Merge branch 'main' into ntran/table_cli

pull/24376/head
NGA-TRAN 2023-07-21 14:49:02 -04:00
commit 144778430e
98 changed files with 3661 additions and 1449 deletions

688
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -121,8 +121,8 @@ license = "MIT OR Apache-2.0"
[workspace.dependencies] [workspace.dependencies]
arrow = { version = "43.0.0" } arrow = { version = "43.0.0" }
arrow-flight = { version = "43.0.0" } arrow-flight = { version = "43.0.0" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "46182c894e5106adba7fb53e9848ce666fb6129b", default-features = false } datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "44008d71180f2d03e9d21944788e61cb8845abc7", default-features = false }
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev = "46182c894e5106adba7fb53e9848ce666fb6129b" } datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev = "44008d71180f2d03e9d21944788e61cb8845abc7" }
hashbrown = { version = "0.14.0" } hashbrown = { version = "0.14.0" }
object_store = { version = "0.6.0" } object_store = { version = "0.6.0" }

View File

@ -315,7 +315,7 @@ struct TestStateTtlAndRefresh {
ttl_provider: Arc<TestTtlProvider>, ttl_provider: Arc<TestTtlProvider>,
refresh_duration_provider: Arc<TestRefreshDurationProvider>, refresh_duration_provider: Arc<TestRefreshDurationProvider>,
time_provider: Arc<MockProvider>, time_provider: Arc<MockProvider>,
loader: Arc<TestLoader<u8, String, ()>>, loader: Arc<TestLoader<u8, (), String>>,
notify_idle: Arc<Notify>, notify_idle: Arc<Notify>,
} }
@ -365,7 +365,7 @@ struct TestStateLRUAndRefresh {
size_estimator: Arc<TestSizeEstimator>, size_estimator: Arc<TestSizeEstimator>,
refresh_duration_provider: Arc<TestRefreshDurationProvider>, refresh_duration_provider: Arc<TestRefreshDurationProvider>,
time_provider: Arc<MockProvider>, time_provider: Arc<MockProvider>,
loader: Arc<TestLoader<u8, String, ()>>, loader: Arc<TestLoader<u8, (), String>>,
pool: Arc<ResourcePool<TestSize>>, pool: Arc<ResourcePool<TestSize>>,
notify_idle: Arc<Notify>, notify_idle: Arc<Notify>,
} }
@ -505,7 +505,7 @@ struct TestStateLruAndRefresh {
size_estimator: Arc<TestSizeEstimator>, size_estimator: Arc<TestSizeEstimator>,
refresh_duration_provider: Arc<TestRefreshDurationProvider>, refresh_duration_provider: Arc<TestRefreshDurationProvider>,
time_provider: Arc<MockProvider>, time_provider: Arc<MockProvider>,
loader: Arc<TestLoader<u8, String, ()>>, loader: Arc<TestLoader<u8, (), String>>,
notify_idle: Arc<Notify>, notify_idle: Arc<Notify>,
} }

View File

@ -963,7 +963,7 @@ mod tests {
metric_registry: metric::Registry, metric_registry: metric::Registry,
refresh_duration_provider: Arc<TestRefreshDurationProvider>, refresh_duration_provider: Arc<TestRefreshDurationProvider>,
time_provider: Arc<MockProvider>, time_provider: Arc<MockProvider>,
loader: Arc<TestLoader<u8, String, ()>>, loader: Arc<TestLoader<u8, (), String>>,
notify_idle: Arc<Notify>, notify_idle: Arc<Notify>,
} }

View File

@ -254,9 +254,10 @@ mod tests {
use crate::{ use crate::{
cache::{ cache::{
driver::CacheDriver, driver::CacheDriver,
test_util::{run_test_generic, AbortAndWaitExt, EnsurePendingExt, TestAdapter}, test_util::{run_test_generic, TestAdapter},
}, },
loader::test_util::TestLoader, loader::test_util::TestLoader,
test_util::{AbortAndWaitExt, EnsurePendingExt},
}; };
use super::*; use super::*;

View File

@ -1,12 +1,11 @@
use std::{sync::Arc, time::Duration}; use std::{sync::Arc, time::Duration};
use async_trait::async_trait; use tokio::sync::Barrier;
use futures::{Future, FutureExt};
use tokio::{sync::Barrier, task::JoinHandle};
use crate::{ use crate::{
cache::{CacheGetStatus, CachePeekStatus}, cache::{CacheGetStatus, CachePeekStatus},
loader::test_util::TestLoader, loader::test_util::TestLoader,
test_util::{AbortAndWaitExt, EnsurePendingExt},
}; };
use super::Cache; use super::Cache;
@ -461,60 +460,3 @@ where
assert_eq!(res, String::from("foo")); assert_eq!(res, String::from("foo"));
assert_eq!(loader.loaded(), vec![(1, true)]); assert_eq!(loader.loaded(), vec![(1, true)]);
} }
#[async_trait]
pub trait EnsurePendingExt {
type Out;
/// Ensure that the future is pending. In the pending case, try to pass the given barrier. Afterwards await the future again.
///
/// This is helpful to ensure a future is in a pending state before continuing with the test setup.
async fn ensure_pending(self, barrier: Arc<Barrier>) -> Self::Out;
}
#[async_trait]
impl<F> EnsurePendingExt for F
where
F: Future + Send + Unpin,
{
type Out = F::Output;
async fn ensure_pending(self, barrier: Arc<Barrier>) -> Self::Out {
let mut fut = self.fuse();
futures::select_biased! {
_ = fut => panic!("fut should be pending"),
_ = barrier.wait().fuse() => (),
}
fut.await
}
}
#[async_trait]
pub trait AbortAndWaitExt {
/// Abort handle and wait for completion.
///
/// Note that this is NOT just a "wait with timeout or panic". This extension is specific to [`JoinHandle`] and will:
///
/// 1. Call [`JoinHandle::abort`].
/// 2. Await the [`JoinHandle`] with a timeout (or panic if the timeout is reached).
/// 3. Check that the handle returned a [`JoinError`] that signals that the tracked task was indeed cancelled and
/// didn't exit otherwise (either by finishing or by panicking).
async fn abort_and_wait(self);
}
#[async_trait]
impl<T> AbortAndWaitExt for JoinHandle<T>
where
T: std::fmt::Debug + Send,
{
async fn abort_and_wait(mut self) {
self.abort();
let join_err = tokio::time::timeout(Duration::from_secs(1), self)
.await
.expect("no timeout")
.expect_err("handle was aborted and therefore MUST fail");
assert!(join_err.is_cancelled());
}
}

View File

@ -24,3 +24,5 @@ pub mod cache;
mod cancellation_safe_future; mod cancellation_safe_future;
pub mod loader; pub mod loader;
pub mod resource_consumption; pub mod resource_consumption;
#[cfg(test)]
mod test_util;

View File

@ -0,0 +1,485 @@
//! Batching of loader request.
use std::{
collections::HashMap,
fmt::Debug,
future::Future,
hash::Hash,
sync::{
atomic::{AtomicU64, Ordering},
Arc,
},
task::Poll,
};
use async_trait::async_trait;
use futures::FutureExt;
use observability_deps::tracing::trace;
use parking_lot::Mutex;
use tokio::sync::oneshot::{channel, Sender};
use crate::cancellation_safe_future::{CancellationSafeFuture, CancellationSafeFutureReceiver};
use super::Loader;
/// Batch [load](Loader::load) requests.
///
/// Requests against this loader will be [pending](std::task::Poll::Pending) until [flush](BatchLoaderFlusher::flush) is
/// called. To simplify the usage -- esp. in combination with [`Cache::get`] -- use [`BatchLoaderFlusherExt`].
///
///
/// [`Cache::get`]: crate::cache::Cache::get
#[derive(Debug)]
pub struct BatchLoader<K, Extra, V, L>
where
K: Debug + Hash + Send + 'static,
Extra: Debug + Send + 'static,
V: Debug + Send + 'static,
L: Loader<K = Vec<K>, Extra = Vec<Extra>, V = Vec<V>>,
{
inner: Arc<BatchLoaderInner<K, Extra, V, L>>,
}
impl<K, Extra, V, L> BatchLoader<K, Extra, V, L>
where
K: Debug + Hash + Send + 'static,
Extra: Debug + Send + 'static,
V: Debug + Send + 'static,
L: Loader<K = Vec<K>, Extra = Vec<Extra>, V = Vec<V>>,
{
/// Create new batch loader based on a non-batched, vector-based one.
pub fn new(inner: L) -> Self {
Self {
inner: Arc::new(BatchLoaderInner {
inner,
pending: Default::default(),
job_id_counter: Default::default(),
job_handles: Default::default(),
}),
}
}
}
/// State of [`BatchLoader`].
///
/// This is an extra struct so it can be wrapped into an [`Arc`] and shared with the futures that are spawned into
/// [`CancellationSafeFuture`]
#[derive(Debug)]
struct BatchLoaderInner<K, Extra, V, L>
where
K: Debug + Hash + Send + 'static,
Extra: Debug + Send + 'static,
V: Debug + Send + 'static,
L: Loader<K = Vec<K>, Extra = Vec<Extra>, V = Vec<V>>,
{
inner: L,
pending: Mutex<Vec<(K, Extra, Sender<V>)>>,
job_id_counter: AtomicU64,
job_handles: Mutex<HashMap<u64, CancellationSafeFutureReceiver<()>>>,
}
/// Flush interface for [`BatchLoader`].
///
/// This is a trait so you can [type-erase](https://en.wikipedia.org/wiki/Type_erasure) it by putting it into an
/// [`Arc`],
///
/// This trait is object-safe.
#[async_trait]
pub trait BatchLoaderFlusher: Debug + Send + Sync + 'static {
/// Flush all batched requests.
async fn flush(&self);
}
#[async_trait]
impl BatchLoaderFlusher for Arc<dyn BatchLoaderFlusher> {
async fn flush(&self) {
self.as_ref().flush().await;
}
}
#[async_trait]
impl<K, Extra, V, L> BatchLoaderFlusher for BatchLoader<K, Extra, V, L>
where
K: Debug + Hash + Send + 'static,
Extra: Debug + Send + 'static,
V: Debug + Send + 'static,
L: Loader<K = Vec<K>, Extra = Vec<Extra>, V = Vec<V>>,
{
async fn flush(&self) {
trace!("flushing batch loader");
let pending: Vec<_> = {
let mut pending = self.inner.pending.lock();
std::mem::take(pending.as_mut())
};
if pending.is_empty() {
return;
}
let job_id = self.inner.job_id_counter.fetch_add(1, Ordering::SeqCst);
let handle_recv = CancellationSafeFutureReceiver::default();
{
let mut job_handles = self.inner.job_handles.lock();
job_handles.insert(job_id, handle_recv.clone());
}
let inner = Arc::clone(&self.inner);
let fut = CancellationSafeFuture::new(
async move {
let mut keys = Vec::with_capacity(pending.len());
let mut extras = Vec::with_capacity(pending.len());
let mut senders = Vec::with_capacity(pending.len());
for (k, extra, sender) in pending {
keys.push(k);
extras.push(extra);
senders.push(sender);
}
let values = inner.inner.load(keys, extras).await;
assert_eq!(values.len(), senders.len());
for (value, sender) in values.into_iter().zip(senders) {
sender.send(value).unwrap();
}
let mut job_handles = inner.job_handles.lock();
job_handles.remove(&job_id);
},
handle_recv,
);
fut.await;
}
}
#[async_trait]
impl<K, Extra, V, L> Loader for BatchLoader<K, Extra, V, L>
where
K: Debug + Hash + Send + 'static,
Extra: Debug + Send + 'static,
V: Debug + Send + 'static,
L: Loader<K = Vec<K>, Extra = Vec<Extra>, V = Vec<V>>,
{
type K = K;
type Extra = Extra;
type V = V;
async fn load(&self, k: Self::K, extra: Self::Extra) -> Self::V {
let (tx, rx) = channel();
{
let mut pending = self.inner.pending.lock();
pending.push((k, extra, tx));
}
rx.await.unwrap()
}
}
/// Extension trait for [`BatchLoaderFlusher`] because the methods on this extension trait are not object safe.
#[async_trait]
pub trait BatchLoaderFlusherExt {
/// Try to poll all given futures and automatically [flush](BatchLoaderFlusher) if any of them end up in a pending state.
///
/// This guarantees that the order of the results is identical to the order of the futures.
async fn auto_flush<F>(&self, futures: Vec<F>) -> Vec<F::Output>
where
F: Future + Send,
F::Output: Send;
}
#[async_trait]
impl<B> BatchLoaderFlusherExt for B
where
B: BatchLoaderFlusher,
{
async fn auto_flush<F>(&self, futures: Vec<F>) -> Vec<F::Output>
where
F: Future + Send,
F::Output: Send,
{
let mut futures = futures
.into_iter()
.map(|f| f.boxed())
.enumerate()
.collect::<Vec<_>>();
let mut output: Vec<Option<F::Output>> = (0..futures.len()).map(|_| None).collect();
while !futures.is_empty() {
let mut pending = Vec::with_capacity(futures.len());
for (idx, mut f) in futures.into_iter() {
match futures::poll!(&mut f) {
Poll::Ready(res) => {
output[idx] = Some(res);
}
Poll::Pending => {
pending.push((idx, f));
}
}
}
if !pending.is_empty() {
self.flush().await;
}
futures = pending;
}
output
.into_iter()
.map(|o| o.expect("all futures finished"))
.collect()
}
}
#[cfg(test)]
mod tests {
use tokio::sync::Barrier;
use crate::{
cache::{driver::CacheDriver, Cache},
loader::test_util::TestLoader,
test_util::EnsurePendingExt,
};
use super::*;
type TestLoaderT = Arc<TestLoader<Vec<u8>, Vec<bool>, Vec<String>>>;
#[tokio::test]
async fn test_flush_empty() {
let (inner, batch) = setup();
batch.flush().await;
assert_eq!(inner.loaded(), vec![],);
}
#[tokio::test]
async fn test_flush_manual() {
let (inner, batch) = setup();
let pending_barrier_1 = Arc::new(Barrier::new(2));
let pending_barrier_1_captured = Arc::clone(&pending_barrier_1);
let batch_captured = Arc::clone(&batch);
let handle_1 = tokio::spawn(async move {
batch_captured
.load(1, true)
.ensure_pending(pending_barrier_1_captured)
.await
});
pending_barrier_1.wait().await;
let pending_barrier_2 = Arc::new(Barrier::new(2));
let pending_barrier_2_captured = Arc::clone(&pending_barrier_2);
let batch_captured = Arc::clone(&batch);
let handle_2 = tokio::spawn(async move {
batch_captured
.load(2, false)
.ensure_pending(pending_barrier_2_captured)
.await
});
pending_barrier_2.wait().await;
inner.mock_next(vec![1, 2], vec![String::from("foo"), String::from("bar")]);
batch.flush().await;
assert_eq!(inner.loaded(), vec![(vec![1, 2], vec![true, false])],);
assert_eq!(handle_1.await.unwrap(), String::from("foo"));
assert_eq!(handle_2.await.unwrap(), String::from("bar"));
}
/// Simulate the following scenario:
///
/// 1. load `1`, flush it, inner load starts processing `[1]`
/// 2. load `2`, flush it, inner load starts processing `[2]`
/// 3. inner loader returns result for `[2]`, batch loader returns that result as well
/// 4. inner loader returns result for `[1]`, batch loader returns that result as well
#[tokio::test]
async fn test_concurrent_load() {
let (inner, batch) = setup();
let load_barrier_1 = inner.block_next(vec![1], vec![String::from("foo")]);
inner.mock_next(vec![2], vec![String::from("bar")]);
// set up first load
let pending_barrier_1 = Arc::new(Barrier::new(2));
let pending_barrier_1_captured = Arc::clone(&pending_barrier_1);
let batch_captured = Arc::clone(&batch);
let handle_1 = tokio::spawn(async move {
batch_captured
.load(1, true)
.ensure_pending(pending_barrier_1_captured)
.await
});
pending_barrier_1.wait().await;
// flush first load, this is blocked by the load barrier
let pending_barrier_2 = Arc::new(Barrier::new(2));
let pending_barrier_2_captured = Arc::clone(&pending_barrier_2);
let batch_captured = Arc::clone(&batch);
let handle_2 = tokio::spawn(async move {
batch_captured
.flush()
.ensure_pending(pending_barrier_2_captured)
.await;
});
pending_barrier_2.wait().await;
// set up second load
let pending_barrier_3 = Arc::new(Barrier::new(2));
let pending_barrier_3_captured = Arc::clone(&pending_barrier_3);
let batch_captured = Arc::clone(&batch);
let handle_3 = tokio::spawn(async move {
batch_captured
.load(2, false)
.ensure_pending(pending_barrier_3_captured)
.await
});
pending_barrier_3.wait().await;
// flush 2nd load and get result
batch.flush().await;
assert_eq!(handle_3.await.unwrap(), String::from("bar"));
// flush 1st load and get result
load_barrier_1.wait().await;
handle_2.await.unwrap();
assert_eq!(handle_1.await.unwrap(), String::from("foo"));
assert_eq!(
inner.loaded(),
vec![(vec![1], vec![true]), (vec![2], vec![false])],
);
}
#[tokio::test]
async fn test_cancel_flush() {
let (inner, batch) = setup();
let load_barrier_1 = inner.block_next(vec![1], vec![String::from("foo")]);
// set up load
let pending_barrier_1 = Arc::new(Barrier::new(2));
let pending_barrier_1_captured = Arc::clone(&pending_barrier_1);
let batch_captured = Arc::clone(&batch);
let handle_1 = tokio::spawn(async move {
batch_captured
.load(1, true)
.ensure_pending(pending_barrier_1_captured)
.await
});
pending_barrier_1.wait().await;
// flush load, this is blocked by the load barrier
let pending_barrier_2 = Arc::new(Barrier::new(2));
let pending_barrier_2_captured = Arc::clone(&pending_barrier_2);
let batch_captured = Arc::clone(&batch);
let handle_2 = tokio::spawn(async move {
batch_captured
.flush()
.ensure_pending(pending_barrier_2_captured)
.await;
});
pending_barrier_2.wait().await;
// abort flush
handle_2.abort();
// flush load and get result
load_barrier_1.wait().await;
assert_eq!(handle_1.await.unwrap(), String::from("foo"));
assert_eq!(inner.loaded(), vec![(vec![1], vec![true])],);
}
#[tokio::test]
async fn test_cancel_load_and_flush() {
let (inner, batch) = setup();
let load_barrier_1 = inner.block_next(vec![1], vec![String::from("foo")]);
// set up load
let pending_barrier_1 = Arc::new(Barrier::new(2));
let pending_barrier_1_captured = Arc::clone(&pending_barrier_1);
let batch_captured = Arc::clone(&batch);
let handle_1 = tokio::spawn(async move {
batch_captured
.load(1, true)
.ensure_pending(pending_barrier_1_captured)
.await
});
pending_barrier_1.wait().await;
// flush load, this is blocked by the load barrier
let pending_barrier_2 = Arc::new(Barrier::new(2));
let pending_barrier_2_captured = Arc::clone(&pending_barrier_2);
let batch_captured = Arc::clone(&batch);
let handle_2 = tokio::spawn(async move {
batch_captured
.flush()
.ensure_pending(pending_barrier_2_captured)
.await;
});
pending_barrier_2.wait().await;
// abort load and flush
handle_1.abort();
handle_2.abort();
// unblock
load_barrier_1.wait().await;
// load was still driven to completion
assert_eq!(inner.loaded(), vec![(vec![1], vec![true])],);
}
#[tokio::test]
async fn test_auto_flush_with_loader() {
let (inner, batch) = setup();
inner.mock_next(vec![1, 2], vec![String::from("foo"), String::from("bar")]);
assert_eq!(
batch
.auto_flush(vec![batch.load(1, true), batch.load(2, false)])
.await,
vec![String::from("foo"), String::from("bar")],
);
assert_eq!(inner.loaded(), vec![(vec![1, 2], vec![true, false])],);
}
#[tokio::test]
async fn test_auto_flush_integration_with_cache_driver() {
let (inner, batch) = setup();
let cache = CacheDriver::new(Arc::clone(&batch), HashMap::new());
inner.mock_next(vec![1, 2], vec![String::from("foo"), String::from("bar")]);
inner.mock_next(vec![3], vec![String::from("baz")]);
assert_eq!(
batch
.auto_flush(vec![cache.get(1, true), cache.get(2, false)])
.await,
vec![String::from("foo"), String::from("bar")],
);
assert_eq!(
batch
.auto_flush(vec![cache.get(2, true), cache.get(3, true)])
.await,
vec![String::from("bar"), String::from("baz")],
);
assert_eq!(
inner.loaded(),
vec![(vec![1, 2], vec![true, false]), (vec![3], vec![true])],
);
}
fn setup() -> (TestLoaderT, Arc<BatchLoader<u8, bool, String, TestLoaderT>>) {
let inner = TestLoaderT::default();
let batch = Arc::new(BatchLoader::new(Arc::clone(&inner)));
(inner, batch)
}
}

View File

@ -2,6 +2,7 @@
use async_trait::async_trait; use async_trait::async_trait;
use std::{fmt::Debug, future::Future, hash::Hash, marker::PhantomData, sync::Arc}; use std::{fmt::Debug, future::Future, hash::Hash, marker::PhantomData, sync::Arc};
pub mod batch;
pub mod metrics; pub mod metrics;
#[cfg(test)] #[cfg(test)]

View File

@ -14,7 +14,7 @@ enum TestLoaderResponse<V> {
/// An easy-to-mock [`Loader`]. /// An easy-to-mock [`Loader`].
#[derive(Debug, Default)] #[derive(Debug, Default)]
pub struct TestLoader<K = u8, V = String, Extra = bool> pub struct TestLoader<K = u8, Extra = bool, V = String>
where where
K: Clone + Debug + Eq + Hash + Send + 'static, K: Clone + Debug + Eq + Hash + Send + 'static,
Extra: Clone + Debug + Send + 'static, Extra: Clone + Debug + Send + 'static,
@ -25,7 +25,7 @@ where
loaded: Mutex<Vec<(K, Extra)>>, loaded: Mutex<Vec<(K, Extra)>>,
} }
impl<K, V, Extra> TestLoader<K, V, Extra> impl<K, V, Extra> TestLoader<K, Extra, V>
where where
K: Clone + Debug + Eq + Hash + Send + 'static, K: Clone + Debug + Eq + Hash + Send + 'static,
Extra: Clone + Debug + Send + 'static, Extra: Clone + Debug + Send + 'static,
@ -93,7 +93,7 @@ where
} }
} }
impl<K, V, Extra> Drop for TestLoader<K, V, Extra> impl<K, Extra, V> Drop for TestLoader<K, Extra, V>
where where
K: Clone + Debug + Eq + Hash + Send + 'static, K: Clone + Debug + Eq + Hash + Send + 'static,
Extra: Clone + Debug + Send + 'static, Extra: Clone + Debug + Send + 'static,
@ -110,15 +110,15 @@ where
} }
#[async_trait] #[async_trait]
impl<K, V, Extra> Loader for TestLoader<K, V, Extra> impl<K, V, Extra> Loader for TestLoader<K, Extra, V>
where where
K: Clone + Debug + Eq + Hash + Send + 'static, K: Clone + Debug + Eq + Hash + Send + 'static,
Extra: Clone + Debug + Send + 'static, Extra: Clone + Debug + Send + 'static,
V: Clone + Debug + Send + 'static, V: Clone + Debug + Send + 'static,
{ {
type K = K; type K = K;
type V = V;
type Extra = Extra; type Extra = Extra;
type V = V;
async fn load(&self, k: Self::K, extra: Self::Extra) -> Self::V { async fn load(&self, k: Self::K, extra: Self::Extra) -> Self::V {
self.loaded.lock().push((k.clone(), extra)); self.loaded.lock().push((k.clone(), extra));
@ -163,7 +163,7 @@ mod tests {
#[tokio::test] #[tokio::test]
#[should_panic(expected = "entry not mocked")] #[should_panic(expected = "entry not mocked")]
async fn test_loader_panic_entry_unknown() { async fn test_loader_panic_entry_unknown() {
let loader = TestLoader::<u8, String, ()>::default(); let loader = TestLoader::<u8, (), String>::default();
loader.load(1, ()).await; loader.load(1, ()).await;
} }
@ -179,14 +179,14 @@ mod tests {
#[test] #[test]
#[should_panic(expected = "mocked response left")] #[should_panic(expected = "mocked response left")]
fn test_loader_panic_requests_left() { fn test_loader_panic_requests_left() {
let loader = TestLoader::<u8, String, ()>::default(); let loader = TestLoader::<u8, (), String>::default();
loader.mock_next(1, String::from("foo")); loader.mock_next(1, String::from("foo"));
} }
#[test] #[test]
#[should_panic(expected = "panic-by-choice")] #[should_panic(expected = "panic-by-choice")]
fn test_loader_no_double_panic() { fn test_loader_no_double_panic() {
let loader = TestLoader::<u8, String, ()>::default(); let loader = TestLoader::<u8, (), String>::default();
loader.mock_next(1, String::from("foo")); loader.mock_next(1, String::from("foo"));
panic!("panic-by-choice"); panic!("panic-by-choice");
} }

View File

@ -0,0 +1,62 @@
use std::{future::Future, sync::Arc, time::Duration};
use async_trait::async_trait;
use futures::FutureExt;
use tokio::{sync::Barrier, task::JoinHandle};
#[async_trait]
pub trait EnsurePendingExt {
type Out;
/// Ensure that the future is pending. In the pending case, try to pass the given barrier. Afterwards await the future again.
///
/// This is helpful to ensure a future is in a pending state before continuing with the test setup.
async fn ensure_pending(self, barrier: Arc<Barrier>) -> Self::Out;
}
#[async_trait]
impl<F> EnsurePendingExt for F
where
F: Future + Send + Unpin,
{
type Out = F::Output;
async fn ensure_pending(self, barrier: Arc<Barrier>) -> Self::Out {
let mut fut = self.fuse();
futures::select_biased! {
_ = fut => panic!("fut should be pending"),
_ = barrier.wait().fuse() => (),
}
fut.await
}
}
#[async_trait]
pub trait AbortAndWaitExt {
/// Abort handle and wait for completion.
///
/// Note that this is NOT just a "wait with timeout or panic". This extension is specific to [`JoinHandle`] and will:
///
/// 1. Call [`JoinHandle::abort`].
/// 2. Await the [`JoinHandle`] with a timeout (or panic if the timeout is reached).
/// 3. Check that the handle returned a [`JoinError`] that signals that the tracked task was indeed cancelled and
/// didn't exit otherwise (either by finishing or by panicking).
async fn abort_and_wait(self);
}
#[async_trait]
impl<T> AbortAndWaitExt for JoinHandle<T>
where
T: std::fmt::Debug + Send,
{
async fn abort_and_wait(mut self) {
self.abort();
let join_err = tokio::time::timeout(Duration::from_secs(1), self)
.await
.expect("no timeout")
.expect_err("handle was aborted and therefore MUST fail");
assert!(join_err.is_cancelled());
}
}

View File

@ -21,7 +21,7 @@ uuid = { version = "1", features = ["v4"] }
workspace-hack = { version = "0.1", path = "../workspace-hack" } workspace-hack = { version = "0.1", path = "../workspace-hack" }
[dev-dependencies] [dev-dependencies]
tempfile = "3.6.0" tempfile = "3.7.0"
test_helpers = { path = "../test_helpers" } test_helpers = { path = "../test_helpers" }
[features] [features]

View File

@ -9,6 +9,7 @@ license.workspace = true
async-trait = "0.1.71" async-trait = "0.1.71"
backoff = { path = "../backoff" } backoff = { path = "../backoff" }
bytes = "1.4" bytes = "1.4"
chrono = { version = "0.4", default-features = false }
compactor_scheduler = { path = "../compactor_scheduler" } compactor_scheduler = { path = "../compactor_scheduler" }
datafusion = { workspace = true } datafusion = { workspace = true }
data_types = { path = "../data_types" } data_types = { path = "../data_types" }

View File

@ -69,7 +69,7 @@ mod tests {
let err = stream.try_collect::<Vec<_>>().await.unwrap_err(); let err = stream.try_collect::<Vec<_>>().await.unwrap_err();
assert_eq!( assert_eq!(
err.to_string(), err.to_string(),
"Join Error (panic)\ncaused by\nExternal error: foo" "Join Error (panic)\ncaused by\nExternal error: Panic: foo"
); );
} }
} }

View File

@ -2,6 +2,7 @@ use std::{fmt::Display, sync::Arc};
use data_types::{CompactionLevel, ParquetFile}; use data_types::{CompactionLevel, ParquetFile};
use observability_deps::tracing::info; use observability_deps::tracing::info;
use parquet_file::ParquetFilePath;
use uuid::Uuid; use uuid::Uuid;
use crate::{ use crate::{
@ -48,14 +49,21 @@ where
target_level: CompactionLevel, target_level: CompactionLevel,
split_or_compact: FilesToSplitOrCompact, split_or_compact: FilesToSplitOrCompact,
object_store_ids: Vec<Uuid>, object_store_ids: Vec<Uuid>,
object_store_paths: Vec<ParquetFilePath>,
) -> Vec<PlanIR> { ) -> Vec<PlanIR> {
self.inner self.inner.create_plans(
.create_plans(partition, target_level, split_or_compact, object_store_ids) partition,
target_level,
split_or_compact,
object_store_ids,
object_store_paths,
)
} }
fn compact_plan( fn compact_plan(
&self, &self,
files: Vec<ParquetFile>, files: Vec<ParquetFile>,
object_store_paths: Vec<ParquetFilePath>,
object_store_ids: Vec<Uuid>, object_store_ids: Vec<Uuid>,
reason: CompactReason, reason: CompactReason,
partition: Arc<PartitionInfo>, partition: Arc<PartitionInfo>,
@ -65,9 +73,14 @@ where
let n_input_files = files.len(); let n_input_files = files.len();
let column_count = partition.column_count(); let column_count = partition.column_count();
let input_file_size_bytes = files.iter().map(|f| f.file_size_bytes).sum::<i64>(); let input_file_size_bytes = files.iter().map(|f| f.file_size_bytes).sum::<i64>();
let plan = let plan = self.inner.compact_plan(
self.inner files,
.compact_plan(files, object_store_ids, reason, partition, compaction_level); object_store_paths,
object_store_ids,
reason,
partition,
compaction_level,
);
info!( info!(
partition_id = partition_id.get(), partition_id = partition_id.get(),
@ -87,6 +100,7 @@ where
fn split_plan( fn split_plan(
&self, &self,
file_to_split: FileToSplit, file_to_split: FileToSplit,
object_store_path: ParquetFilePath,
object_store_id: Uuid, object_store_id: Uuid,
reason: SplitReason, reason: SplitReason,
partition: Arc<PartitionInfo>, partition: Arc<PartitionInfo>,
@ -98,6 +112,7 @@ where
let input_file_size_bytes = file_to_split.file.file_size_bytes; let input_file_size_bytes = file_to_split.file.file_size_bytes;
let plan = self.inner.split_plan( let plan = self.inner.split_plan(
file_to_split, file_to_split,
object_store_path,
object_store_id, object_store_id,
reason, reason,
partition, partition,

View File

@ -4,6 +4,7 @@ use std::{
}; };
use data_types::{CompactionLevel, ParquetFile}; use data_types::{CompactionLevel, ParquetFile};
use parquet_file::ParquetFilePath;
use uuid::Uuid; use uuid::Uuid;
pub mod logging; pub mod logging;
@ -24,12 +25,14 @@ pub trait IRPlanner: Debug + Display + Send + Sync {
target_level: CompactionLevel, target_level: CompactionLevel,
split_or_compact: FilesToSplitOrCompact, split_or_compact: FilesToSplitOrCompact,
object_store_ids: Vec<Uuid>, object_store_ids: Vec<Uuid>,
object_store_paths: Vec<ParquetFilePath>,
) -> Vec<PlanIR>; ) -> Vec<PlanIR>;
/// Build a plan to compact give files /// Build a plan to compact give files
fn compact_plan( fn compact_plan(
&self, &self,
files: Vec<ParquetFile>, files: Vec<ParquetFile>,
paths: Vec<ParquetFilePath>,
object_store_ids: Vec<Uuid>, object_store_ids: Vec<Uuid>,
reason: CompactReason, reason: CompactReason,
partition: Arc<PartitionInfo>, partition: Arc<PartitionInfo>,
@ -40,6 +43,7 @@ pub trait IRPlanner: Debug + Display + Send + Sync {
fn split_plan( fn split_plan(
&self, &self,
file_to_split: FileToSplit, file_to_split: FileToSplit,
path: ParquetFilePath,
object_store_id: Uuid, object_store_id: Uuid,
reason: SplitReason, reason: SplitReason,
partition: Arc<PartitionInfo>, partition: Arc<PartitionInfo>,

View File

@ -1,6 +1,7 @@
use std::{fmt::Display, sync::Arc}; use std::{fmt::Display, sync::Arc};
use data_types::{ChunkOrder, CompactionLevel, ParquetFile, Timestamp, TimestampMinMax}; use data_types::{ChunkOrder, CompactionLevel, ParquetFile, Timestamp, TimestampMinMax};
use parquet_file::ParquetFilePath;
use uuid::Uuid; use uuid::Uuid;
use crate::{ use crate::{
@ -125,21 +126,31 @@ impl IRPlanner for V1IRPlanner {
target_level: CompactionLevel, target_level: CompactionLevel,
split_or_compact: FilesToSplitOrCompact, split_or_compact: FilesToSplitOrCompact,
object_store_ids: Vec<Uuid>, object_store_ids: Vec<Uuid>,
object_store_paths: Vec<ParquetFilePath>,
) -> Vec<PlanIR> { ) -> Vec<PlanIR> {
match split_or_compact { match split_or_compact {
FilesToSplitOrCompact::Compact(files, reason) => { FilesToSplitOrCompact::Compact(files, reason) => {
vec![self.compact_plan(files, object_store_ids, reason, partition, target_level)] vec![self.compact_plan(
files,
object_store_paths,
object_store_ids,
reason,
partition,
target_level,
)]
} }
FilesToSplitOrCompact::Split(files, reason) => { FilesToSplitOrCompact::Split(files, reason) => {
files files
.into_iter() .into_iter()
.zip(object_store_ids) .zip(object_store_ids)
.map(|(file_to_split, object_store_id)| { .zip(object_store_paths)
.map(|((file_to_split, object_store_id), object_store_path)| {
// target level of a split file is the same as its level // target level of a split file is the same as its level
let target_level = file_to_split.file.compaction_level; let target_level = file_to_split.file.compaction_level;
self.split_plan( self.split_plan(
file_to_split, file_to_split,
object_store_path,
object_store_id, object_store_id,
reason, reason,
Arc::clone(&partition), Arc::clone(&partition),
@ -157,6 +168,7 @@ impl IRPlanner for V1IRPlanner {
fn compact_plan( fn compact_plan(
&self, &self,
files: Vec<ParquetFile>, files: Vec<ParquetFile>,
paths: Vec<ParquetFilePath>,
object_store_ids: Vec<Uuid>, object_store_ids: Vec<Uuid>,
reason: CompactReason, reason: CompactReason,
_partition: Arc<PartitionInfo>, _partition: Arc<PartitionInfo>,
@ -188,13 +200,15 @@ impl IRPlanner for V1IRPlanner {
let files = files let files = files
.into_iter() .into_iter()
.zip(object_store_ids) .zip(object_store_ids)
.map(|(file, object_store_id)| { .zip(paths)
.map(|((file, object_store_id), path)| {
let order = order(file.compaction_level, target_level, file.max_l0_created_at); let order = order(file.compaction_level, target_level, file.max_l0_created_at);
FileIR { FileIR {
file: ParquetFile { file: ParquetFile {
object_store_id, object_store_id,
..file ..file
}, },
path,
order, order,
} }
}) })
@ -248,6 +262,7 @@ impl IRPlanner for V1IRPlanner {
fn split_plan( fn split_plan(
&self, &self,
file_to_split: FileToSplit, file_to_split: FileToSplit,
path: ParquetFilePath,
object_store_id: Uuid, object_store_id: Uuid,
reason: SplitReason, reason: SplitReason,
_partition: Arc<PartitionInfo>, _partition: Arc<PartitionInfo>,
@ -261,6 +276,7 @@ impl IRPlanner for V1IRPlanner {
object_store_id, object_store_id,
..file ..file
}, },
path,
order, order,
}; };

View File

@ -102,6 +102,6 @@ mod tests {
.store(stream, partition, level, max_l0_created_at) .store(stream, partition, level, max_l0_created_at)
.await .await
.unwrap_err(); .unwrap_err();
assert_eq!(err.to_string(), "External error: foo",); assert_eq!(err.to_string(), "External error: Panic: foo",);
} }
} }

View File

@ -49,6 +49,7 @@ pub trait ScratchpadGen: Debug + Display + Send + Sync {
/// SMALLER than the uncompressed Arrow data during compaction itself. /// SMALLER than the uncompressed Arrow data during compaction itself.
#[async_trait] #[async_trait]
pub trait Scratchpad: Debug + Send + Sync + 'static { pub trait Scratchpad: Debug + Send + Sync + 'static {
fn uuids(&self, files: &[ParquetFilePath]) -> Vec<Uuid>;
async fn load_to_scratchpad(&self, files: &[ParquetFilePath]) -> Vec<Uuid>; async fn load_to_scratchpad(&self, files: &[ParquetFilePath]) -> Vec<Uuid>;
async fn make_public(&self, files: &[ParquetFilePath]) -> Vec<Uuid>; async fn make_public(&self, files: &[ParquetFilePath]) -> Vec<Uuid>;
async fn clean_from_scratchpad(&self, files: &[ParquetFilePath]); async fn clean_from_scratchpad(&self, files: &[ParquetFilePath]);

View File

@ -33,6 +33,10 @@ struct NoopScratchpad;
#[async_trait] #[async_trait]
impl Scratchpad for NoopScratchpad { impl Scratchpad for NoopScratchpad {
fn uuids(&self, files: &[ParquetFilePath]) -> Vec<Uuid> {
files.iter().map(|f| f.objest_store_id()).collect()
}
async fn load_to_scratchpad(&self, files: &[ParquetFilePath]) -> Vec<Uuid> { async fn load_to_scratchpad(&self, files: &[ParquetFilePath]) -> Vec<Uuid> {
files.iter().map(|f| f.objest_store_id()).collect() files.iter().map(|f| f.objest_store_id()).collect()
} }

View File

@ -178,6 +178,11 @@ impl Drop for ProdScratchpad {
#[async_trait] #[async_trait]
impl Scratchpad for ProdScratchpad { impl Scratchpad for ProdScratchpad {
fn uuids(&self, files: &[ParquetFilePath]) -> Vec<Uuid> {
let (_, uuids) = self.apply_mask(files);
uuids
}
async fn load_to_scratchpad(&self, files: &[ParquetFilePath]) -> Vec<Uuid> { async fn load_to_scratchpad(&self, files: &[ParquetFilePath]) -> Vec<Uuid> {
let (files_to, uuids) = self.apply_mask(files); let (files_to, uuids) = self.apply_mask(files);
let (files_from, files_to) = self.check_known(files, &files_to, false); let (files_from, files_to) = self.check_known(files, &files_to, false);
@ -323,8 +328,11 @@ mod tests {
assert_content(&store_scratchpad, []).await; assert_content(&store_scratchpad, []).await;
assert_content(&store_output, []).await; assert_content(&store_output, []).await;
let early_get_uuids = pad.uuids(&[f1.clone(), f2.clone()]);
let uuids = pad.load_to_scratchpad(&[f1.clone(), f2.clone()]).await; let uuids = pad.load_to_scratchpad(&[f1.clone(), f2.clone()]).await;
assert_eq!(uuids.len(), 2); assert_eq!(uuids.len(), 2);
assert_eq!(early_get_uuids, uuids);
let f1_masked = f1.clone().with_object_store_id(uuids[0]); let f1_masked = f1.clone().with_object_store_id(uuids[0]);
let f2_masked = f2.clone().with_object_store_id(uuids[1]); let f2_masked = f2.clone().with_object_store_id(uuids[1]);

View File

@ -1,7 +1,9 @@
use std::{num::NonZeroUsize, sync::Arc, time::Duration}; use std::{num::NonZeroUsize, sync::Arc, time::Duration};
use chrono::Utc;
use data_types::{CompactionLevel, ParquetFile, ParquetFileParams, PartitionId}; use data_types::{CompactionLevel, ParquetFile, ParquetFileParams, PartitionId};
use futures::{stream, StreamExt, TryStreamExt}; use futures::{stream, StreamExt, TryStreamExt};
use iox_query::exec::query_tracing::send_metrics_to_tracing;
use observability_deps::tracing::info; use observability_deps::tracing::info;
use parquet_file::ParquetFilePath; use parquet_file::ParquetFilePath;
use tokio::sync::watch::Sender; use tokio::sync::watch::Sender;
@ -17,7 +19,7 @@ use crate::{
Components, Components,
}, },
error::{DynError, ErrorKind, SimpleError}, error::{DynError, ErrorKind, SimpleError},
file_classification::{FileClassification, FilesForProgress, FilesToSplitOrCompact}, file_classification::{FileClassification, FilesForProgress},
partition_info::PartitionInfo, partition_info::PartitionInfo,
PlanIR, RoundInfo, PlanIR, RoundInfo,
}; };
@ -301,8 +303,6 @@ async fn execute_branch(
// throw away the compaction work we've done. // throw away the compaction work we've done.
let saved_parquet_file_state = SavedParquetFileState::from(&branch); let saved_parquet_file_state = SavedParquetFileState::from(&branch);
let input_paths: Vec<ParquetFilePath> = branch.iter().map(ParquetFilePath::from).collect();
// Identify the target level and files that should be // Identify the target level and files that should be
// compacted together, upgraded, and kept for next round of // compacted together, upgraded, and kept for next round of
// compaction // compaction
@ -329,105 +329,128 @@ async fn execute_branch(
} }
let FilesForProgress { let FilesForProgress {
upgrade, mut upgrade,
split_or_compact, split_or_compact,
} = files_to_make_progress_on; } = files_to_make_progress_on;
// Compact & Split let paths = split_or_compact.file_input_paths();
let created_file_params = run_plans( let object_store_ids = scratchpad_ctx.uuids(&paths);
span.child("run_plans"), let plans = components.ir_planner.create_plans(
split_or_compact.clone(), Arc::clone(&partition_info),
&partition_info,
&components,
target_level, target_level,
Arc::clone(&df_semaphore), split_or_compact.clone(),
Arc::<dyn Scratchpad>::clone(&scratchpad_ctx), object_store_ids,
) paths,
.await?; );
// inputs can be removed from the scratchpad as soon as we're done with compaction. let mut files_next: Vec<ParquetFile> = Vec::new();
scratchpad_ctx.clean_from_scratchpad(&input_paths).await;
// upload files to real object store // The number of plans is often small (1), but can be thousands, especially in vertical splitting
let upload_span = span.child("upload_objects"); // scenarios when the partition is highly backlogged. So we chunk the plans into groups to control
let created_file_params = upload_files_to_object_store( // memory usage (all files for all plans in a chunk are loaded to the scratchpad at once), and to
created_file_params, // allow incremental catalog & progress updates. But the chunk size should still be large enough
Arc::<dyn Scratchpad>::clone(&scratchpad_ctx), // to facilitate concurrency in plan execution, which can be accomplished with a small multiple on
) // the concurrency limit.
.await; let mut chunks = plans.into_iter().peekable();
drop(upload_span); while chunks.peek().is_some() {
// 4x run_plans' concurrency limit will allow adequate concurrency.
let chunk: Vec<PlanIR> = chunks
.by_ref()
.take(df_semaphore.total_permits() * 4)
.collect();
for file_param in &created_file_params { let files_to_delete = chunk
info!( .iter()
partition_id = partition_info.partition_id.get(), .flat_map(|plan| plan.input_parquet_files())
uuid = file_param.object_store_id.to_string(), .collect();
bytes = file_param.file_size_bytes,
"uploaded file to objectstore",
);
}
let created_file_paths: Vec<ParquetFilePath> = created_file_params // Compact & Split
.iter() let created_file_params = run_plans(
.map(ParquetFilePath::from) span.child("run_plans"),
.collect(); chunk,
&partition_info,
&components,
Arc::clone(&df_semaphore),
Arc::<dyn Scratchpad>::clone(&scratchpad_ctx),
)
.await?;
// conditionally (if not shaddow mode) remove the newly created files from the scratchpad. // upload files to real object store
scratchpad_ctx let upload_span = span.child("upload_objects");
.clean_written_from_scratchpad(&created_file_paths) let created_file_params = upload_files_to_object_store(
created_file_params,
Arc::<dyn Scratchpad>::clone(&scratchpad_ctx),
)
.await;
drop(upload_span);
for file_param in &created_file_params {
info!(
partition_id = partition_info.partition_id.get(),
uuid = file_param.object_store_id.to_string(),
bytes = file_param.file_size_bytes,
"uploaded file to objectstore",
);
}
let created_file_paths: Vec<ParquetFilePath> = created_file_params
.iter()
.map(ParquetFilePath::from)
.collect();
// conditionally (if not shaddow mode) remove the newly created files from the scratchpad.
scratchpad_ctx
.clean_written_from_scratchpad(&created_file_paths)
.await;
// Update the catalog to reflect the newly created files, soft delete the compacted
// files and update the upgraded files
let (created_files, upgraded_files) = update_catalog(
Arc::clone(&components),
partition_id,
&saved_parquet_file_state,
files_to_delete,
upgrade,
created_file_params,
target_level,
)
.await; .await;
// Update the catalog to reflect the newly created files, soft delete the compacted // we only need to upgrade files on the first iteration, so empty the upgrade list for next loop.
// files and update the upgraded files upgrade = Vec::new();
let files_to_delete = split_or_compact.into_files();
let (created_files, upgraded_files) = update_catalog(
Arc::clone(&components),
partition_id,
saved_parquet_file_state,
files_to_delete,
upgrade,
created_file_params,
target_level,
)
.await;
// Report to `timeout_with_progress_checking` that some progress has been made; stop // Report to `timeout_with_progress_checking` that some progress has been made; stop
// if sending this signal fails because something has gone terribly wrong for the other // if sending this signal fails because something has gone terribly wrong for the other
// end of the channel to not be listening anymore. // end of the channel to not be listening anymore.
if let Err(e) = transmit_progress_signal.send(true) { if let Err(e) = transmit_progress_signal.send(true) {
return Err(Box::new(e)); return Err(Box::new(e));
}
// track this chunk files to return later
files_next.extend(created_files);
files_next.extend(upgraded_files);
} }
// Extend created files, upgraded files and files_to_keep to files_next
let mut files_next = created_files;
files_next.extend(upgraded_files);
files_next.extend(files_to_keep); files_next.extend(files_to_keep);
Ok(files_next) Ok(files_next)
} }
/// Compact or split given files /// Compact or split given files
async fn run_plans( async fn run_plans(
span: SpanRecorder, span: SpanRecorder,
split_or_compact: FilesToSplitOrCompact, plans: Vec<PlanIR>,
partition_info: &Arc<PartitionInfo>, partition_info: &Arc<PartitionInfo>,
components: &Arc<Components>, components: &Arc<Components>,
target_level: CompactionLevel,
df_semaphore: Arc<InstrumentedAsyncSemaphore>, df_semaphore: Arc<InstrumentedAsyncSemaphore>,
scratchpad_ctx: Arc<dyn Scratchpad>, scratchpad_ctx: Arc<dyn Scratchpad>,
) -> Result<Vec<ParquetFileParams>, DynError> { ) -> Result<Vec<ParquetFileParams>, DynError> {
// stage files let paths: Vec<ParquetFilePath> = plans.iter().flat_map(|plan| plan.input_paths()).collect();
let download_span = span.child("download_objects");
let input_uuids_inpad = scratchpad_ctx
.load_to_scratchpad(&split_or_compact.file_input_paths())
.await;
drop(download_span);
let plans = components.ir_planner.create_plans( // stage files. This could move to execute_plan to reduce peak scratchpad memory use, but that would
Arc::clone(partition_info), // cost some concurrency in object downloads.
target_level, let download_span = span.child("download_objects");
split_or_compact, let _ = scratchpad_ctx.load_to_scratchpad(&paths).await;
input_uuids_inpad, drop(download_span);
);
info!( info!(
partition_id = partition_info.partition_id.get(), partition_id = partition_info.partition_id.get(),
@ -448,6 +471,7 @@ async fn run_plans(
partition_info, partition_info,
components, components,
Arc::clone(&df_semaphore), Arc::clone(&df_semaphore),
Arc::<dyn Scratchpad>::clone(&scratchpad_ctx),
) )
}) })
.buffer_unordered(df_semaphore.total_permits()) .buffer_unordered(df_semaphore.total_permits())
@ -463,6 +487,7 @@ async fn execute_plan(
partition_info: &Arc<PartitionInfo>, partition_info: &Arc<PartitionInfo>,
components: &Arc<Components>, components: &Arc<Components>,
df_semaphore: Arc<InstrumentedAsyncSemaphore>, df_semaphore: Arc<InstrumentedAsyncSemaphore>,
scratchpad_ctx: Arc<dyn Scratchpad>,
) -> Result<Vec<ParquetFileParams>, DynError> { ) -> Result<Vec<ParquetFileParams>, DynError> {
span.set_metadata("input_files", plan_ir.input_files().len().to_string()); span.set_metadata("input_files", plan_ir.input_files().len().to_string());
span.set_metadata("input_bytes", plan_ir.input_bytes().to_string()); span.set_metadata("input_bytes", plan_ir.input_bytes().to_string());
@ -508,12 +533,14 @@ async fn execute_plan(
"job semaphore acquired", "job semaphore acquired",
); );
let df_span = span.child("data_fusion"); let df_span = span.child_span("data_fusion");
let plan = components let plan = components
.df_planner .df_planner
.plan(&plan_ir, Arc::clone(partition_info)) .plan(&plan_ir, Arc::clone(partition_info))
.await?; .await?;
let streams = components.df_plan_exec.exec(plan); let streams = components.df_plan_exec.exec(Arc::<
dyn datafusion::physical_plan::ExecutionPlan,
>::clone(&plan));
let job = components.parquet_files_sink.stream_into_file_sink( let job = components.parquet_files_sink.stream_into_file_sink(
streams, streams,
Arc::clone(partition_info), Arc::clone(partition_info),
@ -524,8 +551,18 @@ async fn execute_plan(
// TODO: react to OOM and try to divide branch // TODO: react to OOM and try to divide branch
let res = job.await; let res = job.await;
if let Some(span) = &df_span {
send_metrics_to_tracing(Utc::now(), span, plan.as_ref(), true);
};
drop(permit); drop(permit);
drop(df_span); drop(df_span);
// inputs can be removed from the scratchpad as soon as we're done with compaction.
scratchpad_ctx
.clean_from_scratchpad(&plan_ir.input_paths())
.await;
info!( info!(
partition_id = partition_info.partition_id.get(), partition_id = partition_info.partition_id.get(),
plan_id, "job semaphore released", plan_id, "job semaphore released",
@ -580,7 +617,7 @@ async fn fetch_and_save_parquet_file_state(
async fn update_catalog( async fn update_catalog(
components: Arc<Components>, components: Arc<Components>,
partition_id: PartitionId, partition_id: PartitionId,
saved_parquet_file_state: SavedParquetFileState, saved_parquet_file_state: &SavedParquetFileState,
files_to_delete: Vec<ParquetFile>, files_to_delete: Vec<ParquetFile>,
files_to_upgrade: Vec<ParquetFile>, files_to_upgrade: Vec<ParquetFile>,
file_params_to_create: Vec<ParquetFileParams>, file_params_to_create: Vec<ParquetFileParams>,
@ -592,7 +629,7 @@ async fn update_catalog(
// Right now this only logs; in the future we might decide not to commit these changes // Right now this only logs; in the future we might decide not to commit these changes
let _ignore = components let _ignore = components
.changed_files_filter .changed_files_filter
.apply(&saved_parquet_file_state, &current_parquet_file_state); .apply(saved_parquet_file_state, &current_parquet_file_state);
let created_ids = components let created_ids = components
.commit .commit

View File

@ -1,6 +1,7 @@
use std::fmt::Display; use std::fmt::Display;
use data_types::{ChunkOrder, CompactionLevel, ParquetFile}; use data_types::{ChunkOrder, CompactionLevel, ParquetFile};
use parquet_file::ParquetFilePath;
use crate::file_classification::{CompactReason, NoneReason, SplitReason}; use crate::file_classification::{CompactReason, NoneReason, SplitReason};
@ -78,6 +79,22 @@ impl PlanIR {
} }
} }
/// return the ParquetFiles that will be compacted together
pub fn input_parquet_files(&self) -> Vec<ParquetFile> {
self.input_files()
.iter()
.map(|ir| ir.file.clone())
.collect::<Vec<_>>()
}
/// return the paths of the input files that will be compacted together
pub fn input_paths(&self) -> Vec<ParquetFilePath> {
self.input_files()
.iter()
.map(|ir| ir.path.clone())
.collect::<Vec<_>>()
}
/// return the total bytes of the input files that will be compacted together /// return the total bytes of the input files that will be compacted together
pub fn input_bytes(&self) -> i64 { pub fn input_bytes(&self) -> i64 {
self.input_files() self.input_files()
@ -109,5 +126,6 @@ impl Display for PlanIR {
#[derive(Debug)] #[derive(Debug)]
pub struct FileIR { pub struct FileIR {
pub file: ParquetFile, pub file: ParquetFile,
pub path: ParquetFilePath,
pub order: ChunkOrder, pub order: ChunkOrder,
} }

View File

@ -460,7 +460,7 @@ async fn test_partition_fail() {
&setup, &setup,
[( [(
setup.partition_info.partition_id, setup.partition_info.partition_id,
"serialize\ncaused by\nJoin Error (panic)\ncaused by\nExternal error: foo", "serialize\ncaused by\nJoin Error (panic)\ncaused by\nExternal error: Panic: foo",
)], )],
) )
.await; .await;

View File

@ -17,7 +17,7 @@ once_cell = "1"
ordered-float = "3" ordered-float = "3"
schema = { path = "../schema" } schema = { path = "../schema" }
sha2 = "0.10" sha2 = "0.10"
sqlx = { version = "0.6", features = ["runtime-tokio-rustls", "postgres", "uuid"] } sqlx = { version = "0.7.1", features = ["runtime-tokio-rustls", "postgres", "uuid"] }
thiserror = "1.0.43" thiserror = "1.0.43"
uuid = { version = "1", features = ["v4"] } uuid = { version = "1", features = ["v4"] }
workspace-hack = { version = "0.1", path = "../workspace-hack" } workspace-hack = { version = "0.1", path = "../workspace-hack" }

View File

@ -4,7 +4,6 @@ use super::TableId;
use generated_types::influxdata::iox::schema::v1 as proto; use generated_types::influxdata::iox::schema::v1 as proto;
use influxdb_line_protocol::FieldValue; use influxdb_line_protocol::FieldValue;
use schema::{builder::SchemaBuilder, InfluxColumnType, InfluxFieldType, Schema}; use schema::{builder::SchemaBuilder, InfluxColumnType, InfluxFieldType, Schema};
use sqlx::postgres::PgHasArrayType;
use std::{ use std::{
collections::{BTreeMap, BTreeSet, HashMap}, collections::{BTreeMap, BTreeSet, HashMap},
convert::TryFrom, convert::TryFrom,
@ -26,12 +25,6 @@ impl ColumnId {
} }
} }
impl PgHasArrayType for ColumnId {
fn array_type_info() -> sqlx::postgres::PgTypeInfo {
<i64 as PgHasArrayType>::array_type_info()
}
}
/// Column definitions for a table indexed by their name /// Column definitions for a table indexed by their name
#[derive(Debug, Clone, Eq, PartialEq)] #[derive(Debug, Clone, Eq, PartialEq)]
pub struct ColumnsByName(BTreeMap<String, ColumnSchema>); pub struct ColumnsByName(BTreeMap<String, ColumnSchema>);
@ -328,7 +321,7 @@ impl TryFrom<proto::column_schema::ColumnType> for ColumnType {
/// Set of columns. /// Set of columns.
#[derive(Debug, Clone, PartialEq, Eq, sqlx::Type)] #[derive(Debug, Clone, PartialEq, Eq, sqlx::Type)]
#[sqlx(transparent)] #[sqlx(transparent, no_pg_array)]
pub struct ColumnSet(Vec<ColumnId>); pub struct ColumnSet(Vec<ColumnId>);
impl ColumnSet { impl ColumnSet {

View File

@ -244,7 +244,7 @@ pub static PARTITION_BY_DAY_PROTO: Lazy<Arc<proto::PartitionTemplate>> = Lazy::n
/// A partition template specified by a namespace record. /// A partition template specified by a namespace record.
#[derive(Debug, PartialEq, Clone, Default, sqlx::Type)] #[derive(Debug, PartialEq, Clone, Default, sqlx::Type)]
#[sqlx(transparent)] #[sqlx(transparent, no_pg_array)]
pub struct NamespacePartitionTemplateOverride(Option<serialization::Wrapper>); pub struct NamespacePartitionTemplateOverride(Option<serialization::Wrapper>);
impl TryFrom<proto::PartitionTemplate> for NamespacePartitionTemplateOverride { impl TryFrom<proto::PartitionTemplate> for NamespacePartitionTemplateOverride {
@ -259,7 +259,7 @@ impl TryFrom<proto::PartitionTemplate> for NamespacePartitionTemplateOverride {
/// A partition template specified by a table record. /// A partition template specified by a table record.
#[derive(Debug, PartialEq, Eq, Clone, Default, sqlx::Type)] #[derive(Debug, PartialEq, Eq, Clone, Default, sqlx::Type)]
#[sqlx(transparent)] #[sqlx(transparent, no_pg_array)]
pub struct TablePartitionTemplateOverride(Option<serialization::Wrapper>); pub struct TablePartitionTemplateOverride(Option<serialization::Wrapper>);
impl TablePartitionTemplateOverride { impl TablePartitionTemplateOverride {

View File

@ -7,14 +7,6 @@ yanked = "deny"
unmaintained = "warn" unmaintained = "warn"
notice = "warn" notice = "warn"
ignore = [ ignore = [
# "It was sometimes possible for SQLite versions >= 1.0.12, < 3.39.2 to allow an array-bounds overflow when large
# string were input into SQLite's printf function."
#
# We are not using `printf` with untrusted inputs.
#
# This is currently blocked by upstream:
# https://github.com/launchbadge/sqlx/issues/2346
"RUSTSEC-2022-0090",
] ]
git-fetch-with-cli = true git-fetch-with-cli = true

View File

@ -51,3 +51,4 @@ We hold monthly Tech Talks that explain the project's technical underpinnings. Y
* [Querier <> Ingester Query Protocol](ingester_querier_protocol.md) * [Querier <> Ingester Query Protocol](ingester_querier_protocol.md)
* [Underground Guide to Running IOx Locally](underground_guide.md) * [Underground Guide to Running IOx Locally](underground_guide.md)
* [Query Processing](query_processing.md) * [Query Processing](query_processing.md)
* [How to Reproduce and Debug Production Data Locally](debug.md)

105
docs/debug.md Normal file
View File

@ -0,0 +1,105 @@
# How to Reproduce and Debug Production Data Locally
Here is a way to reproduce issues using production data locally with all in one mode.
## Summary of steps
Reproduce the error locally by building a local catalog from the output of `influxdb_iox remote store get-table`:
1. Download contents of table_name into a directory named 'table_name'
```
influxdb_iox remote store get-table <namespace> <table_name>
```
1. Create a catalog and object_store in /tmp/data_dir
```
influxdb_iox debug build-catalog <table_dir> /tmp/data_dir
```
1. Start iox using this data directory (you can now query `table_name` locally):
```
influxdb_iox --data-dir /tmp/data_dir
```
## Demonstration
## Setup
Running `influxdb_iox` and getting local telegraph data
```shell
$ influxdb_iox namespace list
[
{
"id": "1",
"name": "26f7e5a4b7be365b_917b97a92e883afc",
"maxTables": 500,
"maxColumnsPerTable": 200
}
]
```
## Export `cpu` table:
```shell
$ influxdb_iox remote store get-table 26f7e5a4b7be365b_917b97a92e883afc cpu
found 11 Parquet files, exporting...
downloading file 1 of 11 (1b8eb36a-7a34-4635-9156-251efcb1c024.4.parquet)...
downloading file 2 of 11 (1819137f-7cb5-4dc8-8051-6fa0b42990cb.4.parquet)...
downloading file 3 of 11 (4931cad7-7aaf-4b41-8f46-2d3be85c492b.4.parquet)...
downloading file 4 of 11 (be75f5fb-a8bc-4646-893a-70d496b13f3d.4.parquet)...
downloading file 5 of 11 (5235b87d-19ee-48ae-830f-b19d81bfe915.4.parquet)...
downloading file 6 of 11 (a8f7be33-42b6-4353-8735-51b245196d39.4.parquet)...
downloading file 7 of 11 (3b43c4ee-7500-47f9-9c0f-76d4f80b480e.4.parquet)...
downloading file 8 of 11 (081da5be-e0f9-4b42-8cd2-45bfebbd934c.4.parquet)...
downloading file 9 of 11 (f29ba3b4-53b1-4c68-9287-4bcea7c4e86b.4.parquet)...
downloading file 10 of 11 (1ce94ce2-1200-4516-950a-64828a7cebba.4.parquet)...
downloading file 11 of 11 (3a2b5525-3be5-41ef-b082-b279edc32acb.4.parquet)...
Done.
$ ls cpu/
081da5be-e0f9-4b42-8cd2-45bfebbd934c.4.parquet 1ce94ce2-1200-4516-950a-64828a7cebba.4.parquet 4931cad7-7aaf-4b41-8f46-2d3be85c492b.4.parquet be75f5fb-a8bc-4646-893a-70d496b13f3d.4.parquet
081da5be-e0f9-4b42-8cd2-45bfebbd934c.4.parquet.json 1ce94ce2-1200-4516-950a-64828a7cebba.4.parquet.json 4931cad7-7aaf-4b41-8f46-2d3be85c492b.4.parquet.json be75f5fb-a8bc-4646-893a-70d496b13f3d.4.parquet.json
1819137f-7cb5-4dc8-8051-6fa0b42990cb.4.parquet 3a2b5525-3be5-41ef-b082-b279edc32acb.4.parquet 5235b87d-19ee-48ae-830f-b19d81bfe915.4.parquet f29ba3b4-53b1-4c68-9287-4bcea7c4e86b.4.parquet
1819137f-7cb5-4dc8-8051-6fa0b42990cb.4.parquet.json 3a2b5525-3be5-41ef-b082-b279edc32acb.4.parquet.json 5235b87d-19ee-48ae-830f-b19d81bfe915.4.parquet.json f29ba3b4-53b1-4c68-9287-4bcea7c4e86b.4.parquet.json
1b8eb36a-7a34-4635-9156-251efcb1c024.4.parquet 3b43c4ee-7500-47f9-9c0f-76d4f80b480e.4.parquet a8f7be33-42b6-4353-8735-51b245196d39.4.parquet partition.4.json
1b8eb36a-7a34-4635-9156-251efcb1c024.4.parquet.json 3b43c4ee-7500-47f9-9c0f-76d4f80b480e.4.parquet.json a8f7be33-42b6-4353-8735-51b245196d39.4.parquet.json table.1.json
```
## Build a new `new_data_dir` from export:
```shell
$ influxdb_iox debug build-catalog cpu new_data_dir
Beginning catalog / object_store build from "cpu" in "new_data_dir"....
Done
$ ls new_data_dir/
catalog.sqlite object_store/
```
## Run `influxdb_iox` with `new_data_dir`:
```shell
$ influxdb_iox --data-dir new_data_dir/
```
And in a separate shell, you can query the data and see it is present:
```shell
$ influxdb_iox query 26f7e5a4b7be365b_917b97a92e883afc 'select * from cpu limit 10';
+-----------+---------------------+----------------------+-------------+------------------+-------------------+--------------+-----------+------------+---------------+-------------+--------------------+--------------------+
| cpu | host | time | usage_guest | usage_guest_nice | usage_idle | usage_iowait | usage_irq | usage_nice | usage_softirq | usage_steal | usage_system | usage_user |
+-----------+---------------------+----------------------+-------------+------------------+-------------------+--------------+-----------+------------+---------------+-------------+--------------------+--------------------+
| cpu-total | MacBook-Pro-8.local | 2023-07-06T17:13:40Z | 0.0 | 0.0 | 95.6668753914105 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.4902943018170824 | 2.8428303068453085 |
| cpu-total | MacBook-Pro-8.local | 2023-07-06T17:13:50Z | 0.0 | 0.0 | 95.9551687433697 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.4213261536472683 | 2.6235051029648098 |
| cpu-total | MacBook-Pro-8.local | 2023-07-06T17:14:00Z | 0.0 | 0.0 | 96.52108622167991 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.37029157802418 | 2.108622199968126 |
| cpu-total | MacBook-Pro-8.local | 2023-07-06T17:14:10Z | 0.0 | 0.0 | 95.26819803491809 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.752519246414341 | 2.979282718922596 |
| cpu-total | MacBook-Pro-8.local | 2023-07-06T17:14:20Z | 0.0 | 0.0 | 95.28402329791422 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.6408843239063593 | 3.0750923780335997 |
| cpu-total | MacBook-Pro-8.local | 2023-07-06T17:14:30Z | 0.0 | 0.0 | 93.97484827633119 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2.0271538509716924 | 3.9979978727699588 |
| cpu-total | MacBook-Pro-8.local | 2023-07-06T17:14:40Z | 0.0 | 0.0 | 95.69219209824692 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.458894245831095 | 2.848913656031324 |
| cpu-total | MacBook-Pro-8.local | 2023-07-06T17:14:50Z | 0.0 | 0.0 | 94.78402607970591 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.9685286188771443 | 3.2474453011797517 |
| cpu-total | MacBook-Pro-8.local | 2023-07-06T17:15:00Z | 0.0 | 0.0 | 95.85132344665212 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.5706151054475623 | 2.5780614479731607 |
| cpu0 | MacBook-Pro-8.local | 2023-07-06T17:13:40Z | 0.0 | 0.0 | 78.65055387717186 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 7.452165156077374 | 13.897280966824042 |
+-----------+---------------------+----------------------+-------------+------------------+-------------------+--------------+-----------+------------+---------------+-------------+--------------------+--------------------+
```

View File

@ -327,7 +327,7 @@ Each querier process has a set of in-memory caches. These are:
| ---- | ---- | -------------- | --- | ----- | ------------------------------ | ----- | | ---- | ---- | -------------- | --- | ----- | ------------------------------ | ----- |
| Namespace | Metadata | Catalog | Namespace Name | `CachedNamespace` | refresh policy, TTL, invalidation by unknown table/columns | Unknown entries NOT cached (assumes upstream DDoS protection) | | Namespace | Metadata | Catalog | Namespace Name | `CachedNamespace` | refresh policy, TTL, invalidation by unknown table/columns | Unknown entries NOT cached (assumes upstream DDoS protection) |
| Object Store | Data | Object Store | Path | Raw object store bytes for the entire object | -- | | | Object Store | Data | Object Store | Path | Raw object store bytes for the entire object | -- | |
| Parquet File | Metadata | Catalog | Table ID | Parquet files (all the data that the catalog has, i.e. the entire row) for all files that are NOT marked for deletion. | No refresh yet (see #5718), can be invalided by ingester watermark. | | | Parquet File | Metadata | Catalog | Table ID | Parquet files (all the data that the catalog has, i.e. the entire row) for all files that are NOT marked for deletion. | TTL, but no refresh yet (see #5718), can be invalided by ingester watermark. | |
| Partition | Metadata | Catalog | Partition ID | `CachedPartition` | Invalided if ingester data or any parquet files has columns that are NOT covered by the sort key. | Needs `CachedTable` for access | | Partition | Metadata | Catalog | Partition ID | `CachedPartition` | Invalided if ingester data or any parquet files has columns that are NOT covered by the sort key. | Needs `CachedTable` for access |
| Projected Schema | Metadata | Querier | Table ID, Column IDs | `ProjectedSchema` | -- | Needs `CachedTable` for access | | Projected Schema | Metadata | Querier | Table ID, Column IDs | `ProjectedSchema` | -- | Needs `CachedTable` for access |

View File

@ -12,6 +12,7 @@ observability_deps = { path = "../observability_deps" }
once_cell = { version = "1.18", features = ["parking_lot"] } once_cell = { version = "1.18", features = ["parking_lot"] }
parking_lot = "0.12" parking_lot = "0.12"
pin-project = "1.1" pin-project = "1.1"
snafu = "0.7"
tokio = { version = "1.29" } tokio = { version = "1.29" }
tokio-util = { version = "0.7.8" } tokio-util = { version = "0.7.8" }
tokio_metrics_bridge = { path = "../tokio_metrics_bridge" } tokio_metrics_bridge = { path = "../tokio_metrics_bridge" }

View File

@ -16,6 +16,7 @@
)] )]
use metric::Registry; use metric::Registry;
use snafu::Snafu;
#[cfg(tokio_unstable)] #[cfg(tokio_unstable)]
use tokio_metrics_bridge::setup_tokio_metrics; use tokio_metrics_bridge::setup_tokio_metrics;
// Workaround for "unused crate" lint false positives. // Workaround for "unused crate" lint false positives.
@ -68,8 +69,16 @@ impl Task {
} }
} }
/// The type of error that is returned from tasks in this module /// Errors occuring when polling [`Job`].
pub type Error = String; #[derive(Debug, Snafu)]
#[allow(missing_docs)]
pub enum JobError {
#[snafu(display("Worker thread gone, executor was likely shut down"))]
WorkerGone,
#[snafu(display("Panic: {msg}"))]
Panic { msg: String },
}
/// Job within the executor. /// Job within the executor.
/// ///
@ -80,7 +89,7 @@ pub struct Job<T> {
cancel: CancellationToken, cancel: CancellationToken,
detached: bool, detached: bool,
#[pin] #[pin]
rx: Receiver<Result<T, String>>, rx: Receiver<Result<T, JobError>>,
} }
impl<T> Job<T> { impl<T> Job<T> {
@ -94,7 +103,7 @@ impl<T> Job<T> {
} }
impl<T> Future for Job<T> { impl<T> Future for Job<T> {
type Output = Result<T, Error>; type Output = Result<T, JobError>;
fn poll( fn poll(
self: Pin<&mut Self>, self: Pin<&mut Self>,
@ -103,9 +112,7 @@ impl<T> Future for Job<T> {
let this = self.project(); let this = self.project();
match ready!(this.rx.poll(cx)) { match ready!(this.rx.poll(cx)) {
Ok(res) => std::task::Poll::Ready(res), Ok(res) => std::task::Poll::Ready(res),
Err(_) => std::task::Poll::Ready(Err(String::from( Err(_) => std::task::Poll::Ready(Err(JobError::WorkerGone)),
"Worker thread gone, executor was likely shut down",
))),
} }
} }
} }
@ -315,13 +322,15 @@ impl DedicatedExecutor {
let fut = Box::pin(async move { let fut = Box::pin(async move {
let task_output = AssertUnwindSafe(task).catch_unwind().await.map_err(|e| { let task_output = AssertUnwindSafe(task).catch_unwind().await.map_err(|e| {
if let Some(s) = e.downcast_ref::<String>() { let s = if let Some(s) = e.downcast_ref::<String>() {
s.clone() s.clone()
} else if let Some(s) = e.downcast_ref::<&str>() { } else if let Some(s) = e.downcast_ref::<&str>() {
s.to_string() s.to_string()
} else { } else {
"unknown internal error".to_string() "unknown internal error".to_string()
} };
JobError::Panic { msg: s }
}); });
if tx.send(task_output).is_err() { if tx.send(task_output).is_err() {
@ -571,7 +580,7 @@ mod tests {
let err = dedicated_task.await.unwrap_err(); let err = dedicated_task.await.unwrap_err();
assert_eq!( assert_eq!(
err.to_string(), err.to_string(),
"At the disco, on the dedicated task scheduler", "Panic: At the disco, on the dedicated task scheduler",
); );
exec.join().await; exec.join().await;
@ -590,7 +599,7 @@ mod tests {
// should not be able to get the result // should not be able to get the result
let err = dedicated_task.await.unwrap_err(); let err = dedicated_task.await.unwrap_err();
assert_eq!(err.to_string(), "1 2",); assert_eq!(err.to_string(), "Panic: 1 2",);
exec.join().await; exec.join().await;
} }
@ -608,7 +617,7 @@ mod tests {
// should not be able to get the result // should not be able to get the result
let err = dedicated_task.await.unwrap_err(); let err = dedicated_task.await.unwrap_err();
assert_eq!(err.to_string(), "unknown internal error",); assert_eq!(err.to_string(), "Panic: unknown internal error",);
exec.join().await; exec.join().await;
} }

View File

@ -32,5 +32,5 @@ metric = { path = "../metric" }
once_cell = { version = "1.18", features = ["parking_lot"] } once_cell = { version = "1.18", features = ["parking_lot"] }
parquet_file = { path = "../parquet_file" } parquet_file = { path = "../parquet_file" }
tempfile = "3" tempfile = "3"
sqlx = { version = "0.6", features = [ "runtime-tokio-rustls" ] } sqlx = { version = "0.7.1", features = [ "runtime-tokio-rustls" ] }

View File

@ -14,7 +14,7 @@ prost = "0.11.9"
thiserror = "1.0.40" thiserror = "1.0.40"
tokio = { version = "1.28.2", features = ["net", "io-util", "time", "rt", "sync", "macros"] } tokio = { version = "1.28.2", features = ["net", "io-util", "time", "rt", "sync", "macros"] }
tracing = "0.1.37" tracing = "0.1.37"
uuid = { version = "1.3.3", features = ["v4"] } uuid = { version = "1.4.1", features = ["v4"] }
workspace-hack = { version = "0.1", path = "../workspace-hack" } workspace-hack = { version = "0.1", path = "../workspace-hack" }
[build-dependencies] [build-dependencies]

View File

@ -69,7 +69,7 @@ once_cell = { version = "1.18", features = ["parking_lot"] }
rustyline = { version = "12.0", default-features = false, features = ["with-file-history"]} rustyline = { version = "12.0", default-features = false, features = ["with-file-history"]}
serde_json = "1.0.103" serde_json = "1.0.103"
snafu = "0.7" snafu = "0.7"
tempfile = "3.6.0" tempfile = "3.7.0"
thiserror = "1.0.43" thiserror = "1.0.43"
tikv-jemalloc-ctl = { version = "0.5.0", optional = true } tikv-jemalloc-ctl = { version = "0.5.0", optional = true }
tokio = { version = "1.29", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time", "io-std"] } tokio = { version = "1.29", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time", "io-std"] }
@ -93,7 +93,7 @@ predicate = { path = "../predicate" }
predicates = "3.0.3" predicates = "3.0.3"
pretty_assertions = "1.4.0" pretty_assertions = "1.4.0"
proptest = { version = "1.2.0", default-features = false } proptest = { version = "1.2.0", default-features = false }
serde = "1.0.171" serde = "1.0.173"
test_helpers = { path = "../test_helpers", features = ["future_timeout"] } test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
test_helpers_end_to_end = { path = "../test_helpers_end_to_end" } test_helpers_end_to_end = { path = "../test_helpers_end_to_end" }
insta = { version = "1", features = ["yaml"] } insta = { version = "1", features = ["yaml"] }

View File

@ -50,12 +50,12 @@ enum Command {
Schema(schema::Config), Schema(schema::Config),
// NB: The example formatting below is weird so Clap make a nice help text // NB: The example formatting below is weird so Clap make a nice help text
/// Build a local catalog from the output of `remote get-table`. /// Build a local catalog from the output of `remote store get-table`.
/// ///
/// For example: /// For example:
/// ```text /// ```text
/// # download contents of table_name into a directory named 'table_name' /// # download contents of table_name into a directory named 'table_name'
/// influxdb_iox remote get-table <namespace> <table_name> /// influxdb_iox remote store get-table <namespace> <table_name>
/// ///
/// # Create a catalog and object_store in /tmp/data_dir /// # Create a catalog and object_store in /tmp/data_dir
/// influxdb_iox debug build-catalog <table_dir> /tmp/data_dir /// influxdb_iox debug build-catalog <table_dir> /tmp/data_dir

View File

@ -957,7 +957,7 @@ async fn query_ingester() {
test_helpers::maybe_start_logging(); test_helpers::maybe_start_logging();
let database_url = maybe_skip_integration!(); let database_url = maybe_skip_integration!();
let mut cluster = MiniCluster::create_shared(database_url).await; let mut cluster = MiniCluster::create_shared_never_persist(database_url).await;
StepTest::new( StepTest::new(
&mut cluster, &mut cluster,

View File

@ -1,10 +1,5 @@
//! Tests the `influxdb_iox debug` commands //! Tests the `influxdb_iox debug` commands
use std::{ use std::path::Path;
collections::VecDeque,
io::Write,
path::{Path, PathBuf},
time::Duration,
};
use arrow::record_batch::RecordBatch; use arrow::record_batch::RecordBatch;
use arrow_util::assert_batches_sorted_eq; use arrow_util::assert_batches_sorted_eq;
@ -12,7 +7,6 @@ use assert_cmd::Command;
use futures::FutureExt; use futures::FutureExt;
use predicates::prelude::*; use predicates::prelude::*;
use tempfile::TempDir; use tempfile::TempDir;
use test_helpers::timeout::FutureTimeout;
use test_helpers_end_to_end::{ use test_helpers_end_to_end::{
maybe_skip_integration, run_sql, MiniCluster, ServerFixture, Step, StepTest, StepTestState, maybe_skip_integration, run_sql, MiniCluster, ServerFixture, Step, StepTest, StepTestState,
TestConfig, TestConfig,
@ -52,8 +46,6 @@ async fn test_print_cpu() {
/// 3. Start a all-in-one instance from that rebuilt catalog /// 3. Start a all-in-one instance from that rebuilt catalog
/// 4. Can run a query successfully /// 4. Can run a query successfully
#[tokio::test] #[tokio::test]
// Ignore due to https://github.com/influxdata/influxdb_iox/issues/8203
#[ignore]
async fn build_catalog() { async fn build_catalog() {
test_helpers::maybe_start_logging(); test_helpers::maybe_start_logging();
let database_url = maybe_skip_integration!(); let database_url = maybe_skip_integration!();
@ -111,20 +103,11 @@ async fn build_catalog() {
let table_dir = export_dir.path().join(table_name); let table_dir = export_dir.path().join(table_name);
// We can build a catalog and start up the server and run a query // We can build a catalog and start up the server and run a query
let restarted = RestartedServer::build_catalog_and_start(&table_dir).await; rebuild_and_query(&table_dir, &namespace, sql, &expected).await;
let batches = restarted
.run_sql_until_non_empty(sql, namespace.as_str())
.await;
assert_batches_sorted_eq!(&expected, &batches);
// We can also rebuild a catalog from just the parquet files // We can also rebuild a catalog from just the parquet files
let only_parquet_dir = copy_only_parquet_files(&table_dir); let only_parquet_dir = copy_only_parquet_files(&table_dir);
let restarted = rebuild_and_query(only_parquet_dir.path(), &namespace, sql, &expected).await;
RestartedServer::build_catalog_and_start(only_parquet_dir.path()).await;
let batches = restarted
.run_sql_until_non_empty(sql, namespace.as_str())
.await;
assert_batches_sorted_eq!(&expected, &batches);
} }
.boxed() .boxed()
})), })),
@ -134,6 +117,30 @@ async fn build_catalog() {
.await .await
} }
/// Rebuilds a catalog from an export directory, starts up a server
/// and verifies the running `sql` in `namespace` produces `expected`
async fn rebuild_and_query(table_dir: &Path, namespace: &str, sql: &str, expected: &[&str]) {
// Very occassionally, something goes wrong with the sqlite based
// catalog and it doesn't get the new files. Thus try a few times
//
// See https://github.com/influxdata/influxdb_iox/issues/8287
let mut retries = 5;
while retries > 0 {
println!("** Retries remaining: {retries}");
let restarted = RestartedServer::build_catalog_and_start(table_dir).await;
let batches = restarted.run_sql(sql, namespace).await;
// if we got results, great, otherwise try again
if !batches.is_empty() {
assert_batches_sorted_eq!(expected, &batches);
return;
}
retries -= 1;
}
}
/// An all in one instance, with data directory of `data_dir` /// An all in one instance, with data directory of `data_dir`
struct RestartedServer { struct RestartedServer {
all_in_one: ServerFixture, all_in_one: ServerFixture,
@ -171,7 +178,7 @@ impl RestartedServer {
println!("target_directory: {data_dir:?}"); println!("target_directory: {data_dir:?}");
// call `influxdb_iox debug build-catalog <table_dir> <new_data_dir>` // call `influxdb_iox debug build-catalog <table_dir> <new_data_dir>`
let cmd = Command::cargo_bin("influxdb_iox") Command::cargo_bin("influxdb_iox")
.unwrap() .unwrap()
// use -v to enable logging so we can check the status messages // use -v to enable logging so we can check the status messages
.arg("-vv") .arg("-vv")
@ -180,31 +187,18 @@ impl RestartedServer {
.arg(exported_table_dir.as_os_str().to_str().unwrap()) .arg(exported_table_dir.as_os_str().to_str().unwrap())
.arg(data_dir.path().as_os_str().to_str().unwrap()) .arg(data_dir.path().as_os_str().to_str().unwrap())
.assert() .assert()
.success(); .success()
.stdout(
// debug information to track down https://github.com/influxdata/influxdb_iox/issues/8203 predicate::str::contains("Beginning catalog / object_store build")
println!("***** Begin build-catalog STDOUT ****"); .and(predicate::str::contains(
std::io::stdout() "Begin importing files total_files=1",
.write_all(&cmd.get_output().stdout) ))
.unwrap(); .and(predicate::str::contains(
println!("***** Begin build-catalog STDERR ****"); "Completed importing files total_files=1",
std::io::stdout() )),
.write_all(&cmd.get_output().stderr) );
.unwrap();
println!("***** DONE ****");
cmd.stdout(
predicate::str::contains("Beginning catalog / object_store build")
.and(predicate::str::contains(
"Begin importing files total_files=1",
))
.and(predicate::str::contains(
"Completed importing files total_files=1",
)),
);
println!("Completed rebuild in {data_dir:?}"); println!("Completed rebuild in {data_dir:?}");
RecursiveDirPrinter::new().print(data_dir.path());
// now, start up a new server in all-in-one mode // now, start up a new server in all-in-one mode
// using the newly built data directory // using the newly built data directory
@ -216,27 +210,6 @@ impl RestartedServer {
data_dir, data_dir,
} }
} }
/// Runs the SQL query against this server, in a loop until
/// results are returned. Panics if the results are not produced
/// within a 5 seconds
async fn run_sql_until_non_empty(&self, sql: &str, namespace: &str) -> Vec<RecordBatch> {
let timeout = Duration::from_secs(5);
let loop_sleep = Duration::from_millis(500);
let fut = async {
loop {
let batches = self.run_sql(sql, namespace).await;
if !batches.is_empty() {
return batches;
}
tokio::time::sleep(loop_sleep).await;
}
};
fut.with_timeout(timeout)
.await
.expect("timed out waiting for non-empty batches in result")
}
} }
/// Copies only parquet files from the source directory to a new /// Copies only parquet files from the source directory to a new
@ -262,43 +235,3 @@ fn copy_only_parquet_files(src: &Path) -> TempDir {
} }
target_dir target_dir
} }
/// Prints out the contents of the directory recursively
/// for debugging.
///
/// ```text
/// RecursiveDirPrinter All files rooted at "/tmp/.tmpvf16r0"
/// "/tmp/.tmpvf16r0"
/// "/tmp/.tmpvf16r0/catalog.sqlite"
/// "/tmp/.tmpvf16r0/object_store"
/// "/tmp/.tmpvf16r0/object_store/1"
/// "/tmp/.tmpvf16r0/object_store/1/1"
/// "/tmp/.tmpvf16r0/object_store/1/1/b862a7e9b329ee6a418cde191198eaeb1512753f19b87a81def2ae6c3d0ed237"
/// "/tmp/.tmpvf16r0/object_store/1/1/b862a7e9b329ee6a418cde191198eaeb1512753f19b87a81def2ae6c3d0ed237/d78abef6-6859-48eb-aa62-3518097fbb9b.parquet"
///
struct RecursiveDirPrinter {
paths: VecDeque<PathBuf>,
}
impl RecursiveDirPrinter {
fn new() -> Self {
Self {
paths: VecDeque::new(),
}
}
// print root and all directories
fn print(mut self, root: &Path) {
println!("RecursiveDirPrinter All files rooted at {root:?}");
self.paths.push_back(PathBuf::from(root));
while let Some(path) = self.paths.pop_front() {
println!("{path:?}");
if path.is_dir() {
for entry in std::fs::read_dir(path).unwrap() {
self.paths.push_front(entry.unwrap().path());
}
}
}
}
}

View File

@ -1,8 +1,8 @@
use std::{collections::HashMap, path::PathBuf, sync::Arc}; use std::path::PathBuf;
use arrow::{ use arrow::{
array::as_generic_binary_array, array::as_generic_binary_array,
datatypes::{DataType, Fields, Schema, SchemaRef, TimeUnit}, datatypes::{DataType, Schema, TimeUnit},
record_batch::RecordBatch, record_batch::RecordBatch,
}; };
use arrow_flight::{ use arrow_flight::{
@ -1592,10 +1592,7 @@ async fn assert_schema(client: &mut FlightClient, cmd: Any) {
let mut saw_data = false; let mut saw_data = false;
while let Some(batch) = result_stream.try_next().await.unwrap() { while let Some(batch) = result_stream.try_next().await.unwrap() {
saw_data = true; saw_data = true;
// strip metadata (GetFlightInfo doesn't include metadata for let batch_schema = batch.schema();
// some reason) before comparison
// https://github.com/influxdata/influxdb_iox/issues/7282
let batch_schema = strip_metadata(&batch.schema());
assert_eq!( assert_eq!(
batch_schema.as_ref(), batch_schema.as_ref(),
&flight_info_schema, &flight_info_schema,
@ -1603,10 +1600,6 @@ async fn assert_schema(client: &mut FlightClient, cmd: Any) {
); );
// The stream itself also may report a schema // The stream itself also may report a schema
if let Some(stream_schema) = result_stream.schema() { if let Some(stream_schema) = result_stream.schema() {
// strip metadata (GetFlightInfo doesn't include metadata for
// some reason) before comparison
// https://github.com/influxdata/influxdb_iox/issues/7282
let stream_schema = strip_metadata(stream_schema);
assert_eq!(stream_schema.as_ref(), &flight_info_schema); assert_eq!(stream_schema.as_ref(), &flight_info_schema);
} }
} }
@ -1615,16 +1608,6 @@ async fn assert_schema(client: &mut FlightClient, cmd: Any) {
assert!(saw_data); assert!(saw_data);
} }
fn strip_metadata(schema: &Schema) -> SchemaRef {
let stripped_fields: Fields = schema
.fields()
.iter()
.map(|f| f.as_ref().clone().with_metadata(HashMap::new()))
.collect();
Arc::new(Schema::new(stripped_fields))
}
#[tokio::test] #[tokio::test]
async fn authz() { async fn authz() {
test_helpers::maybe_start_logging(); test_helpers::maybe_start_logging();

View File

@ -235,9 +235,9 @@ async fn test_tracing_create_compactor_trace() {
// "shallow" packet inspection and verify the UDP server got omething that had some expected // "shallow" packet inspection and verify the UDP server got omething that had some expected
// results. We could look for any text of any of the compaction spans. The name of the span // results. We could look for any text of any of the compaction spans. The name of the span
// for data fusion execution is arbitrarily chosen. // for acquiring permit is arbitrarily chosen.
udp_capture udp_capture
.wait_for(|m| m.to_string().contains("data_fusion")) .wait_for(|m| m.to_string().contains("acquire_permit"))
.await; .await;
// debugging assistance // debugging assistance

View File

@ -135,3 +135,28 @@ SELECT
from cpu from cpu
where time between timestamp '2000-05-05T12:00:00Z' and timestamp '2000-05-05T12:59:00Z' where time between timestamp '2000-05-05T12:00:00Z' and timestamp '2000-05-05T12:59:00Z'
group by region, minute; group by region, minute;
-- With a VALUES clause, which affects how the range is found
-- Fix for https://github.com/influxdata/idpe/issues/17880
SELECT
date_bin_gapfill(INTERVAL '1 minute', time) as _time,
pod,
locf(selector_last(image, time))
FROM
(VALUES ('2023-06-10T12:00:00Z'::timestamp, 'pod1', 'imageA'),
('2023-06-10T12:00:00Z'::timestamp, 'pod2', 'imageA'),
('2023-06-10T12:00:01Z'::timestamp, 'pod1', 'imageB'),
('2023-06-10T12:00:02Z'::timestamp, 'pod1', 'imageB'),
('2023-06-10T12:00:02Z'::timestamp, 'pod2', 'imageB')
) AS data(time, pod, image)
WHERE time >= timestamp '2023-06-10T11:55:00Z' AND time < timestamp '2023-06-10T12:05:00Z'
GROUP BY _time, pod;
-- This is not supported since the grouping is not on the values produced by
-- date_bin_gapfill. The query should fail with a reasonable message.
select
date_bin_gapfill('60 seconds'::interval, time)::bigint as time,
sum(idle)
from cpu
WHERE time >= '2020-06-11T16:52:00Z' AND time < '2020-06-11T16:54:00Z'
group by 1;

View File

@ -223,3 +223,32 @@ Error during planning: gap-filling query is missing lower time bound
| b | 2000-05-05T12:40:00Z | 27.049999999999997 | | b | 2000-05-05T12:40:00Z | 27.049999999999997 |
| b | 2000-05-05T12:50:00Z | 27.049999999999997 | | b | 2000-05-05T12:50:00Z | 27.049999999999997 |
+--------+----------------------+--------------------+ +--------+----------------------+--------------------+
-- SQL: SELECT date_bin_gapfill(INTERVAL '1 minute', time) as _time, pod, locf(selector_last(image, time)) FROM (VALUES ('2023-06-10T12:00:00Z'::timestamp, 'pod1', 'imageA'), ('2023-06-10T12:00:00Z'::timestamp, 'pod2', 'imageA'), ('2023-06-10T12:00:01Z'::timestamp, 'pod1', 'imageB'), ('2023-06-10T12:00:02Z'::timestamp, 'pod1', 'imageB'), ('2023-06-10T12:00:02Z'::timestamp, 'pod2', 'imageB') ) AS data(time, pod, image) WHERE time >= timestamp '2023-06-10T11:55:00Z' AND time < timestamp '2023-06-10T12:05:00Z' GROUP BY _time, pod;
+----------------------+------+--------------------------------------------+
| _time | pod | locf(selector_last(image,time)) |
+----------------------+------+--------------------------------------------+
| 2023-06-10T11:55:00Z | pod1 | |
| 2023-06-10T11:56:00Z | pod1 | |
| 2023-06-10T11:57:00Z | pod1 | |
| 2023-06-10T11:58:00Z | pod1 | |
| 2023-06-10T11:59:00Z | pod1 | |
| 2023-06-10T12:00:00Z | pod1 | {value: imageB, time: 2023-06-10T12:00:02} |
| 2023-06-10T12:01:00Z | pod1 | {value: imageB, time: 2023-06-10T12:00:02} |
| 2023-06-10T12:02:00Z | pod1 | {value: imageB, time: 2023-06-10T12:00:02} |
| 2023-06-10T12:03:00Z | pod1 | {value: imageB, time: 2023-06-10T12:00:02} |
| 2023-06-10T12:04:00Z | pod1 | {value: imageB, time: 2023-06-10T12:00:02} |
| 2023-06-10T11:55:00Z | pod2 | |
| 2023-06-10T11:56:00Z | pod2 | |
| 2023-06-10T11:57:00Z | pod2 | |
| 2023-06-10T11:58:00Z | pod2 | |
| 2023-06-10T11:59:00Z | pod2 | |
| 2023-06-10T12:00:00Z | pod2 | {value: imageB, time: 2023-06-10T12:00:02} |
| 2023-06-10T12:01:00Z | pod2 | {value: imageB, time: 2023-06-10T12:00:02} |
| 2023-06-10T12:02:00Z | pod2 | {value: imageB, time: 2023-06-10T12:00:02} |
| 2023-06-10T12:03:00Z | pod2 | {value: imageB, time: 2023-06-10T12:00:02} |
| 2023-06-10T12:04:00Z | pod2 | {value: imageB, time: 2023-06-10T12:00:02} |
+----------------------+------+--------------------------------------------+
-- SQL: select date_bin_gapfill('60 seconds'::interval, time)::bigint as time, sum(idle) from cpu WHERE time >= '2020-06-11T16:52:00Z' AND time < '2020-06-11T16:54:00Z' group by 1;
Error while planning query: Optimizer rule 'handle_gap_fill' failed
caused by
Error during planning: DATE_BIN_GAPFILL must a top-level expression in the GROUP BY clause when gap filling. It cannot be part of another expression or cast

View File

@ -339,6 +339,12 @@ SELECT COUNT(f64), SUM(f64) FROM m0 GROUP BY TIME(30s) FILL(none);
-- supports offset parameter -- supports offset parameter
SELECT COUNT(f64), SUM(f64) FROM m0 GROUP BY TIME(30s, 1s) FILL(none); SELECT COUNT(f64), SUM(f64) FROM m0 GROUP BY TIME(30s, 1s) FILL(none);
-- N.B. The gap filling of the COUNT(usage_idle) and COUNT(bytes_free)
-- columns happens before the two measurements are UNIONed together
-- when producing the output table. This means that a COUNT column for
-- a field that is not present for a measurement will contain NULLs,
-- rather than being filled with 0s. This is consistent with older
-- versions of influxdb.
SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk; SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk;
SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk GROUP BY TIME(1s) FILL(none); SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk GROUP BY TIME(1s) FILL(none);
SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk GROUP BY cpu; SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk GROUP BY cpu;
@ -360,7 +366,9 @@ SELECT COUNT(usage_idle), usage_idle FROM cpu;
-- Default FILL(null) when FILL is omitted -- Default FILL(null) when FILL is omitted
SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s); SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s);
SELECT COUNT(usage_idle)+2 FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s);
SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s); SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s);
SELECT COUNT(usage_idle)+1, COUNT(bytes_free)+2 FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s);
SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) FILL(null); SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) FILL(null);
SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) FILL(null); SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) FILL(null);
SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) FILL(previous); SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) FILL(previous);
@ -655,3 +663,6 @@ SELECT SUM(bytes_free) / SUM(bytes_used) AS result FROM disk WHERE time >= '2022
-- Unsupported: host is a field in one subquery and a tag in the other -- Unsupported: host is a field in one subquery and a tag in the other
SELECT count(host) FROM (SELECT usage_idle AS host FROM cpu), (SELECT host, usage_idle FROM cpu); SELECT count(host) FROM (SELECT usage_idle AS host FROM cpu), (SELECT host, usage_idle FROM cpu);
-- Using a selector or a aggregate function on a tag column returns NULL
SELECT last(host) AS host, first(usage_idle) AS usage_idle FROM cpu GROUP BY host;

View File

@ -919,10 +919,10 @@ name: logical_plan
plan plan
Sort: iox::measurement ASC NULLS LAST, tag0 ASC NULLS LAST, time ASC NULLS LAST Sort: iox::measurement ASC NULLS LAST, tag0 ASC NULLS LAST, time ASC NULLS LAST
Union Union
Projection: Dictionary(Int32, Utf8("m0")) AS iox::measurement, TimestampNanosecond(0, None) AS time, m0.tag0 AS tag0, COUNT(m0.f64) AS count, SUM(m0.f64) AS sum, STDDEV(m0.f64) AS stddev Projection: Dictionary(Int32, Utf8("m0")) AS iox::measurement, TimestampNanosecond(0, None) AS time, m0.tag0 AS tag0, coalesce_struct(COUNT(m0.f64), Int64(0)) AS count, SUM(m0.f64) AS sum, STDDEV(m0.f64) AS stddev
Aggregate: groupBy=[[m0.tag0]], aggr=[[COUNT(m0.f64), SUM(m0.f64), STDDEV(m0.f64)]] Aggregate: groupBy=[[m0.tag0]], aggr=[[COUNT(m0.f64), SUM(m0.f64), STDDEV(m0.f64)]]
TableScan: m0 projection=[f64, tag0] TableScan: m0 projection=[f64, tag0]
Projection: Dictionary(Int32, Utf8("m1")) AS iox::measurement, TimestampNanosecond(0, None) AS time, m1.tag0 AS tag0, COUNT(m1.f64) AS count, SUM(m1.f64) AS sum, STDDEV(m1.f64) AS stddev Projection: Dictionary(Int32, Utf8("m1")) AS iox::measurement, TimestampNanosecond(0, None) AS time, m1.tag0 AS tag0, coalesce_struct(COUNT(m1.f64), Int64(0)) AS count, SUM(m1.f64) AS sum, STDDEV(m1.f64) AS stddev
Aggregate: groupBy=[[m1.tag0]], aggr=[[COUNT(m1.f64), SUM(m1.f64), STDDEV(m1.f64)]] Aggregate: groupBy=[[m1.tag0]], aggr=[[COUNT(m1.f64), SUM(m1.f64), STDDEV(m1.f64)]]
TableScan: m1 projection=[f64, tag0] TableScan: m1 projection=[f64, tag0]
name: physical_plan name: physical_plan
@ -930,7 +930,7 @@ name: physical_plan
SortPreservingMergeExec: [iox::measurement@0 ASC NULLS LAST,tag0@2 ASC NULLS LAST,time@1 ASC NULLS LAST] SortPreservingMergeExec: [iox::measurement@0 ASC NULLS LAST,tag0@2 ASC NULLS LAST,time@1 ASC NULLS LAST]
UnionExec UnionExec
SortExec: expr=[iox::measurement@0 ASC NULLS LAST,tag0@2 ASC NULLS LAST,time@1 ASC NULLS LAST] SortExec: expr=[iox::measurement@0 ASC NULLS LAST,tag0@2 ASC NULLS LAST,time@1 ASC NULLS LAST]
ProjectionExec: expr=[m0 as iox::measurement, 0 as time, tag0@0 as tag0, COUNT(m0.f64)@1 as count, SUM(m0.f64)@2 as sum, STDDEV(m0.f64)@3 as stddev] ProjectionExec: expr=[m0 as iox::measurement, 0 as time, tag0@0 as tag0, coalesce_struct(COUNT(m0.f64)@1, 0) as count, SUM(m0.f64)@2 as sum, STDDEV(m0.f64)@3 as stddev]
AggregateExec: mode=FinalPartitioned, gby=[tag0@0 as tag0], aggr=[COUNT(m0.f64), SUM(m0.f64), STDDEV(m0.f64)] AggregateExec: mode=FinalPartitioned, gby=[tag0@0 as tag0], aggr=[COUNT(m0.f64), SUM(m0.f64), STDDEV(m0.f64)]
CoalesceBatchesExec: target_batch_size=8192 CoalesceBatchesExec: target_batch_size=8192
RepartitionExec: partitioning=Hash([tag0@0], 4), input_partitions=4 RepartitionExec: partitioning=Hash([tag0@0], 4), input_partitions=4
@ -938,7 +938,7 @@ name: physical_plan
AggregateExec: mode=Partial, gby=[tag0@1 as tag0], aggr=[COUNT(m0.f64), SUM(m0.f64), STDDEV(m0.f64)] AggregateExec: mode=Partial, gby=[tag0@1 as tag0], aggr=[COUNT(m0.f64), SUM(m0.f64), STDDEV(m0.f64)]
ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[f64, tag0] ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[f64, tag0]
SortExec: expr=[iox::measurement@0 ASC NULLS LAST,tag0@2 ASC NULLS LAST,time@1 ASC NULLS LAST] SortExec: expr=[iox::measurement@0 ASC NULLS LAST,tag0@2 ASC NULLS LAST,time@1 ASC NULLS LAST]
ProjectionExec: expr=[m1 as iox::measurement, 0 as time, tag0@0 as tag0, COUNT(m1.f64)@1 as count, SUM(m1.f64)@2 as sum, STDDEV(m1.f64)@3 as stddev] ProjectionExec: expr=[m1 as iox::measurement, 0 as time, tag0@0 as tag0, coalesce_struct(COUNT(m1.f64)@1, 0) as count, SUM(m1.f64)@2 as sum, STDDEV(m1.f64)@3 as stddev]
RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=4 RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=4
AggregateExec: mode=FinalPartitioned, gby=[tag0@0 as tag0], aggr=[COUNT(m1.f64), SUM(m1.f64), STDDEV(m1.f64)], ordering_mode=FullyOrdered AggregateExec: mode=FinalPartitioned, gby=[tag0@0 as tag0], aggr=[COUNT(m1.f64), SUM(m1.f64), STDDEV(m1.f64)], ordering_mode=FullyOrdered
CoalesceBatchesExec: target_batch_size=8192 CoalesceBatchesExec: target_batch_size=8192
@ -1267,9 +1267,19 @@ name: cpu
| time | count | | time | count |
+---------------------+-------+ +---------------------+-------+
| 2022-10-31T02:00:00 | 6 | | 2022-10-31T02:00:00 | 6 |
| 2022-10-31T02:00:30 | | | 2022-10-31T02:00:30 | 0 |
| 2022-10-31T02:01:00 | | | 2022-10-31T02:01:00 | 0 |
| 2022-10-31T02:01:30 | | | 2022-10-31T02:01:30 | 0 |
+---------------------+-------+
-- InfluxQL: SELECT COUNT(usage_idle)+2 FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s);
name: cpu
+---------------------+-------+
| time | count |
+---------------------+-------+
| 2022-10-31T02:00:00 | 8 |
| 2022-10-31T02:00:30 | 2 |
| 2022-10-31T02:01:00 | 2 |
| 2022-10-31T02:01:30 | 2 |
+---------------------+-------+ +---------------------+-------+
-- InfluxQL: SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s); -- InfluxQL: SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s);
name: cpu name: cpu
@ -1277,18 +1287,37 @@ name: cpu
| time | count | count_1 | | time | count | count_1 |
+---------------------+-------+---------+ +---------------------+-------+---------+
| 2022-10-31T02:00:00 | 6 | | | 2022-10-31T02:00:00 | 6 | |
| 2022-10-31T02:00:30 | | | | 2022-10-31T02:00:30 | 0 | |
| 2022-10-31T02:01:00 | | | | 2022-10-31T02:01:00 | 0 | |
| 2022-10-31T02:01:30 | | | | 2022-10-31T02:01:30 | 0 | |
+---------------------+-------+---------+ +---------------------+-------+---------+
name: disk name: disk
+---------------------+-------+---------+ +---------------------+-------+---------+
| time | count | count_1 | | time | count | count_1 |
+---------------------+-------+---------+ +---------------------+-------+---------+
| 2022-10-31T02:00:00 | | 6 | | 2022-10-31T02:00:00 | | 6 |
| 2022-10-31T02:00:30 | | | | 2022-10-31T02:00:30 | | 0 |
| 2022-10-31T02:01:00 | | | | 2022-10-31T02:01:00 | | 0 |
| 2022-10-31T02:01:30 | | | | 2022-10-31T02:01:30 | | 0 |
+---------------------+-------+---------+
-- InfluxQL: SELECT COUNT(usage_idle)+1, COUNT(bytes_free)+2 FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s);
name: cpu
+---------------------+-------+---------+
| time | count | count_1 |
+---------------------+-------+---------+
| 2022-10-31T02:00:00 | 7 | |
| 2022-10-31T02:00:30 | 1 | |
| 2022-10-31T02:01:00 | 1 | |
| 2022-10-31T02:01:30 | 1 | |
+---------------------+-------+---------+
name: disk
+---------------------+-------+---------+
| time | count | count_1 |
+---------------------+-------+---------+
| 2022-10-31T02:00:00 | | 8 |
| 2022-10-31T02:00:30 | | 2 |
| 2022-10-31T02:01:00 | | 2 |
| 2022-10-31T02:01:30 | | 2 |
+---------------------+-------+---------+ +---------------------+-------+---------+
-- InfluxQL: SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) FILL(null); -- InfluxQL: SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) FILL(null);
name: cpu name: cpu
@ -1296,9 +1325,9 @@ name: cpu
| time | count | | time | count |
+---------------------+-------+ +---------------------+-------+
| 2022-10-31T02:00:00 | 6 | | 2022-10-31T02:00:00 | 6 |
| 2022-10-31T02:00:30 | | | 2022-10-31T02:00:30 | 0 |
| 2022-10-31T02:01:00 | | | 2022-10-31T02:01:00 | 0 |
| 2022-10-31T02:01:30 | | | 2022-10-31T02:01:30 | 0 |
+---------------------+-------+ +---------------------+-------+
-- InfluxQL: SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) FILL(null); -- InfluxQL: SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) FILL(null);
name: cpu name: cpu
@ -1306,18 +1335,18 @@ name: cpu
| time | count | count_1 | | time | count | count_1 |
+---------------------+-------+---------+ +---------------------+-------+---------+
| 2022-10-31T02:00:00 | 6 | | | 2022-10-31T02:00:00 | 6 | |
| 2022-10-31T02:00:30 | | | | 2022-10-31T02:00:30 | 0 | |
| 2022-10-31T02:01:00 | | | | 2022-10-31T02:01:00 | 0 | |
| 2022-10-31T02:01:30 | | | | 2022-10-31T02:01:30 | 0 | |
+---------------------+-------+---------+ +---------------------+-------+---------+
name: disk name: disk
+---------------------+-------+---------+ +---------------------+-------+---------+
| time | count | count_1 | | time | count | count_1 |
+---------------------+-------+---------+ +---------------------+-------+---------+
| 2022-10-31T02:00:00 | | 6 | | 2022-10-31T02:00:00 | | 6 |
| 2022-10-31T02:00:30 | | | | 2022-10-31T02:00:30 | | 0 |
| 2022-10-31T02:01:00 | | | | 2022-10-31T02:01:00 | | 0 |
| 2022-10-31T02:01:30 | | | | 2022-10-31T02:01:30 | | 0 |
+---------------------+-------+---------+ +---------------------+-------+---------+
-- InfluxQL: SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) FILL(previous); -- InfluxQL: SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) FILL(previous);
name: cpu name: cpu
@ -1507,9 +1536,9 @@ tags: cpu=cpu-total
| time | count | | time | count |
+---------------------+-------+ +---------------------+-------+
| 2022-10-31T02:00:00 | 2 | | 2022-10-31T02:00:00 | 2 |
| 2022-10-31T02:00:30 | | | 2022-10-31T02:00:30 | 0 |
| 2022-10-31T02:01:00 | | | 2022-10-31T02:01:00 | 0 |
| 2022-10-31T02:01:30 | | | 2022-10-31T02:01:30 | 0 |
+---------------------+-------+ +---------------------+-------+
name: cpu name: cpu
tags: cpu=cpu0 tags: cpu=cpu0
@ -1517,9 +1546,9 @@ tags: cpu=cpu0
| time | count | | time | count |
+---------------------+-------+ +---------------------+-------+
| 2022-10-31T02:00:00 | 2 | | 2022-10-31T02:00:00 | 2 |
| 2022-10-31T02:00:30 | | | 2022-10-31T02:00:30 | 0 |
| 2022-10-31T02:01:00 | | | 2022-10-31T02:01:00 | 0 |
| 2022-10-31T02:01:30 | | | 2022-10-31T02:01:30 | 0 |
+---------------------+-------+ +---------------------+-------+
name: cpu name: cpu
tags: cpu=cpu1 tags: cpu=cpu1
@ -1527,9 +1556,9 @@ tags: cpu=cpu1
| time | count | | time | count |
+---------------------+-------+ +---------------------+-------+
| 2022-10-31T02:00:00 | 2 | | 2022-10-31T02:00:00 | 2 |
| 2022-10-31T02:00:30 | | | 2022-10-31T02:00:30 | 0 |
| 2022-10-31T02:01:00 | | | 2022-10-31T02:01:00 | 0 |
| 2022-10-31T02:01:30 | | | 2022-10-31T02:01:30 | 0 |
+---------------------+-------+ +---------------------+-------+
-- InfluxQL: SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s), cpu FILL(null); -- InfluxQL: SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s), cpu FILL(null);
name: cpu name: cpu
@ -1538,9 +1567,9 @@ tags: cpu=cpu-total
| time | count | | time | count |
+---------------------+-------+ +---------------------+-------+
| 2022-10-31T02:00:00 | 2 | | 2022-10-31T02:00:00 | 2 |
| 2022-10-31T02:00:30 | | | 2022-10-31T02:00:30 | 0 |
| 2022-10-31T02:01:00 | | | 2022-10-31T02:01:00 | 0 |
| 2022-10-31T02:01:30 | | | 2022-10-31T02:01:30 | 0 |
+---------------------+-------+ +---------------------+-------+
name: cpu name: cpu
tags: cpu=cpu0 tags: cpu=cpu0
@ -1548,9 +1577,9 @@ tags: cpu=cpu0
| time | count | | time | count |
+---------------------+-------+ +---------------------+-------+
| 2022-10-31T02:00:00 | 2 | | 2022-10-31T02:00:00 | 2 |
| 2022-10-31T02:00:30 | | | 2022-10-31T02:00:30 | 0 |
| 2022-10-31T02:01:00 | | | 2022-10-31T02:01:00 | 0 |
| 2022-10-31T02:01:30 | | | 2022-10-31T02:01:30 | 0 |
+---------------------+-------+ +---------------------+-------+
name: cpu name: cpu
tags: cpu=cpu1 tags: cpu=cpu1
@ -1558,9 +1587,9 @@ tags: cpu=cpu1
| time | count | | time | count |
+---------------------+-------+ +---------------------+-------+
| 2022-10-31T02:00:00 | 2 | | 2022-10-31T02:00:00 | 2 |
| 2022-10-31T02:00:30 | | | 2022-10-31T02:00:30 | 0 |
| 2022-10-31T02:01:00 | | | 2022-10-31T02:01:00 | 0 |
| 2022-10-31T02:01:30 | | | 2022-10-31T02:01:30 | 0 |
+---------------------+-------+ +---------------------+-------+
-- InfluxQL: SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s), cpu, device FILL(null); -- InfluxQL: SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s), cpu, device FILL(null);
name: cpu name: cpu
@ -1569,9 +1598,9 @@ tags: cpu=cpu-total, device=
| time | count | count_1 | | time | count | count_1 |
+---------------------+-------+---------+ +---------------------+-------+---------+
| 2022-10-31T02:00:00 | 2 | | | 2022-10-31T02:00:00 | 2 | |
| 2022-10-31T02:00:30 | | | | 2022-10-31T02:00:30 | 0 | |
| 2022-10-31T02:01:00 | | | | 2022-10-31T02:01:00 | 0 | |
| 2022-10-31T02:01:30 | | | | 2022-10-31T02:01:30 | 0 | |
+---------------------+-------+---------+ +---------------------+-------+---------+
name: cpu name: cpu
tags: cpu=cpu0, device= tags: cpu=cpu0, device=
@ -1579,9 +1608,9 @@ tags: cpu=cpu0, device=
| time | count | count_1 | | time | count | count_1 |
+---------------------+-------+---------+ +---------------------+-------+---------+
| 2022-10-31T02:00:00 | 2 | | | 2022-10-31T02:00:00 | 2 | |
| 2022-10-31T02:00:30 | | | | 2022-10-31T02:00:30 | 0 | |
| 2022-10-31T02:01:00 | | | | 2022-10-31T02:01:00 | 0 | |
| 2022-10-31T02:01:30 | | | | 2022-10-31T02:01:30 | 0 | |
+---------------------+-------+---------+ +---------------------+-------+---------+
name: cpu name: cpu
tags: cpu=cpu1, device= tags: cpu=cpu1, device=
@ -1589,9 +1618,9 @@ tags: cpu=cpu1, device=
| time | count | count_1 | | time | count | count_1 |
+---------------------+-------+---------+ +---------------------+-------+---------+
| 2022-10-31T02:00:00 | 2 | | | 2022-10-31T02:00:00 | 2 | |
| 2022-10-31T02:00:30 | | | | 2022-10-31T02:00:30 | 0 | |
| 2022-10-31T02:01:00 | | | | 2022-10-31T02:01:00 | 0 | |
| 2022-10-31T02:01:30 | | | | 2022-10-31T02:01:30 | 0 | |
+---------------------+-------+---------+ +---------------------+-------+---------+
name: disk name: disk
tags: cpu=, device=disk1s1 tags: cpu=, device=disk1s1
@ -1599,9 +1628,9 @@ tags: cpu=, device=disk1s1
| time | count | count_1 | | time | count | count_1 |
+---------------------+-------+---------+ +---------------------+-------+---------+
| 2022-10-31T02:00:00 | | 2 | | 2022-10-31T02:00:00 | | 2 |
| 2022-10-31T02:00:30 | | | | 2022-10-31T02:00:30 | | 0 |
| 2022-10-31T02:01:00 | | | | 2022-10-31T02:01:00 | | 0 |
| 2022-10-31T02:01:30 | | | | 2022-10-31T02:01:30 | | 0 |
+---------------------+-------+---------+ +---------------------+-------+---------+
name: disk name: disk
tags: cpu=, device=disk1s2 tags: cpu=, device=disk1s2
@ -1609,9 +1638,9 @@ tags: cpu=, device=disk1s2
| time | count | count_1 | | time | count | count_1 |
+---------------------+-------+---------+ +---------------------+-------+---------+
| 2022-10-31T02:00:00 | | 2 | | 2022-10-31T02:00:00 | | 2 |
| 2022-10-31T02:00:30 | | | | 2022-10-31T02:00:30 | | 0 |
| 2022-10-31T02:01:00 | | | | 2022-10-31T02:01:00 | | 0 |
| 2022-10-31T02:01:30 | | | | 2022-10-31T02:01:30 | | 0 |
+---------------------+-------+---------+ +---------------------+-------+---------+
name: disk name: disk
tags: cpu=, device=disk1s5 tags: cpu=, device=disk1s5
@ -1619,9 +1648,9 @@ tags: cpu=, device=disk1s5
| time | count | count_1 | | time | count | count_1 |
+---------------------+-------+---------+ +---------------------+-------+---------+
| 2022-10-31T02:00:00 | | 2 | | 2022-10-31T02:00:00 | | 2 |
| 2022-10-31T02:00:30 | | | | 2022-10-31T02:00:30 | | 0 |
| 2022-10-31T02:01:00 | | | | 2022-10-31T02:01:00 | | 0 |
| 2022-10-31T02:01:30 | | | | 2022-10-31T02:01:30 | | 0 |
+---------------------+-------+---------+ +---------------------+-------+---------+
-- InfluxQL: SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s), cpu FILL(previous); -- InfluxQL: SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s), cpu FILL(previous);
name: cpu name: cpu
@ -2202,15 +2231,15 @@ name: cpu
| time | count | | time | count |
+---------------------+-------+ +---------------------+-------+
| 2022-10-31T02:00:00 | 6 | | 2022-10-31T02:00:00 | 6 |
| 2022-10-31T02:00:30 | | | 2022-10-31T02:00:30 | 0 |
+---------------------+-------+ +---------------------+-------+
-- InfluxQL: SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:05:00Z' GROUP BY TIME(30s) LIMIT 2 OFFSET 2; -- InfluxQL: SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:05:00Z' GROUP BY TIME(30s) LIMIT 2 OFFSET 2;
name: cpu name: cpu
+---------------------+-------+ +---------------------+-------+
| time | count | | time | count |
+---------------------+-------+ +---------------------+-------+
| 2022-10-31T02:01:00 | | | 2022-10-31T02:01:00 | 0 |
| 2022-10-31T02:01:30 | | | 2022-10-31T02:01:30 | 0 |
+---------------------+-------+ +---------------------+-------+
-- InfluxQL: SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:05:00Z' GROUP BY TIME(30s), cpu LIMIT 2; -- InfluxQL: SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:05:00Z' GROUP BY TIME(30s), cpu LIMIT 2;
name: cpu name: cpu
@ -2219,7 +2248,7 @@ tags: cpu=cpu-total
| time | count | | time | count |
+---------------------+-------+ +---------------------+-------+
| 2022-10-31T02:00:00 | 2 | | 2022-10-31T02:00:00 | 2 |
| 2022-10-31T02:00:30 | | | 2022-10-31T02:00:30 | 0 |
+---------------------+-------+ +---------------------+-------+
name: cpu name: cpu
tags: cpu=cpu0 tags: cpu=cpu0
@ -2227,7 +2256,7 @@ tags: cpu=cpu0
| time | count | | time | count |
+---------------------+-------+ +---------------------+-------+
| 2022-10-31T02:00:00 | 2 | | 2022-10-31T02:00:00 | 2 |
| 2022-10-31T02:00:30 | | | 2022-10-31T02:00:30 | 0 |
+---------------------+-------+ +---------------------+-------+
name: cpu name: cpu
tags: cpu=cpu1 tags: cpu=cpu1
@ -2235,7 +2264,7 @@ tags: cpu=cpu1
| time | count | | time | count |
+---------------------+-------+ +---------------------+-------+
| 2022-10-31T02:00:00 | 2 | | 2022-10-31T02:00:00 | 2 |
| 2022-10-31T02:00:30 | | | 2022-10-31T02:00:30 | 0 |
+---------------------+-------+ +---------------------+-------+
-- InfluxQL: SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) LIMIT 1; -- InfluxQL: SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) LIMIT 1;
name: cpu name: cpu
@ -2268,13 +2297,13 @@ name: cpu
+---------------------+-------+---------+ +---------------------+-------+---------+
| time | count | count_1 | | time | count | count_1 |
+---------------------+-------+---------+ +---------------------+-------+---------+
| 2022-10-31T02:01:30 | | | | 2022-10-31T02:01:30 | 0 | |
+---------------------+-------+---------+ +---------------------+-------+---------+
name: disk name: disk
+---------------------+-------+---------+ +---------------------+-------+---------+
| time | count | count_1 | | time | count | count_1 |
+---------------------+-------+---------+ +---------------------+-------+---------+
| 2022-10-31T02:01:30 | | | | 2022-10-31T02:01:30 | | 0 |
+---------------------+-------+---------+ +---------------------+-------+---------+
-- InfluxQL: SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s), cpu, device LIMIT 1; -- InfluxQL: SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s), cpu, device LIMIT 1;
name: cpu name: cpu
@ -3088,3 +3117,11 @@ name: disk
+---------------------+----------------------+ +---------------------+----------------------+
-- InfluxQL: SELECT count(host) FROM (SELECT usage_idle AS host FROM cpu), (SELECT host, usage_idle FROM cpu); -- InfluxQL: SELECT count(host) FROM (SELECT usage_idle AS host FROM cpu), (SELECT host, usage_idle FROM cpu);
Error while planning query: This feature is not implemented: cannot mix tag and field columns with the same name: host Error while planning query: This feature is not implemented: cannot mix tag and field columns with the same name: host
-- InfluxQL: SELECT last(host) AS host, first(usage_idle) AS usage_idle FROM cpu GROUP BY host;
name: cpu
tags: host=
+---------------------+------+------------+
| time | host | usage_idle |
+---------------------+------+------------+
| 1970-01-01T00:00:00 | | 2.98 |
+---------------------+------+------------+

View File

@ -21,6 +21,19 @@ SELECT difference(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AN
-- group by time and a tag -- group by time and a tag
SELECT difference(mean(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu; SELECT difference(mean(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
--
-- difference + selector
--
SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
-- the input data is regular data at 10s intervals, so 7s windows ensure the `mean` generates windows with NULL values to test NULL handling of difference
SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
-- linear filling of selector functions produces an execution error
-- (see https://github.com/influxdata/influxdb_iox/issues/8302).
-- SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
-- group by time and a tag
SELECT difference(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
-- --
-- non_negative_difference -- non_negative_difference
@ -35,6 +48,11 @@ SELECT non_negative_difference(usage_idle) FROM cpu WHERE time >= 00000001300000
-- --
SELECT non_negative_difference(mean(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu; SELECT non_negative_difference(mean(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
--
-- non_negative_difference + selector
--
SELECT non_negative_difference(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
-- --
-- moving_average -- moving_average
-- --
@ -61,6 +79,17 @@ SELECT moving_average(mean(writes), 3) FROM diskio WHERE time >= 000000013000000
SELECT moving_average(mean(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous); SELECT moving_average(mean(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
SELECT moving_average(mean(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear); SELECT moving_average(mean(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
--
-- moving_average + selector
--
-- the input data is regular data at 10s intervals, so 7s windows ensure the `mean` generates windows with NULL values to test NULL handling of moving_average
SELECT moving_average(first(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
SELECT moving_average(first(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
SELECT moving_average(first(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
-- linear filling of selector functions produces an execution error
-- (see https://github.com/influxdata/influxdb_iox/issues/8302).
-- SELECT moving_average(first(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
-- --
-- combining window functions -- combining window functions
-- --
@ -109,7 +138,7 @@ SELECT derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AN
SELECT derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s); SELECT derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
SELECT derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s); SELECT derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
SELECT derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s); SELECT derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
-- the input data is regular data at 10s intervals, so 7s windows ensure the `mean` generates windows with NULL values to test NULL handling of difference -- the input data is regular data at 10s intervals, so 7s windows ensure the `mean` generates windows with NULL values to test NULL handling of derivative
SELECT derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0); SELECT derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
SELECT derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0); SELECT derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
SELECT derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous); SELECT derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
@ -120,6 +149,26 @@ SELECT derivative(mean(writes), 500ms) FROM diskio WHERE time >= 000000013000000
SELECT derivative(mean(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu; SELECT derivative(mean(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
SELECT derivative(mean(usage_idle), 500ms) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu; SELECT derivative(mean(usage_idle), 500ms) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
--
-- derivative + selector
--
SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
-- the input data is regular data at 10s intervals, so 7s windows ensure the `first` generates windows with NULL values to test NULL handling of derivative
SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
-- linear filling of selector functions produces an execution error
-- (see https://github.com/influxdata/influxdb_iox/issues/8302).
-- SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
-- SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
-- group by time and a tag
SELECT derivative(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
SELECT derivative(first(usage_idle), 500ms) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
-- --
-- non_negative_derivative -- non_negative_derivative
-- --
@ -138,7 +187,7 @@ SELECT non_negative_derivative(mean(writes)) FROM diskio WHERE time >= 000000013
SELECT non_negative_derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s); SELECT non_negative_derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
SELECT non_negative_derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s); SELECT non_negative_derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
SELECT non_negative_derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s); SELECT non_negative_derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
-- the input data is regular data at 10s intervals, so 7s windows ensure the `mean` generates windows with NULL values to test NULL handling of difference -- the input data is regular data at 10s intervals, so 7s windows ensure the `mean` generates windows with NULL values to test NULL handling of non_negative_derivative
SELECT non_negative_derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0); SELECT non_negative_derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
SELECT non_negative_derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0); SELECT non_negative_derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
SELECT non_negative_derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous); SELECT non_negative_derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
@ -148,3 +197,57 @@ SELECT non_negative_derivative(mean(writes), 500ms) FROM diskio WHERE time >= 00
-- group by time and a tag -- group by time and a tag
SELECT non_negative_derivative(mean(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu; SELECT non_negative_derivative(mean(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
SELECT non_negative_derivative(mean(usage_idle), 500ms) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu; SELECT non_negative_derivative(mean(usage_idle), 500ms) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
--
-- non_negative_derivative + selector
--
SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
-- the input data is regular data at 10s intervals, so 7s windows ensure the `first` generates windows with NULL values to test NULL handling of non_negative_derivative
SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
-- linear filling of selector functions produces an execution error
-- (see https://github.com/influxdata/influxdb_iox/issues/8302).
-- SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
-- SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
-- group by time and a tag
SELECT non_negative_derivative(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
SELECT non_negative_derivative(first(usage_idle), 500ms) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
--
-- cumulative_sum
--
SELECT cumulative_sum(writes) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001;
SELECT cumulative_sum(usage_system) FROM cpu WHERE time >= 0000000060000000000 AND time < 0000000210000000001 AND cpu = 'cpu0';
SELECT cumulative_sum(usage_idle), cumulative_sum(usage_system) FROM cpu WHERE time >= 0000000060000000000 AND time < 0000000210000000001 AND cpu = 'cpu0';
SELECT cumulative_sum(usage_idle) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY cpu;
--
-- cumulative_sum + aggregate
--
SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
-- the input data is regular data at 10s intervals, so 7s windows ensure the `mean` generates windows with NULL values to test NULL handling of cumulative_sum
SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
-- group by time and a tag
SELECT cumulative_sum(mean(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
--
-- cumulative_sum + selector
--
SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
-- the input data is regular data at 10s intervals, so 7s windows ensure the `first` generates windows with NULL values to test NULL handling of cumulative_sum
SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
-- linear filling of selector functions produces an execution error
-- (see https://github.com/influxdata/influxdb_iox/issues/8302).
-- SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
-- group by time and a tag
SELECT cumulative_sum(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;

View File

@ -148,6 +148,86 @@ tags: cpu=cpu1
| 1970-01-01T00:02:30 | -0.03333333333334565 | | 1970-01-01T00:02:30 | -0.03333333333334565 |
| 1970-01-01T00:03:00 | -0.03333333333333144 | | 1970-01-01T00:03:00 | -0.03333333333333144 |
+---------------------+----------------------+ +---------------------+----------------------+
-- InfluxQL: SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
name: diskio
+---------------------+------------+
| time | difference |
+---------------------+------------+
| 1970-01-01T00:02:20 | 164 |
| 1970-01-01T00:02:27 | 187 |
| 1970-01-01T00:02:34 | 112 |
| 1970-01-01T00:02:48 | 110 |
| 1970-01-01T00:02:55 | 219 |
| 1970-01-01T00:03:09 | 75 |
| 1970-01-01T00:03:16 | 76 |
| 1970-01-01T00:03:30 | 146 |
+---------------------+------------+
-- InfluxQL: SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
name: diskio
+---------------------+------------+
| time | difference |
+---------------------+------------+
| 1970-01-01T00:02:00 | 366 |
| 1970-01-01T00:02:30 | 421 |
| 1970-01-01T00:03:00 | 441 |
| 1970-01-01T00:03:30 | 297 |
+---------------------+------------+
-- InfluxQL: SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
name: diskio
+---------------------+------------+
| time | difference |
+---------------------+------------+
| 1970-01-01T00:02:06 | 5592646 |
| 1970-01-01T00:02:13 | -5592646 |
| 1970-01-01T00:02:20 | 5592810 |
| 1970-01-01T00:02:27 | 187 |
| 1970-01-01T00:02:34 | 112 |
| 1970-01-01T00:02:41 | -5593109 |
| 1970-01-01T00:02:48 | 5593219 |
| 1970-01-01T00:02:55 | 219 |
| 1970-01-01T00:03:02 | -5593438 |
| 1970-01-01T00:03:09 | 5593513 |
| 1970-01-01T00:03:16 | 76 |
| 1970-01-01T00:03:23 | -5593589 |
| 1970-01-01T00:03:30 | 5593735 |
+---------------------+------------+
-- InfluxQL: SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
name: diskio
+---------------------+------------+
| time | difference |
+---------------------+------------+
| 1970-01-01T00:02:13 | 0 |
| 1970-01-01T00:02:20 | 164 |
| 1970-01-01T00:02:27 | 187 |
| 1970-01-01T00:02:34 | 112 |
| 1970-01-01T00:02:41 | 0 |
| 1970-01-01T00:02:48 | 110 |
| 1970-01-01T00:02:55 | 219 |
| 1970-01-01T00:03:02 | 0 |
| 1970-01-01T00:03:09 | 75 |
| 1970-01-01T00:03:16 | 76 |
| 1970-01-01T00:03:23 | 0 |
| 1970-01-01T00:03:30 | 146 |
+---------------------+------------+
-- InfluxQL: SELECT difference(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
name: cpu
tags: cpu=cpu0
+---------------------+---------------------+
| time | difference |
+---------------------+---------------------+
| 1970-01-01T00:02:00 | -0.7999999999999972 |
| 1970-01-01T00:02:30 | 3.5 |
| 1970-01-01T00:03:00 | -0.4000000000000057 |
+---------------------+---------------------+
name: cpu
tags: cpu=cpu1
+---------------------+----------------------+
| time | difference |
+---------------------+----------------------+
| 1970-01-01T00:02:00 | 0.20000000000000284 |
| 1970-01-01T00:02:30 | 0.0 |
| 1970-01-01T00:03:00 | -0.10000000000000853 |
+---------------------+----------------------+
-- InfluxQL: SELECT non_negative_difference(usage_system) FROM cpu WHERE time >= 0000000060000000000 AND time < 0000000210000000001 AND cpu = 'cpu0'; -- InfluxQL: SELECT non_negative_difference(usage_system) FROM cpu WHERE time >= 0000000060000000000 AND time < 0000000210000000001 AND cpu = 'cpu0';
name: cpu name: cpu
+---------------------+-------------------------+ +---------------------+-------------------------+
@ -202,6 +282,22 @@ tags: cpu=cpu1
+---------------------+-------------------------+ +---------------------+-------------------------+
| 1970-01-01T00:02:00 | 0.36666666666667425 | | 1970-01-01T00:02:00 | 0.36666666666667425 |
+---------------------+-------------------------+ +---------------------+-------------------------+
-- InfluxQL: SELECT non_negative_difference(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
name: cpu
tags: cpu=cpu0
+---------------------+-------------------------+
| time | non_negative_difference |
+---------------------+-------------------------+
| 1970-01-01T00:02:30 | 3.5 |
+---------------------+-------------------------+
name: cpu
tags: cpu=cpu1
+---------------------+-------------------------+
| time | non_negative_difference |
+---------------------+-------------------------+
| 1970-01-01T00:02:00 | 0.20000000000000284 |
| 1970-01-01T00:02:30 | 0.0 |
+---------------------+-------------------------+
-- InfluxQL: SELECT moving_average(writes, 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001; -- InfluxQL: SELECT moving_average(writes, 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001;
name: diskio name: diskio
+---------------------+-------------------+ +---------------------+-------------------+
@ -307,6 +403,54 @@ name: diskio
| 1970-01-01T00:03:23 | 5593588.0 | | 1970-01-01T00:03:23 | 5593588.0 |
| 1970-01-01T00:03:30 | 5593662.0 | | 1970-01-01T00:03:30 | 5593662.0 |
+---------------------+-------------------+ +---------------------+-------------------+
-- InfluxQL: SELECT moving_average(first(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
name: diskio
+---------------------+-------------------+
| time | moving_average |
+---------------------+-------------------+
| 1970-01-01T00:02:27 | 5592817.666666667 |
| 1970-01-01T00:02:34 | 5592972.0 |
| 1970-01-01T00:02:48 | 5593108.333333333 |
| 1970-01-01T00:02:55 | 5593255.333333333 |
| 1970-01-01T00:03:09 | 5593390.0 |
| 1970-01-01T00:03:16 | 5593513.333333333 |
| 1970-01-01T00:03:30 | 5593612.333333333 |
+---------------------+-------------------+
-- InfluxQL: SELECT moving_average(first(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
name: diskio
+---------------------+--------------------+
| time | moving_average |
+---------------------+--------------------+
| 1970-01-01T00:02:13 | 1864215.3333333333 |
| 1970-01-01T00:02:20 | 3728485.3333333335 |
| 1970-01-01T00:02:27 | 3728602.3333333335 |
| 1970-01-01T00:02:34 | 5592972.0 |
| 1970-01-01T00:02:41 | 3728702.0 |
| 1970-01-01T00:02:48 | 3728776.0 |
| 1970-01-01T00:02:55 | 3728885.6666666665 |
| 1970-01-01T00:03:02 | 3728885.6666666665 |
| 1970-01-01T00:03:09 | 3728983.6666666665 |
| 1970-01-01T00:03:16 | 3729034.0 |
| 1970-01-01T00:03:23 | 3729034.0 |
| 1970-01-01T00:03:30 | 3729108.0 |
+---------------------+--------------------+
-- InfluxQL: SELECT moving_average(first(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
name: diskio
+---------------------+-------------------+
| time | moving_average |
+---------------------+-------------------+
| 1970-01-01T00:02:20 | 5592700.666666667 |
| 1970-01-01T00:02:27 | 5592817.666666667 |
| 1970-01-01T00:02:34 | 5592972.0 |
| 1970-01-01T00:02:41 | 5593071.666666667 |
| 1970-01-01T00:02:48 | 5593145.666666667 |
| 1970-01-01T00:02:55 | 5593255.333333333 |
| 1970-01-01T00:03:02 | 5593365.0 |
| 1970-01-01T00:03:09 | 5593463.0 |
| 1970-01-01T00:03:16 | 5593513.333333333 |
| 1970-01-01T00:03:23 | 5593563.666666667 |
| 1970-01-01T00:03:30 | 5593637.666666667 |
+---------------------+-------------------+
-- InfluxQL: SELECT difference(usage_idle), non_negative_difference(usage_idle), moving_average(usage_idle, 4) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY cpu; -- InfluxQL: SELECT difference(usage_idle), non_negative_difference(usage_idle), moving_average(usage_idle, 4) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY cpu;
name: cpu name: cpu
tags: cpu=cpu0 tags: cpu=cpu0
@ -649,6 +793,166 @@ tags: cpu=cpu1
| 1970-01-01T00:02:30 | -0.0005555555555557608 | | 1970-01-01T00:02:30 | -0.0005555555555557608 |
| 1970-01-01T00:03:00 | -0.000555555555555524 | | 1970-01-01T00:03:00 | -0.000555555555555524 |
+---------------------+------------------------+ +---------------------+------------------------+
-- InfluxQL: SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
name: diskio
+---------------------+------------+
| time | derivative |
+---------------------+------------+
| 1970-01-01T00:02:20 | 82.0 |
| 1970-01-01T00:02:27 | 187.0 |
| 1970-01-01T00:02:34 | 112.0 |
| 1970-01-01T00:02:48 | 55.0 |
| 1970-01-01T00:02:55 | 219.0 |
| 1970-01-01T00:03:09 | 37.5 |
| 1970-01-01T00:03:16 | 76.0 |
| 1970-01-01T00:03:30 | 73.0 |
+---------------------+------------+
-- InfluxQL: SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
name: diskio
+---------------------+--------------------+
| time | derivative |
+---------------------+--------------------+
| 1970-01-01T00:02:20 | 5.857142857142857 |
| 1970-01-01T00:02:27 | 13.357142857142858 |
| 1970-01-01T00:02:34 | 8.0 |
| 1970-01-01T00:02:48 | 3.9285714285714284 |
| 1970-01-01T00:02:55 | 15.642857142857142 |
| 1970-01-01T00:03:09 | 2.6785714285714284 |
| 1970-01-01T00:03:16 | 5.428571428571429 |
| 1970-01-01T00:03:30 | 5.214285714285714 |
+---------------------+--------------------+
-- InfluxQL: SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
name: diskio
+---------------------+------------+
| time | derivative |
+---------------------+------------+
| 1970-01-01T00:02:00 | 366.0 |
| 1970-01-01T00:02:30 | 421.0 |
| 1970-01-01T00:03:00 | 441.0 |
| 1970-01-01T00:03:30 | 297.0 |
+---------------------+------------+
-- InfluxQL: SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
name: diskio
+---------------------+-------------------+
| time | derivative |
+---------------------+-------------------+
| 1970-01-01T00:02:00 | 6.1 |
| 1970-01-01T00:02:30 | 7.016666666666667 |
| 1970-01-01T00:03:00 | 7.35 |
| 1970-01-01T00:03:30 | 4.95 |
+---------------------+-------------------+
-- InfluxQL: SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
name: diskio
+---------------------+------------+
| time | derivative |
+---------------------+------------+
| 1970-01-01T00:02:06 | 5592646.0 |
| 1970-01-01T00:02:13 | -5592646.0 |
| 1970-01-01T00:02:20 | 5592810.0 |
| 1970-01-01T00:02:27 | 187.0 |
| 1970-01-01T00:02:34 | 112.0 |
| 1970-01-01T00:02:41 | -5593109.0 |
| 1970-01-01T00:02:48 | 5593219.0 |
| 1970-01-01T00:02:55 | 219.0 |
| 1970-01-01T00:03:02 | -5593438.0 |
| 1970-01-01T00:03:09 | 5593513.0 |
| 1970-01-01T00:03:16 | 76.0 |
| 1970-01-01T00:03:23 | -5593589.0 |
| 1970-01-01T00:03:30 | 5593735.0 |
+---------------------+------------+
-- InfluxQL: SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
name: diskio
+---------------------+---------------------+
| time | derivative |
+---------------------+---------------------+
| 1970-01-01T00:02:06 | 399474.71428571426 |
| 1970-01-01T00:02:13 | -399474.71428571426 |
| 1970-01-01T00:02:20 | 399486.4285714286 |
| 1970-01-01T00:02:27 | 13.357142857142858 |
| 1970-01-01T00:02:34 | 8.0 |
| 1970-01-01T00:02:41 | -399507.78571428574 |
| 1970-01-01T00:02:48 | 399515.64285714284 |
| 1970-01-01T00:02:55 | 15.642857142857142 |
| 1970-01-01T00:03:02 | -399531.28571428574 |
| 1970-01-01T00:03:09 | 399536.64285714284 |
| 1970-01-01T00:03:16 | 5.428571428571429 |
| 1970-01-01T00:03:23 | -399542.0714285714 |
| 1970-01-01T00:03:30 | 399552.5 |
+---------------------+---------------------+
-- InfluxQL: SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
name: diskio
+---------------------+------------+
| time | derivative |
+---------------------+------------+
| 1970-01-01T00:02:13 | 0.0 |
| 1970-01-01T00:02:20 | 164.0 |
| 1970-01-01T00:02:27 | 187.0 |
| 1970-01-01T00:02:34 | 112.0 |
| 1970-01-01T00:02:41 | 0.0 |
| 1970-01-01T00:02:48 | 110.0 |
| 1970-01-01T00:02:55 | 219.0 |
| 1970-01-01T00:03:02 | 0.0 |
| 1970-01-01T00:03:09 | 75.0 |
| 1970-01-01T00:03:16 | 76.0 |
| 1970-01-01T00:03:23 | 0.0 |
| 1970-01-01T00:03:30 | 146.0 |
+---------------------+------------+
-- InfluxQL: SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
name: diskio
+---------------------+--------------------+
| time | derivative |
+---------------------+--------------------+
| 1970-01-01T00:02:13 | 0.0 |
| 1970-01-01T00:02:20 | 11.714285714285714 |
| 1970-01-01T00:02:27 | 13.357142857142858 |
| 1970-01-01T00:02:34 | 8.0 |
| 1970-01-01T00:02:41 | 0.0 |
| 1970-01-01T00:02:48 | 7.857142857142857 |
| 1970-01-01T00:02:55 | 15.642857142857142 |
| 1970-01-01T00:03:02 | 0.0 |
| 1970-01-01T00:03:09 | 5.357142857142857 |
| 1970-01-01T00:03:16 | 5.428571428571429 |
| 1970-01-01T00:03:23 | 0.0 |
| 1970-01-01T00:03:30 | 10.428571428571429 |
+---------------------+--------------------+
-- InfluxQL: SELECT derivative(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
name: cpu
tags: cpu=cpu0
+---------------------+---------------------+
| time | derivative |
+---------------------+---------------------+
| 1970-01-01T00:02:00 | -0.7999999999999972 |
| 1970-01-01T00:02:30 | 3.5 |
| 1970-01-01T00:03:00 | -0.4000000000000057 |
+---------------------+---------------------+
name: cpu
tags: cpu=cpu1
+---------------------+----------------------+
| time | derivative |
+---------------------+----------------------+
| 1970-01-01T00:02:00 | 0.20000000000000284 |
| 1970-01-01T00:02:30 | 0.0 |
| 1970-01-01T00:03:00 | -0.10000000000000853 |
+---------------------+----------------------+
-- InfluxQL: SELECT derivative(first(usage_idle), 500ms) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
name: cpu
tags: cpu=cpu0
+---------------------+-----------------------+
| time | derivative |
+---------------------+-----------------------+
| 1970-01-01T00:02:00 | -0.013333333333333286 |
| 1970-01-01T00:02:30 | 0.058333333333333334 |
| 1970-01-01T00:03:00 | -0.006666666666666762 |
+---------------------+-----------------------+
name: cpu
tags: cpu=cpu1
+---------------------+------------------------+
| time | derivative |
+---------------------+------------------------+
| 1970-01-01T00:02:00 | 0.003333333333333381 |
| 1970-01-01T00:02:30 | 0.0 |
| 1970-01-01T00:03:00 | -0.0016666666666668088 |
+---------------------+------------------------+
-- InfluxQL: SELECT non_negative_derivative(writes) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001; -- InfluxQL: SELECT non_negative_derivative(writes) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001;
name: diskio name: diskio
+---------------------+-------------------------+ +---------------------+-------------------------+
@ -918,3 +1222,407 @@ tags: cpu=cpu1
+---------------------+-------------------------+ +---------------------+-------------------------+
| 1970-01-01T00:02:00 | 0.006111111111111237 | | 1970-01-01T00:02:00 | 0.006111111111111237 |
+---------------------+-------------------------+ +---------------------+-------------------------+
-- InfluxQL: SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
name: diskio
+---------------------+-------------------------+
| time | non_negative_derivative |
+---------------------+-------------------------+
| 1970-01-01T00:02:20 | 82.0 |
| 1970-01-01T00:02:27 | 187.0 |
| 1970-01-01T00:02:34 | 112.0 |
| 1970-01-01T00:02:48 | 55.0 |
| 1970-01-01T00:02:55 | 219.0 |
| 1970-01-01T00:03:09 | 37.5 |
| 1970-01-01T00:03:16 | 76.0 |
| 1970-01-01T00:03:30 | 73.0 |
+---------------------+-------------------------+
-- InfluxQL: SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
name: diskio
+---------------------+-------------------------+
| time | non_negative_derivative |
+---------------------+-------------------------+
| 1970-01-01T00:02:20 | 5.857142857142857 |
| 1970-01-01T00:02:27 | 13.357142857142858 |
| 1970-01-01T00:02:34 | 8.0 |
| 1970-01-01T00:02:48 | 3.9285714285714284 |
| 1970-01-01T00:02:55 | 15.642857142857142 |
| 1970-01-01T00:03:09 | 2.6785714285714284 |
| 1970-01-01T00:03:16 | 5.428571428571429 |
| 1970-01-01T00:03:30 | 5.214285714285714 |
+---------------------+-------------------------+
-- InfluxQL: SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
name: diskio
+---------------------+-------------------------+
| time | non_negative_derivative |
+---------------------+-------------------------+
| 1970-01-01T00:02:00 | 366.0 |
| 1970-01-01T00:02:30 | 421.0 |
| 1970-01-01T00:03:00 | 441.0 |
| 1970-01-01T00:03:30 | 297.0 |
+---------------------+-------------------------+
-- InfluxQL: SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
name: diskio
+---------------------+-------------------------+
| time | non_negative_derivative |
+---------------------+-------------------------+
| 1970-01-01T00:02:00 | 6.1 |
| 1970-01-01T00:02:30 | 7.016666666666667 |
| 1970-01-01T00:03:00 | 7.35 |
| 1970-01-01T00:03:30 | 4.95 |
+---------------------+-------------------------+
-- InfluxQL: SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
name: diskio
+---------------------+-------------------------+
| time | non_negative_derivative |
+---------------------+-------------------------+
| 1970-01-01T00:02:06 | 5592646.0 |
| 1970-01-01T00:02:20 | 5592810.0 |
| 1970-01-01T00:02:27 | 187.0 |
| 1970-01-01T00:02:34 | 112.0 |
| 1970-01-01T00:02:48 | 5593219.0 |
| 1970-01-01T00:02:55 | 219.0 |
| 1970-01-01T00:03:09 | 5593513.0 |
| 1970-01-01T00:03:16 | 76.0 |
| 1970-01-01T00:03:30 | 5593735.0 |
+---------------------+-------------------------+
-- InfluxQL: SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
name: diskio
+---------------------+-------------------------+
| time | non_negative_derivative |
+---------------------+-------------------------+
| 1970-01-01T00:02:06 | 399474.71428571426 |
| 1970-01-01T00:02:20 | 399486.4285714286 |
| 1970-01-01T00:02:27 | 13.357142857142858 |
| 1970-01-01T00:02:34 | 8.0 |
| 1970-01-01T00:02:48 | 399515.64285714284 |
| 1970-01-01T00:02:55 | 15.642857142857142 |
| 1970-01-01T00:03:09 | 399536.64285714284 |
| 1970-01-01T00:03:16 | 5.428571428571429 |
| 1970-01-01T00:03:30 | 399552.5 |
+---------------------+-------------------------+
-- InfluxQL: SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
name: diskio
+---------------------+-------------------------+
| time | non_negative_derivative |
+---------------------+-------------------------+
| 1970-01-01T00:02:13 | 0.0 |
| 1970-01-01T00:02:20 | 164.0 |
| 1970-01-01T00:02:27 | 187.0 |
| 1970-01-01T00:02:34 | 112.0 |
| 1970-01-01T00:02:41 | 0.0 |
| 1970-01-01T00:02:48 | 110.0 |
| 1970-01-01T00:02:55 | 219.0 |
| 1970-01-01T00:03:02 | 0.0 |
| 1970-01-01T00:03:09 | 75.0 |
| 1970-01-01T00:03:16 | 76.0 |
| 1970-01-01T00:03:23 | 0.0 |
| 1970-01-01T00:03:30 | 146.0 |
+---------------------+-------------------------+
-- InfluxQL: SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
name: diskio
+---------------------+-------------------------+
| time | non_negative_derivative |
+---------------------+-------------------------+
| 1970-01-01T00:02:13 | 0.0 |
| 1970-01-01T00:02:20 | 11.714285714285714 |
| 1970-01-01T00:02:27 | 13.357142857142858 |
| 1970-01-01T00:02:34 | 8.0 |
| 1970-01-01T00:02:41 | 0.0 |
| 1970-01-01T00:02:48 | 7.857142857142857 |
| 1970-01-01T00:02:55 | 15.642857142857142 |
| 1970-01-01T00:03:02 | 0.0 |
| 1970-01-01T00:03:09 | 5.357142857142857 |
| 1970-01-01T00:03:16 | 5.428571428571429 |
| 1970-01-01T00:03:23 | 0.0 |
| 1970-01-01T00:03:30 | 10.428571428571429 |
+---------------------+-------------------------+
-- InfluxQL: SELECT non_negative_derivative(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
name: cpu
tags: cpu=cpu0
+---------------------+-------------------------+
| time | non_negative_derivative |
+---------------------+-------------------------+
| 1970-01-01T00:02:30 | 3.5 |
+---------------------+-------------------------+
name: cpu
tags: cpu=cpu1
+---------------------+-------------------------+
| time | non_negative_derivative |
+---------------------+-------------------------+
| 1970-01-01T00:02:00 | 0.20000000000000284 |
| 1970-01-01T00:02:30 | 0.0 |
+---------------------+-------------------------+
-- InfluxQL: SELECT non_negative_derivative(first(usage_idle), 500ms) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
name: cpu
tags: cpu=cpu0
+---------------------+-------------------------+
| time | non_negative_derivative |
+---------------------+-------------------------+
| 1970-01-01T00:02:30 | 0.058333333333333334 |
+---------------------+-------------------------+
name: cpu
tags: cpu=cpu1
+---------------------+-------------------------+
| time | non_negative_derivative |
+---------------------+-------------------------+
| 1970-01-01T00:02:00 | 0.003333333333333381 |
| 1970-01-01T00:02:30 | 0.0 |
+---------------------+-------------------------+
-- InfluxQL: SELECT cumulative_sum(writes) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001;
name: diskio
+---------------------+----------------+
| time | cumulative_sum |
+---------------------+----------------+
| 1970-01-01T00:02:10 | 5592646 |
| 1970-01-01T00:02:20 | 11185456 |
| 1970-01-01T00:02:30 | 16778453 |
| 1970-01-01T00:02:40 | 22371562 |
| 1970-01-01T00:02:50 | 27964781 |
| 1970-01-01T00:03:00 | 33558219 |
| 1970-01-01T00:03:10 | 39151732 |
| 1970-01-01T00:03:20 | 44745321 |
| 1970-01-01T00:03:30 | 50339056 |
+---------------------+----------------+
-- InfluxQL: SELECT cumulative_sum(usage_system) FROM cpu WHERE time >= 0000000060000000000 AND time < 0000000210000000001 AND cpu = 'cpu0';
name: cpu
+---------------------+----------------+
| time | cumulative_sum |
+---------------------+----------------+
| 1970-01-01T00:01:00 | 89.5 |
| 1970-01-01T00:01:10 | 178.1 |
| 1970-01-01T00:01:30 | 261.5 |
| 1970-01-01T00:01:40 | 349.2 |
| 1970-01-01T00:02:10 | 439.0 |
| 1970-01-01T00:02:50 | 528.8 |
| 1970-01-01T00:03:00 | 618.8 |
+---------------------+----------------+
-- InfluxQL: SELECT cumulative_sum(usage_idle), cumulative_sum(usage_system) FROM cpu WHERE time >= 0000000060000000000 AND time < 0000000210000000001 AND cpu = 'cpu0';
name: cpu
+---------------------+--------------------+------------------+
| time | cumulative_sum | cumulative_sum_1 |
+---------------------+--------------------+------------------+
| 1970-01-01T00:01:00 | 89.5 | 89.5 |
| 1970-01-01T00:01:10 | 178.1 | 178.1 |
| 1970-01-01T00:01:20 | 266.7 | |
| 1970-01-01T00:01:30 | 350.1 | 261.5 |
| 1970-01-01T00:01:40 | 437.8 | 349.2 |
| 1970-01-01T00:01:50 | 526.5 | |
| 1970-01-01T00:02:00 | 613.4 | |
| 1970-01-01T00:02:10 | 703.1999999999999 | 439.0 |
| 1970-01-01T00:02:20 | 792.1999999999999 | |
| 1970-01-01T00:02:30 | 882.5999999999999 | |
| 1970-01-01T00:02:40 | 972.8 | |
| 1970-01-01T00:02:50 | 1062.6 | 528.8 |
| 1970-01-01T00:03:00 | 1152.6 | 618.8 |
| 1970-01-01T00:03:10 | 1241.3999999999999 | |
+---------------------+--------------------+------------------+
-- InfluxQL: SELECT cumulative_sum(usage_idle) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY cpu;
name: cpu
tags: cpu=cpu0
+---------------------+--------------------+
| time | cumulative_sum |
+---------------------+--------------------+
| 1970-01-01T00:02:10 | 89.8 |
| 1970-01-01T00:02:20 | 178.8 |
| 1970-01-01T00:02:30 | 269.20000000000005 |
| 1970-01-01T00:02:40 | 359.40000000000003 |
| 1970-01-01T00:02:50 | 449.20000000000005 |
| 1970-01-01T00:03:00 | 539.2 |
| 1970-01-01T00:03:10 | 628.0 |
+---------------------+--------------------+
name: cpu
tags: cpu=cpu1
+---------------------+--------------------+
| time | cumulative_sum |
+---------------------+--------------------+
| 1970-01-01T00:02:10 | 99.8 |
| 1970-01-01T00:02:20 | 199.7 |
| 1970-01-01T00:02:30 | 299.6 |
| 1970-01-01T00:02:40 | 399.40000000000003 |
| 1970-01-01T00:02:50 | 499.20000000000005 |
| 1970-01-01T00:03:00 | 599.0 |
| 1970-01-01T00:03:10 | 698.8 |
+---------------------+--------------------+
-- InfluxQL: SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
name: diskio
+---------------------+----------------+
| time | cumulative_sum |
+---------------------+----------------+
| 1970-01-01T00:02:06 | 5592646.0 |
| 1970-01-01T00:02:20 | 11185456.0 |
| 1970-01-01T00:02:27 | 16778453.0 |
| 1970-01-01T00:02:34 | 22371562.0 |
| 1970-01-01T00:02:48 | 27964781.0 |
| 1970-01-01T00:02:55 | 33558219.0 |
| 1970-01-01T00:03:09 | 39151732.0 |
| 1970-01-01T00:03:16 | 44745321.0 |
| 1970-01-01T00:03:30 | 50339056.0 |
+---------------------+----------------+
-- InfluxQL: SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
name: diskio
+---------------------+--------------------+
| time | cumulative_sum |
+---------------------+--------------------+
| 1970-01-01T00:02:00 | 5592728.0 |
| 1970-01-01T00:02:30 | 11185836.333333332 |
| 1970-01-01T00:03:00 | 16779349.666666664 |
| 1970-01-01T00:03:30 | 22373084.666666664 |
+---------------------+--------------------+
-- InfluxQL: SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
name: diskio
+---------------------+----------------+
| time | cumulative_sum |
+---------------------+----------------+
| 1970-01-01T00:02:06 | 5592646.0 |
| 1970-01-01T00:02:13 | 5592646.0 |
| 1970-01-01T00:02:20 | 11185456.0 |
| 1970-01-01T00:02:27 | 16778453.0 |
| 1970-01-01T00:02:34 | 22371562.0 |
| 1970-01-01T00:02:41 | 22371562.0 |
| 1970-01-01T00:02:48 | 27964781.0 |
| 1970-01-01T00:02:55 | 33558219.0 |
| 1970-01-01T00:03:02 | 33558219.0 |
| 1970-01-01T00:03:09 | 39151732.0 |
| 1970-01-01T00:03:16 | 44745321.0 |
| 1970-01-01T00:03:23 | 44745321.0 |
| 1970-01-01T00:03:30 | 50339056.0 |
+---------------------+----------------+
-- InfluxQL: SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
name: diskio
+---------------------+----------------+
| time | cumulative_sum |
+---------------------+----------------+
| 1970-01-01T00:02:06 | 5592646.0 |
| 1970-01-01T00:02:13 | 11185292.0 |
| 1970-01-01T00:02:20 | 16778102.0 |
| 1970-01-01T00:02:27 | 22371099.0 |
| 1970-01-01T00:02:34 | 27964208.0 |
| 1970-01-01T00:02:41 | 33557317.0 |
| 1970-01-01T00:02:48 | 39150536.0 |
| 1970-01-01T00:02:55 | 44743974.0 |
| 1970-01-01T00:03:02 | 50337412.0 |
| 1970-01-01T00:03:09 | 55930925.0 |
| 1970-01-01T00:03:16 | 61524514.0 |
| 1970-01-01T00:03:23 | 67118103.0 |
| 1970-01-01T00:03:30 | 72711838.0 |
+---------------------+----------------+
-- InfluxQL: SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
name: diskio
+---------------------+----------------+
| time | cumulative_sum |
+---------------------+----------------+
| 1970-01-01T00:02:06 | 5592646.0 |
| 1970-01-01T00:02:13 | 11185374.0 |
| 1970-01-01T00:02:20 | 16778184.0 |
| 1970-01-01T00:02:27 | 22371181.0 |
| 1970-01-01T00:02:34 | 27964290.0 |
| 1970-01-01T00:02:41 | 33557454.0 |
| 1970-01-01T00:02:48 | 39150673.0 |
| 1970-01-01T00:02:55 | 44744111.0 |
| 1970-01-01T00:03:02 | 50337586.5 |
| 1970-01-01T00:03:09 | 55931099.5 |
| 1970-01-01T00:03:16 | 61524688.5 |
| 1970-01-01T00:03:23 | 67118350.5 |
| 1970-01-01T00:03:30 | 72712085.5 |
+---------------------+----------------+
-- InfluxQL: SELECT cumulative_sum(mean(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
name: cpu
tags: cpu=cpu0
+---------------------+--------------------+
| time | cumulative_sum |
+---------------------+--------------------+
| 1970-01-01T00:02:00 | 89.4 |
| 1970-01-01T00:02:30 | 179.53333333333336 |
| 1970-01-01T00:03:00 | 268.9333333333334 |
+---------------------+--------------------+
name: cpu
tags: cpu=cpu1
+---------------------+--------------------+
| time | cumulative_sum |
+---------------------+--------------------+
| 1970-01-01T00:02:00 | 99.85 |
| 1970-01-01T00:02:30 | 199.68333333333334 |
| 1970-01-01T00:03:00 | 299.48333333333335 |
+---------------------+--------------------+
-- InfluxQL: SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
name: diskio
+---------------------+----------------+
| time | cumulative_sum |
+---------------------+----------------+
| 1970-01-01T00:02:06 | 5592646 |
| 1970-01-01T00:02:20 | 11185456 |
| 1970-01-01T00:02:27 | 16778453 |
| 1970-01-01T00:02:34 | 22371562 |
| 1970-01-01T00:02:48 | 27964781 |
| 1970-01-01T00:02:55 | 33558219 |
| 1970-01-01T00:03:09 | 39151732 |
| 1970-01-01T00:03:16 | 44745321 |
| 1970-01-01T00:03:30 | 50339056 |
+---------------------+----------------+
-- InfluxQL: SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
name: diskio
+---------------------+----------------+
| time | cumulative_sum |
+---------------------+----------------+
| 1970-01-01T00:02:00 | 5592646 |
| 1970-01-01T00:02:30 | 11185643 |
| 1970-01-01T00:03:00 | 16779081 |
| 1970-01-01T00:03:30 | 22372816 |
+---------------------+----------------+
-- InfluxQL: SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
name: diskio
+---------------------+----------------+
| time | cumulative_sum |
+---------------------+----------------+
| 1970-01-01T00:02:06 | 5592646 |
| 1970-01-01T00:02:13 | 5592646 |
| 1970-01-01T00:02:20 | 11185456 |
| 1970-01-01T00:02:27 | 16778453 |
| 1970-01-01T00:02:34 | 22371562 |
| 1970-01-01T00:02:41 | 22371562 |
| 1970-01-01T00:02:48 | 27964781 |
| 1970-01-01T00:02:55 | 33558219 |
| 1970-01-01T00:03:02 | 33558219 |
| 1970-01-01T00:03:09 | 39151732 |
| 1970-01-01T00:03:16 | 44745321 |
| 1970-01-01T00:03:23 | 44745321 |
| 1970-01-01T00:03:30 | 50339056 |
+---------------------+----------------+
-- InfluxQL: SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
name: diskio
+---------------------+----------------+
| time | cumulative_sum |
+---------------------+----------------+
| 1970-01-01T00:02:06 | 5592646 |
| 1970-01-01T00:02:13 | 11185292 |
| 1970-01-01T00:02:20 | 16778102 |
| 1970-01-01T00:02:27 | 22371099 |
| 1970-01-01T00:02:34 | 27964208 |
| 1970-01-01T00:02:41 | 33557317 |
| 1970-01-01T00:02:48 | 39150536 |
| 1970-01-01T00:02:55 | 44743974 |
| 1970-01-01T00:03:02 | 50337412 |
| 1970-01-01T00:03:09 | 55930925 |
| 1970-01-01T00:03:16 | 61524514 |
| 1970-01-01T00:03:23 | 67118103 |
| 1970-01-01T00:03:30 | 72711838 |
+---------------------+----------------+
-- InfluxQL: SELECT cumulative_sum(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
name: cpu
tags: cpu=cpu0
+---------------------+----------------+
| time | cumulative_sum |
+---------------------+----------------+
| 1970-01-01T00:02:00 | 89.8 |
| 1970-01-01T00:02:30 | 180.2 |
| 1970-01-01T00:03:00 | 270.2 |
+---------------------+----------------+
name: cpu
tags: cpu=cpu1
+---------------------+----------------+
| time | cumulative_sum |
+---------------------+----------------+
| 1970-01-01T00:02:00 | 99.8 |
| 1970-01-01T00:02:30 | 199.7 |
| 1970-01-01T00:03:00 | 299.5 |
+---------------------+----------------+

View File

@ -6,7 +6,7 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
sqlparser = "0.35.0" sqlparser = "0.36.0"
snafu = "0.7.5" snafu = "0.7.5"
generated_types = { path = "../generated_types" } generated_types = { path = "../generated_types" }

View File

@ -45,7 +45,7 @@ tokio = { version = "1.29", features = ["macros", "parking_lot", "rt-multi-threa
tokio-util = "0.7.8" tokio-util = "0.7.8"
tonic = { workspace = true } tonic = { workspace = true }
trace = { version = "0.1.0", path = "../trace" } trace = { version = "0.1.0", path = "../trace" }
uuid = "1.4.0" uuid = "1.4.1"
wal = { version = "0.1.0", path = "../wal" } wal = { version = "0.1.0", path = "../wal" }
workspace-hack = { version = "0.1", path = "../workspace-hack" } workspace-hack = { version = "0.1", path = "../workspace-hack" }
@ -60,7 +60,7 @@ lazy_static = "1.4.0"
mutable_batch_lp = { path = "../mutable_batch_lp" } mutable_batch_lp = { path = "../mutable_batch_lp" }
object_store = { workspace = true } object_store = { workspace = true }
paste = "1.0.14" paste = "1.0.14"
tempfile = "3.6.0" tempfile = "3.7.0"
test_helpers = { path = "../test_helpers", features = ["future_timeout"] } test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
tokio = { version = "1.29", features = ["macros", "time", "test-util"] } tokio = { version = "1.29", features = ["macros", "time", "test-util"] }

View File

@ -2,9 +2,7 @@ use std::{collections::HashMap, sync::Arc, time::Duration};
use async_trait::async_trait; use async_trait::async_trait;
use backoff::BackoffConfig; use backoff::BackoffConfig;
use data_types::{ use data_types::{NamespaceId, Partition, PartitionHashId, PartitionId, PartitionKey, TableId};
NamespaceId, Partition, PartitionHashId, PartitionId, PartitionKey, SequenceNumber, TableId,
};
use iox_catalog::interface::Catalog; use iox_catalog::interface::Catalog;
use observability_deps::tracing::debug; use observability_deps::tracing::debug;
use parking_lot::Mutex; use parking_lot::Mutex;
@ -222,6 +220,7 @@ mod tests {
// Harmless in tests - saves a bunch of extra vars. // Harmless in tests - saves a bunch of extra vars.
#![allow(clippy::await_holding_lock)] #![allow(clippy::await_holding_lock)]
use data_types::PartitionId;
use iox_catalog::mem::MemCatalog; use iox_catalog::mem::MemCatalog;
use super::*; use super::*;

View File

@ -6,7 +6,6 @@ use std::{
}, },
}; };
use arrow::compute::kernels::partition;
use async_trait::async_trait; use async_trait::async_trait;
use data_types::{NamespaceId, PartitionKey, TableId}; use data_types::{NamespaceId, PartitionKey, TableId};
use futures::{future::Shared, FutureExt}; use futures::{future::Shared, FutureExt};
@ -25,11 +24,10 @@ use super::PartitionProvider;
type BoxedResolveFuture = type BoxedResolveFuture =
Pin<Box<dyn std::future::Future<Output = Arc<Mutex<PartitionData>>> + Send>>; Pin<Box<dyn std::future::Future<Output = Arc<Mutex<PartitionData>>> + Send>>;
/// A compound key of `(namespace, table, partition_key)` which uniquely /// A compound key of `(table, partition_key)` which uniquely
/// identifies a single partition. /// identifies a single partition.
#[derive(Debug, Clone, PartialEq, Eq, Hash)] #[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct Key { struct Key {
namespace_id: NamespaceId,
table_id: TableId, table_id: TableId,
partition_key: PartitionKey, partition_key: PartitionKey,
} }
@ -149,7 +147,6 @@ where
table: Arc<DeferredLoad<TableMetadata>>, table: Arc<DeferredLoad<TableMetadata>>,
) -> Arc<Mutex<PartitionData>> { ) -> Arc<Mutex<PartitionData>> {
let key = Key { let key = Key {
namespace_id,
table_id, table_id,
partition_key: partition_key.clone(), // Ref-counted anyway! partition_key: partition_key.clone(), // Ref-counted anyway!
}; };
@ -267,12 +264,11 @@ mod tests {
use assert_matches::assert_matches; use assert_matches::assert_matches;
use futures::Future; use futures::Future;
use futures::{stream::FuturesUnordered, StreamExt}; use futures::{stream::FuturesUnordered, StreamExt};
use lazy_static::lazy_static;
use test_helpers::timeout::FutureTimeout; use test_helpers::timeout::FutureTimeout;
use tokio::sync::{Notify, Semaphore}; use tokio::sync::{Notify, Semaphore};
use crate::{ use crate::{
buffer_tree::partition::{resolver::mock::MockPartitionProvider, SortKeyState}, buffer_tree::partition::resolver::mock::MockPartitionProvider,
test_util::{ test_util::{
defer_namespace_name_1_sec, defer_table_metadata_1_sec, PartitionDataBuilder, defer_namespace_name_1_sec, defer_table_metadata_1_sec, PartitionDataBuilder,
ARBITRARY_NAMESPACE_ID, ARBITRARY_PARTITION_KEY, ARBITRARY_TABLE_ID, ARBITRARY_NAMESPACE_ID, ARBITRARY_PARTITION_KEY, ARBITRARY_TABLE_ID,

View File

@ -2,8 +2,6 @@
//! //!
//! [`PartitionData`]: crate::buffer_tree::partition::PartitionData //! [`PartitionData`]: crate::buffer_tree::partition::PartitionData
#![allow(unused_imports)] // Transition time only.
mod cache; mod cache;
pub(crate) use cache::*; pub(crate) use cache::*;

View File

@ -49,11 +49,11 @@ where
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::{sync::Arc, time::Duration}; use std::sync::Arc;
use super::*; use super::*;
use crate::{ use crate::{
buffer_tree::partition::{resolver::mock::MockPartitionProvider, SortKeyState}, buffer_tree::partition::resolver::mock::MockPartitionProvider,
test_util::{ test_util::{
defer_namespace_name_1_sec, defer_table_metadata_1_sec, PartitionDataBuilder, defer_namespace_name_1_sec, defer_table_metadata_1_sec, PartitionDataBuilder,
ARBITRARY_NAMESPACE_ID, ARBITRARY_PARTITION_ID, ARBITRARY_PARTITION_KEY, ARBITRARY_NAMESPACE_ID, ARBITRARY_PARTITION_ID, ARBITRARY_PARTITION_KEY,

View File

@ -998,12 +998,8 @@ mod tests {
assert_eq!(m, 1, "tables counter mismatch"); assert_eq!(m, 1, "tables counter mismatch");
} }
/// Assert that multiple writes to a single namespace/table results in a
/// single namespace being created, and matching metrics.
#[tokio::test] #[tokio::test]
async fn test_partition_iter() { async fn test_partition_iter() {
// Configure the mock partition provider to return a single partition, named
// p1.
let partition_provider = Arc::new( let partition_provider = Arc::new(
MockPartitionProvider::default() MockPartitionProvider::default()
.with_partition( .with_partition(

View File

@ -27,7 +27,7 @@ object_store = { workspace = true }
observability_deps = { version = "0.1.0", path = "../observability_deps" } observability_deps = { version = "0.1.0", path = "../observability_deps" }
parquet_file = { version = "0.1.0", path = "../parquet_file" } parquet_file = { version = "0.1.0", path = "../parquet_file" }
prost = { version = "0.11.9", default-features = false, features = ["std"] } prost = { version = "0.11.9", default-features = false, features = ["std"] }
tempfile = { version = "3.6.0" } tempfile = { version = "3.7.0" }
test_helpers = { path = "../test_helpers", features = ["future_timeout"] } test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
tokio = { version = "1.29", features = ["macros", "parking_lot", "rt-multi-thread", "sync", "time"] } tokio = { version = "1.29", features = ["macros", "parking_lot", "rt-multi-thread", "sync", "time"] }
tokio-util = "0.7.8" tokio-util = "0.7.8"

View File

@ -18,7 +18,7 @@ parking_lot = { version = "0.12" }
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
siphasher = "0.3" siphasher = "0.3"
snafu = "0.7" snafu = "0.7"
sqlx = { version = "0.6", features = [ "runtime-tokio-rustls" , "postgres", "uuid", "sqlite" ] } sqlx = { version = "0.7.1", features = [ "runtime-tokio-rustls" , "postgres", "uuid", "sqlite" ] }
sqlx-hotswap-pool = { path = "../sqlx-hotswap-pool" } sqlx-hotswap-pool = { path = "../sqlx-hotswap-pool" }
thiserror = "1.0.43" thiserror = "1.0.43"
tokio = { version = "1.29", features = ["io-util", "macros", "parking_lot", "rt-multi-thread", "time"] } tokio = { version = "1.29", features = ["io-util", "macros", "parking_lot", "rt-multi-thread", "time"] }

View File

@ -0,0 +1,11 @@
-- Drop the foreign key constraints referencing the various
-- placeholder kafka columns
ALTER TABLE IF EXISTS namespace DROP CONSTRAINT IF EXISTS namespace_kafka_topic_id_fkey, DROP CONSTRAINT IF EXISTS namespace_query_pool_id_fkey;
ALTER TABLE IF EXISTS parquet_file DROP CONSTRAINT IF EXISTS parquet_file_sequencer_id_fkey;
ALTER TABLE IF EXISTS partition DROP CONSTRAINT IF EXISTS partition_sequencer_id_fkey;
ALTER TABLE IF EXISTS tombstone DROP CONSTRAINT IF EXISTS tombstone_sequencer_id_fkey;
-- Allow the ID columns in these tables to be nullable
ALTER TABLE IF EXISTS namespace ALTER COLUMN topic_id DROP NOT NULL, ALTER COLUMN query_pool_id DROP NOT NULL;
ALTER TABLE IF EXISTS parquet_file ALTER COLUMN shard_id DROP NOT NULL;
ALTER TABLE IF EXISTS partition ALTER COLUMN shard_id DROP NOT NULL;
ALTER TABLE IF EXISTS tombstone ALTER COLUMN shard_id DROP NOT NULL;

View File

@ -0,0 +1,13 @@
-- FUNTION that updates the new_file_at field in the partition table when the update_partition trigger is fired
-- The field new_file_at signals when the last file was added to the partition for compaction.
CREATE OR REPLACE FUNCTION update_partition_on_new_file_at()
RETURNS TRIGGER
LANGUAGE PLPGSQL
AS $$
BEGIN
UPDATE partition SET new_file_at = NEW.created_at WHERE id = NEW.partition_id;
RETURN NEW;
END;
$$;

View File

@ -0,0 +1,9 @@
-- update new_file_at for all compactions, not just L0 & L1
drop trigger update_partition;
create trigger if not exists update_partition
after insert
on parquet_file
for each row
begin
UPDATE partition set new_file_at = NEW.created_at WHERE id = NEW.partition_id;
end;

View File

@ -372,12 +372,25 @@ pub trait PartitionRepo: Send + Sync {
/// get partition by ID /// get partition by ID
async fn get_by_id(&mut self, partition_id: PartitionId) -> Result<Option<Partition>>; async fn get_by_id(&mut self, partition_id: PartitionId) -> Result<Option<Partition>>;
/// get multiple partitions by ID.
///
/// the output order is undefined, non-existing partitions are not part of the output.
async fn get_by_id_batch(&mut self, partition_ids: Vec<PartitionId>) -> Result<Vec<Partition>>;
/// get partition by deterministic hash ID /// get partition by deterministic hash ID
async fn get_by_hash_id( async fn get_by_hash_id(
&mut self, &mut self,
partition_hash_id: &PartitionHashId, partition_hash_id: &PartitionHashId,
) -> Result<Option<Partition>>; ) -> Result<Option<Partition>>;
/// get partition by deterministic hash ID
///
/// the output order is undefined, non-existing partitions are not part of the output.
async fn get_by_hash_id_batch(
&mut self,
partition_hash_ids: &[&PartitionHashId],
) -> Result<Vec<Partition>>;
/// return the partitions by table id /// return the partitions by table id
async fn list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>>; async fn list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>>;
@ -1487,6 +1500,8 @@ pub(crate) mod test_helpers {
.unwrap(); .unwrap();
// partitions can be retrieved easily // partitions can be retrieved easily
let mut created_sorted = created.values().cloned().collect::<Vec<_>>();
created_sorted.sort_by_key(|p| p.id);
assert_eq!( assert_eq!(
other_partition, other_partition,
repos repos
@ -1505,21 +1520,47 @@ pub(crate) mod test_helpers {
.unwrap() .unwrap()
.unwrap() .unwrap()
); );
let non_existing_partition_id = PartitionId::new(i64::MAX);
let non_existing_partition_hash_id =
PartitionHashId::new(TableId::new(i64::MAX), &PartitionKey::from("arbitrary"));
assert!(repos assert!(repos
.partitions() .partitions()
.get_by_id(PartitionId::new(i64::MAX)) .get_by_id(non_existing_partition_id)
.await .await
.unwrap() .unwrap()
.is_none()); .is_none());
assert!(repos assert!(repos
.partitions() .partitions()
.get_by_hash_id(&PartitionHashId::new( .get_by_hash_id(&non_existing_partition_hash_id)
TableId::new(i64::MAX),
&PartitionKey::from("arbitrary")
))
.await .await
.unwrap() .unwrap()
.is_none()); .is_none());
let mut batch = repos
.partitions()
.get_by_id_batch(
created
.keys()
.cloned()
.chain([non_existing_partition_id])
.collect(),
)
.await
.unwrap();
batch.sort_by_key(|p| p.id);
assert_eq!(created_sorted, batch);
let mut batch = repos
.partitions()
.get_by_hash_id_batch(
&created
.values()
.map(|p| p.hash_id().unwrap())
.chain([&non_existing_partition_hash_id])
.collect::<Vec<_>>(),
)
.await
.unwrap();
batch.sort_by_key(|p| p.id);
assert_eq!(created_sorted, batch);
let listed = repos let listed = repos
.partitions() .partitions()
@ -2534,7 +2575,6 @@ pub(crate) mod test_helpers {
assert!(partitions.is_empty()); assert!(partitions.is_empty());
// Add an L2 file created just now for partition three // Add an L2 file created just now for partition three
// Since the file is L2, the partition won't get updated
let l2_file_params = ParquetFileParams { let l2_file_params = ParquetFileParams {
object_store_id: Uuid::new_v4(), object_store_id: Uuid::new_v4(),
created_at: time_now, created_at: time_now,
@ -2547,16 +2587,17 @@ pub(crate) mod test_helpers {
.create(l2_file_params.clone()) .create(l2_file_params.clone())
.await .await
.unwrap(); .unwrap();
// still should return partition one and two only // now should return partition one two and three
let mut partitions = repos let mut partitions = repos
.partitions() .partitions()
.partitions_new_file_between(time_two_hour_ago, None) .partitions_new_file_between(time_two_hour_ago, None)
.await .await
.unwrap(); .unwrap();
assert_eq!(partitions.len(), 2); assert_eq!(partitions.len(), 3);
partitions.sort(); partitions.sort();
assert_eq!(partitions[0], partition1.id); assert_eq!(partitions[0], partition1.id);
assert_eq!(partitions[1], partition2.id); assert_eq!(partitions[1], partition2.id);
assert_eq!(partitions[2], partition3.id);
// Only return partition1: the creation time must be strictly less than the maximum time, // Only return partition1: the creation time must be strictly less than the maximum time,
// not equal // not equal
let partitions = repos let partitions = repos

View File

@ -88,6 +88,48 @@ where
} }
} }
/// Look up multiple partitions in the catalog by either database-assigned ID or deterministic hash ID.
///
/// The output only contains existing partitions, the order is undefined.
///
/// The existence of this function should be temporary; it can be removed once all partition lookup
/// is happening with only the deterministic hash ID.
pub async fn partition_lookup_batch<R>(
repos: &mut R,
ids: &[&TransitionPartitionId],
) -> Result<Vec<Partition>, Error>
where
R: RepoCollection + ?Sized,
{
let mut partition_ids = Vec::with_capacity(ids.len());
let mut partition_hash_ids = Vec::with_capacity(ids.len());
for id in ids {
match id {
TransitionPartitionId::Deprecated(partition_id) => {
partition_ids.push(*partition_id);
}
TransitionPartitionId::Deterministic(partition_hash_id) => {
partition_hash_ids.push(partition_hash_id);
}
}
}
let mut out = Vec::with_capacity(partition_ids.len() + partition_hash_ids.len());
if !partition_ids.is_empty() {
let mut partitions = repos.partitions().get_by_id_batch(partition_ids).await?;
out.append(&mut partitions);
}
if !partition_hash_ids.is_empty() {
let mut partitions = repos
.partitions()
.get_by_hash_id_batch(&partition_hash_ids)
.await?;
out.append(&mut partitions);
}
Ok(out)
}
/// Given an iterator of `(table_name, batch)` to validate, this function /// Given an iterator of `(table_name, batch)` to validate, this function
/// ensures all the columns within `batch` match the existing schema for /// ensures all the columns within `batch` match the existing schema for
/// `table_name` in `schema`. If the column does not already exist in `schema`, /// `table_name` in `schema`. If the column does not already exist in `schema`,

View File

@ -586,6 +586,19 @@ impl PartitionRepo for MemTxn {
.cloned()) .cloned())
} }
async fn get_by_id_batch(&mut self, partition_ids: Vec<PartitionId>) -> Result<Vec<Partition>> {
let lookup = partition_ids.into_iter().collect::<HashSet<_>>();
let stage = self.stage();
Ok(stage
.partitions
.iter()
.filter(|p| lookup.contains(&p.id))
.cloned()
.collect())
}
async fn get_by_hash_id( async fn get_by_hash_id(
&mut self, &mut self,
partition_hash_id: &PartitionHashId, partition_hash_id: &PartitionHashId,
@ -603,6 +616,26 @@ impl PartitionRepo for MemTxn {
.cloned()) .cloned())
} }
async fn get_by_hash_id_batch(
&mut self,
partition_hash_ids: &[&PartitionHashId],
) -> Result<Vec<Partition>> {
let lookup = partition_hash_ids.iter().copied().collect::<HashSet<_>>();
let stage = self.stage();
Ok(stage
.partitions
.iter()
.filter(|p| {
p.hash_id()
.map(|hash_id| lookup.contains(hash_id))
.unwrap_or_default()
})
.cloned()
.collect())
}
async fn list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>> { async fn list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>> {
let stage = self.stage(); let stage = self.stage();
@ -962,23 +995,19 @@ async fn create_parquet_file(
parquet_file_params, parquet_file_params,
ParquetFileId::new(stage.parquet_files.len() as i64 + 1), ParquetFileId::new(stage.parquet_files.len() as i64 + 1),
); );
let compaction_level = parquet_file.compaction_level;
let created_at = parquet_file.created_at; let created_at = parquet_file.created_at;
let partition_id = parquet_file.partition_id; let partition_id = parquet_file.partition_id;
stage.parquet_files.push(parquet_file); stage.parquet_files.push(parquet_file);
// Update the new_file_at field its partition to the time of created_at // Update the new_file_at field its partition to the time of created_at
// Only update if the compaction level is not Final which signal more compaction needed let partition = stage
if compaction_level < CompactionLevel::Final { .partitions
let partition = stage .iter_mut()
.partitions .find(|p| p.id == partition_id)
.iter_mut() .ok_or(Error::PartitionNotFound {
.find(|p| p.id == partition_id) id: TransitionPartitionId::Deprecated(partition_id),
.ok_or(Error::PartitionNotFound { })?;
id: TransitionPartitionId::Deprecated(partition_id), partition.new_file_at = Some(created_at);
})?;
partition.new_file_at = Some(created_at);
}
Ok(stage.parquet_files.last().unwrap().clone()) Ok(stage.parquet_files.last().unwrap().clone())
} }

View File

@ -171,7 +171,9 @@ decorate!(
methods = [ methods = [
"partition_create_or_get" = create_or_get(&mut self, key: PartitionKey, table_id: TableId) -> Result<Partition>; "partition_create_or_get" = create_or_get(&mut self, key: PartitionKey, table_id: TableId) -> Result<Partition>;
"partition_get_by_id" = get_by_id(&mut self, partition_id: PartitionId) -> Result<Option<Partition>>; "partition_get_by_id" = get_by_id(&mut self, partition_id: PartitionId) -> Result<Option<Partition>>;
"partition_get_by_id_batch" = get_by_id_batch(&mut self, partition_ids: Vec<PartitionId>) -> Result<Vec<Partition>>;
"partition_get_by_hash_id" = get_by_hash_id(&mut self, partition_hash_id: &PartitionHashId) -> Result<Option<Partition>>; "partition_get_by_hash_id" = get_by_hash_id(&mut self, partition_hash_id: &PartitionHashId) -> Result<Option<Partition>>;
"partition_get_by_hash_id_batch" = get_by_hash_id_batch(&mut self, partition_hash_ids: &[&PartitionHashId]) -> Result<Vec<Partition>>;
"partition_list_by_table_id" = list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>>; "partition_list_by_table_id" = list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>>;
"partition_list_ids" = list_ids(&mut self) -> Result<Vec<PartitionId>>; "partition_list_ids" = list_ids(&mut self) -> Result<Vec<PartitionId>>;
"partition_update_sort_key" = cas_sort_key(&mut self, partition_id: &TransitionPartitionId, old_sort_key: Option<Vec<String>>, new_sort_key: &[&str]) -> Result<Partition, CasFailure<Vec<String>>>; "partition_update_sort_key" = cas_sort_key(&mut self, partition_id: &TransitionPartitionId, old_sort_key: Option<Vec<String>>, new_sort_key: &[&str]) -> Result<Partition, CasFailure<Vec<String>>>;

View File

@ -329,9 +329,9 @@ async fn new_raw_pool(
parsed_dsn: &str, parsed_dsn: &str,
) -> Result<sqlx::Pool<Postgres>, sqlx::Error> { ) -> Result<sqlx::Pool<Postgres>, sqlx::Error> {
// sqlx exposes some options as pool options, while other options are available as connection options. // sqlx exposes some options as pool options, while other options are available as connection options.
let mut connect_options = PgConnectOptions::from_str(parsed_dsn)?; let connect_options = PgConnectOptions::from_str(parsed_dsn)?
// the default is INFO, which is frankly surprising. // the default is INFO, which is frankly surprising.
connect_options.log_statements(log::LevelFilter::Trace); .log_statements(log::LevelFilter::Trace);
let app_name = options.app_name.clone(); let app_name = options.app_name.clone();
let app_name2 = options.app_name.clone(); // just to log below let app_name2 = options.app_name.clone(); // just to log below
@ -816,7 +816,7 @@ RETURNING *;
.bind(name) // $1 .bind(name) // $1
.bind(partition_template) // $2 .bind(partition_template) // $2
.bind(namespace_id) // $3 .bind(namespace_id) // $3
.fetch_one(&mut tx) .fetch_one(&mut *tx)
.await .await
.map_err(|e| match e { .map_err(|e| match e {
sqlx::Error::RowNotFound => Error::TableCreateLimitError { sqlx::Error::RowNotFound => Error::TableCreateLimitError {
@ -843,7 +843,8 @@ RETURNING *;
// columns with an unsupported type. // columns with an unsupported type.
for template_part in table.partition_template.parts() { for template_part in table.partition_template.parts() {
if let TemplatePart::TagValue(tag_name) = template_part { if let TemplatePart::TagValue(tag_name) = template_part {
insert_column_with_connection(&mut tx, tag_name, table.id, ColumnType::Tag).await?; insert_column_with_connection(&mut *tx, tag_name, table.id, ColumnType::Tag)
.await?;
} }
} }
@ -1095,6 +1096,22 @@ WHERE id = $1;
Ok(Some(partition)) Ok(Some(partition))
} }
async fn get_by_id_batch(&mut self, partition_ids: Vec<PartitionId>) -> Result<Vec<Partition>> {
let ids: Vec<_> = partition_ids.iter().map(|p| p.get()).collect();
sqlx::query_as::<_, Partition>(
r#"
SELECT id, hash_id, table_id, partition_key, sort_key, new_file_at
FROM partition
WHERE id = ANY($1);
"#,
)
.bind(&ids[..]) // $1
.fetch_all(&mut self.inner)
.await
.map_err(|e| Error::SqlxError { source: e })
}
async fn get_by_hash_id( async fn get_by_hash_id(
&mut self, &mut self,
partition_hash_id: &PartitionHashId, partition_hash_id: &PartitionHashId,
@ -1119,6 +1136,25 @@ WHERE hash_id = $1;
Ok(Some(partition)) Ok(Some(partition))
} }
async fn get_by_hash_id_batch(
&mut self,
partition_ids: &[&PartitionHashId],
) -> Result<Vec<Partition>> {
let ids: Vec<_> = partition_ids.iter().map(|p| p.as_bytes()).collect();
sqlx::query_as::<_, Partition>(
r#"
SELECT id, hash_id, table_id, partition_key, sort_key, new_file_at
FROM partition
WHERE hash_id = ANY($1);
"#,
)
.bind(&ids[..]) // $1
.fetch_all(&mut self.inner)
.await
.map_err(|e| Error::SqlxError { source: e })
}
async fn list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>> { async fn list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>> {
sqlx::query_as::<_, Partition>( sqlx::query_as::<_, Partition>(
r#" r#"
@ -1538,15 +1574,14 @@ WHERE object_store_id = $1;
) -> Result<Vec<Uuid>> { ) -> Result<Vec<Uuid>> {
sqlx::query( sqlx::query(
// sqlx's readme suggests using PG's ANY operator instead of IN; see link below. // sqlx's readme suggests using PG's ANY operator instead of IN; see link below.
// https://github.com/launchbadge/sqlx/blob/main/FAQ.md#how-can-i-do-a-select--where-foo-in--query
r#" r#"
SELECT object_store_id SELECT object_store_id
FROM parquet_file FROM parquet_file
WHERE object_store_id = ANY($1); WHERE object_store_id = ANY($1);
"#, "#,
) )
// from https://github.com/launchbadge/sqlx/blob/main/FAQ.md#how-can-i-do-a-select--where-foo-in--query .bind(object_store_ids) // $1
// a bug of the parameter typechecking code requires all array parameters to be slices
.bind(&object_store_ids[..]) // $1
.map(|pgr| pgr.get::<Uuid, _>("object_store_id")) .map(|pgr| pgr.get::<Uuid, _>("object_store_id"))
.fetch_all(&mut self.inner) .fetch_all(&mut self.inner)
.await .await
@ -1576,13 +1611,13 @@ WHERE object_store_id = ANY($1);
.map_err(|e| Error::StartTransaction { source: e })?; .map_err(|e| Error::StartTransaction { source: e })?;
let marked_at = Timestamp::from(self.time_provider.now()); let marked_at = Timestamp::from(self.time_provider.now());
flag_for_delete(&mut tx, delete, marked_at).await?; flag_for_delete(&mut *tx, delete, marked_at).await?;
update_compaction_level(&mut tx, upgrade, target_level).await?; update_compaction_level(&mut *tx, upgrade, target_level).await?;
let mut ids = Vec::with_capacity(create.len()); let mut ids = Vec::with_capacity(create.len());
for file in create { for file in create {
let id = create_parquet_file(&mut tx, file).await?; let id = create_parquet_file(&mut *tx, file).await?;
ids.push(id); ids.push(id);
} }
@ -1667,12 +1702,9 @@ async fn flag_for_delete<'q, E>(
where where
E: Executor<'q, Database = Postgres>, E: Executor<'q, Database = Postgres>,
{ {
// If I try to do `.bind(parquet_file_ids)` directly, I get a compile error from sqlx.
// See https://github.com/launchbadge/sqlx/issues/1744
let ids: Vec<_> = ids.iter().map(|p| p.get()).collect();
let query = sqlx::query(r#"UPDATE parquet_file SET to_delete = $1 WHERE id = ANY($2);"#) let query = sqlx::query(r#"UPDATE parquet_file SET to_delete = $1 WHERE id = ANY($2);"#)
.bind(marked_at) // $1 .bind(marked_at) // $1
.bind(&ids[..]); // $2 .bind(ids); // $2
query query
.execute(executor) .execute(executor)
.await .await
@ -1689,9 +1721,6 @@ async fn update_compaction_level<'q, E>(
where where
E: Executor<'q, Database = Postgres>, E: Executor<'q, Database = Postgres>,
{ {
// If I try to do `.bind(parquet_file_ids)` directly, I get a compile error from sqlx.
// See https://github.com/launchbadge/sqlx/issues/1744
let ids: Vec<_> = parquet_file_ids.iter().map(|p| p.get()).collect();
let query = sqlx::query( let query = sqlx::query(
r#" r#"
UPDATE parquet_file UPDATE parquet_file
@ -1700,7 +1729,7 @@ WHERE id = ANY($2);
"#, "#,
) )
.bind(compaction_level) // $1 .bind(compaction_level) // $1
.bind(&ids[..]); // $2 .bind(parquet_file_ids); // $2
query query
.execute(executor) .execute(executor)
.await .await

View File

@ -24,8 +24,8 @@ use data_types::{
Table, TableId, Timestamp, TransitionPartitionId, Table, TableId, Timestamp, TransitionPartitionId,
}; };
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use std::{collections::HashMap, fmt::Display}; use std::{collections::HashMap, fmt::Display};
use std::{collections::HashSet, fmt::Write};
use crate::interface::MAX_PARQUET_FILES_SELECTED_ONCE_FOR_DELETE; use crate::interface::MAX_PARQUET_FILES_SELECTED_ONCE_FOR_DELETE;
use iox_time::{SystemProvider, TimeProvider}; use iox_time::{SystemProvider, TimeProvider};
@ -577,7 +577,7 @@ RETURNING *;
.bind(name) // $1 .bind(name) // $1
.bind(partition_template) // $2 .bind(partition_template) // $2
.bind(namespace_id) // $3 .bind(namespace_id) // $3
.fetch_one(&mut tx) .fetch_one(&mut *tx)
.await .await
.map_err(|e| match e { .map_err(|e| match e {
sqlx::Error::RowNotFound => Error::TableCreateLimitError { sqlx::Error::RowNotFound => Error::TableCreateLimitError {
@ -604,7 +604,8 @@ RETURNING *;
// columns with an unsupported type. // columns with an unsupported type.
for template_part in table.partition_template.parts() { for template_part in table.partition_template.parts() {
if let TemplatePart::TagValue(tag_name) = template_part { if let TemplatePart::TagValue(tag_name) = template_part {
insert_column_with_connection(&mut tx, tag_name, table.id, ColumnType::Tag).await?; insert_column_with_connection(&mut *tx, tag_name, table.id, ColumnType::Tag)
.await?;
} }
} }
@ -891,6 +892,24 @@ WHERE id = $1;
Ok(Some(partition.into())) Ok(Some(partition.into()))
} }
async fn get_by_id_batch(&mut self, partition_ids: Vec<PartitionId>) -> Result<Vec<Partition>> {
// We use a JSON-based "IS IN" check.
let ids: Vec<_> = partition_ids.iter().map(|p| p.get()).collect();
sqlx::query_as::<_, PartitionPod>(
r#"
SELECT id, hash_id, table_id, partition_key, sort_key, new_file_at
FROM partition
WHERE id IN (SELECT value FROM json_each($1));
"#,
)
.bind(Json(&ids[..])) // $1
.fetch_all(self.inner.get_mut())
.await
.map(|vals| vals.into_iter().map(Partition::from).collect())
.map_err(|e| Error::SqlxError { source: e })
}
async fn get_by_hash_id( async fn get_by_hash_id(
&mut self, &mut self,
partition_hash_id: &PartitionHashId, partition_hash_id: &PartitionHashId,
@ -915,6 +934,38 @@ WHERE hash_id = $1;
Ok(Some(partition.into())) Ok(Some(partition.into()))
} }
async fn get_by_hash_id_batch(
&mut self,
partition_hash_ids: &[&PartitionHashId],
) -> Result<Vec<Partition>> {
// We use a JSON-based "IS IN" check.
let ids: Vec<_> = partition_hash_ids
.iter()
.map(|id| {
// convert partiion hash ID to uppercase hex string
let bytes = id.as_bytes();
let mut s = String::with_capacity(bytes.len() * 2);
for b in bytes {
write!(&mut s, "{:02X}", b).expect("never fails");
}
s
})
.collect();
sqlx::query_as::<_, PartitionPod>(
r#"
SELECT id, hash_id, table_id, partition_key, sort_key, new_file_at
FROM partition
WHERE hex(hash_id) IN (SELECT value FROM json_each($1));
"#,
)
.bind(Json(&ids[..])) // $1
.fetch_all(self.inner.get_mut())
.await
.map(|vals| vals.into_iter().map(Partition::from).collect())
.map_err(|e| Error::SqlxError { source: e })
}
async fn list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>> { async fn list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>> {
Ok(sqlx::query_as::<_, PartitionPod>( Ok(sqlx::query_as::<_, PartitionPod>(
r#" r#"
@ -1451,14 +1502,14 @@ WHERE object_store_id IN ({v});",
for id in delete { for id in delete {
let marked_at = Timestamp::from(self.time_provider.now()); let marked_at = Timestamp::from(self.time_provider.now());
flag_for_delete(&mut tx, *id, marked_at).await?; flag_for_delete(&mut *tx, *id, marked_at).await?;
} }
update_compaction_level(&mut tx, upgrade, target_level).await?; update_compaction_level(&mut *tx, upgrade, target_level).await?;
let mut ids = Vec::with_capacity(create.len()); let mut ids = Vec::with_capacity(create.len());
for file in create { for file in create {
let res = create_parquet_file(&mut tx, file.clone()).await?; let res = create_parquet_file(&mut *tx, file.clone()).await?;
ids.push(res.id); ids.push(res.id);
} }
tx.commit() tx.commit()
@ -1562,8 +1613,7 @@ async fn update_compaction_level<'q, E>(
where where
E: Executor<'q, Database = Sqlite>, E: Executor<'q, Database = Sqlite>,
{ {
// If I try to do `.bind(parquet_file_ids)` directly, I get a compile error from sqlx. // We use a JSON-based "IS IN" check.
// See https://github.com/launchbadge/sqlx/issues/1744
let ids: Vec<_> = parquet_file_ids.iter().map(|p| p.get()).collect(); let ids: Vec<_> = parquet_file_ids.iter().map(|p| p.get()).collect();
let query = sqlx::query( let query = sqlx::query(
r#" r#"

View File

@ -6,7 +6,7 @@ pub mod field;
pub mod fieldlist; pub mod fieldlist;
pub mod gapfill; pub mod gapfill;
mod non_null_checker; mod non_null_checker;
mod query_tracing; pub mod query_tracing;
mod schema_pivot; mod schema_pivot;
pub mod seriesset; pub mod seriesset;
pub(crate) mod split; pub(crate) mod split;

View File

@ -648,7 +648,7 @@ impl IOxSessionContext {
exec.spawn(fut).await.unwrap_or_else(|e| { exec.spawn(fut).await.unwrap_or_else(|e| {
Err(Error::Context( Err(Error::Context(
"Join Error".to_string(), "Join Error".to_string(),
Box::new(Error::External(e.into())), Box::new(Error::External(Box::new(e))),
)) ))
}) })
} }

View File

@ -74,11 +74,11 @@ where
/// Create new stream based on an existing stream that transports [`Result`]s. /// Create new stream based on an existing stream that transports [`Result`]s.
/// ///
/// Also receives an executor that actually executes the underlying stream as well as a converter that convets /// Also receives an executor that actually executes the underlying stream as well as a converter that convets
/// [`executor::Error`] to the error type of the stream (so we can send potential crashes/panics). /// [`executor::JobError`] to the error type of the stream (so we can send potential crashes/panics).
fn new_with_error_stream<S, C>(stream: S, exec: DedicatedExecutor, converter: C) -> Self fn new_with_error_stream<S, C>(stream: S, exec: DedicatedExecutor, converter: C) -> Self
where where
S: Stream<Item = Result<X, E>> + Send + 'static, S: Stream<Item = Result<X, E>> + Send + 'static,
C: Fn(executor::Error) -> E + Send + 'static, C: Fn(executor::JobError) -> E + Send + 'static,
{ {
Self::new_with_tx(|tx| { Self::new_with_tx(|tx| {
// future to be run in the other runtime // future to be run in the other runtime
@ -177,7 +177,7 @@ mod tests {
let barrier1_captured = Arc::clone(&barrier1); let barrier1_captured = Arc::clone(&barrier1);
let barrier2 = Arc::new(tokio::sync::Barrier::new(2)); let barrier2 = Arc::new(tokio::sync::Barrier::new(2));
let barrier2_captured = Arc::clone(&barrier2); let barrier2_captured = Arc::clone(&barrier2);
let mut stream = CrossRtStream::<Result<u8, executor::Error>>::new_with_error_stream( let mut stream = CrossRtStream::<Result<u8, executor::JobError>>::new_with_error_stream(
futures::stream::once(async move { futures::stream::once(async move {
barrier1_captured.wait().await; barrier1_captured.wait().await;
barrier2_captured.wait().await; barrier2_captured.wait().await;
@ -195,7 +195,7 @@ mod tests {
barrier2.wait().await; barrier2.wait().await;
let res = f.await.expect("streamed data"); let res = f.await.expect("streamed data");
assert_eq!(res, Ok(1)); assert_eq!(res.unwrap(), 1);
} }
#[tokio::test] #[tokio::test]
@ -212,7 +212,7 @@ mod tests {
let barrier1_captured = Arc::clone(&barrier1); let barrier1_captured = Arc::clone(&barrier1);
let barrier2 = Arc::new(std::sync::Barrier::new(2)); let barrier2 = Arc::new(std::sync::Barrier::new(2));
let barrier2_captured = Arc::clone(&barrier2); let barrier2_captured = Arc::clone(&barrier2);
let mut stream = CrossRtStream::<Result<u8, executor::Error>>::new_with_error_stream( let mut stream = CrossRtStream::<Result<u8, executor::JobError>>::new_with_error_stream(
futures::stream::once(async move { futures::stream::once(async move {
barrier1_captured.wait(); barrier1_captured.wait();
barrier2_captured.wait(); barrier2_captured.wait();
@ -230,13 +230,13 @@ mod tests {
barrier2.wait(); barrier2.wait();
let res = f.await.expect("streamed data"); let res = f.await.expect("streamed data");
assert_eq!(res, Ok(1)); assert_eq!(res.unwrap(), 1);
} }
#[tokio::test] #[tokio::test]
async fn test_panic() { async fn test_panic() {
let exec = DedicatedExecutor::new_testing(); let exec = DedicatedExecutor::new_testing();
let mut stream = CrossRtStream::<Result<(), executor::Error>>::new_with_error_stream( let mut stream = CrossRtStream::<Result<(), executor::JobError>>::new_with_error_stream(
futures::stream::once(async { panic!("foo") }), futures::stream::once(async { panic!("foo") }),
exec, exec,
std::convert::identity, std::convert::identity,
@ -247,7 +247,7 @@ mod tests {
.await .await
.expect("stream not finished") .expect("stream not finished")
.unwrap_err(); .unwrap_err();
assert_eq!(e.to_string(), "foo"); assert_eq!(e.to_string(), "Panic: foo");
let none = stream.next().await; let none = stream.next().await;
assert!(none.is_none()); assert!(none.is_none());
@ -260,7 +260,7 @@ mod tests {
let barrier1_captured = Arc::clone(&barrier1); let barrier1_captured = Arc::clone(&barrier1);
let barrier2 = Arc::new(tokio::sync::Barrier::new(2)); let barrier2 = Arc::new(tokio::sync::Barrier::new(2));
let barrier2_captured = Arc::clone(&barrier2); let barrier2_captured = Arc::clone(&barrier2);
let mut stream = CrossRtStream::<Result<u8, executor::Error>>::new_with_error_stream( let mut stream = CrossRtStream::<Result<u8, executor::JobError>>::new_with_error_stream(
futures::stream::once(async move { futures::stream::once(async move {
barrier1_captured.wait().await; barrier1_captured.wait().await;
barrier2_captured.wait().await; barrier2_captured.wait().await;
@ -281,7 +281,7 @@ mod tests {
barrier2.wait().await; barrier2.wait().await;
let res = stream.next().await.expect("streamed data"); let res = stream.next().await.expect("streamed data");
assert_eq!(res, Ok(1)); assert_eq!(res.unwrap(), 1);
} }
#[tokio::test] #[tokio::test]
@ -289,7 +289,7 @@ mod tests {
let exec = DedicatedExecutor::new_testing(); let exec = DedicatedExecutor::new_testing();
let barrier = Arc::new(tokio::sync::Barrier::new(2)); let barrier = Arc::new(tokio::sync::Barrier::new(2));
let barrier_captured = Arc::clone(&barrier); let barrier_captured = Arc::clone(&barrier);
let mut stream = CrossRtStream::<Result<u8, executor::Error>>::new_with_error_stream( let mut stream = CrossRtStream::<Result<u8, executor::JobError>>::new_with_error_stream(
futures::stream::once(async move { futures::stream::once(async move {
barrier_captured.wait().await; barrier_captured.wait().await;

View File

@ -109,7 +109,7 @@ impl Drop for TracedStream {
/// 1. If the ExecutionPlan had no metrics /// 1. If the ExecutionPlan had no metrics
/// 2. The total number of rows produced by the ExecutionPlan (if available) /// 2. The total number of rows produced by the ExecutionPlan (if available)
/// 3. The elapsed compute time taken by the ExecutionPlan /// 3. The elapsed compute time taken by the ExecutionPlan
fn send_metrics_to_tracing( pub fn send_metrics_to_tracing(
default_end_time: DateTime<Utc>, default_end_time: DateTime<Utc>,
parent_span: &Span, parent_span: &Span,
physical_plan: &dyn ExecutionPlan, physical_plan: &dyn ExecutionPlan,

View File

@ -8,7 +8,7 @@ use datafusion::{
common::tree_node::{RewriteRecursion, TreeNode, TreeNodeRewriter, VisitRecursion}, common::tree_node::{RewriteRecursion, TreeNode, TreeNodeRewriter, VisitRecursion},
error::{DataFusionError, Result}, error::{DataFusionError, Result},
logical_expr::{ logical_expr::{
expr::{ScalarFunction, ScalarUDF}, expr::{Alias, ScalarFunction, ScalarUDF},
utils::expr_to_columns, utils::expr_to_columns,
Aggregate, BuiltinScalarFunction, Extension, LogicalPlan, Projection, Aggregate, BuiltinScalarFunction, Extension, LogicalPlan, Projection,
}, },
@ -293,13 +293,26 @@ fn replace_date_bin_gapfill(group_expr: &[Expr]) -> Result<Option<RewriteInfo>>
})?; })?;
match date_bin_gapfill_count { match date_bin_gapfill_count {
0 => return Ok(None), 0 => return Ok(None),
2.. => { 1 => {
// Make sure that the call to DATE_BIN_GAPFILL is root expression
// excluding aliases.
let dbg_idx = dbg_idx.expect("should have found exactly one call");
if !matches_udf(
unwrap_alias(&group_expr[dbg_idx]),
DATE_BIN_GAPFILL_UDF_NAME,
) {
return Err(DataFusionError::Plan(
"DATE_BIN_GAPFILL must a top-level expression in the GROUP BY clause when gap filling. It cannot be part of another expression or cast".to_string(),
));
}
}
_ => {
return Err(DataFusionError::Plan( return Err(DataFusionError::Plan(
"DATE_BIN_GAPFILL specified more than once".to_string(), "DATE_BIN_GAPFILL specified more than once".to_string(),
)) ))
} }
_ => (),
} }
let date_bin_gapfill_index = dbg_idx.expect("should be found exactly one call"); let date_bin_gapfill_index = dbg_idx.expect("should be found exactly one call");
let mut rewriter = DateBinGapfillRewriter { args: None }; let mut rewriter = DateBinGapfillRewriter { args: None };
@ -323,6 +336,15 @@ fn replace_date_bin_gapfill(group_expr: &[Expr]) -> Result<Option<RewriteInfo>>
})) }))
} }
fn unwrap_alias(mut e: &Expr) -> &Expr {
loop {
match e {
Expr::Alias(Alias { expr, .. }) => e = expr.as_ref(),
e => break e,
}
}
}
struct DateBinGapfillRewriter { struct DateBinGapfillRewriter {
args: Option<Vec<Expr>>, args: Option<Vec<Expr>>,
} }
@ -486,17 +508,21 @@ impl FillFnRewriter {
fn count_udf(e: &Expr, name: &str) -> Result<usize> { fn count_udf(e: &Expr, name: &str) -> Result<usize> {
let mut count = 0; let mut count = 0;
e.apply(&mut |expr| { e.apply(&mut |expr| {
match expr { if matches_udf(expr, name) {
Expr::ScalarUDF(ScalarUDF { fun, .. }) if fun.name == name => { count += 1;
count += 1; }
}
_ => (),
};
Ok(VisitRecursion::Continue) Ok(VisitRecursion::Continue)
})?; })?;
Ok(count) Ok(count)
} }
fn matches_udf(e: &Expr, name: &str) -> bool {
matches!(
e,
Expr::ScalarUDF(ScalarUDF { fun, .. }) if fun.name == name
)
}
fn check_node(node: &LogicalPlan) -> Result<()> { fn check_node(node: &LogicalPlan) -> Result<()> {
node.expressions().iter().try_for_each(|expr| { node.expressions().iter().try_for_each(|expr| {
let dbg_count = count_udf(expr, DATE_BIN_GAPFILL_UDF_NAME)?; let dbg_count = count_udf(expr, DATE_BIN_GAPFILL_UDF_NAME)?;

View File

@ -7,11 +7,13 @@ use datafusion::{
DFSchema, DFSchema,
}, },
error::Result, error::Result,
logical_expr::{expr::Alias, Between, BinaryExpr, LogicalPlan, Operator}, logical_expr::{Between, BinaryExpr, LogicalPlan, Operator},
optimizer::utils::split_conjunction, optimizer::utils::split_conjunction,
prelude::{Column, Expr}, prelude::{Column, Expr},
}; };
use super::unwrap_alias;
/// Given a plan and a column, finds the predicates that use that column /// Given a plan and a column, finds the predicates that use that column
/// and return a range with expressions for upper and lower bounds. /// and return a range with expressions for upper and lower bounds.
pub fn find_time_range(plan: &LogicalPlan, time_col: &Column) -> Result<Range<Bound<Expr>>> { pub fn find_time_range(plan: &LogicalPlan, time_col: &Column) -> Result<Range<Bound<Expr>>> {
@ -65,6 +67,12 @@ impl TreeNodeVisitor for TimeRangeVisitor {
self.range = range; self.range = range;
Ok(VisitRecursion::Continue) Ok(VisitRecursion::Continue)
} }
LogicalPlan::SubqueryAlias(_) => {
// The nodes below this one refer to the column with a different table name,
// just unset the relation so we match on the column name.
self.col.relation = None;
Ok(VisitRecursion::Continue)
}
// These nodes do not alter their schema, so we can recurse through them // These nodes do not alter their schema, so we can recurse through them
LogicalPlan::Sort(_) LogicalPlan::Sort(_)
| LogicalPlan::Repartition(_) | LogicalPlan::Repartition(_)
@ -76,15 +84,6 @@ impl TreeNodeVisitor for TimeRangeVisitor {
} }
} }
fn unwrap_alias(mut e: &Expr) -> &Expr {
loop {
match e {
Expr::Alias(Alias { expr, .. }) => e = expr.as_ref(),
e => break e,
}
}
}
/// Encapsulates the upper and lower bounds of a time column /// Encapsulates the upper and lower bounds of a time column
/// in a logical plan. /// in a logical plan.
#[derive(Clone)] #[derive(Clone)]

View File

@ -39,7 +39,7 @@ pub(super) fn accumulator(dt: &DataType) -> Result<Box<dyn Accumulator>> {
/// Calculate the intermediate merge state for the aggregator. /// Calculate the intermediate merge state for the aggregator.
pub(super) fn state_type(dt: &DataType) -> Result<Arc<Vec<DataType>>> { pub(super) fn state_type(dt: &DataType) -> Result<Arc<Vec<DataType>>> {
Ok(Arc::new(vec![ Ok(Arc::new(vec![
DataType::List(Arc::new(Field::new("state", dt.clone(), false))), DataType::List(Arc::new(Field::new("item", dt.clone(), true))),
DataType::Float64, DataType::Float64,
])) ]))
} }

View File

@ -9,18 +9,18 @@ use crate::plan::planner::select::{
}; };
use crate::plan::planner_time_range_expression::time_range_to_df_expr; use crate::plan::planner_time_range_expression::time_range_to_df_expr;
use crate::plan::rewriter::{find_table_names, rewrite_statement, ProjectionType}; use crate::plan::rewriter::{find_table_names, rewrite_statement, ProjectionType};
use crate::plan::udaf::{ use crate::plan::udaf::MOVING_AVERAGE;
derivative_udf, non_negative_derivative_udf, DIFFERENCE, MOVING_AVERAGE,
NON_NEGATIVE_DIFFERENCE,
};
use crate::plan::udf::{ use crate::plan::udf::{
derivative, difference, find_window_udfs, moving_average, non_negative_derivative, cumulative_sum, derivative, difference, find_window_udfs, moving_average,
non_negative_difference, non_negative_derivative, non_negative_difference,
}; };
use crate::plan::util::{binary_operator_to_df_operator, rebase_expr, Schemas}; use crate::plan::util::{binary_operator_to_df_operator, rebase_expr, IQLSchema};
use crate::plan::var_ref::var_ref_data_type_to_data_type; use crate::plan::var_ref::var_ref_data_type_to_data_type;
use crate::plan::{planner_rewrite_expression, udf, util_copy}; use crate::plan::{planner_rewrite_expression, udf, util_copy};
use crate::window::PERCENT_ROW_NUMBER; use crate::window::{
CUMULATIVE_SUM, DERIVATIVE, DIFFERENCE, NON_NEGATIVE_DERIVATIVE, NON_NEGATIVE_DIFFERENCE,
PERCENT_ROW_NUMBER,
};
use arrow::array::{StringBuilder, StringDictionaryBuilder}; use arrow::array::{StringBuilder, StringDictionaryBuilder};
use arrow::datatypes::{DataType, Field as ArrowField, Int32Type, Schema as ArrowSchema}; use arrow::datatypes::{DataType, Field as ArrowField, Int32Type, Schema as ArrowSchema};
use arrow::record_batch::RecordBatch; use arrow::record_batch::RecordBatch;
@ -94,7 +94,6 @@ use std::ops::{Bound, ControlFlow, Deref, Not, Range};
use std::str::FromStr; use std::str::FromStr;
use std::sync::Arc; use std::sync::Arc;
use super::ir::DataSourceSchema;
use super::parse_regex; use super::parse_regex;
use super::util::contains_expr; use super::util::contains_expr;
use super::util_copy::clone_with_replacement; use super::util_copy::clone_with_replacement;
@ -712,16 +711,14 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
continue; continue;
}; };
let schemas = Schemas::new(plan.schema())?; let schema = IQLSchema::new_from_ds_schema(plan.schema(), ds.schema(self.s)?)?;
let ds_schema = ds.schema(self.s)?;
let plan = self.plan_condition_time_range( let plan = self.plan_condition_time_range(
ctx.condition, ctx.condition,
ctx.extended_time_range(), ctx.extended_time_range(),
plan, plan,
&schemas, &schema,
&ds_schema,
)?; )?;
plans.push((plan, ds_schema)); plans.push((plan, schema));
} }
Ok(match plans.len() { Ok(match plans.len() {
@ -797,10 +794,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
/// Plan "Raw" SELECT queriers, These are queries that have no grouping /// Plan "Raw" SELECT queriers, These are queries that have no grouping
/// and call only scalar functions. /// and call only scalar functions.
fn project_select_raw(&self, input: LogicalPlan, fields: &[Field]) -> Result<LogicalPlan> { fn project_select_raw(&self, input: LogicalPlan, fields: &[Field]) -> Result<LogicalPlan> {
let schemas = Schemas::new(input.schema())?; let schema = IQLSchema::new_from_fields(input.schema(), fields)?;
// Transform InfluxQL AST field expressions to a list of DataFusion expressions. // Transform InfluxQL AST field expressions to a list of DataFusion expressions.
let select_exprs = self.field_list_to_exprs(&input, fields, &schemas)?; let select_exprs = self.field_list_to_exprs(&input, fields, &schema)?;
// Wrap the plan in a `LogicalPlan::Projection` from the select expressions // Wrap the plan in a `LogicalPlan::Projection` from the select expressions
project(input, select_exprs) project(input, select_exprs)
@ -813,10 +810,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
input: LogicalPlan, input: LogicalPlan,
fields: &[Field], fields: &[Field],
) -> Result<LogicalPlan> { ) -> Result<LogicalPlan> {
let schemas = Schemas::new(input.schema())?; let schema = IQLSchema::new_from_fields(input.schema(), fields)?;
// Transform InfluxQL AST field expressions to a list of DataFusion expressions. // Transform InfluxQL AST field expressions to a list of DataFusion expressions.
let mut select_exprs = self.field_list_to_exprs(&input, fields, &schemas)?; let mut select_exprs = self.field_list_to_exprs(&input, fields, &schema)?;
// This is a special case, where exactly one column can be projected with a `DISTINCT` // This is a special case, where exactly one column can be projected with a `DISTINCT`
// clause or the `distinct` function. // clause or the `distinct` function.
@ -850,10 +847,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
fields: &[Field], fields: &[Field],
group_by_tag_set: &[&str], group_by_tag_set: &[&str],
) -> Result<LogicalPlan> { ) -> Result<LogicalPlan> {
let schemas = Schemas::new(input.schema())?; let schema = IQLSchema::new_from_fields(input.schema(), fields)?;
// Transform InfluxQL AST field expressions to a list of DataFusion expressions. // Transform InfluxQL AST field expressions to a list of DataFusion expressions.
let select_exprs = self.field_list_to_exprs(&input, fields, &schemas)?; let select_exprs = self.field_list_to_exprs(&input, fields, &schema)?;
let (plan, select_exprs) = let (plan, select_exprs) =
self.select_aggregate(ctx, input, fields, select_exprs, group_by_tag_set)?; self.select_aggregate(ctx, input, fields, select_exprs, group_by_tag_set)?;
@ -871,10 +868,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
fields: &[Field], fields: &[Field],
group_by_tag_set: &[&str], group_by_tag_set: &[&str],
) -> Result<LogicalPlan> { ) -> Result<LogicalPlan> {
let schemas = Schemas::new(input.schema())?; let schema = IQLSchema::new_from_fields(input.schema(), fields)?;
// Transform InfluxQL AST field expressions to a list of DataFusion expressions. // Transform InfluxQL AST field expressions to a list of DataFusion expressions.
let select_exprs = self.field_list_to_exprs(&input, fields, &schemas)?; let select_exprs = self.field_list_to_exprs(&input, fields, &schema)?;
let (plan, select_exprs) = let (plan, select_exprs) =
self.select_window(ctx, input, select_exprs, group_by_tag_set)?; self.select_window(ctx, input, select_exprs, group_by_tag_set)?;
@ -909,10 +906,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
fields: &[Field], fields: &[Field],
group_by_tag_set: &[&str], group_by_tag_set: &[&str],
) -> Result<LogicalPlan> { ) -> Result<LogicalPlan> {
let schemas = Schemas::new(input.schema())?; let schema = IQLSchema::new_from_fields(input.schema(), fields)?;
// Transform InfluxQL AST field expressions to a list of DataFusion expressions. // Transform InfluxQL AST field expressions to a list of DataFusion expressions.
let select_exprs = self.field_list_to_exprs(&input, fields, &schemas)?; let select_exprs = self.field_list_to_exprs(&input, fields, &schema)?;
let (plan, select_exprs) = let (plan, select_exprs) =
self.select_aggregate(ctx, input, fields, select_exprs, group_by_tag_set)?; self.select_aggregate(ctx, input, fields, select_exprs, group_by_tag_set)?;
@ -953,7 +950,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
fields: &[Field], fields: &[Field],
group_by_tag_set: &[&str], group_by_tag_set: &[&str],
) -> Result<LogicalPlan> { ) -> Result<LogicalPlan> {
let schemas = Schemas::new(input.schema())?; let schema = IQLSchema::new_from_fields(input.schema(), fields)?;
let (selector_index, field_key, plan) = match Selector::find_enumerated(fields)? { let (selector_index, field_key, plan) = match Selector::find_enumerated(fields)? {
(_, Selector::First { .. }) (_, Selector::First { .. })
@ -1027,7 +1024,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
}); });
// Transform InfluxQL AST field expressions to a list of DataFusion expressions. // Transform InfluxQL AST field expressions to a list of DataFusion expressions.
let select_exprs = self.field_list_to_exprs(&plan, fields_vec.as_slice(), &schemas)?; let select_exprs = self.field_list_to_exprs(&plan, fields_vec.as_slice(), &schema)?;
// Wrap the plan in a `LogicalPlan::Projection` from the select expressions // Wrap the plan in a `LogicalPlan::Projection` from the select expressions
project(plan, select_exprs) project(plan, select_exprs)
@ -1043,7 +1040,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
fields: &[Field], fields: &[Field],
group_by_tag_set: &[&str], group_by_tag_set: &[&str],
) -> Result<LogicalPlan> { ) -> Result<LogicalPlan> {
let schemas = Schemas::new(input.schema())?; let schema = IQLSchema::new_from_fields(input.schema(), fields)?;
let (selector_index, is_bottom, field_key, tag_keys, narg) = let (selector_index, is_bottom, field_key, tag_keys, narg) =
match Selector::find_enumerated(fields)? { match Selector::find_enumerated(fields)? {
@ -1098,7 +1095,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
} }
// Transform InfluxQL AST field expressions to a list of DataFusion expressions. // Transform InfluxQL AST field expressions to a list of DataFusion expressions.
let select_exprs = self.field_list_to_exprs(&input, fields_vec.as_slice(), &schemas)?; let select_exprs = self.field_list_to_exprs(&input, fields_vec.as_slice(), &schema)?;
let plan = if !tag_keys.is_empty() { let plan = if !tag_keys.is_empty() {
self.select_first(ctx, input, order_by, internal_group_by.as_slice(), 1)? self.select_first(ctx, input, order_by, internal_group_by.as_slice(), 1)?
@ -1326,18 +1323,25 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
_ => None, _ => None,
}; };
// Some aggregates, such as COUNT, should be filled with zero by default
// rather than NULL.
let should_zero_fill_expr = fields
.iter()
.map(is_zero_filled_aggregate_field)
.collect::<Vec<_>>();
// Rewrite the aggregate columns from the projection, so that the expressions // Rewrite the aggregate columns from the projection, so that the expressions
// refer to the columns from the aggregate projection // refer to the columns from the aggregate projection
let select_exprs_post_aggr = select_exprs let select_exprs_post_aggr = select_exprs
.iter() .iter()
.zip(should_fill_expr) .zip(should_fill_expr.iter().zip(should_zero_fill_expr))
.map(|(expr, should_fill)| { .map(|(expr, (should_fill, should_zero_fill))| {
// This implements the `FILL(<value>)` strategy, by coalescing any aggregate // This implements the `FILL(<value>)` strategy, by coalescing any aggregate
// expressions to `<value>` when they are `NULL`. // expressions to `<value>` when they are `NULL`.
let fill_if_null = if fill_if_null.is_some() && should_fill { let fill_if_null = match (fill_if_null, should_fill, should_zero_fill) {
fill_if_null (Some(_), true, _) => fill_if_null,
} else { (None, true, true) => Some(0.into()),
None _ => None,
}; };
rebase_expr(expr, &aggr_projection_exprs, &fill_if_null, &plan) rebase_expr(expr, &aggr_projection_exprs, &fill_if_null, &plan)
@ -1450,17 +1454,17 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
return error::internal(format!("udf_to_expr: unexpected expression: {e}")) return error::internal(format!("udf_to_expr: unexpected expression: {e}"))
}; };
fn derivative_unit(ctx: &Context<'_>, args: &Vec<Expr>) -> Result<i64> { fn derivative_unit(ctx: &Context<'_>, args: &Vec<Expr>) -> Result<ScalarValue> {
if args.len() > 1 { if args.len() > 1 {
if let Expr::Literal(ScalarValue::IntervalMonthDayNano(Some(v))) = args[1] { if let Expr::Literal(v) = &args[1] {
Ok(v as i64) Ok(v.clone())
} else { } else {
error::internal(format!("udf_to_expr: unexpected expression: {}", args[1])) error::internal(format!("udf_to_expr: unexpected expression: {}", args[1]))
} }
} else if let Some(interval) = ctx.interval { } else if let Some(interval) = ctx.interval {
Ok(interval.duration) Ok(ScalarValue::new_interval_mdn(0, 0, interval.duration))
} else { } else {
Ok(1000000000) // 1s Ok(ScalarValue::new_interval_mdn(0, 0, 1_000_000_000)) // 1s
} }
} }
@ -1478,63 +1482,77 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
}) })
.alias(alias)), .alias(alias)),
Some(udf::WindowFunction::Difference) => Ok(Expr::WindowFunction(WindowFunction { Some(udf::WindowFunction::Difference) => Ok(Expr::WindowFunction(WindowFunction {
fun: window_function::WindowFunction::AggregateUDF(DIFFERENCE.clone()), fun: DIFFERENCE.clone(),
args, args,
partition_by, partition_by,
order_by, order_by,
window_frame: WindowFrame { window_frame: WindowFrame {
units: WindowFrameUnits::Rows, units: WindowFrameUnits::Rows,
start_bound: WindowFrameBound::Preceding(ScalarValue::Null), start_bound: WindowFrameBound::Preceding(ScalarValue::Null),
end_bound: WindowFrameBound::CurrentRow, end_bound: WindowFrameBound::Following(ScalarValue::Null),
}, },
}) })
.alias(alias)), .alias(alias)),
Some(udf::WindowFunction::NonNegativeDifference) => { Some(udf::WindowFunction::NonNegativeDifference) => {
Ok(Expr::WindowFunction(WindowFunction { Ok(Expr::WindowFunction(WindowFunction {
fun: window_function::WindowFunction::AggregateUDF( fun: NON_NEGATIVE_DIFFERENCE.clone(),
NON_NEGATIVE_DIFFERENCE.clone(),
),
args, args,
partition_by, partition_by,
order_by, order_by,
window_frame: WindowFrame { window_frame: WindowFrame {
units: WindowFrameUnits::Rows, units: WindowFrameUnits::Rows,
start_bound: WindowFrameBound::Preceding(ScalarValue::Null), start_bound: WindowFrameBound::Preceding(ScalarValue::Null),
end_bound: WindowFrameBound::CurrentRow, end_bound: WindowFrameBound::Following(ScalarValue::Null),
}, },
}) })
.alias(alias)) .alias(alias))
} }
Some(udf::WindowFunction::Derivative) => Ok(Expr::WindowFunction(WindowFunction { Some(udf::WindowFunction::Derivative) => Ok(Expr::WindowFunction(WindowFunction {
fun: window_function::WindowFunction::AggregateUDF( fun: DERIVATIVE.clone(),
derivative_udf(derivative_unit(ctx, &args)?).into(), args: vec![
), args[0].clone(),
args: vec!["time".as_expr(), args[0].clone()], lit(derivative_unit(ctx, &args)?),
"time".as_expr(),
],
partition_by, partition_by,
order_by, order_by,
window_frame: WindowFrame { window_frame: WindowFrame {
units: WindowFrameUnits::Rows, units: WindowFrameUnits::Rows,
start_bound: WindowFrameBound::Preceding(ScalarValue::Null), start_bound: WindowFrameBound::Preceding(ScalarValue::Null),
end_bound: WindowFrameBound::CurrentRow, end_bound: WindowFrameBound::Following(ScalarValue::Null),
}, },
}) })
.alias(alias)), .alias(alias)),
Some(udf::WindowFunction::NonNegativeDerivative) => { Some(udf::WindowFunction::NonNegativeDerivative) => {
Ok(Expr::WindowFunction(WindowFunction { Ok(Expr::WindowFunction(WindowFunction {
fun: window_function::WindowFunction::AggregateUDF( fun: NON_NEGATIVE_DERIVATIVE.clone(),
non_negative_derivative_udf(derivative_unit(ctx, &args)?).into(), args: vec![
), args[0].clone(),
args: vec!["time".as_expr(), args[0].clone()], lit(derivative_unit(ctx, &args)?),
"time".as_expr(),
],
partition_by, partition_by,
order_by, order_by,
window_frame: WindowFrame { window_frame: WindowFrame {
units: WindowFrameUnits::Rows, units: WindowFrameUnits::Rows,
start_bound: WindowFrameBound::Preceding(ScalarValue::Null), start_bound: WindowFrameBound::Preceding(ScalarValue::Null),
end_bound: WindowFrameBound::CurrentRow, end_bound: WindowFrameBound::Following(ScalarValue::Null),
}, },
}) })
.alias(alias)) .alias(alias))
} }
Some(udf::WindowFunction::CumulativeSum) => Ok(Expr::WindowFunction(WindowFunction {
fun: CUMULATIVE_SUM.clone(),
args,
partition_by,
order_by,
window_frame: WindowFrame {
units: WindowFrameUnits::Rows,
start_bound: WindowFrameBound::Preceding(ScalarValue::Null),
end_bound: WindowFrameBound::Following(ScalarValue::Null),
},
})
.alias(alias)),
None => error::internal(format!( None => error::internal(format!(
"unexpected user-defined window function: {}", "unexpected user-defined window function: {}",
fun.name fun.name
@ -1688,7 +1706,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
&self, &self,
plan: &LogicalPlan, plan: &LogicalPlan,
fields: &[Field], fields: &[Field],
schemas: &Schemas, schema: &IQLSchema<'_>,
) -> Result<Vec<Expr>> { ) -> Result<Vec<Expr>> {
let mut names: HashMap<&str, usize> = HashMap::new(); let mut names: HashMap<&str, usize> = HashMap::new();
fields fields
@ -1708,7 +1726,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
}; };
new_field new_field
}) })
.map(|field| self.field_to_df_expr(&field, plan, schemas)) .map(|field| self.field_to_df_expr(&field, plan, schema))
.collect() .collect()
} }
@ -1719,10 +1737,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
&self, &self,
field: &Field, field: &Field,
plan: &LogicalPlan, plan: &LogicalPlan,
schemas: &Schemas, schema: &IQLSchema<'_>,
) -> Result<Expr> { ) -> Result<Expr> {
let expr = self.expr_to_df_expr(ExprScope::Projection, &field.expr, schemas)?; let expr = self.expr_to_df_expr(ExprScope::Projection, &field.expr, schema)?;
let expr = planner_rewrite_expression::rewrite_field_expr(expr, schemas)?; let expr = planner_rewrite_expression::rewrite_field_expr(expr, schema)?;
normalize_col(expr.alias(&field.name), plan) normalize_col(expr.alias(&field.name), plan)
} }
@ -1730,16 +1748,14 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
fn conditional_to_df_expr( fn conditional_to_df_expr(
&self, &self,
iql: &ConditionalExpression, iql: &ConditionalExpression,
schemas: &Schemas, schema: &IQLSchema<'_>,
) -> Result<Expr> { ) -> Result<Expr> {
match iql { match iql {
ConditionalExpression::Expr(expr) => { ConditionalExpression::Expr(expr) => {
self.expr_to_df_expr(ExprScope::Where, expr, schemas) self.expr_to_df_expr(ExprScope::Where, expr, schema)
} }
ConditionalExpression::Binary(expr) => { ConditionalExpression::Binary(expr) => self.binary_conditional_to_df_expr(expr, schema),
self.binary_conditional_to_df_expr(expr, schemas) ConditionalExpression::Grouped(e) => self.conditional_to_df_expr(e, schema),
}
ConditionalExpression::Grouped(e) => self.conditional_to_df_expr(e, schemas),
} }
} }
@ -1747,20 +1763,25 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
fn binary_conditional_to_df_expr( fn binary_conditional_to_df_expr(
&self, &self,
expr: &ConditionalBinary, expr: &ConditionalBinary,
schemas: &Schemas, schema: &IQLSchema<'_>,
) -> Result<Expr> { ) -> Result<Expr> {
let ConditionalBinary { lhs, op, rhs } = expr; let ConditionalBinary { lhs, op, rhs } = expr;
Ok(binary_expr( Ok(binary_expr(
self.conditional_to_df_expr(lhs, schemas)?, self.conditional_to_df_expr(lhs, schema)?,
conditional_op_to_operator(*op)?, conditional_op_to_operator(*op)?,
self.conditional_to_df_expr(rhs, schemas)?, self.conditional_to_df_expr(rhs, schema)?,
)) ))
} }
/// Map an InfluxQL [`IQLExpr`] to a DataFusion [`Expr`]. /// Map an InfluxQL [`IQLExpr`] to a DataFusion [`Expr`].
fn expr_to_df_expr(&self, scope: ExprScope, iql: &IQLExpr, schemas: &Schemas) -> Result<Expr> { fn expr_to_df_expr(
let schema = &schemas.df_schema; &self,
scope: ExprScope,
iql: &IQLExpr,
schema: &IQLSchema<'_>,
) -> Result<Expr> {
let df_schema = &schema.df_schema;
match iql { match iql {
// rewriter is expected to expand wildcard expressions // rewriter is expected to expand wildcard expressions
IQLExpr::Wildcard(_) => error::internal("unexpected wildcard in projection"), IQLExpr::Wildcard(_) => error::internal("unexpected wildcard in projection"),
@ -1777,7 +1798,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
"time".as_expr() "time".as_expr()
} }
(ExprScope::Projection, "time") => "time".as_expr(), (ExprScope::Projection, "time") => "time".as_expr(),
(_, name) => match schema (_, name) => match df_schema
.fields_with_unqualified_name(name) .fields_with_unqualified_name(name)
.first() .first()
.map(|f| f.data_type().clone()) .map(|f| f.data_type().clone())
@ -1801,7 +1822,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
// and it is safe to unconditionally unwrap, as the // and it is safe to unconditionally unwrap, as the
// `is_numeric_type` call guarantees it can be mapped to // `is_numeric_type` call guarantees it can be mapped to
// an Arrow DataType // an Arrow DataType
column.cast_to(&dst_type, &schemas.df_schema)? column.cast_to(&dst_type, &schema.df_schema)?
} else { } else {
// If the cast is incompatible, evaluates to NULL // If the cast is incompatible, evaluates to NULL
Expr::Literal(ScalarValue::Null) Expr::Literal(ScalarValue::Null)
@ -1839,9 +1860,9 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
}, },
// A DISTINCT <ident> clause should have been replaced by `rewrite_statement`. // A DISTINCT <ident> clause should have been replaced by `rewrite_statement`.
IQLExpr::Distinct(_) => error::internal("distinct expression"), IQLExpr::Distinct(_) => error::internal("distinct expression"),
IQLExpr::Call(call) => self.call_to_df_expr(scope, call, schemas), IQLExpr::Call(call) => self.call_to_df_expr(scope, call, schema),
IQLExpr::Binary(expr) => self.arithmetic_expr_to_df_expr(scope, expr, schemas), IQLExpr::Binary(expr) => self.arithmetic_expr_to_df_expr(scope, expr, schema),
IQLExpr::Nested(e) => self.expr_to_df_expr(scope, e, schemas), IQLExpr::Nested(e) => self.expr_to_df_expr(scope, e, schema),
} }
} }
@ -1861,9 +1882,14 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
/// > * <https://github.com/influxdata/influxdb_iox/issues/6939> /// > * <https://github.com/influxdata/influxdb_iox/issues/6939>
/// ///
/// [docs]: https://docs.influxdata.com/influxdb/v1.8/query_language/functions/ /// [docs]: https://docs.influxdata.com/influxdb/v1.8/query_language/functions/
fn call_to_df_expr(&self, scope: ExprScope, call: &Call, schemas: &Schemas) -> Result<Expr> { fn call_to_df_expr(
&self,
scope: ExprScope,
call: &Call,
schema: &IQLSchema<'_>,
) -> Result<Expr> {
if is_scalar_math_function(call.name.as_str()) { if is_scalar_math_function(call.name.as_str()) {
return self.scalar_math_func_to_df_expr(scope, call, schemas); return self.scalar_math_func_to_df_expr(scope, call, schema);
} }
match scope { match scope {
@ -1875,7 +1901,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
error::query(format!("invalid function call in condition: {name}")) error::query(format!("invalid function call in condition: {name}"))
} }
} }
ExprScope::Projection => self.function_to_df_expr(scope, call, schemas), ExprScope::Projection => self.function_to_df_expr(scope, call, schema),
} }
} }
@ -1883,7 +1909,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
&self, &self,
scope: ExprScope, scope: ExprScope,
call: &Call, call: &Call,
schemas: &Schemas, schema: &IQLSchema<'_>,
) -> Result<Expr> { ) -> Result<Expr> {
fn check_arg_count(name: &str, args: &[IQLExpr], count: usize) -> Result<()> { fn check_arg_count(name: &str, args: &[IQLExpr], count: usize) -> Result<()> {
let got = args.len(); let got = args.len();
@ -1918,13 +1944,13 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
// The DISTINCT function is handled as a `ProjectionType::RawDistinct` // The DISTINCT function is handled as a `ProjectionType::RawDistinct`
// query, so the planner only needs to project the single column // query, so the planner only needs to project the single column
// argument. // argument.
"distinct" => self.expr_to_df_expr(scope, &args[0], schemas), "distinct" => self.expr_to_df_expr(scope, &args[0], schema),
"count" => { "count" => {
let (expr, distinct) = match &args[0] { let (expr, distinct) = match &args[0] {
IQLExpr::Call(c) if c.name == "distinct" => { IQLExpr::Call(c) if c.name == "distinct" => {
(self.expr_to_df_expr(scope, &c.args[0], schemas)?, true) (self.expr_to_df_expr(scope, &c.args[0], schema)?, true)
} }
expr => (self.expr_to_df_expr(scope, expr, schemas)?, false), expr => (self.expr_to_df_expr(scope, expr, schema)?, false),
}; };
if let Expr::Literal(ScalarValue::Null) = expr { if let Expr::Literal(ScalarValue::Null) = expr {
return Ok(expr); return Ok(expr);
@ -1940,7 +1966,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
))) )))
} }
"sum" | "stddev" | "mean" | "median" => { "sum" | "stddev" | "mean" | "median" => {
let expr = self.expr_to_df_expr(scope, &args[0], schemas)?; let expr = self.expr_to_df_expr(scope, &args[0], schema)?;
if let Expr::Literal(ScalarValue::Null) = expr { if let Expr::Literal(ScalarValue::Null) = expr {
return Ok(expr); return Ok(expr);
} }
@ -1955,13 +1981,13 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
))) )))
} }
"percentile" => { "percentile" => {
let expr = self.expr_to_df_expr(scope, &args[0], schemas)?; let expr = self.expr_to_df_expr(scope, &args[0], schema)?;
if let Expr::Literal(ScalarValue::Null) = expr { if let Expr::Literal(ScalarValue::Null) = expr {
return Ok(expr); return Ok(expr);
} }
check_arg_count(name, args, 2)?; check_arg_count(name, args, 2)?;
let nexpr = self.expr_to_df_expr(scope, &args[1], schemas)?; let nexpr = self.expr_to_df_expr(scope, &args[1], schema)?;
Ok(Expr::AggregateUDF(expr::AggregateUDF::new( Ok(Expr::AggregateUDF(expr::AggregateUDF::new(
PERCENTILE.clone(), PERCENTILE.clone(),
vec![expr, nexpr], vec![expr, nexpr],
@ -1970,7 +1996,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
))) )))
} }
name @ ("first" | "last" | "min" | "max") => { name @ ("first" | "last" | "min" | "max") => {
let expr = self.expr_to_df_expr(scope, &args[0], schemas)?; let expr = self.expr_to_df_expr(scope, &args[0], schema)?;
if let Expr::Literal(ScalarValue::Null) = expr { if let Expr::Literal(ScalarValue::Null) = expr {
return Ok(expr); return Ok(expr);
} }
@ -1993,7 +2019,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
check_arg_count(name, args, 1)?; check_arg_count(name, args, 1)?;
// arg0 should be a column or function // arg0 should be a column or function
let arg0 = self.expr_to_df_expr(scope, &args[0], schemas)?; let arg0 = self.expr_to_df_expr(scope, &args[0], schema)?;
if let Expr::Literal(ScalarValue::Null) = arg0 { if let Expr::Literal(ScalarValue::Null) = arg0 {
return Ok(arg0); return Ok(arg0);
} }
@ -2004,7 +2030,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
check_arg_count(name, args, 1)?; check_arg_count(name, args, 1)?;
// arg0 should be a column or function // arg0 should be a column or function
let arg0 = self.expr_to_df_expr(scope, &args[0], schemas)?; let arg0 = self.expr_to_df_expr(scope, &args[0], schema)?;
if let Expr::Literal(ScalarValue::Null) = arg0 { if let Expr::Literal(ScalarValue::Null) = arg0 {
return Ok(arg0); return Ok(arg0);
} }
@ -2015,14 +2041,14 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
check_arg_count(name, args, 2)?; check_arg_count(name, args, 2)?;
// arg0 should be a column or function // arg0 should be a column or function
let arg0 = self.expr_to_df_expr(scope, &args[0], schemas)?; let arg0 = self.expr_to_df_expr(scope, &args[0], schema)?;
if let Expr::Literal(ScalarValue::Null) = arg0 { if let Expr::Literal(ScalarValue::Null) = arg0 {
return Ok(arg0); return Ok(arg0);
} }
// arg1 should be an integer. // arg1 should be an integer.
let arg1 = ScalarValue::Int64(Some( let arg1 = ScalarValue::Int64(Some(
match self.expr_to_df_expr(scope, &args[1], schemas)? { match self.expr_to_df_expr(scope, &args[1], schema)? {
Expr::Literal(ScalarValue::Int64(Some(v))) => v, Expr::Literal(ScalarValue::Int64(Some(v))) => v,
Expr::Literal(ScalarValue::UInt64(Some(v))) => v as i64, Expr::Literal(ScalarValue::UInt64(Some(v))) => v as i64,
_ => { _ => {
@ -2039,13 +2065,13 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
check_arg_count_range(name, args, 1, 2)?; check_arg_count_range(name, args, 1, 2)?;
// arg0 should be a column or function // arg0 should be a column or function
let arg0 = self.expr_to_df_expr(scope, &args[0], schemas)?; let arg0 = self.expr_to_df_expr(scope, &args[0], schema)?;
if let Expr::Literal(ScalarValue::Null) = arg0 { if let Expr::Literal(ScalarValue::Null) = arg0 {
return Ok(arg0); return Ok(arg0);
} }
let mut eargs = vec![arg0]; let mut eargs = vec![arg0];
if args.len() > 1 { if args.len() > 1 {
let arg1 = self.expr_to_df_expr(scope, &args[1], schemas)?; let arg1 = self.expr_to_df_expr(scope, &args[1], schema)?;
eargs.push(arg1); eargs.push(arg1);
} }
@ -2055,22 +2081,33 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
check_arg_count_range(name, args, 1, 2)?; check_arg_count_range(name, args, 1, 2)?;
// arg0 should be a column or function // arg0 should be a column or function
let arg0 = self.expr_to_df_expr(scope, &args[0], schemas)?; let arg0 = self.expr_to_df_expr(scope, &args[0], schema)?;
if let Expr::Literal(ScalarValue::Null) = arg0 { if let Expr::Literal(ScalarValue::Null) = arg0 {
return Ok(arg0); return Ok(arg0);
} }
let mut eargs = vec![arg0]; let mut eargs = vec![arg0];
if args.len() > 1 { if args.len() > 1 {
let arg1 = self.expr_to_df_expr(scope, &args[1], schemas)?; let arg1 = self.expr_to_df_expr(scope, &args[1], schema)?;
eargs.push(arg1); eargs.push(arg1);
} }
Ok(non_negative_derivative(eargs)) Ok(non_negative_derivative(eargs))
} }
"cumulative_sum" => {
check_arg_count(name, args, 1)?;
// arg0 should be a column or function
let arg0 = self.expr_to_df_expr(scope, &args[0], schema)?;
if let Expr::Literal(ScalarValue::Null) = arg0 {
return Ok(arg0);
}
Ok(cumulative_sum(vec![arg0]))
}
// The TOP/BOTTOM function is handled as a `ProjectionType::TopBottomSelector` // The TOP/BOTTOM function is handled as a `ProjectionType::TopBottomSelector`
// query, so the planner only needs to project the single column // query, so the planner only needs to project the single column
// argument. // argument.
"top" | "bottom" => self.expr_to_df_expr(scope, &args[0], schemas), "top" | "bottom" => self.expr_to_df_expr(scope, &args[0], schema),
_ => error::query(format!("Invalid function '{name}'")), _ => error::query(format!("Invalid function '{name}'")),
} }
@ -2081,12 +2118,12 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
&self, &self,
scope: ExprScope, scope: ExprScope,
call: &Call, call: &Call,
schemas: &Schemas, schema: &IQLSchema<'a>,
) -> Result<Expr> { ) -> Result<Expr> {
let args = call let args = call
.args .args
.iter() .iter()
.map(|e| self.expr_to_df_expr(scope, e, schemas)) .map(|e| self.expr_to_df_expr(scope, e, schema))
.collect::<Result<Vec<Expr>>>()?; .collect::<Result<Vec<Expr>>>()?;
match BuiltinScalarFunction::from_str(call.name.as_str())? { match BuiltinScalarFunction::from_str(call.name.as_str())? {
@ -2109,12 +2146,12 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
&self, &self,
scope: ExprScope, scope: ExprScope,
expr: &Binary, expr: &Binary,
schemas: &Schemas, schema: &IQLSchema<'_>,
) -> Result<Expr> { ) -> Result<Expr> {
Ok(binary_expr( Ok(binary_expr(
self.expr_to_df_expr(scope, &expr.lhs, schemas)?, self.expr_to_df_expr(scope, &expr.lhs, schema)?,
binary_operator_to_df_operator(expr.op), binary_operator_to_df_operator(expr.op),
self.expr_to_df_expr(scope, &expr.rhs, schemas)?, self.expr_to_df_expr(scope, &expr.rhs, schema)?,
)) ))
} }
@ -2123,17 +2160,15 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
condition: Option<&ConditionalExpression>, condition: Option<&ConditionalExpression>,
time_range: TimeRange, time_range: TimeRange,
plan: LogicalPlan, plan: LogicalPlan,
schemas: &Schemas, schema: &IQLSchema<'a>,
ds_schema: &DataSourceSchema<'_>,
) -> Result<LogicalPlan> { ) -> Result<LogicalPlan> {
let filter_expr = condition let filter_expr = condition
.map(|condition| { .map(|condition| {
let filter_expr = self.conditional_to_df_expr(condition, schemas)?; let filter_expr = self.conditional_to_df_expr(condition, schema)?;
planner_rewrite_expression::rewrite_conditional_expr( planner_rewrite_expression::rewrite_conditional_expr(
self.s.execution_props(), self.s.execution_props(),
filter_expr, filter_expr,
schemas, schema,
ds_schema,
) )
}) })
.transpose()?; .transpose()?;
@ -2156,8 +2191,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
plan: LogicalPlan, plan: LogicalPlan,
condition: &Option<WhereClause>, condition: &Option<WhereClause>,
cutoff: MetadataCutoff, cutoff: MetadataCutoff,
schemas: &Schemas, schema: &IQLSchema<'_>,
ds_schema: &DataSourceSchema<'_>,
) -> Result<LogicalPlan> { ) -> Result<LogicalPlan> {
let start_time = Timestamp::from(self.s.execution_props().query_execution_start_time); let start_time = Timestamp::from(self.s.execution_props().query_execution_start_time);
@ -2189,7 +2223,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
time_range time_range
}; };
self.plan_condition_time_range(cond.as_ref(), time_range, plan, schemas, ds_schema) self.plan_condition_time_range(cond.as_ref(), time_range, plan, schema)
} }
/// Generate a logical plan for the specified `DataSource`. /// Generate a logical plan for the specified `DataSource`.
@ -2363,16 +2397,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
let Some(table_schema) = self.s.table_schema(&table) else {continue}; let Some(table_schema) = self.s.table_schema(&table) else {continue};
let Some((plan, measurement_expr)) = self.create_table_ref(&table)? else {continue;}; let Some((plan, measurement_expr)) = self.create_table_ref(&table)? else {continue;};
let schemas = Schemas::new(plan.schema())?;
let ds = DataSource::Table(table.clone()); let ds = DataSource::Table(table.clone());
let ds_schema = ds.schema(self.s)?; let schema = IQLSchema::new_from_ds_schema(plan.schema(), ds.schema(self.s)?)?;
let plan = self.plan_where_clause( let plan =
plan, self.plan_where_clause(plan, &condition, metadata_cutoff, &schema)?;
&condition,
metadata_cutoff,
&schemas,
&ds_schema,
)?;
let tags = table_schema let tags = table_schema
.iter() .iter()
@ -2616,16 +2644,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
let Some((plan, measurement_expr)) = self.create_table_ref(&table)? else {continue;}; let Some((plan, measurement_expr)) = self.create_table_ref(&table)? else {continue;};
let schemas = Schemas::new(plan.schema())?;
let ds = DataSource::Table(table.clone()); let ds = DataSource::Table(table.clone());
let ds_schema = ds.schema(self.s)?; let schema = IQLSchema::new_from_ds_schema(plan.schema(), ds.schema(self.s)?)?;
let plan = self.plan_where_clause( let plan =
plan, self.plan_where_clause(plan, &show_tag_values.condition, metadata_cutoff, &schema)?;
&show_tag_values.condition,
metadata_cutoff,
&schemas,
&ds_schema,
)?;
for key in keys { for key in keys {
let idx = plan let idx = plan
@ -2722,16 +2744,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
for table in tables { for table in tables {
let Some((plan, _measurement_expr)) = self.create_table_ref(&table)? else {continue;}; let Some((plan, _measurement_expr)) = self.create_table_ref(&table)? else {continue;};
let schemas = Schemas::new(plan.schema())?;
let ds = DataSource::Table(table.clone()); let ds = DataSource::Table(table.clone());
let ds_schema = ds.schema(self.s)?; let schema = IQLSchema::new_from_ds_schema(plan.schema(), ds.schema(self.s)?)?;
let plan = self.plan_where_clause( let plan =
plan, self.plan_where_clause(plan, &condition, metadata_cutoff, &schema)?;
&condition,
metadata_cutoff,
&schemas,
&ds_schema,
)?;
let plan = LogicalPlanBuilder::from(plan) let plan = LogicalPlanBuilder::from(plan)
.limit(0, Some(1))? .limit(0, Some(1))?
@ -3072,6 +3088,16 @@ fn is_aggregate_field(f: &Field) -> bool {
.is_break() .is_break()
} }
/// A utility function that checks whether `f` is an aggregate field
/// that should be filled with a 0 rather than an NULL.
fn is_zero_filled_aggregate_field(f: &Field) -> bool {
walk_expr(&f.expr, &mut |e| match e {
IQLExpr::Call(Call { name, .. }) if name == "count" => ControlFlow::Break(()),
_ => ControlFlow::Continue(()),
})
.is_break()
}
fn conditional_op_to_operator(op: ConditionalOperator) -> Result<Operator> { fn conditional_op_to_operator(op: ConditionalOperator) -> Result<Operator> {
match op { match op {
ConditionalOperator::Eq => Ok(Operator::Eq), ConditionalOperator::Eq => Ok(Operator::Eq),
@ -3886,7 +3912,7 @@ mod test {
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, difference [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), difference:Float64;N] Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, difference [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), difference:Float64;N]
Filter: NOT difference IS NULL [time:Timestamp(Nanosecond, None), difference:Float64;N] Filter: NOT difference IS NULL [time:Timestamp(Nanosecond, None), difference:Float64;N]
Projection: cpu.time AS time, difference(cpu.usage_idle) AS difference [time:Timestamp(Nanosecond, None), difference:Float64;N] Projection: cpu.time AS time, difference(cpu.usage_idle) AS difference [time:Timestamp(Nanosecond, None), difference:Float64;N]
WindowAggr: windowExpr=[[AggregateUDF { name: "difference", signature: Signature { type_signature: OneOf([Exact([Int64]), Exact([UInt64]), Exact([Float64])]), volatility: Immutable }, fun: "<FUNC>" }(cpu.usage_idle) ORDER BY [cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS difference(cpu.usage_idle)]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, difference(cpu.usage_idle):Float64;N] WindowAggr: windowExpr=[[difference(cpu.usage_idle) ORDER BY [cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS difference(cpu.usage_idle)]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, difference(cpu.usage_idle):Float64;N]
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
"###); "###);
@ -3896,7 +3922,7 @@ mod test {
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, difference [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, difference:Float64;N] Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, difference [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, difference:Float64;N]
Filter: NOT difference IS NULL [time:Timestamp(Nanosecond, None);N, difference:Float64;N] Filter: NOT difference IS NULL [time:Timestamp(Nanosecond, None);N, difference:Float64;N]
Projection: time, difference(AVG(cpu.usage_idle)) AS difference [time:Timestamp(Nanosecond, None);N, difference:Float64;N] Projection: time, difference(AVG(cpu.usage_idle)) AS difference [time:Timestamp(Nanosecond, None);N, difference:Float64;N]
WindowAggr: windowExpr=[[AggregateUDF { name: "difference", signature: Signature { type_signature: OneOf([Exact([Int64]), Exact([UInt64]), Exact([Float64])]), volatility: Immutable }, fun: "<FUNC>" }(AVG(cpu.usage_idle)) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS difference(AVG(cpu.usage_idle))]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N, difference(AVG(cpu.usage_idle)):Float64;N] WindowAggr: windowExpr=[[difference(AVG(cpu.usage_idle)) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS difference(AVG(cpu.usage_idle))]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N, difference(AVG(cpu.usage_idle)):Float64;N]
GapFill: groupBy=[time], aggr=[[AVG(cpu.usage_idle)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N] GapFill: groupBy=[time], aggr=[[AVG(cpu.usage_idle)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[AVG(cpu.usage_idle)]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N] Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[AVG(cpu.usage_idle)]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
@ -3912,7 +3938,7 @@ mod test {
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, non_negative_difference [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), non_negative_difference:Float64;N] Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, non_negative_difference [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), non_negative_difference:Float64;N]
Filter: NOT non_negative_difference IS NULL [time:Timestamp(Nanosecond, None), non_negative_difference:Float64;N] Filter: NOT non_negative_difference IS NULL [time:Timestamp(Nanosecond, None), non_negative_difference:Float64;N]
Projection: cpu.time AS time, non_negative_difference(cpu.usage_idle) AS non_negative_difference [time:Timestamp(Nanosecond, None), non_negative_difference:Float64;N] Projection: cpu.time AS time, non_negative_difference(cpu.usage_idle) AS non_negative_difference [time:Timestamp(Nanosecond, None), non_negative_difference:Float64;N]
WindowAggr: windowExpr=[[AggregateUDF { name: "non_negative_difference", signature: Signature { type_signature: OneOf([Exact([Int64]), Exact([UInt64]), Exact([Float64])]), volatility: Immutable }, fun: "<FUNC>" }(cpu.usage_idle) ORDER BY [cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS non_negative_difference(cpu.usage_idle)]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, non_negative_difference(cpu.usage_idle):Float64;N] WindowAggr: windowExpr=[[non_negative_difference(cpu.usage_idle) ORDER BY [cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS non_negative_difference(cpu.usage_idle)]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, non_negative_difference(cpu.usage_idle):Float64;N]
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
"###); "###);
@ -3922,7 +3948,7 @@ mod test {
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, non_negative_difference [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, non_negative_difference:Float64;N] Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, non_negative_difference [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, non_negative_difference:Float64;N]
Filter: NOT non_negative_difference IS NULL [time:Timestamp(Nanosecond, None);N, non_negative_difference:Float64;N] Filter: NOT non_negative_difference IS NULL [time:Timestamp(Nanosecond, None);N, non_negative_difference:Float64;N]
Projection: time, non_negative_difference(AVG(cpu.usage_idle)) AS non_negative_difference [time:Timestamp(Nanosecond, None);N, non_negative_difference:Float64;N] Projection: time, non_negative_difference(AVG(cpu.usage_idle)) AS non_negative_difference [time:Timestamp(Nanosecond, None);N, non_negative_difference:Float64;N]
WindowAggr: windowExpr=[[AggregateUDF { name: "non_negative_difference", signature: Signature { type_signature: OneOf([Exact([Int64]), Exact([UInt64]), Exact([Float64])]), volatility: Immutable }, fun: "<FUNC>" }(AVG(cpu.usage_idle)) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS non_negative_difference(AVG(cpu.usage_idle))]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N, non_negative_difference(AVG(cpu.usage_idle)):Float64;N] WindowAggr: windowExpr=[[non_negative_difference(AVG(cpu.usage_idle)) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS non_negative_difference(AVG(cpu.usage_idle))]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N, non_negative_difference(AVG(cpu.usage_idle)):Float64;N]
GapFill: groupBy=[time], aggr=[[AVG(cpu.usage_idle)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N] GapFill: groupBy=[time], aggr=[[AVG(cpu.usage_idle)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[AVG(cpu.usage_idle)]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N] Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[AVG(cpu.usage_idle)]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
@ -3967,7 +3993,7 @@ mod test {
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, derivative [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), derivative:Float64;N] Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, derivative [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), derivative:Float64;N]
Filter: NOT derivative IS NULL [time:Timestamp(Nanosecond, None), derivative:Float64;N] Filter: NOT derivative IS NULL [time:Timestamp(Nanosecond, None), derivative:Float64;N]
Projection: cpu.time AS time, derivative(cpu.usage_idle) AS derivative [time:Timestamp(Nanosecond, None), derivative:Float64;N] Projection: cpu.time AS time, derivative(cpu.usage_idle) AS derivative [time:Timestamp(Nanosecond, None), derivative:Float64;N]
WindowAggr: windowExpr=[[AggregateUDF { name: "derivative(unit: 1000000000)", signature: Signature { type_signature: OneOf([Exact([Timestamp(Nanosecond, None), Int64]), Exact([Timestamp(Nanosecond, None), UInt64]), Exact([Timestamp(Nanosecond, None), Float64])]), volatility: Immutable }, fun: "<FUNC>" }(cpu.time, cpu.usage_idle) ORDER BY [cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS derivative(cpu.usage_idle)]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, derivative(cpu.usage_idle):Float64;N] WindowAggr: windowExpr=[[derivative(cpu.usage_idle, IntervalMonthDayNano("1000000000"), cpu.time) ORDER BY [cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS derivative(cpu.usage_idle)]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, derivative(cpu.usage_idle):Float64;N]
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
"###); "###);
@ -3977,7 +4003,7 @@ mod test {
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, derivative [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, derivative:Float64;N] Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, derivative [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, derivative:Float64;N]
Filter: NOT derivative IS NULL [time:Timestamp(Nanosecond, None);N, derivative:Float64;N] Filter: NOT derivative IS NULL [time:Timestamp(Nanosecond, None);N, derivative:Float64;N]
Projection: time, derivative(AVG(cpu.usage_idle)) AS derivative [time:Timestamp(Nanosecond, None);N, derivative:Float64;N] Projection: time, derivative(AVG(cpu.usage_idle)) AS derivative [time:Timestamp(Nanosecond, None);N, derivative:Float64;N]
WindowAggr: windowExpr=[[AggregateUDF { name: "derivative(unit: 10000000000)", signature: Signature { type_signature: OneOf([Exact([Timestamp(Nanosecond, None), Int64]), Exact([Timestamp(Nanosecond, None), UInt64]), Exact([Timestamp(Nanosecond, None), Float64])]), volatility: Immutable }, fun: "<FUNC>" }(time, AVG(cpu.usage_idle)) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS derivative(AVG(cpu.usage_idle))]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N, derivative(AVG(cpu.usage_idle)):Float64;N] WindowAggr: windowExpr=[[derivative(AVG(cpu.usage_idle), IntervalMonthDayNano("10000000000"), time) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS derivative(AVG(cpu.usage_idle))]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N, derivative(AVG(cpu.usage_idle)):Float64;N]
GapFill: groupBy=[time], aggr=[[AVG(cpu.usage_idle)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N] GapFill: groupBy=[time], aggr=[[AVG(cpu.usage_idle)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[AVG(cpu.usage_idle)]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N] Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[AVG(cpu.usage_idle)]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
@ -3993,7 +4019,7 @@ mod test {
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, non_negative_derivative [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), non_negative_derivative:Float64;N] Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, non_negative_derivative [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), non_negative_derivative:Float64;N]
Filter: NOT non_negative_derivative IS NULL [time:Timestamp(Nanosecond, None), non_negative_derivative:Float64;N] Filter: NOT non_negative_derivative IS NULL [time:Timestamp(Nanosecond, None), non_negative_derivative:Float64;N]
Projection: cpu.time AS time, non_negative_derivative(cpu.usage_idle) AS non_negative_derivative [time:Timestamp(Nanosecond, None), non_negative_derivative:Float64;N] Projection: cpu.time AS time, non_negative_derivative(cpu.usage_idle) AS non_negative_derivative [time:Timestamp(Nanosecond, None), non_negative_derivative:Float64;N]
WindowAggr: windowExpr=[[AggregateUDF { name: "non_negative_derivative(unit: 1000000000)", signature: Signature { type_signature: OneOf([Exact([Timestamp(Nanosecond, None), Int64]), Exact([Timestamp(Nanosecond, None), UInt64]), Exact([Timestamp(Nanosecond, None), Float64])]), volatility: Immutable }, fun: "<FUNC>" }(cpu.time, cpu.usage_idle) ORDER BY [cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS non_negative_derivative(cpu.usage_idle)]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, non_negative_derivative(cpu.usage_idle):Float64;N] WindowAggr: windowExpr=[[non_negative_derivative(cpu.usage_idle, IntervalMonthDayNano("1000000000"), cpu.time) ORDER BY [cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS non_negative_derivative(cpu.usage_idle)]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, non_negative_derivative(cpu.usage_idle):Float64;N]
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
"###); "###);
@ -4003,7 +4029,46 @@ mod test {
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, non_negative_derivative [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, non_negative_derivative:Float64;N] Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, non_negative_derivative [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, non_negative_derivative:Float64;N]
Filter: NOT non_negative_derivative IS NULL [time:Timestamp(Nanosecond, None);N, non_negative_derivative:Float64;N] Filter: NOT non_negative_derivative IS NULL [time:Timestamp(Nanosecond, None);N, non_negative_derivative:Float64;N]
Projection: time, non_negative_derivative(AVG(cpu.usage_idle)) AS non_negative_derivative [time:Timestamp(Nanosecond, None);N, non_negative_derivative:Float64;N] Projection: time, non_negative_derivative(AVG(cpu.usage_idle)) AS non_negative_derivative [time:Timestamp(Nanosecond, None);N, non_negative_derivative:Float64;N]
WindowAggr: windowExpr=[[AggregateUDF { name: "non_negative_derivative(unit: 10000000000)", signature: Signature { type_signature: OneOf([Exact([Timestamp(Nanosecond, None), Int64]), Exact([Timestamp(Nanosecond, None), UInt64]), Exact([Timestamp(Nanosecond, None), Float64])]), volatility: Immutable }, fun: "<FUNC>" }(time, AVG(cpu.usage_idle)) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS non_negative_derivative(AVG(cpu.usage_idle))]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N, non_negative_derivative(AVG(cpu.usage_idle)):Float64;N] WindowAggr: windowExpr=[[non_negative_derivative(AVG(cpu.usage_idle), IntervalMonthDayNano("10000000000"), time) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS non_negative_derivative(AVG(cpu.usage_idle))]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N, non_negative_derivative(AVG(cpu.usage_idle)):Float64;N]
GapFill: groupBy=[time], aggr=[[AVG(cpu.usage_idle)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[AVG(cpu.usage_idle)]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
"###);
// selector
assert_snapshot!(plan("SELECT NON_NEGATIVE_DERIVATIVE(LAST(usage_idle)) FROM cpu GROUP BY TIME(10s)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, non_negative_derivative:Float64;N]
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, non_negative_derivative [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, non_negative_derivative:Float64;N]
Filter: NOT non_negative_derivative IS NULL [time:Timestamp(Nanosecond, None);N, non_negative_derivative:Float64;N]
Projection: time, non_negative_derivative(selector_last(cpu.usage_idle,cpu.time)[value]) AS non_negative_derivative [time:Timestamp(Nanosecond, None);N, non_negative_derivative:Float64;N]
WindowAggr: windowExpr=[[non_negative_derivative((selector_last(cpu.usage_idle,cpu.time))[value], IntervalMonthDayNano("10000000000"), time) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS non_negative_derivative(selector_last(cpu.usage_idle,cpu.time)[value])]] [time:Timestamp(Nanosecond, None);N, selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N, non_negative_derivative(selector_last(cpu.usage_idle,cpu.time)[value]):Float64;N]
GapFill: groupBy=[time], aggr=[[selector_last(cpu.usage_idle,cpu.time)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[selector_last(cpu.usage_idle, cpu.time)]] [time:Timestamp(Nanosecond, None);N, selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
"###);
}
#[test]
fn test_cumulative_sum() {
// no aggregates
assert_snapshot!(plan("SELECT CUMULATIVE_SUM(usage_idle) FROM cpu"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cumulative_sum:Float64;N]
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, cumulative_sum [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cumulative_sum:Float64;N]
Filter: NOT cumulative_sum IS NULL [time:Timestamp(Nanosecond, None), cumulative_sum:Float64;N]
Projection: cpu.time AS time, cumulative_sum(cpu.usage_idle) AS cumulative_sum [time:Timestamp(Nanosecond, None), cumulative_sum:Float64;N]
WindowAggr: windowExpr=[[cumumlative_sum(cpu.usage_idle) ORDER BY [cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS cumulative_sum(cpu.usage_idle)]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, cumulative_sum(cpu.usage_idle):Float64;N]
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
"###);
// aggregate
assert_snapshot!(plan("SELECT CUMULATIVE_SUM(MEAN(usage_idle)) FROM cpu GROUP BY TIME(10s)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, cumulative_sum:Float64;N]
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, cumulative_sum [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, cumulative_sum:Float64;N]
Filter: NOT cumulative_sum IS NULL [time:Timestamp(Nanosecond, None);N, cumulative_sum:Float64;N]
Projection: time, cumulative_sum(AVG(cpu.usage_idle)) AS cumulative_sum [time:Timestamp(Nanosecond, None);N, cumulative_sum:Float64;N]
WindowAggr: windowExpr=[[cumumlative_sum(AVG(cpu.usage_idle)) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS cumulative_sum(AVG(cpu.usage_idle))]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N, cumulative_sum(AVG(cpu.usage_idle)):Float64;N]
GapFill: groupBy=[time], aggr=[[AVG(cpu.usage_idle)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N] GapFill: groupBy=[time], aggr=[[AVG(cpu.usage_idle)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[AVG(cpu.usage_idle)]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N] Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[AVG(cpu.usage_idle)]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
@ -4043,7 +4108,7 @@ mod test {
"###); "###);
assert_snapshot!(plan("SELECT COUNT(DISTINCT usage_idle) FROM cpu"), @r###" assert_snapshot!(plan("SELECT COUNT(DISTINCT usage_idle) FROM cpu"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N] Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N]
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, TimestampNanosecond(0, None) AS time, COUNT(DISTINCT cpu.usage_idle) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N] Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, TimestampNanosecond(0, None) AS time, coalesce_struct(COUNT(DISTINCT cpu.usage_idle), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N]
Aggregate: groupBy=[[]], aggr=[[COUNT(DISTINCT cpu.usage_idle)]] [COUNT(DISTINCT cpu.usage_idle):Int64;N] Aggregate: groupBy=[[]], aggr=[[COUNT(DISTINCT cpu.usage_idle)]] [COUNT(DISTINCT cpu.usage_idle):Int64;N]
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
"###); "###);
@ -4114,7 +4179,7 @@ mod test {
fn test_selectors_and_aggregate() { fn test_selectors_and_aggregate() {
assert_snapshot!(plan("SELECT LAST(usage_idle), COUNT(usage_idle) FROM cpu"), @r###" assert_snapshot!(plan("SELECT LAST(usage_idle), COUNT(usage_idle) FROM cpu"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), last:Float64;N, count:Int64;N] Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), last:Float64;N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, TimestampNanosecond(0, None) AS time, (selector_last(cpu.usage_idle,cpu.time))[value] AS last, COUNT(cpu.usage_idle) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), last:Float64;N, count:Int64;N] Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, TimestampNanosecond(0, None) AS time, (selector_last(cpu.usage_idle,cpu.time))[value] AS last, coalesce_struct(COUNT(cpu.usage_idle), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), last:Float64;N, count:Int64;N]
Aggregate: groupBy=[[]], aggr=[[selector_last(cpu.usage_idle, cpu.time), COUNT(cpu.usage_idle)]] [selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N, COUNT(cpu.usage_idle):Int64;N] Aggregate: groupBy=[[]], aggr=[[selector_last(cpu.usage_idle, cpu.time), COUNT(cpu.usage_idle)]] [selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N, COUNT(cpu.usage_idle):Int64;N]
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
"###); "###);
@ -4793,20 +4858,20 @@ mod test {
fn no_group_by() { fn no_group_by() {
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data"), @r###" assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N] Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N]
Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N] Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###); "###);
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY non_existent"), @r###" assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY non_existent"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), non_existent:Null;N, count:Int64;N] Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), non_existent:Null;N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, NULL AS non_existent, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), non_existent:Null;N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, NULL AS non_existent, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), non_existent:Null;N, count:Int64;N]
Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N] Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###); "###);
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY foo"), @r###" assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY foo"), @r###"
Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N] Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N] Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###); "###);
@ -4814,7 +4879,7 @@ mod test {
// The `COUNT(f64_field)` aggregate is only projected ones in the Aggregate and reused in the projection // The `COUNT(f64_field)` aggregate is only projected ones in the Aggregate and reused in the projection
assert_snapshot!(plan("SELECT COUNT(f64_field), COUNT(f64_field) + COUNT(f64_field), COUNT(f64_field) * 3 FROM data"), @r###" assert_snapshot!(plan("SELECT COUNT(f64_field), COUNT(f64_field) + COUNT(f64_field), COUNT(f64_field) * 3 FROM data"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_count:Int64;N, count_1:Int64;N] Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_count:Int64;N, count_1:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, COUNT(data.f64_field) AS count, COUNT(data.f64_field) + COUNT(data.f64_field) AS count_count, COUNT(data.f64_field) * Int64(3) AS count_1 [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_count:Int64;N, count_1:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count, coalesce_struct(COUNT(data.f64_field), Int64(0)) + coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count_count, coalesce_struct(COUNT(data.f64_field), Int64(0)) * Int64(3) AS count_1 [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_count:Int64;N, count_1:Int64;N]
Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N] Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###); "###);
@ -4822,7 +4887,7 @@ mod test {
// non-existent tags are excluded from the Aggregate groupBy and Sort operators // non-existent tags are excluded from the Aggregate groupBy and Sort operators
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY foo, non_existent"), @r###" assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY foo, non_existent"), @r###"
Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, non_existent:Null;N, count:Int64;N] Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, non_existent:Null;N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, NULL AS non_existent, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, non_existent:Null;N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, NULL AS non_existent, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, non_existent:Null;N, count:Int64;N]
Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N] Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###); "###);
@ -4830,7 +4895,7 @@ mod test {
// Aggregate expression is projected once and reused in final projection // Aggregate expression is projected once and reused in final projection
assert_snapshot!(plan("SELECT COUNT(f64_field), COUNT(f64_field) * 2 FROM data"), @r###" assert_snapshot!(plan("SELECT COUNT(f64_field), COUNT(f64_field) * 2 FROM data"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_1:Int64;N] Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_1:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, COUNT(data.f64_field) AS count, COUNT(data.f64_field) * Int64(2) AS count_1 [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_1:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count, coalesce_struct(COUNT(data.f64_field), Int64(0)) * Int64(2) AS count_1 [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_1:Int64;N]
Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N] Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###); "###);
@ -4869,7 +4934,7 @@ mod test {
fn group_by_time() { fn group_by_time() {
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(none)"), @r###" assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(none)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@ -4878,7 +4943,7 @@ mod test {
// supports offset parameter // supports offset parameter
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s, 5s) FILL(none)"), @r###" assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s, 5s) FILL(none)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(5000000000, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(5000000000, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@ -4890,7 +4955,7 @@ mod test {
// No time bounds // No time bounds
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s)"), @r###" assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
GapFill: groupBy=[time], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] GapFill: groupBy=[time], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@ -4903,7 +4968,7 @@ mod test {
// No lower time bounds // No lower time bounds
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data WHERE time < '2022-10-31T02:02:00Z' GROUP BY TIME(10s)"), @r###" assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data WHERE time < '2022-10-31T02:02:00Z' GROUP BY TIME(10s)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
GapFill: groupBy=[time], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1667181719999999999, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] GapFill: groupBy=[time], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1667181719999999999, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Filter: data.time <= TimestampNanosecond(1667181719999999999, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] Filter: data.time <= TimestampNanosecond(1667181719999999999, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@ -4916,7 +4981,7 @@ mod test {
// No upper time bounds // No upper time bounds
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data WHERE time >= '2022-10-31T02:00:00Z' GROUP BY TIME(10s)"), @r###" assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data WHERE time >= '2022-10-31T02:00:00Z' GROUP BY TIME(10s)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
GapFill: groupBy=[time], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Included(Literal(TimestampNanosecond(1667181600000000000, None)))..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] GapFill: groupBy=[time], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Included(Literal(TimestampNanosecond(1667181600000000000, None)))..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Filter: data.time >= TimestampNanosecond(1667181600000000000, None) AND data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] Filter: data.time >= TimestampNanosecond(1667181600000000000, None) AND data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@ -4929,7 +4994,7 @@ mod test {
// Default is FILL(null) // Default is FILL(null)
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(10s)"), @r###" assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(10s)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
GapFill: groupBy=[time], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Included(Literal(TimestampNanosecond(1667181600000000000, None)))..Included(Literal(TimestampNanosecond(1667181719999999999, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] GapFill: groupBy=[time], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Included(Literal(TimestampNanosecond(1667181600000000000, None)))..Included(Literal(TimestampNanosecond(1667181719999999999, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Filter: data.time >= TimestampNanosecond(1667181600000000000, None) AND data.time <= TimestampNanosecond(1667181719999999999, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] Filter: data.time >= TimestampNanosecond(1667181600000000000, None) AND data.time <= TimestampNanosecond(1667181719999999999, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@ -4941,7 +5006,7 @@ mod test {
fn group_by_time_gapfill_default_is_fill_null1() { fn group_by_time_gapfill_default_is_fill_null1() {
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s)"), @r###" assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
GapFill: groupBy=[time], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] GapFill: groupBy=[time], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@ -4953,7 +5018,7 @@ mod test {
fn group_by_time_gapfill_default_is_fill_null2() { fn group_by_time_gapfill_default_is_fill_null2() {
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(null)"), @r###" assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(null)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
GapFill: groupBy=[time], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] GapFill: groupBy=[time], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@ -4965,7 +5030,7 @@ mod test {
fn group_by_time_gapfill_default_is_fill_null3() { fn group_by_time_gapfill_default_is_fill_null3() {
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(previous)"), @r###" assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(previous)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
GapFill: groupBy=[time], aggr=[[LOCF(COUNT(data.f64_field))]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] GapFill: groupBy=[time], aggr=[[LOCF(COUNT(data.f64_field))]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@ -4989,7 +5054,7 @@ mod test {
fn group_by_time_gapfill_default_is_fill_null5() { fn group_by_time_gapfill_default_is_fill_null5() {
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(linear)"), @r###" assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(linear)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
GapFill: groupBy=[time], aggr=[[INTERPOLATE(COUNT(data.f64_field))]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] GapFill: groupBy=[time], aggr=[[INTERPOLATE(COUNT(data.f64_field))]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@ -5031,7 +5096,7 @@ mod test {
Filter: iox::row <= Int64(1) [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N, iox::row:UInt64;N] Filter: iox::row <= Int64(1) [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N, iox::row:UInt64;N]
WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [foo] ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS iox::row]] [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N, iox::row:UInt64;N] WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [foo] ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS iox::row]] [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N, iox::row:UInt64;N]
Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N] Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N] Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###); "###);
@ -5045,7 +5110,7 @@ mod test {
Filter: iox::row > Int64(1) [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N, iox::row:UInt64;N] Filter: iox::row > Int64(1) [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N, iox::row:UInt64;N]
WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [foo] ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS iox::row]] [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N, iox::row:UInt64;N] WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [foo] ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS iox::row]] [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N, iox::row:UInt64;N]
Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N] Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N] Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###); "###);
@ -5059,7 +5124,7 @@ mod test {
Filter: iox::row BETWEEN Int64(4) AND Int64(5) [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N, iox::row:UInt64;N] Filter: iox::row BETWEEN Int64(4) AND Int64(5) [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N, iox::row:UInt64;N]
WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [foo] ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS iox::row]] [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N, iox::row:UInt64;N] WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [foo] ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS iox::row]] [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N, iox::row:UInt64;N]
Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N] Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N] Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
"###); "###);
@ -5085,7 +5150,7 @@ mod test {
fn group_by_time_precision() { fn group_by_time_precision() {
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10u) FILL(none)"), @r###" assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10u) FILL(none)"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
@ -5333,6 +5398,22 @@ mod test {
"###); "###);
} }
#[test]
fn test_select_function_tag_column() {
assert_snapshot!(plan("SELECT last(foo) as foo, first(usage_idle) from cpu group by foo"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Null;N, first:Float64;N]
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, TimestampNanosecond(0, None) AS time, NULL AS foo, (selector_first(cpu.usage_idle,cpu.time))[value] AS first [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Null;N, first:Float64;N]
Aggregate: groupBy=[[]], aggr=[[selector_first(cpu.usage_idle, cpu.time)]] [selector_first(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
"###);
assert_snapshot!(plan("SELECT count(foo) as foo, first(usage_idle) from cpu group by foo"), @r###"
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Null;N, foo_1:Null;N, first:Float64;N]
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, TimestampNanosecond(0, None) AS time, NULL AS foo, (coalesce_struct(selector_first(cpu.usage_idle,cpu.time,NULL), Struct({value:Float64(0),time:TimestampNanosecond(0, None),other_1:NULL})))[other_1] AS foo_1, (selector_first(cpu.usage_idle,cpu.time,NULL))[value] AS first [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Null;N, foo_1:Null;N, first:Float64;N]
Aggregate: groupBy=[[]], aggr=[[selector_first(cpu.usage_idle, cpu.time, NULL)]] [selector_first(cpu.usage_idle,cpu.time,NULL):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "other_1", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
"###);
}
// The following is an outline of additional scenarios to develop // The following is an outline of additional scenarios to develop
// as the planner learns more features. // as the planner learns more features.
// This is not an exhaustive list and is expected to grow as the // This is not an exhaustive list and is expected to grow as the

View File

@ -27,11 +27,8 @@ pub(super) fn make_tag_key_column_meta(
let index_map = fields let index_map = fields
.iter() .iter()
.enumerate() .enumerate()
.filter_map(|(index, f)| match &f.expr { .filter_map(|(index, f)| match &f.data_type {
IQLExpr::VarRef(VarRef { Some(InfluxColumnType::Tag) | None => Some((f.name.as_str(), index + START_INDEX)),
name,
data_type: Some(VarRefDataType::Tag) | None,
}) => Some((name.as_str(), index + START_INDEX)),
_ => None, _ => None,
}) })
.collect::<HashMap<_, _>>(); .collect::<HashMap<_, _>>();

View File

@ -123,44 +123,42 @@
//! [`Eval`]: https://github.com/influxdata/influxql/blob/1ba470371ec093d57a726b143fe6ccbacf1b452b/ast.go#L4137 //! [`Eval`]: https://github.com/influxdata/influxql/blob/1ba470371ec093d57a726b143fe6ccbacf1b452b/ast.go#L4137
use std::sync::Arc; use std::sync::Arc;
use crate::plan::util::Schemas; use crate::plan::util::IQLSchema;
use arrow::datatypes::DataType; use arrow::datatypes::DataType;
use datafusion::common::tree_node::{Transformed, TreeNode, TreeNodeRewriter}; use datafusion::common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
use datafusion::common::{Result, ScalarValue}; use datafusion::common::{Result, ScalarValue};
use datafusion::logical_expr::expr::{AggregateFunction, AggregateUDF, WindowFunction};
use datafusion::logical_expr::{ use datafusion::logical_expr::{
binary_expr, cast, coalesce, lit, BinaryExpr, Expr, ExprSchemable, Operator, binary_expr, cast, coalesce, lit, BinaryExpr, Expr, ExprSchemable, GetIndexedField, Operator,
}; };
use datafusion::optimizer::simplify_expressions::{ExprSimplifier, SimplifyContext}; use datafusion::optimizer::simplify_expressions::{ExprSimplifier, SimplifyContext};
use datafusion::physical_expr::execution_props::ExecutionProps; use datafusion::physical_expr::execution_props::ExecutionProps;
use datafusion::prelude::when; use datafusion::prelude::{when, Column};
use observability_deps::tracing::trace; use observability_deps::tracing::trace;
use predicate::rpc_predicate::{iox_expr_rewrite, simplify_predicate}; use predicate::rpc_predicate::{iox_expr_rewrite, simplify_predicate};
use super::ir::DataSourceSchema;
/// Perform a series of passes to rewrite `expr` in compliance with InfluxQL behavior /// Perform a series of passes to rewrite `expr` in compliance with InfluxQL behavior
/// in an effort to ensure the query executes without error. /// in an effort to ensure the query executes without error.
pub(super) fn rewrite_conditional_expr( pub(super) fn rewrite_conditional_expr(
exec_props: &ExecutionProps, exec_props: &ExecutionProps,
expr: Expr, expr: Expr,
schemas: &Schemas, schema: &IQLSchema<'_>,
ds_schema: &DataSourceSchema<'_>,
) -> Result<Expr> { ) -> Result<Expr> {
let simplify_context = let simplify_context =
SimplifyContext::new(exec_props).with_schema(Arc::clone(&schemas.df_schema)); SimplifyContext::new(exec_props).with_schema(Arc::clone(&schema.df_schema));
let simplifier = ExprSimplifier::new(simplify_context); let simplifier = ExprSimplifier::new(simplify_context);
Ok(expr) Ok(expr)
.map(|expr| log_rewrite(expr, "original")) .map(|expr| log_rewrite(expr, "original"))
// make regex matching with invalid types produce false // make regex matching with invalid types produce false
.and_then(|expr| expr.rewrite(&mut FixRegularExpressions { schemas })) .and_then(|expr| expr.rewrite(&mut FixRegularExpressions { schema }))
.map(|expr| log_rewrite(expr, "after fix_regular_expressions")) .map(|expr| log_rewrite(expr, "after fix_regular_expressions"))
// rewrite exprs with incompatible operands to NULL or FALSE // rewrite exprs with incompatible operands to NULL or FALSE
// (seems like FixRegularExpressions could be combined into this pass) // (seems like FixRegularExpressions could be combined into this pass)
.and_then(|expr| rewrite_expr(expr, schemas)) .and_then(|expr| rewrite_expr(expr, schema))
.map(|expr| log_rewrite(expr, "after rewrite_expr")) .map(|expr| log_rewrite(expr, "after rewrite_expr"))
// Convert tag column references to CASE WHEN <tag> IS NULL THEN '' ELSE <tag> END // Convert tag column references to CASE WHEN <tag> IS NULL THEN '' ELSE <tag> END
.and_then(|expr| rewrite_tag_columns(expr, schemas, ds_schema)) .and_then(|expr| rewrite_tag_columns(expr, schema))
.map(|expr| log_rewrite(expr, "after rewrite_tag_columns")) .map(|expr| log_rewrite(expr, "after rewrite_tag_columns"))
// Push comparison operators into CASE exprs: // Push comparison operators into CASE exprs:
// CASE WHEN tag0 IS NULL THEN '' ELSE tag0 END = 'foo' // CASE WHEN tag0 IS NULL THEN '' ELSE tag0 END = 'foo'
@ -172,7 +170,7 @@ pub(super) fn rewrite_conditional_expr(
// - convert numeric types so that operands agree // - convert numeric types so that operands agree
// - convert Utf8 to Dictionary as needed // - convert Utf8 to Dictionary as needed
// The next step will fail with type errors if we don't do this. // The next step will fail with type errors if we don't do this.
.and_then(|expr| simplifier.coerce(expr, Arc::clone(&schemas.df_schema))) .and_then(|expr| simplifier.coerce(expr, Arc::clone(&schema.df_schema)))
.map(|expr| log_rewrite(expr, "after coerce")) .map(|expr| log_rewrite(expr, "after coerce"))
// DataFusion expression simplification. This is important here because: // DataFusion expression simplification. This is important here because:
// CASE WHEN tag0 IS NULL THEN '' = 'foo' ELSE tag0 = 'foo' END // CASE WHEN tag0 IS NULL THEN '' = 'foo' ELSE tag0 = 'foo' END
@ -206,8 +204,8 @@ fn log_rewrite(expr: Expr, description: &str) -> Expr {
/// Perform a series of passes to rewrite `expr`, used as a column projection, /// Perform a series of passes to rewrite `expr`, used as a column projection,
/// to match the behavior of InfluxQL. /// to match the behavior of InfluxQL.
pub(super) fn rewrite_field_expr(expr: Expr, schemas: &Schemas) -> Result<Expr> { pub(super) fn rewrite_field_expr(expr: Expr, schema: &IQLSchema<'_>) -> Result<Expr> {
rewrite_expr(expr, schemas) rewrite_expr(expr, schema)
} }
/// The expression was rewritten /// The expression was rewritten
@ -225,7 +223,7 @@ fn no(expr: Expr) -> Result<Transformed<Expr>> {
/// ///
/// Rewrite and coerce the expression tree to model the behavior /// Rewrite and coerce the expression tree to model the behavior
/// of an InfluxQL query. /// of an InfluxQL query.
fn rewrite_expr(expr: Expr, schemas: &Schemas) -> Result<Expr> { fn rewrite_expr(expr: Expr, schema: &IQLSchema<'_>) -> Result<Expr> {
expr.transform(&|expr| { expr.transform(&|expr| {
match expr { match expr {
Expr::BinaryExpr(BinaryExpr { Expr::BinaryExpr(BinaryExpr {
@ -233,8 +231,8 @@ fn rewrite_expr(expr: Expr, schemas: &Schemas) -> Result<Expr> {
op, op,
ref right, ref right,
}) => { }) => {
let lhs_type = left.get_type(&schemas.df_schema)?; let lhs_type = left.get_type(&schema.df_schema)?;
let rhs_type = right.get_type(&schemas.df_schema)?; let rhs_type = right.get_type(&schema.df_schema)?;
match (lhs_type, op, rhs_type) { match (lhs_type, op, rhs_type) {
// //
@ -422,6 +420,23 @@ fn rewrite_expr(expr: Expr, schemas: &Schemas) -> Result<Expr> {
_ => yes(lit(ScalarValue::Null)), _ => yes(lit(ScalarValue::Null)),
} }
} }
// Invoking an aggregate or window function on a tag column should return `NULL`
// to be consistent with OG.
Expr::AggregateFunction(AggregateFunction { ref args, .. } )
| Expr::AggregateUDF(AggregateUDF { ref args, .. } )
| Expr::WindowFunction(WindowFunction { ref args, .. } ) => match &args[0] {
Expr::Column(Column { ref name, .. }) if schema.is_tag_field(name) => yes(lit(ScalarValue::Null)),
_ => no(expr),
}
// If the InfluxQL query used a selector on a tag column, like `last(tag_col)`
// then there will be an indexed field. Convert this to `NULL` as well.
Expr::GetIndexedField(GetIndexedField { expr: ref e, .. }) => match e.as_ref() {
Expr::Literal(ScalarValue::Null) => yes(lit(ScalarValue::Null)),
_ => no(expr),
}
// //
// Literals and other expressions are passed through to DataFusion, // Literals and other expressions are passed through to DataFusion,
// as it will handle evaluating function calls, etc // as it will handle evaluating function calls, etc
@ -467,7 +482,7 @@ fn rewrite_boolean(lhs: Expr, op: Operator, rhs: Expr) -> Expr {
/// Rewrite regex conditional expressions to match InfluxQL behaviour. /// Rewrite regex conditional expressions to match InfluxQL behaviour.
struct FixRegularExpressions<'a> { struct FixRegularExpressions<'a> {
schemas: &'a Schemas, schema: &'a IQLSchema<'a>,
} }
impl<'a> TreeNodeRewriter for FixRegularExpressions<'a> { impl<'a> TreeNodeRewriter for FixRegularExpressions<'a> {
@ -483,7 +498,7 @@ impl<'a> TreeNodeRewriter for FixRegularExpressions<'a> {
right, right,
}) => { }) => {
Ok(if let Expr::Column(ref col) = *left { Ok(if let Expr::Column(ref col) = *left {
match self.schemas.df_schema.field_from_column(col)?.data_type() { match self.schema.df_schema.field_from_column(col)?.data_type() {
DataType::Dictionary(..) | DataType::Utf8 => { DataType::Dictionary(..) | DataType::Utf8 => {
Expr::BinaryExpr(BinaryExpr { left, op, right }) Expr::BinaryExpr(BinaryExpr { left, op, right })
} }
@ -517,13 +532,9 @@ impl<'a> TreeNodeRewriter for FixRegularExpressions<'a> {
/// case when tag0 is null then "" else tag0 end /// case when tag0 is null then "" else tag0 end
/// ``` /// ```
/// This ensures that we treat tags with the same semantics as OG InfluxQL. /// This ensures that we treat tags with the same semantics as OG InfluxQL.
fn rewrite_tag_columns( fn rewrite_tag_columns(expr: Expr, schema: &IQLSchema<'_>) -> Result<Expr> {
expr: Expr,
_schemas: &Schemas,
ds_schema: &DataSourceSchema<'_>,
) -> Result<Expr> {
expr.transform(&|expr| match expr { expr.transform(&|expr| match expr {
Expr::Column(ref c) if ds_schema.is_tag_field(&c.name) => { Expr::Column(ref c) if schema.is_tag_field(&c.name) => {
yes(when(expr.clone().is_null(), lit("")).otherwise(expr)?) yes(when(expr.clone().is_null(), lit("")).otherwise(expr)?)
} }
e => no(e), e => no(e),
@ -532,6 +543,8 @@ fn rewrite_tag_columns(
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use crate::plan::ir::DataSourceSchema;
use super::*; use super::*;
use datafusion::logical_expr::lit_timestamp_nano; use datafusion::logical_expr::lit_timestamp_nano;
use datafusion::prelude::col; use datafusion::prelude::col;
@ -542,7 +555,7 @@ mod test {
use schema::{InfluxFieldType, SchemaBuilder}; use schema::{InfluxFieldType, SchemaBuilder};
use std::sync::Arc; use std::sync::Arc;
fn new_schemas() -> (Schemas, DataSourceSchema<'static>) { fn new_schema() -> IQLSchema<'static> {
let iox_schema = SchemaBuilder::new() let iox_schema = SchemaBuilder::new()
.measurement("m0") .measurement("m0")
.timestamp() .timestamp()
@ -556,7 +569,8 @@ mod test {
.build() .build()
.expect("schema failed"); .expect("schema failed");
let df_schema: DFSchemaRef = Arc::clone(iox_schema.inner()).to_dfschema_ref().unwrap(); let df_schema: DFSchemaRef = Arc::clone(iox_schema.inner()).to_dfschema_ref().unwrap();
(Schemas { df_schema }, DataSourceSchema::Table(iox_schema)) let ds_schema = DataSourceSchema::Table(iox_schema);
IQLSchema::new_from_ds_schema(&df_schema, ds_schema).unwrap()
} }
/// Tests which validate that division is coalesced to `0`, to handle division by zero, /// Tests which validate that division is coalesced to `0`, to handle division by zero,
@ -566,7 +580,7 @@ mod test {
/// binary expression to a scalar value, `0`. /// binary expression to a scalar value, `0`.
#[test] #[test]
fn test_division() { fn test_division() {
let (schemas, _) = new_schemas(); let schemas = new_schema();
let rewrite = |expr| rewrite_expr(expr, &schemas).unwrap().to_string(); let rewrite = |expr| rewrite_expr(expr, &schemas).unwrap().to_string();
// Float64 // Float64
@ -627,7 +641,7 @@ mod test {
#[test] #[test]
fn test_pass_thru() { fn test_pass_thru() {
test_helpers::maybe_start_logging(); test_helpers::maybe_start_logging();
let (schemas, _) = new_schemas(); let schemas = new_schema();
let rewrite = |expr| rewrite_expr(expr, &schemas).unwrap().to_string(); let rewrite = |expr| rewrite_expr(expr, &schemas).unwrap().to_string();
let expr = lit(5.5).gt(lit(1_i64)); let expr = lit(5.5).gt(lit(1_i64));
@ -664,9 +678,9 @@ mod test {
#[test] #[test]
fn test_string_operations() { fn test_string_operations() {
let props = execution_props(); let props = execution_props();
let (schemas, ds_schema) = new_schemas(); let schemas = new_schema();
let rewrite = |expr| { let rewrite = |expr| {
rewrite_conditional_expr(&props, expr, &schemas, &ds_schema) rewrite_conditional_expr(&props, expr, &schemas)
.unwrap() .unwrap()
.to_string() .to_string()
}; };
@ -688,7 +702,7 @@ mod test {
/// to the supported bitwise operators. /// to the supported bitwise operators.
#[test] #[test]
fn test_boolean_operations() { fn test_boolean_operations() {
let (schemas, _) = new_schemas(); let schemas = new_schema();
let rewrite = |expr| rewrite_expr(expr, &schemas).unwrap().to_string(); let rewrite = |expr| rewrite_expr(expr, &schemas).unwrap().to_string();
let expr = "boolean_field".as_expr().and(lit(true)); let expr = "boolean_field".as_expr().and(lit(true));
@ -743,7 +757,7 @@ mod test {
/// Tests cases to validate Boolean and NULL data types /// Tests cases to validate Boolean and NULL data types
#[test] #[test]
fn test_rewrite_conditional_null() { fn test_rewrite_conditional_null() {
let (schemas, _) = new_schemas(); let schemas = new_schema();
let rewrite = |expr| rewrite_expr(expr, &schemas).unwrap().to_string(); let rewrite = |expr| rewrite_expr(expr, &schemas).unwrap().to_string();
// NULL on either side and boolean on the other of a binary expression // NULL on either side and boolean on the other of a binary expression
@ -779,7 +793,7 @@ mod test {
#[test] #[test]
fn test_time_range() { fn test_time_range() {
let (schemas, _) = new_schemas(); let schemas = new_schema();
let rewrite = |expr| rewrite_expr(expr, &schemas).unwrap().to_string(); let rewrite = |expr| rewrite_expr(expr, &schemas).unwrap().to_string();
let expr = "time".as_expr().gt_eq(lit_timestamp_nano(1000)); let expr = "time".as_expr().gt_eq(lit_timestamp_nano(1000));
@ -811,7 +825,7 @@ mod test {
/// valid operation for the given the operands. These are used when projecting columns. /// valid operation for the given the operands. These are used when projecting columns.
#[test] #[test]
fn test_rewrite_expr_coercion_reduce_to_null() { fn test_rewrite_expr_coercion_reduce_to_null() {
let (schemas, _) = new_schemas(); let schemas = new_schema();
let rewrite = |expr| rewrite_expr(expr, &schemas).unwrap().to_string(); let rewrite = |expr| rewrite_expr(expr, &schemas).unwrap().to_string();
// //
@ -851,9 +865,9 @@ mod test {
fn test_rewrite_tag_columns_eq() { fn test_rewrite_tag_columns_eq() {
test_helpers::maybe_start_logging(); test_helpers::maybe_start_logging();
let props = execution_props(); let props = execution_props();
let (schemas, ds_schema) = new_schemas(); let schemas = new_schema();
let rewrite = |expr| { let rewrite = |expr| {
rewrite_conditional_expr(&props, expr, &schemas, &ds_schema) rewrite_conditional_expr(&props, expr, &schemas)
.unwrap() .unwrap()
.to_string() .to_string()
}; };
@ -904,9 +918,9 @@ mod test {
fn test_rewrite_tag_columns_regex() { fn test_rewrite_tag_columns_regex() {
let props = execution_props(); let props = execution_props();
test_helpers::maybe_start_logging(); test_helpers::maybe_start_logging();
let (schemas, ds_schema) = new_schemas(); let schemas = new_schema();
let rewrite = |expr| { let rewrite = |expr| {
rewrite_conditional_expr(&props, expr, &schemas, &ds_schema) rewrite_conditional_expr(&props, expr, &schemas)
.unwrap() .unwrap()
.to_string() .to_string()
}; };
@ -931,9 +945,9 @@ mod test {
fn test_fields_pass_thru() { fn test_fields_pass_thru() {
test_helpers::maybe_start_logging(); test_helpers::maybe_start_logging();
let props = execution_props(); let props = execution_props();
let (schemas, ds_schema) = new_schemas(); let schemas = new_schema();
let rewrite = |expr| { let rewrite = |expr| {
rewrite_conditional_expr(&props, expr, &schemas, &ds_schema) rewrite_conditional_expr(&props, expr, &schemas)
.unwrap() .unwrap()
.to_string() .to_string()
}; };

View File

@ -1029,7 +1029,7 @@ impl FieldChecker {
ProjectionType::TopBottomSelector ProjectionType::TopBottomSelector
} else if self.has_group_by_time { } else if self.has_group_by_time {
if self.window_count > 0 { if self.window_count > 0 {
if self.window_count == self.aggregate_count { if self.window_count == self.aggregate_count + self.selector_count {
ProjectionType::WindowAggregate ProjectionType::WindowAggregate
} else { } else {
ProjectionType::WindowAggregateMixed ProjectionType::WindowAggregateMixed
@ -1338,11 +1338,8 @@ impl FieldChecker {
} }
fn check_cumulative_sum(&mut self, args: &[Expr]) -> Result<()> { fn check_cumulative_sum(&mut self, args: &[Expr]) -> Result<()> {
self.inc_aggregate_count(); self.inc_window_count();
check_exp_args!("cumulative_sum", 1, args); check_exp_args!("cumulative_sum", 1, args);
set_extra_intervals!(self, 1);
self.check_nested_symbol("cumulative_sum", &args[0]) self.check_nested_symbol("cumulative_sum", &args[0])
} }

View File

@ -1,13 +1,12 @@
use crate::{error, NUMERICS}; use crate::{error, NUMERICS};
use arrow::array::{Array, ArrayRef, Int64Array}; use arrow::array::{Array, ArrayRef, Int64Array};
use arrow::datatypes::{DataType, TimeUnit}; use arrow::datatypes::DataType;
use datafusion::common::{downcast_value, DataFusionError, Result, ScalarValue}; use datafusion::common::{downcast_value, DataFusionError, Result, ScalarValue};
use datafusion::logical_expr::{ use datafusion::logical_expr::{
Accumulator, AccumulatorFactoryFunction, AggregateUDF, ReturnTypeFunction, Signature, Accumulator, AccumulatorFactoryFunction, AggregateUDF, ReturnTypeFunction, Signature,
StateTypeFunction, TypeSignature, Volatility, StateTypeFunction, TypeSignature, Volatility,
}; };
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use std::mem::replace;
use std::sync::Arc; use std::sync::Arc;
/// Name of the `MOVING_AVERAGE` user-defined aggregate function. /// Name of the `MOVING_AVERAGE` user-defined aggregate function.
@ -148,339 +147,3 @@ impl Accumulator for AvgNAccumulator {
- std::mem::size_of_val(&self.data_type) - std::mem::size_of_val(&self.data_type)
} }
} }
/// Name of the `DIFFERENCE` user-defined aggregate function.
pub(crate) const DIFFERENCE_NAME: &str = "difference";
/// Definition of the `DIFFERENCE` user-defined aggregate function.
pub(crate) static DIFFERENCE: Lazy<Arc<AggregateUDF>> = Lazy::new(|| {
let return_type: ReturnTypeFunction = Arc::new(|dt| Ok(Arc::new(dt[0].clone())));
let accumulator: AccumulatorFactoryFunction =
Arc::new(|dt| Ok(Box::new(DifferenceAccumulator::new(dt))));
let state_type: StateTypeFunction = Arc::new(|_| Ok(Arc::new(vec![])));
Arc::new(AggregateUDF::new(
DIFFERENCE_NAME,
&Signature::one_of(
NUMERICS
.iter()
.map(|dt| TypeSignature::Exact(vec![dt.clone()]))
.collect(),
Volatility::Immutable,
),
&return_type,
&accumulator,
// State shouldn't be called, so no schema to report
&state_type,
))
});
#[derive(Debug)]
struct DifferenceAccumulator {
data_type: DataType,
last: ScalarValue,
diff: ScalarValue,
}
impl DifferenceAccumulator {
fn new(data_type: &DataType) -> Self {
let last: ScalarValue = data_type.try_into().expect("data_type → ScalarValue");
let diff = last.clone();
Self {
data_type: data_type.clone(),
last,
diff,
}
}
}
impl Accumulator for DifferenceAccumulator {
/// `state` is only called when used as an aggregate function. It can be
/// can safely left unimplemented, as this accumulator is only used as a window aggregate.
///
/// See: <https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.Accumulator.html#tymethod.state>
fn state(&self) -> Result<Vec<ScalarValue>> {
error::internal("unexpected call to DifferenceAccumulator::state")
}
fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
if values.is_empty() {
return Ok(());
}
let arr = &values[0];
for index in 0..arr.len() {
let scalar = ScalarValue::try_from_array(arr, index)?;
if !scalar.is_null() {
if !self.last.is_null() {
self.diff = scalar.sub(self.last.clone())?
}
self.last = scalar;
} else {
self.diff = ScalarValue::try_from(&self.data_type).unwrap()
}
}
Ok(())
}
/// `merge_batch` is only called when used as an aggregate function. It can be
/// can safely left unimplemented, as this accumulator is only used as a window aggregate.
///
/// See: <https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.Accumulator.html#tymethod.state>
fn merge_batch(&mut self, _states: &[ArrayRef]) -> Result<()> {
error::internal("unexpected call to DifferenceAccumulator::merge_batch")
}
fn evaluate(&self) -> Result<ScalarValue> {
Ok(self.diff.clone())
}
fn size(&self) -> usize {
std::mem::size_of_val(self)
}
}
/// Name of the `NON_NEGATIVE_DIFFERENCE` user-defined aggregate function.
pub(crate) const NON_NEGATIVE_DIFFERENCE_NAME: &str = "non_negative_difference";
/// Definition of the `NON_NEGATIVE_DIFFERENCE` user-defined aggregate function.
pub(crate) static NON_NEGATIVE_DIFFERENCE: Lazy<Arc<AggregateUDF>> = Lazy::new(|| {
let return_type: ReturnTypeFunction = Arc::new(|dt| Ok(Arc::new(dt[0].clone())));
let accumulator: AccumulatorFactoryFunction = Arc::new(|dt| {
Ok(Box::new(NonNegative::<_>::new(DifferenceAccumulator::new(
dt,
))))
});
let state_type: StateTypeFunction = Arc::new(|_| Ok(Arc::new(vec![])));
Arc::new(AggregateUDF::new(
NON_NEGATIVE_DIFFERENCE_NAME,
&Signature::one_of(
NUMERICS
.iter()
.map(|dt| TypeSignature::Exact(vec![dt.clone()]))
.collect(),
Volatility::Immutable,
),
&return_type,
&accumulator,
// State shouldn't be called, so no schema to report
&state_type,
))
});
/// NonNegative is a wrapper around an Accumulator that transposes
/// negative value to be NULL.
#[derive(Debug)]
struct NonNegative<T> {
acc: T,
}
impl<T> NonNegative<T> {
fn new(acc: T) -> Self {
Self { acc }
}
}
impl<T: Accumulator> Accumulator for NonNegative<T> {
fn state(&self) -> Result<Vec<ScalarValue>> {
self.acc.state()
}
fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
self.acc.update_batch(values)
}
fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
self.acc.merge_batch(states)
}
fn evaluate(&self) -> Result<ScalarValue> {
Ok(match self.acc.evaluate()? {
ScalarValue::Float64(Some(v)) if v < 0.0 => ScalarValue::Float64(None),
ScalarValue::Int64(Some(v)) if v < 0 => ScalarValue::Int64(None),
v => v,
})
}
fn size(&self) -> usize {
self.acc.size()
}
}
/// Name of the `DERIVATIVE` user-defined aggregate function.
pub(crate) const DERIVATIVE_NAME: &str = "derivative";
pub(crate) fn derivative_udf(unit: i64) -> AggregateUDF {
let return_type: ReturnTypeFunction = Arc::new(|_| Ok(Arc::new(DataType::Float64)));
let accumulator: AccumulatorFactoryFunction =
Arc::new(move |_| Ok(Box::new(DerivativeAccumulator::new(unit))));
let state_type: StateTypeFunction = Arc::new(|_| Ok(Arc::new(vec![])));
let sig = Signature::one_of(
NUMERICS
.iter()
.map(|dt| {
TypeSignature::Exact(vec![
DataType::Timestamp(TimeUnit::Nanosecond, None),
dt.clone(),
])
})
.collect(),
Volatility::Immutable,
);
AggregateUDF::new(
format!("{DERIVATIVE_NAME}(unit: {unit})").as_str(),
&sig,
&return_type,
&accumulator,
// State shouldn't be called, so no schema to report
&state_type,
)
}
/// Name of the `NON_NEGATIVE_DERIVATIVE` user-defined aggregate function.
pub(crate) const NON_NEGATIVE_DERIVATIVE_NAME: &str = "non_negative_derivative";
pub(crate) fn non_negative_derivative_udf(unit: i64) -> AggregateUDF {
let return_type: ReturnTypeFunction = Arc::new(|_| Ok(Arc::new(DataType::Float64)));
let accumulator: AccumulatorFactoryFunction = Arc::new(move |_| {
Ok(Box::new(NonNegative::<_>::new(DerivativeAccumulator::new(
unit,
))))
});
let state_type: StateTypeFunction = Arc::new(|_| Ok(Arc::new(vec![])));
let sig = Signature::one_of(
NUMERICS
.iter()
.map(|dt| {
TypeSignature::Exact(vec![
DataType::Timestamp(TimeUnit::Nanosecond, None),
dt.clone(),
])
})
.collect(),
Volatility::Immutable,
);
AggregateUDF::new(
format!("{NON_NEGATIVE_DERIVATIVE_NAME}(unit: {unit})").as_str(),
&sig,
&return_type,
&accumulator,
// State shouldn't be called, so no schema to report
&state_type,
)
}
#[derive(Debug)]
struct DerivativeAccumulator {
unit: i64,
prev: Option<Point>,
curr: Option<Point>,
}
impl DerivativeAccumulator {
fn new(unit: i64) -> Self {
Self {
unit,
prev: None,
curr: None,
}
}
}
impl Accumulator for DerivativeAccumulator {
/// `state` is only called when used as an aggregate function. It can be
/// can safely left unimplemented, as this accumulator is only used as a window aggregate.
///
/// See: <https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.Accumulator.html#tymethod.state>
fn state(&self) -> Result<Vec<ScalarValue>> {
error::internal("unexpected call to DerivativeAccumulator::state")
}
fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
if values.is_empty() {
return Ok(());
}
let times = &values[0];
let arr = &values[1];
for index in 0..arr.len() {
let time = match ScalarValue::try_from_array(times, index)? {
ScalarValue::TimestampNanosecond(Some(ts), _) => ts,
v => {
return Err(DataFusionError::Internal(format!(
"invalid time value: {}",
v
)))
}
};
let curr = Point::new(time, ScalarValue::try_from_array(arr, index)?);
let prev = replace(&mut self.curr, curr);
// don't replace the previous value if the current value has the same timestamp.
if self.prev.is_none()
|| prev
.as_ref()
.is_some_and(|prev| prev.time > self.prev.as_ref().unwrap().time)
{
self.prev = prev
}
}
Ok(())
}
/// `merge_batch` is only called when used as an aggregate function. It can be
/// can safely left unimplemented, as this accumulator is only used as a window aggregate.
///
/// See: <https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.Accumulator.html#tymethod.state>
fn merge_batch(&mut self, _states: &[ArrayRef]) -> Result<()> {
error::internal("unexpected call to DerivativeAccumulator::merge_batch")
}
fn evaluate(&self) -> Result<ScalarValue> {
Ok(ScalarValue::Float64(
self.curr
.as_ref()
.and_then(|c| c.derivative(self.prev.as_ref(), self.unit)),
))
}
fn size(&self) -> usize {
std::mem::size_of_val(self)
}
}
#[derive(Debug)]
struct Point {
time: i64,
value: ScalarValue,
}
impl Point {
fn new(time: i64, value: ScalarValue) -> Option<Self> {
if value.is_null() {
None
} else {
Some(Self { time, value })
}
}
fn value_as_f64(&self) -> f64 {
match self.value {
ScalarValue::Int64(Some(v)) => v as f64,
ScalarValue::Float64(Some(v)) => v,
ScalarValue::UInt64(Some(v)) => v as f64,
_ => panic!("invalid point {:?}", self),
}
}
fn derivative(&self, prev: Option<&Self>, unit: i64) -> Option<f64> {
prev.and_then(|prev| {
let diff = self.value_as_f64() - prev.value_as_f64();
let elapsed = match self.time - prev.time {
// if the time hasn't changed then it is a NULL.
0 => return None,
v => v,
} as f64;
let devisor = elapsed / (unit as f64);
Some(diff / devisor)
})
}
}

View File

@ -7,7 +7,7 @@
use crate::plan::util_copy::find_exprs_in_exprs; use crate::plan::util_copy::find_exprs_in_exprs;
use crate::{error, NUMERICS}; use crate::{error, NUMERICS};
use arrow::datatypes::DataType; use arrow::datatypes::{DataType, TimeUnit};
use datafusion::logical_expr::{ use datafusion::logical_expr::{
Expr, ReturnTypeFunction, ScalarFunctionImplementation, ScalarUDF, Signature, TypeSignature, Expr, ReturnTypeFunction, ScalarFunctionImplementation, ScalarUDF, Signature, TypeSignature,
Volatility, Volatility,
@ -21,6 +21,7 @@ pub(super) enum WindowFunction {
NonNegativeDifference, NonNegativeDifference,
Derivative, Derivative,
NonNegativeDerivative, NonNegativeDerivative,
CumulativeSum,
} }
impl WindowFunction { impl WindowFunction {
@ -32,6 +33,7 @@ impl WindowFunction {
NON_NEGATIVE_DIFFERENCE_UDF_NAME => Some(Self::NonNegativeDifference), NON_NEGATIVE_DIFFERENCE_UDF_NAME => Some(Self::NonNegativeDifference),
DERIVATIVE_UDF_NAME => Some(Self::Derivative), DERIVATIVE_UDF_NAME => Some(Self::Derivative),
NON_NEGATIVE_DERIVATIVE_UDF_NAME => Some(Self::NonNegativeDerivative), NON_NEGATIVE_DERIVATIVE_UDF_NAME => Some(Self::NonNegativeDerivative),
CUMULATIVE_SUM_UDF_NAME => Some(Self::CumulativeSum),
_ => None, _ => None,
} }
} }
@ -129,13 +131,21 @@ pub(crate) fn derivative(args: Vec<Expr>) -> Expr {
/// Definition of the `DERIVATIVE` function. /// Definition of the `DERIVATIVE` function.
static DERIVATIVE: Lazy<Arc<ScalarUDF>> = Lazy::new(|| { static DERIVATIVE: Lazy<Arc<ScalarUDF>> = Lazy::new(|| {
let return_type_fn: ReturnTypeFunction = Arc::new(|args| Ok(Arc::new(args[0].clone()))); let return_type_fn: ReturnTypeFunction = Arc::new(|_| Ok(Arc::new(DataType::Float64)));
Arc::new(ScalarUDF::new( Arc::new(ScalarUDF::new(
DERIVATIVE_UDF_NAME, DERIVATIVE_UDF_NAME,
&Signature::one_of( &Signature::one_of(
NUMERICS NUMERICS
.iter() .iter()
.map(|dt| TypeSignature::Exact(vec![dt.clone()])) .flat_map(|dt| {
vec![
TypeSignature::Exact(vec![dt.clone()]),
TypeSignature::Exact(vec![
dt.clone(),
DataType::Duration(TimeUnit::Nanosecond),
]),
]
})
.collect(), .collect(),
Volatility::Immutable, Volatility::Immutable,
), ),
@ -153,13 +163,21 @@ pub(crate) fn non_negative_derivative(args: Vec<Expr>) -> Expr {
/// Definition of the `NON_NEGATIVE_DERIVATIVE` function. /// Definition of the `NON_NEGATIVE_DERIVATIVE` function.
static NON_NEGATIVE_DERIVATIVE: Lazy<Arc<ScalarUDF>> = Lazy::new(|| { static NON_NEGATIVE_DERIVATIVE: Lazy<Arc<ScalarUDF>> = Lazy::new(|| {
let return_type_fn: ReturnTypeFunction = Arc::new(|args| Ok(Arc::new(args[0].clone()))); let return_type_fn: ReturnTypeFunction = Arc::new(|_| Ok(Arc::new(DataType::Float64)));
Arc::new(ScalarUDF::new( Arc::new(ScalarUDF::new(
NON_NEGATIVE_DERIVATIVE_UDF_NAME, NON_NEGATIVE_DERIVATIVE_UDF_NAME,
&Signature::one_of( &Signature::one_of(
NUMERICS NUMERICS
.iter() .iter()
.map(|dt| TypeSignature::Exact(vec![dt.clone()])) .flat_map(|dt| {
vec![
TypeSignature::Exact(vec![dt.clone()]),
TypeSignature::Exact(vec![
dt.clone(),
DataType::Duration(TimeUnit::Nanosecond),
]),
]
})
.collect(), .collect(),
Volatility::Immutable, Volatility::Immutable,
), ),
@ -168,6 +186,29 @@ static NON_NEGATIVE_DERIVATIVE: Lazy<Arc<ScalarUDF>> = Lazy::new(|| {
)) ))
}); });
const CUMULATIVE_SUM_UDF_NAME: &str = "cumulative_sum";
/// Create an expression to represent the `CUMULATIVE_SUM` function.
pub(crate) fn cumulative_sum(args: Vec<Expr>) -> Expr {
CUMULATIVE_SUM.call(args)
}
/// Definition of the `CUMULATIVE_SUM` function.
static CUMULATIVE_SUM: Lazy<Arc<ScalarUDF>> = Lazy::new(|| {
let return_type_fn: ReturnTypeFunction = Arc::new(|args| Ok(Arc::new(args[0].clone())));
Arc::new(ScalarUDF::new(
CUMULATIVE_SUM_UDF_NAME,
&Signature::one_of(
NUMERICS
.iter()
.map(|dt| TypeSignature::Exact(vec![dt.clone()]))
.collect(),
Volatility::Immutable,
),
&return_type_fn,
&stand_in_impl(CUMULATIVE_SUM_UDF_NAME),
))
});
/// Returns an implementation that always returns an error. /// Returns an implementation that always returns an error.
fn stand_in_impl(name: &'static str) -> ScalarFunctionImplementation { fn stand_in_impl(name: &'static str) -> ScalarFunctionImplementation {
Arc::new(move |_| error::internal(format!("{name} should not exist in the final logical plan"))) Arc::new(move |_| error::internal(format!("{name} should not exist in the final logical plan")))

View File

@ -11,8 +11,11 @@ use influxdb_influxql_parser::literal::Number;
use influxdb_influxql_parser::string::Regex; use influxdb_influxql_parser::string::Regex;
use query_functions::clean_non_meta_escapes; use query_functions::clean_non_meta_escapes;
use query_functions::coalesce_struct::coalesce_struct; use query_functions::coalesce_struct::coalesce_struct;
use schema::InfluxColumnType;
use std::sync::Arc; use std::sync::Arc;
use super::ir::{DataSourceSchema, Field};
pub(in crate::plan) fn binary_operator_to_df_operator(op: BinaryOperator) -> Operator { pub(in crate::plan) fn binary_operator_to_df_operator(op: BinaryOperator) -> Operator {
match op { match op {
BinaryOperator::Add => Operator::Plus, BinaryOperator::Add => Operator::Plus,
@ -26,17 +29,62 @@ pub(in crate::plan) fn binary_operator_to_df_operator(op: BinaryOperator) -> Ope
} }
} }
/// Container for both the DataFusion and equivalent IOx schema. /// Container for the DataFusion schema as well as
pub(in crate::plan) struct Schemas { /// info on which columns are tags.
pub(in crate::plan) struct IQLSchema<'a> {
pub(in crate::plan) df_schema: DFSchemaRef, pub(in crate::plan) df_schema: DFSchemaRef,
tag_info: TagInfo<'a>,
} }
impl Schemas { impl<'a> IQLSchema<'a> {
pub(in crate::plan) fn new(df_schema: &DFSchemaRef) -> Result<Self> { /// Create a new IQLSchema from a [`DataSourceSchema`] from the
/// FROM clause of a query or subquery.
pub(in crate::plan) fn new_from_ds_schema(
df_schema: &DFSchemaRef,
ds_schema: DataSourceSchema<'a>,
) -> Result<Self> {
Ok(Self { Ok(Self {
df_schema: Arc::clone(df_schema), df_schema: Arc::clone(df_schema),
tag_info: TagInfo::DataSourceSchema(ds_schema),
}) })
} }
/// Create a new IQLSchema from a list of [`Field`]s on the SELECT list
/// of a subquery.
pub(in crate::plan) fn new_from_fields(
df_schema: &DFSchemaRef,
fields: &'a [Field],
) -> Result<Self> {
Ok(Self {
df_schema: Arc::clone(df_schema),
tag_info: TagInfo::FieldList(fields),
})
}
/// Returns `true` if the schema contains a tag column with the specified name.
pub fn is_tag_field(&self, name: &str) -> bool {
match self.tag_info {
TagInfo::DataSourceSchema(ref ds_schema) => ds_schema.is_tag_field(name),
TagInfo::FieldList(fields) => fields
.iter()
.any(|f| f.name == name && f.data_type == Some(InfluxColumnType::Tag)),
}
}
/// Returns `true` if the schema contains a tag column with the specified name.
/// If the underlying data source is a subquery, it will apply any aliases in the
/// projection that represents the SELECT list.
pub fn is_projected_tag_field(&self, name: &str) -> bool {
match self.tag_info {
TagInfo::DataSourceSchema(ref ds_schema) => ds_schema.is_projected_tag_field(name),
_ => self.is_tag_field(name),
}
}
}
pub(in crate::plan) enum TagInfo<'a> {
DataSourceSchema(DataSourceSchema<'a>),
FieldList(&'a [Field]),
} }
/// Sanitize an InfluxQL regular expression and create a compiled [`regex::Regex`]. /// Sanitize an InfluxQL regular expression and create a compiled [`regex::Regex`].
@ -70,6 +118,7 @@ fn number_to_scalar(n: &Number, data_type: &DataType) -> Result<ScalarValue> {
), ),
fields.clone(), fields.clone(),
), ),
(_, DataType::Null) => ScalarValue::Null,
(n, data_type) => { (n, data_type) => {
// The only output data types expected are Int64, Float64 or UInt64 // The only output data types expected are Int64, Float64 or UInt64
return error::internal(format!("no conversion from {n} to {data_type}")); return error::internal(format!("no conversion from {n} to {data_type}"));

View File

@ -145,34 +145,27 @@ where
negated, negated,
expr, expr,
pattern, pattern,
case_insensitive,
escape_char, escape_char,
}) => Ok(Expr::Like(Like::new( }) => Ok(Expr::Like(Like::new(
*negated, *negated,
Box::new(clone_with_replacement(expr, replacement_fn)?), Box::new(clone_with_replacement(expr, replacement_fn)?),
Box::new(clone_with_replacement(pattern, replacement_fn)?), Box::new(clone_with_replacement(pattern, replacement_fn)?),
*escape_char, *escape_char,
))), *case_insensitive,
Expr::ILike(Like {
negated,
expr,
pattern,
escape_char,
}) => Ok(Expr::ILike(Like::new(
*negated,
Box::new(clone_with_replacement(expr, replacement_fn)?),
Box::new(clone_with_replacement(pattern, replacement_fn)?),
*escape_char,
))), ))),
Expr::SimilarTo(Like { Expr::SimilarTo(Like {
negated, negated,
expr, expr,
pattern, pattern,
case_insensitive,
escape_char, escape_char,
}) => Ok(Expr::SimilarTo(Like::new( }) => Ok(Expr::SimilarTo(Like::new(
*negated, *negated,
Box::new(clone_with_replacement(expr, replacement_fn)?), Box::new(clone_with_replacement(expr, replacement_fn)?),
Box::new(clone_with_replacement(pattern, replacement_fn)?), Box::new(clone_with_replacement(pattern, replacement_fn)?),
*escape_char, *escape_char,
*case_insensitive,
))), ))),
Expr::Case(case) => Ok(Expr::Case(Case::new( Expr::Case(case) => Ok(Expr::Case(Case::new(
match &case.expr { match &case.expr {

View File

@ -6,8 +6,92 @@ use datafusion::logical_expr::{
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use std::sync::Arc; use std::sync::Arc;
mod cumulative_sum;
mod derivative;
mod difference;
mod non_negative;
mod percent_row_number; mod percent_row_number;
/// Definition of the `CUMULATIVE_SUM` user-defined window function.
pub(crate) static CUMULATIVE_SUM: Lazy<WindowFunction> = Lazy::new(|| {
let return_type: ReturnTypeFunction = Arc::new(cumulative_sum::return_type);
let partition_evaluator_factory: PartitionEvaluatorFactory =
Arc::new(cumulative_sum::partition_evaluator_factory);
WindowFunction::WindowUDF(Arc::new(WindowUDF::new(
cumulative_sum::NAME,
&cumulative_sum::SIGNATURE,
&return_type,
&partition_evaluator_factory,
)))
});
/// Definition of the `DERIVATIVE` user-defined window function.
pub(crate) static DERIVATIVE: Lazy<WindowFunction> = Lazy::new(|| {
let return_type: ReturnTypeFunction = Arc::new(derivative::return_type);
let partition_evaluator_factory: PartitionEvaluatorFactory =
Arc::new(derivative::partition_evaluator_factory);
WindowFunction::WindowUDF(Arc::new(WindowUDF::new(
derivative::NAME,
&derivative::SIGNATURE,
&return_type,
&partition_evaluator_factory,
)))
});
/// Definition of the `DIFFERENCE` user-defined window function.
pub(crate) static DIFFERENCE: Lazy<WindowFunction> = Lazy::new(|| {
let return_type: ReturnTypeFunction = Arc::new(difference::return_type);
let partition_evaluator_factory: PartitionEvaluatorFactory =
Arc::new(difference::partition_evaluator_factory);
WindowFunction::WindowUDF(Arc::new(WindowUDF::new(
difference::NAME,
&difference::SIGNATURE,
&return_type,
&partition_evaluator_factory,
)))
});
const NON_NEGATIVE_DERIVATIVE_NAME: &str = "non_negative_derivative";
/// Definition of the `NON_NEGATIVE_DERIVATIVE` user-defined window function.
pub(crate) static NON_NEGATIVE_DERIVATIVE: Lazy<WindowFunction> = Lazy::new(|| {
let return_type: ReturnTypeFunction = Arc::new(derivative::return_type);
let partition_evaluator_factory: PartitionEvaluatorFactory = Arc::new(|| {
Ok(non_negative::wrapper(
derivative::partition_evaluator_factory()?,
))
});
WindowFunction::WindowUDF(Arc::new(WindowUDF::new(
NON_NEGATIVE_DERIVATIVE_NAME,
&derivative::SIGNATURE,
&return_type,
&partition_evaluator_factory,
)))
});
const NON_NEGATIVE_DIFFERENCE_NAME: &str = "non_negative_difference";
/// Definition of the `NON_NEGATIVE_DIFFERENCE` user-defined window function.
pub(crate) static NON_NEGATIVE_DIFFERENCE: Lazy<WindowFunction> = Lazy::new(|| {
let return_type: ReturnTypeFunction = Arc::new(difference::return_type);
let partition_evaluator_factory: PartitionEvaluatorFactory = Arc::new(|| {
Ok(non_negative::wrapper(
difference::partition_evaluator_factory()?,
))
});
WindowFunction::WindowUDF(Arc::new(WindowUDF::new(
NON_NEGATIVE_DIFFERENCE_NAME,
&difference::SIGNATURE,
&return_type,
&partition_evaluator_factory,
)))
});
/// Definition of the `PERCENT_ROW_NUMBER` user-defined window function. /// Definition of the `PERCENT_ROW_NUMBER` user-defined window function.
pub(crate) static PERCENT_ROW_NUMBER: Lazy<WindowFunction> = Lazy::new(|| { pub(crate) static PERCENT_ROW_NUMBER: Lazy<WindowFunction> = Lazy::new(|| {
let return_type: ReturnTypeFunction = Arc::new(percent_row_number::return_type); let return_type: ReturnTypeFunction = Arc::new(percent_row_number::return_type);

View File

@ -0,0 +1,64 @@
use crate::NUMERICS;
use arrow::array::{Array, ArrayRef};
use arrow::datatypes::DataType;
use datafusion::common::{Result, ScalarValue};
use datafusion::logical_expr::{PartitionEvaluator, Signature, TypeSignature, Volatility};
use once_cell::sync::Lazy;
use std::sync::Arc;
/// The name of the cumulative_sum window function.
pub(super) const NAME: &str = "cumumlative_sum";
/// Valid signatures for the cumulative_sum window function.
pub(super) static SIGNATURE: Lazy<Signature> = Lazy::new(|| {
Signature::one_of(
NUMERICS
.iter()
.map(|dt| TypeSignature::Exact(vec![dt.clone()]))
.collect(),
Volatility::Immutable,
)
});
/// Calculate the return type given the function signature.
pub(super) fn return_type(sig: &[DataType]) -> Result<Arc<DataType>> {
Ok(Arc::new(sig[0].clone()))
}
/// Create a new partition_evaluator_factory.
pub(super) fn partition_evaluator_factory() -> Result<Box<dyn PartitionEvaluator>> {
Ok(Box::new(CumulativeSumPartitionEvaluator {}))
}
/// PartitionEvaluator which returns the cumulative sum of the input.
#[derive(Debug)]
struct CumulativeSumPartitionEvaluator {}
impl PartitionEvaluator for CumulativeSumPartitionEvaluator {
fn evaluate_all(&mut self, values: &[ArrayRef], num_rows: usize) -> Result<Arc<dyn Array>> {
assert_eq!(values.len(), 1);
let array = Arc::clone(&values[0]);
let mut sum = ScalarValue::new_zero(array.data_type())?;
let mut cumulative: Vec<ScalarValue> = vec![];
for idx in 0..num_rows {
let v = ScalarValue::try_from_array(&array, idx)?;
let res = if v.is_null() {
v
} else {
sum = sum.add(&v)?;
sum.clone()
};
cumulative.push(res);
}
Ok(Arc::new(ScalarValue::iter_to_array(cumulative)?))
}
fn uses_window_frame(&self) -> bool {
false
}
fn include_rank(&self) -> bool {
false
}
}

View File

@ -0,0 +1,125 @@
use crate::{error, NUMERICS};
use arrow::array::{Array, ArrayRef};
use arrow::datatypes::{DataType, TimeUnit};
use datafusion::common::{Result, ScalarValue};
use datafusion::logical_expr::{PartitionEvaluator, Signature, TypeSignature, Volatility};
use once_cell::sync::Lazy;
use std::borrow::Borrow;
use std::sync::Arc;
/// The name of the derivative window function.
pub(super) const NAME: &str = "derivative";
/// Valid signatures for the derivative window function.
pub(super) static SIGNATURE: Lazy<Signature> = Lazy::new(|| {
Signature::one_of(
NUMERICS
.iter()
.map(|dt| {
TypeSignature::Exact(vec![
dt.clone(),
DataType::Duration(TimeUnit::Nanosecond),
DataType::Timestamp(TimeUnit::Nanosecond, None),
])
})
.collect(),
Volatility::Immutable,
)
});
/// Calculate the return type given the function signature.
pub(super) fn return_type(_: &[DataType]) -> Result<Arc<DataType>> {
Ok(Arc::new(DataType::Float64))
}
/// Create a new partition_evaluator_factory.
pub(super) fn partition_evaluator_factory() -> Result<Box<dyn PartitionEvaluator>> {
Ok(Box::new(DifferencePartitionEvaluator {}))
}
/// PartitionEvaluator which returns the derivative between input values,
/// in the provided units.
#[derive(Debug)]
struct DifferencePartitionEvaluator {}
impl PartitionEvaluator for DifferencePartitionEvaluator {
fn evaluate_all(&mut self, values: &[ArrayRef], _num_rows: usize) -> Result<Arc<dyn Array>> {
assert_eq!(values.len(), 3);
let array = Arc::clone(&values[0]);
let times = Arc::clone(&values[2]);
// The second element of the values array is the second argument to
// the 'derivative' function. This specifies the unit duration for the
// derivation to use.
//
// INVARIANT:
// The planner guarantees that the second argument is always a duration
// literal.
let unit = ScalarValue::try_from_array(&values[1], 0)?;
let mut idx: usize = 0;
let mut last: ScalarValue = array.data_type().try_into()?;
let mut last_time: ScalarValue = times.data_type().try_into()?;
let mut derivative: Vec<ScalarValue> = vec![];
while idx < array.len() {
last = ScalarValue::try_from_array(&array, idx)?;
last_time = ScalarValue::try_from_array(&times, idx)?;
derivative.push(ScalarValue::Float64(None));
idx += 1;
if !last.is_null() {
break;
}
}
while idx < array.len() {
let v = ScalarValue::try_from_array(&array, idx)?;
let t = ScalarValue::try_from_array(&times, idx)?;
if v.is_null() {
derivative.push(ScalarValue::Float64(None));
} else {
derivative.push(ScalarValue::Float64(Some(
delta(&v, &last)? / delta_time(&t, &last_time, &unit)?,
)));
last = v.clone();
last_time = t.clone();
}
idx += 1;
}
Ok(Arc::new(ScalarValue::iter_to_array(derivative)?))
}
fn uses_window_frame(&self) -> bool {
false
}
fn include_rank(&self) -> bool {
false
}
}
fn delta(curr: &ScalarValue, prev: &ScalarValue) -> Result<f64> {
match (curr.borrow(), prev.borrow()) {
(ScalarValue::Float64(Some(curr)), ScalarValue::Float64(Some(prev))) => Ok(*curr - *prev),
(ScalarValue::Int64(Some(curr)), ScalarValue::Int64(Some(prev))) => {
Ok(*curr as f64 - *prev as f64)
}
(ScalarValue::UInt64(Some(curr)), ScalarValue::UInt64(Some(prev))) => {
Ok(*curr as f64 - *prev as f64)
}
_ => error::internal("derivative attempted on unsupported values"),
}
}
fn delta_time(curr: &ScalarValue, prev: &ScalarValue, unit: &ScalarValue) -> Result<f64> {
if let (
ScalarValue::TimestampNanosecond(Some(curr), _),
ScalarValue::TimestampNanosecond(Some(prev), _),
ScalarValue::IntervalMonthDayNano(Some(unit)),
) = (curr, prev, unit)
{
Ok((*curr as f64 - *prev as f64) / *unit as f64)
} else {
error::internal("derivative attempted on unsupported values")
}
}

View File

@ -0,0 +1,79 @@
use crate::NUMERICS;
use arrow::array::{Array, ArrayRef};
use arrow::compute::{shift, subtract_dyn};
use arrow::datatypes::DataType;
use datafusion::common::{Result, ScalarValue};
use datafusion::logical_expr::{PartitionEvaluator, Signature, TypeSignature, Volatility};
use once_cell::sync::Lazy;
use std::sync::Arc;
/// The name of the difference window function.
pub(super) const NAME: &str = "difference";
/// Valid signatures for the difference window function.
pub(super) static SIGNATURE: Lazy<Signature> = Lazy::new(|| {
Signature::one_of(
NUMERICS
.iter()
.map(|dt| TypeSignature::Exact(vec![dt.clone()]))
.collect(),
Volatility::Immutable,
)
});
/// Calculate the return type given the function signature.
pub(super) fn return_type(sig: &[DataType]) -> Result<Arc<DataType>> {
Ok(Arc::new(sig[0].clone()))
}
/// Create a new partition_evaluator_factory.
pub(super) fn partition_evaluator_factory() -> Result<Box<dyn PartitionEvaluator>> {
Ok(Box::new(DifferencePartitionEvaluator {}))
}
/// PartitionEvaluator which returns the difference between input values.
#[derive(Debug)]
struct DifferencePartitionEvaluator {}
impl PartitionEvaluator for DifferencePartitionEvaluator {
fn evaluate_all(&mut self, values: &[ArrayRef], _num_rows: usize) -> Result<Arc<dyn Array>> {
assert_eq!(values.len(), 1);
let array = Arc::clone(&values[0]);
if array.null_count() == 0 {
// If there are no gaps then use arrow kernels.
Ok(subtract_dyn(&array, &shift(&array, 1)?)?)
} else {
let mut idx: usize = 0;
let mut last: ScalarValue = array.data_type().try_into()?;
let mut difference: Vec<ScalarValue> = vec![];
while idx < array.len() {
last = ScalarValue::try_from_array(&array, idx)?;
difference.push(array.data_type().try_into()?);
idx += 1;
if !last.is_null() {
break;
}
}
while idx < array.len() {
let v = ScalarValue::try_from_array(&array, idx)?;
if v.is_null() {
difference.push(array.data_type().try_into()?);
} else {
difference.push(v.sub(last)?);
last = v;
}
idx += 1;
}
Ok(Arc::new(ScalarValue::iter_to_array(difference)?))
}
}
fn uses_window_frame(&self) -> bool {
false
}
fn include_rank(&self) -> bool {
false
}
}

View File

@ -0,0 +1,74 @@
use arrow::array::Array;
use arrow::compute::{lt_dyn_scalar, nullif};
use datafusion::common::{Result, ScalarValue};
use datafusion::logical_expr::window_state::WindowAggState;
use datafusion::logical_expr::PartitionEvaluator;
use std::ops::Range;
use std::sync::Arc;
/// Wrap a PartitionEvaluator in a non-negative filter.
pub(super) fn wrapper(
partition_evaluator: Box<dyn PartitionEvaluator>,
) -> Box<dyn PartitionEvaluator> {
Box::new(NonNegative {
partition_evaluator,
})
}
#[derive(Debug)]
struct NonNegative {
partition_evaluator: Box<dyn PartitionEvaluator>,
}
impl PartitionEvaluator for NonNegative {
fn memoize(&mut self, state: &mut WindowAggState) -> Result<()> {
self.partition_evaluator.memoize(state)
}
fn get_range(&self, idx: usize, n_rows: usize) -> Result<Range<usize>> {
self.partition_evaluator.get_range(idx, n_rows)
}
fn evaluate_all(
&mut self,
values: &[Arc<dyn Array>],
num_rows: usize,
) -> Result<Arc<dyn Array>> {
let array = self.partition_evaluator.evaluate_all(values, num_rows)?;
let predicate = lt_dyn_scalar(&array, 0)?;
Ok(nullif(&array, &predicate)?)
}
fn evaluate(&mut self, values: &[Arc<dyn Array>], range: &Range<usize>) -> Result<ScalarValue> {
let value = self.partition_evaluator.evaluate(values, range)?;
Ok(match value {
ScalarValue::Float64(Some(v)) if v < 0.0 => ScalarValue::Float64(None),
ScalarValue::Int64(Some(v)) if v < 0 => ScalarValue::Int64(None),
v => v,
})
}
fn evaluate_all_with_rank(
&self,
num_rows: usize,
ranks_in_partition: &[Range<usize>],
) -> Result<Arc<dyn Array>> {
let array = self
.partition_evaluator
.evaluate_all_with_rank(num_rows, ranks_in_partition)?;
let predicate = lt_dyn_scalar(&array, 0)?;
Ok(nullif(&array, &predicate)?)
}
fn supports_bounded_execution(&self) -> bool {
self.partition_evaluator.supports_bounded_execution()
}
fn uses_window_frame(&self) -> bool {
self.partition_evaluator.uses_window_frame()
}
fn include_rank(&self) -> bool {
self.partition_evaluator.include_rank()
}
}

View File

@ -6,10 +6,10 @@ use datafusion::logical_expr::{PartitionEvaluator, Signature, TypeSignature, Vol
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use std::sync::Arc; use std::sync::Arc;
/// The name of the percent_row_number aggregate function. /// The name of the percent_row_number window function.
pub(super) const NAME: &str = "percent_row_number"; pub(super) const NAME: &str = "percent_row_number";
/// Valid signatures for the percent_row_number aggregate function. /// Valid signatures for the percent_row_number window function.
pub(super) static SIGNATURE: Lazy<Signature> = Lazy::new(|| { pub(super) static SIGNATURE: Lazy<Signature> = Lazy::new(|| {
Signature::one_of( Signature::one_of(
vec![ vec![

View File

@ -16,7 +16,7 @@ observability_deps = { path = "../observability_deps" }
query_functions = { path = "../query_functions"} query_functions = { path = "../query_functions"}
schema = { path = "../schema" } schema = { path = "../schema" }
snafu = "0.7" snafu = "0.7"
sqlparser = "0.35.0" sqlparser = "0.36.0"
workspace-hack = { version = "0.1", path = "../workspace-hack" } workspace-hack = { version = "0.1", path = "../workspace-hack" }
[dev-dependencies] [dev-dependencies]

View File

@ -496,7 +496,6 @@ impl TreeNodeVisitor for RowBasedVisitor {
| Expr::Column(_) | Expr::Column(_)
| Expr::Exists { .. } | Expr::Exists { .. }
| Expr::GetIndexedField { .. } | Expr::GetIndexedField { .. }
| Expr::ILike { .. }
| Expr::InList { .. } | Expr::InList { .. }
| Expr::InSubquery { .. } | Expr::InSubquery { .. }
| Expr::IsFalse(_) | Expr::IsFalse(_)

View File

@ -515,6 +515,7 @@ mod tests {
expr, expr,
pattern, pattern,
escape_char: None, escape_char: None,
case_insensitive: false,
}) })
} }

View File

@ -64,9 +64,7 @@ impl From<&SingleTenantExtractError> for hyper::StatusCode {
SingleTenantExtractError::NoBucketSpecified => Self::BAD_REQUEST, SingleTenantExtractError::NoBucketSpecified => Self::BAD_REQUEST,
SingleTenantExtractError::InvalidNamespace(_) => Self::BAD_REQUEST, SingleTenantExtractError::InvalidNamespace(_) => Self::BAD_REQUEST,
SingleTenantExtractError::ParseV1Request( SingleTenantExtractError::ParseV1Request(
V1WriteParseError::NoQueryParams V1WriteParseError::NoQueryParams | V1WriteParseError::DecodeFail(_),
| V1WriteParseError::DecodeFail(_)
| V1WriteParseError::ContainsRpSeparator,
) => Self::BAD_REQUEST, ) => Self::BAD_REQUEST,
SingleTenantExtractError::ParseV2Request( SingleTenantExtractError::ParseV2Request(
V2WriteParseError::NoQueryParams | V2WriteParseError::DecodeFail(_), V2WriteParseError::NoQueryParams | V2WriteParseError::DecodeFail(_),
@ -125,10 +123,6 @@ async fn parse_v1(
// Extract the write parameters. // Extract the write parameters.
let write_params = WriteParamsV1::try_from(req)?; let write_params = WriteParamsV1::try_from(req)?;
// Extracting the write parameters validates the db field never contains the
// '/' separator to avoid ambiguity with the "namespace/rp" construction.
debug_assert!(!write_params.db.contains(V1_NAMESPACE_RP_SEPARATOR));
// Extract or construct the namespace name string from the write parameters // Extract or construct the namespace name string from the write parameters
let namespace = NamespaceName::new(match write_params.rp { let namespace = NamespaceName::new(match write_params.rp {
RetentionPolicy::Unspecified | RetentionPolicy::Autogen => write_params.db, RetentionPolicy::Unspecified | RetentionPolicy::Autogen => write_params.db,
@ -316,22 +310,65 @@ mod tests {
} }
); );
// Prevent ambiguity by denying the `/` character in the DB // Permit `/` character in the DB
test_parse_v1!( test_parse_v1!(
no_rp_db_with_rp_separator, no_rp_db_with_rp_separator,
query_string = "?db=bananas/are/great", query_string = "?db=bananas/are/great",
want = Err(Error::SingleTenantError( want = Ok(WriteParams{ namespace, precision }) => {
SingleTenantExtractError::ParseV1Request(V1WriteParseError::ContainsRpSeparator) assert_eq!(namespace.as_str(), "bananas/are/great");
)) assert_matches!(precision, Precision::Nanoseconds);
}
); );
// Prevent ambiguity by denying the `/` character in the RP // Permit the `/` character in the RP
test_parse_v1!( test_parse_v1!(
rp_with_rp_separator, rp_with_rp_separator,
query_string = "?db=bananas&rp=are/great", query_string = "?db=bananas&rp=are/great",
want = Err(Error::SingleTenantError( want = Ok(WriteParams{ namespace, precision }) => {
SingleTenantExtractError::ParseV1Request(V1WriteParseError::ContainsRpSeparator) assert_eq!(namespace.as_str(), "bananas/are/great");
)) assert_matches!(precision, Precision::Nanoseconds);
}
);
// `/` character is allowed in the DB, if a named RP is specified
test_parse_v1!(
db_with_rp_separator_and_rp,
query_string = "?db=foo/bar&rp=my_rp",
want = Ok(WriteParams{ namespace, precision }) => {
assert_eq!(namespace.as_str(), "foo/bar/my_rp");
assert_matches!(precision, Precision::Nanoseconds);
}
);
// Always concat, even if this results in duplication rp within the namespace.
// ** this matches the query API behavior **
test_parse_v1!(
db_with_rp_separator_and_duplicate_rp,
query_string = "?db=foo/my_rp&rp=my_rp",
want = Ok(WriteParams{ namespace, precision }) => {
assert_eq!(namespace.as_str(), "foo/my_rp/my_rp");
assert_matches!(precision, Precision::Nanoseconds);
}
);
// `/` character is allowed in the DB, if an autogen RP is specified
test_parse_v1!(
db_with_rp_separator_and_rp_autogen,
query_string = "?db=foo/bar&rp=autogen",
want = Ok(WriteParams{ namespace, precision }) => {
assert_eq!(namespace.as_str(), "foo/bar");
assert_matches!(precision, Precision::Nanoseconds);
}
);
// `/` character is allowed in the DB, if a default RP is specified
test_parse_v1!(
db_with_rp_separator_and_rp_default,
query_string = "?db=foo/bar&rp=default",
want = Ok(WriteParams{ namespace, precision }) => {
assert_eq!(namespace.as_str(), "foo/bar");
assert_matches!(precision, Precision::Nanoseconds);
}
); );
test_parse_v1!( test_parse_v1!(

View File

@ -29,12 +29,6 @@ pub enum V1WriteParseError {
/// The request contains invalid parameters. /// The request contains invalid parameters.
#[error("failed to deserialize db/rp/precision in request: {0}")] #[error("failed to deserialize db/rp/precision in request: {0}")]
DecodeFail(#[from] serde::de::value::Error), DecodeFail(#[from] serde::de::value::Error),
/// The provided "db" or "rp" value contains the reserved `/` character.
///
/// See [`V1_NAMESPACE_RP_SEPARATOR`].
#[error("db cannot contain the reserved character '/'")]
ContainsRpSeparator,
} }
/// May be empty string, explicit rp name, or `autogen`. As provided at the /// May be empty string, explicit rp name, or `autogen`. As provided at the
@ -61,7 +55,7 @@ impl<'de> Deserialize<'de> for RetentionPolicy {
Ok(match s.as_str() { Ok(match s.as_str() {
"" => RetentionPolicy::Unspecified, "" => RetentionPolicy::Unspecified,
"''" => RetentionPolicy::Unspecified, "''" => RetentionPolicy::Unspecified,
"autogen" => RetentionPolicy::Autogen, "autogen" | "default" => RetentionPolicy::Autogen,
_ => RetentionPolicy::Named(s), _ => RetentionPolicy::Named(s),
}) })
} }
@ -90,20 +84,6 @@ impl<T> TryFrom<&Request<T>> for WriteParamsV1 {
let query = req.uri().query().ok_or(V1WriteParseError::NoQueryParams)?; let query = req.uri().query().ok_or(V1WriteParseError::NoQueryParams)?;
let params: WriteParamsV1 = serde_urlencoded::from_str(query)?; let params: WriteParamsV1 = serde_urlencoded::from_str(query)?;
// No namespace (db) is ever allowed to contain a `/` to prevent
// ambiguity with the namespace/rp NamespaceName construction.
if params.db.contains(V1_NAMESPACE_RP_SEPARATOR) {
return Err(V1WriteParseError::ContainsRpSeparator);
}
// Likewise the "rp" field itself cannot contain the `/` character if
// specified.
if let RetentionPolicy::Named(s) = &params.rp {
if s.contains(V1_NAMESPACE_RP_SEPARATOR) {
return Err(V1WriteParseError::ContainsRpSeparator);
}
}
Ok(params) Ok(params)
} }
} }

View File

@ -9,6 +9,7 @@ license.workspace = true
async-trait = "0.1.71" async-trait = "0.1.71"
bytes = "1.4" bytes = "1.4"
datafusion = { workspace = true } datafusion = { workspace = true }
executor = { path = "../executor" }
iox_query = { path = "../iox_query" } iox_query = { path = "../iox_query" }
iox_query_influxql = { path = "../iox_query_influxql" } iox_query_influxql = { path = "../iox_query_influxql" }
iox_query_influxrpc = { path = "../iox_query_influxrpc" } iox_query_influxrpc = { path = "../iox_query_influxrpc" }

View File

@ -53,16 +53,25 @@ pub fn datafusion_error_to_tonic_code(e: &DataFusionError) -> tonic::Code {
| DataFusionError::NotImplemented(_) | DataFusionError::NotImplemented(_)
| DataFusionError::Plan(_) => tonic::Code::InvalidArgument, | DataFusionError::Plan(_) => tonic::Code::InvalidArgument,
DataFusionError::Context(_,_) => unreachable!("handled in chain traversal above"), DataFusionError::Context(_,_) => unreachable!("handled in chain traversal above"),
// External errors are mostly traversed by the DataFusion already except for some IOx errors
DataFusionError::External(e) => {
if let Some(e) = e.downcast_ref::<executor::JobError>() {
match e {
executor::JobError::WorkerGone => tonic::Code::Unavailable,
executor::JobError::Panic { .. } => tonic::Code::Internal,
}
} else {
// All other, unclassified cases are signalled as "internal error" to the user since they cannot do
// anything about it (except for reporting a bug). Note that DataFusion "external" error is only from
// DataFusion's PoV, not from a users PoV.
tonic::Code::Internal
}
}
// Map as many as possible back into user visible // Map as many as possible back into user visible
// (non internal) errors and only treat the ones // (non internal) errors and only treat the ones
// the user likely can't do anything about as internal // the user likely can't do anything about as internal
DataFusionError::ObjectStore(_) DataFusionError::ObjectStore(_)
| DataFusionError::IoError(_) | DataFusionError::IoError(_)
// External originate from outside DataFusions core codebase.
// As of 2022-10-17, these always come external object store
// errors (e.g. misconfiguration or bad path) which would be
// an internal error and thus we classify them as such.
| DataFusionError::External(_)
// Substrait errors come from internal code and are unused // Substrait errors come from internal code and are unused
// with DataFusion at the moment // with DataFusion at the moment
| DataFusionError::Substrait(_) | DataFusionError::Substrait(_)
@ -100,7 +109,7 @@ mod test {
tonic::Code::InvalidArgument, tonic::Code::InvalidArgument,
); );
do_transl_test(DataFusionError::Internal(s), tonic::Code::Internal); do_transl_test(DataFusionError::Internal(s.clone()), tonic::Code::Internal);
// traversal // traversal
do_transl_test( do_transl_test(
@ -110,6 +119,29 @@ mod test {
), ),
tonic::Code::ResourceExhausted, tonic::Code::ResourceExhausted,
); );
// inspect "external" errors
do_transl_test(
DataFusionError::External(s.clone().into()),
tonic::Code::Internal,
);
do_transl_test(
DataFusionError::External(Box::new(executor::JobError::Panic { msg: s })),
tonic::Code::Internal,
);
do_transl_test(
DataFusionError::External(Box::new(executor::JobError::WorkerGone)),
tonic::Code::Unavailable,
);
do_transl_test(
DataFusionError::Context(
"ctx".into(),
Box::new(DataFusionError::External(Box::new(
executor::JobError::WorkerGone,
))),
),
tonic::Code::Unavailable,
);
} }
fn do_transl_test(e: DataFusionError, code: tonic::Code) { fn do_transl_test(e: DataFusionError, code: tonic::Code) {

View File

@ -10,7 +10,7 @@ license.workspace = true
publish = false publish = false
[dependencies] [dependencies]
sqlx = { version = "0.6.3", features = ["runtime-tokio-rustls", "postgres", "json", "tls"] } sqlx = { version = "0.7.1", features = ["runtime-tokio-rustls", "postgres", "json", "tls-rustls"] }
either = "1.8.1" either = "1.8.1"
futures = "0.3" futures = "0.3"
workspace-hack = { version = "0.1", path = "../workspace-hack" } workspace-hack = { version = "0.1", path = "../workspace-hack" }

View File

@ -8,7 +8,7 @@ license.workspace = true
[dependencies] # In alphabetical order [dependencies] # In alphabetical order
dotenvy = "0.15.7" dotenvy = "0.15.7"
parking_lot = "0.12" parking_lot = "0.12"
tempfile = "3.6.0" tempfile = "3.7.0"
tracing-log = "0.1" tracing-log = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] } tracing-subscriber = { version = "0.3", features = ["env-filter"] }
observability_deps = { path = "../observability_deps" } observability_deps = { path = "../observability_deps" }

View File

@ -31,8 +31,8 @@ rand = "0.8.3"
regex = "1.9" regex = "1.9"
reqwest = { version = "0.11", default-features = false, features = ["json", "rustls-tls"] } reqwest = { version = "0.11", default-features = false, features = ["json", "rustls-tls"] }
snafu = "0.7" snafu = "0.7"
sqlx = { version = "0.6", features = [ "runtime-tokio-rustls" , "postgres", "uuid" ] } sqlx = { version = "0.7.1", features = [ "runtime-tokio-rustls" , "postgres", "uuid" ] }
tempfile = "3.6.0" tempfile = "3.7.0"
test_helpers = { path = "../test_helpers", features = ["future_timeout"] } test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
tokio = { version = "1.29", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time"] } tokio = { version = "1.29", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time"] }
tokio-util = "0.7" tokio-util = "0.7"

View File

@ -22,6 +22,6 @@ workspace-hack = { version = "0.1", path = "../workspace-hack" }
sysinfo = "0.29.5" sysinfo = "0.29.5"
[dev-dependencies] [dev-dependencies]
tempfile = "3.6.0" tempfile = "3.7.0"
# Need the multi-threaded executor for testing # Need the multi-threaded executor for testing
tokio = { version = "1.29", features = ["macros", "parking_lot", "rt-multi-thread", "time"] } tokio = { version = "1.29", features = ["macros", "parking_lot", "rt-multi-thread", "time"] }

View File

@ -16,25 +16,23 @@ license.workspace = true
### BEGIN HAKARI SECTION ### BEGIN HAKARI SECTION
[dependencies] [dependencies]
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] } ahash = { version = "0.8" }
arrow = { version = "43", features = ["dyn_cmp_dict", "prettyprint"] } arrow = { version = "43", features = ["dyn_cmp_dict", "prettyprint"] }
arrow-array = { version = "43", default-features = false, features = ["chrono-tz"] } arrow-array = { version = "43", default-features = false, features = ["chrono-tz"] }
arrow-flight = { version = "43", features = ["flight-sql-experimental"] } arrow-flight = { version = "43", features = ["flight-sql-experimental"] }
arrow-ord = { version = "43", default-features = false, features = ["dyn_cmp_dict"] } arrow-ord = { version = "43", default-features = false, features = ["dyn_cmp_dict"] }
arrow-string = { version = "43", default-features = false, features = ["dyn_cmp_dict"] } arrow-string = { version = "43", default-features = false, features = ["dyn_cmp_dict"] }
base64-594e8ee84c453af0 = { package = "base64", version = "0.13" } base64 = { version = "0.21" }
base64-647d43efb71741da = { package = "base64", version = "0.21" }
bitflags = { version = "1" }
byteorder = { version = "1" } byteorder = { version = "1" }
bytes = { version = "1" } bytes = { version = "1" }
chrono = { version = "0.4", default-features = false, features = ["alloc", "clock", "serde"] } chrono = { version = "0.4", default-features = false, features = ["alloc", "clock", "serde"] }
crossbeam-utils = { version = "0.8" } crossbeam-utils = { version = "0.8" }
crypto-common = { version = "0.1", default-features = false, features = ["std"] } crypto-common = { version = "0.1", default-features = false, features = ["std"] }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "46182c894e5106adba7fb53e9848ce666fb6129b" } datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "44008d71180f2d03e9d21944788e61cb8845abc7" }
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "46182c894e5106adba7fb53e9848ce666fb6129b", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] } datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "44008d71180f2d03e9d21944788e61cb8845abc7", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "46182c894e5106adba7fb53e9848ce666fb6129b", default-features = false, features = ["crypto_expressions", "encoding_expressions", "regex_expressions", "unicode_expressions"] } datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "44008d71180f2d03e9d21944788e61cb8845abc7", default-features = false, features = ["crypto_expressions", "encoding_expressions", "regex_expressions", "unicode_expressions"] }
digest = { version = "0.10", features = ["mac", "std"] } digest = { version = "0.10", features = ["mac", "std"] }
either = { version = "1" } either = { version = "1", features = ["serde"] }
fixedbitset = { version = "0.4" } fixedbitset = { version = "0.4" }
flatbuffers = { version = "23" } flatbuffers = { version = "23" }
flate2 = { version = "1" } flate2 = { version = "1" }
@ -47,8 +45,7 @@ futures-task = { version = "0.3", default-features = false, features = ["std"] }
futures-util = { version = "0.3", features = ["channel", "io", "sink"] } futures-util = { version = "0.3", features = ["channel", "io", "sink"] }
getrandom = { version = "0.2", default-features = false, features = ["std"] } getrandom = { version = "0.2", default-features = false, features = ["std"] }
hashbrown = { version = "0.14", features = ["raw"] } hashbrown = { version = "0.14", features = ["raw"] }
indexmap-dff4ba8e3ae991db = { package = "indexmap", version = "1", default-features = false, features = ["std"] } indexmap = { version = "2" }
indexmap-f595c2ba2a3f28df = { package = "indexmap", version = "2" }
itertools = { version = "0.10" } itertools = { version = "0.10" }
libc = { version = "0.2", features = ["extra_traits"] } libc = { version = "0.2", features = ["extra_traits"] }
lock_api = { version = "0.4", features = ["arc_lock"] } lock_api = { version = "0.4", features = ["arc_lock"] }
@ -74,14 +71,16 @@ regex-automata = { version = "0.3", default-features = false, features = ["dfa-o
regex-syntax = { version = "0.7" } regex-syntax = { version = "0.7" }
reqwest = { version = "0.11", default-features = false, features = ["json", "rustls-tls", "stream"] } reqwest = { version = "0.11", default-features = false, features = ["json", "rustls-tls", "stream"] }
ring = { version = "0.16", features = ["std"] } ring = { version = "0.16", features = ["std"] }
rustls = { version = "0.21", default-features = false, features = ["dangerous_configuration", "logging", "tls12"] }
serde = { version = "1", features = ["derive", "rc"] } serde = { version = "1", features = ["derive", "rc"] }
serde_json = { version = "1", features = ["raw_value"] } serde_json = { version = "1", features = ["raw_value"] }
sha2 = { version = "0.10" } sha2 = { version = "0.10" }
similar = { version = "2", features = ["inline"] } similar = { version = "2", features = ["inline"] }
smallvec = { version = "1", default-features = false, features = ["union"] } smallvec = { version = "1", default-features = false, features = ["union"] }
sqlparser = { version = "0.35", features = ["visitor"] } sqlx = { version = "0.7", features = ["postgres", "runtime-tokio-rustls", "sqlite", "uuid"] }
sqlx = { version = "0.6", features = ["json", "postgres", "runtime-tokio-rustls", "sqlite", "tls", "uuid"] } sqlx-core = { version = "0.7", features = ["_rt-tokio", "_tls-rustls", "any", "json", "migrate", "offline", "uuid"] }
sqlx-core = { version = "0.6", default-features = false, features = ["any", "migrate", "postgres", "runtime-tokio-rustls", "sqlite", "uuid"] } sqlx-postgres = { version = "0.7", default-features = false, features = ["any", "json", "migrate", "offline", "uuid"] }
sqlx-sqlite = { version = "0.7", default-features = false, features = ["any", "json", "migrate", "offline", "uuid"] }
thrift = { version = "0.17" } thrift = { version = "0.17" }
tokio = { version = "1", features = ["full", "test-util", "tracing"] } tokio = { version = "1", features = ["full", "test-util", "tracing"] }
tokio-stream = { version = "0.1", features = ["fs", "net"] } tokio-stream = { version = "0.1", features = ["fs", "net"] }
@ -101,17 +100,15 @@ zstd-safe = { version = "6", default-features = false, features = ["arrays", "le
zstd-sys = { version = "2", default-features = false, features = ["legacy", "std", "zdict_builder"] } zstd-sys = { version = "2", default-features = false, features = ["legacy", "std", "zdict_builder"] }
[build-dependencies] [build-dependencies]
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] } ahash = { version = "0.8" }
base64-594e8ee84c453af0 = { package = "base64", version = "0.13" } base64 = { version = "0.21" }
base64-647d43efb71741da = { package = "base64", version = "0.21" }
bitflags = { version = "1" }
byteorder = { version = "1" } byteorder = { version = "1" }
bytes = { version = "1" } bytes = { version = "1" }
cc = { version = "1", default-features = false, features = ["parallel"] } cc = { version = "1", default-features = false, features = ["parallel"] }
crossbeam-utils = { version = "0.8" } crossbeam-utils = { version = "0.8" }
crypto-common = { version = "0.1", default-features = false, features = ["std"] } crypto-common = { version = "0.1", default-features = false, features = ["std"] }
digest = { version = "0.10", features = ["mac", "std"] } digest = { version = "0.10", features = ["mac", "std"] }
either = { version = "1" } either = { version = "1", features = ["serde"] }
fixedbitset = { version = "0.4" } fixedbitset = { version = "0.4" }
futures-channel = { version = "0.3", features = ["sink"] } futures-channel = { version = "0.3", features = ["sink"] }
futures-core = { version = "0.3" } futures-core = { version = "0.3" }
@ -123,7 +120,7 @@ futures-util = { version = "0.3", features = ["channel", "io", "sink"] }
getrandom = { version = "0.2", default-features = false, features = ["std"] } getrandom = { version = "0.2", default-features = false, features = ["std"] }
hashbrown = { version = "0.14", features = ["raw"] } hashbrown = { version = "0.14", features = ["raw"] }
heck = { version = "0.4", features = ["unicode"] } heck = { version = "0.4", features = ["unicode"] }
indexmap-dff4ba8e3ae991db = { package = "indexmap", version = "1", default-features = false, features = ["std"] } indexmap = { version = "2" }
itertools = { version = "0.10" } itertools = { version = "0.10" }
libc = { version = "0.2", features = ["extra_traits"] } libc = { version = "0.2", features = ["extra_traits"] }
lock_api = { version = "0.4", features = ["arc_lock"] } lock_api = { version = "0.4", features = ["arc_lock"] }
@ -144,67 +141,78 @@ regex = { version = "1" }
regex-automata = { version = "0.3", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] } regex-automata = { version = "0.3", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] }
regex-syntax = { version = "0.7" } regex-syntax = { version = "0.7" }
ring = { version = "0.16", features = ["std"] } ring = { version = "0.16", features = ["std"] }
rustls = { version = "0.21", default-features = false, features = ["dangerous_configuration", "logging", "tls12"] }
serde = { version = "1", features = ["derive", "rc"] } serde = { version = "1", features = ["derive", "rc"] }
serde_json = { version = "1", features = ["raw_value"] } serde_json = { version = "1", features = ["raw_value"] }
sha2 = { version = "0.10" } sha2 = { version = "0.10" }
smallvec = { version = "1", default-features = false, features = ["union"] } smallvec = { version = "1", default-features = false, features = ["union"] }
sqlx-core = { version = "0.6", default-features = false, features = ["any", "migrate", "postgres", "runtime-tokio-rustls", "sqlite", "uuid"] } sqlx-core = { version = "0.7", features = ["_rt-tokio", "_tls-rustls", "any", "json", "migrate", "offline", "uuid"] }
sqlx-macros = { version = "0.6", default-features = false, features = ["json", "migrate", "postgres", "runtime-tokio-rustls", "sqlite", "uuid"] } sqlx-macros = { version = "0.7", features = ["_rt-tokio", "_tls-rustls", "json", "migrate", "postgres", "sqlite", "uuid"] }
sqlx-macros-core = { version = "0.7", features = ["_rt-tokio", "_tls-rustls", "json", "migrate", "postgres", "sqlite", "uuid"] }
sqlx-postgres = { version = "0.7", default-features = false, features = ["any", "json", "migrate", "offline", "uuid"] }
sqlx-sqlite = { version = "0.7", default-features = false, features = ["any", "json", "migrate", "offline", "uuid"] }
syn-dff4ba8e3ae991db = { package = "syn", version = "1", features = ["extra-traits", "full"] } syn-dff4ba8e3ae991db = { package = "syn", version = "1", features = ["extra-traits", "full"] }
syn-f595c2ba2a3f28df = { package = "syn", version = "2", features = ["extra-traits", "full", "visit-mut"] } syn-f595c2ba2a3f28df = { package = "syn", version = "2", features = ["extra-traits", "full", "visit-mut"] }
tokio = { version = "1", features = ["full", "test-util", "tracing"] } tokio = { version = "1", features = ["full", "test-util", "tracing"] }
tokio-stream = { version = "0.1", features = ["fs", "net"] } tokio-stream = { version = "0.1", features = ["fs", "net"] }
tracing = { version = "0.1", features = ["log", "max_level_trace", "release_max_level_trace"] }
tracing-core = { version = "0.1" }
unicode-bidi = { version = "0.3" } unicode-bidi = { version = "0.3" }
unicode-normalization = { version = "0.1" } unicode-normalization = { version = "0.1" }
url = { version = "2" } url = { version = "2" }
uuid = { version = "1", features = ["v4"] } uuid = { version = "1", features = ["v4"] }
[target.x86_64-unknown-linux-gnu.dependencies] [target.x86_64-unknown-linux-gnu.dependencies]
bitflags = { version = "2", default-features = false, features = ["std"] }
nix = { version = "0.26" } nix = { version = "0.26" }
once_cell = { version = "1", default-features = false, features = ["unstable"] } once_cell = { version = "1", default-features = false, features = ["unstable"] }
rustix = { version = "0.38", features = ["fs", "termios"] } rustix = { version = "0.38", features = ["fs", "termios"] }
rustls = { version = "0.21", features = ["dangerous_configuration"] } rustls = { version = "0.21" }
webpki = { version = "0.22", default-features = false, features = ["std"] }
[target.x86_64-unknown-linux-gnu.build-dependencies] [target.x86_64-unknown-linux-gnu.build-dependencies]
bitflags = { version = "2", default-features = false, features = ["std"] }
once_cell = { version = "1", default-features = false, features = ["unstable"] } once_cell = { version = "1", default-features = false, features = ["unstable"] }
webpki = { version = "0.22", default-features = false, features = ["std"] } rustix = { version = "0.38", features = ["fs", "termios"] }
rustls = { version = "0.21" }
[target.x86_64-apple-darwin.dependencies] [target.x86_64-apple-darwin.dependencies]
bitflags = { version = "2", default-features = false, features = ["std"] }
nix = { version = "0.26" } nix = { version = "0.26" }
once_cell = { version = "1", default-features = false, features = ["unstable"] } once_cell = { version = "1", default-features = false, features = ["unstable"] }
rustix = { version = "0.38", features = ["fs", "termios"] } rustix = { version = "0.38", features = ["fs", "termios"] }
rustls = { version = "0.21", features = ["dangerous_configuration"] } rustls = { version = "0.21" }
webpki = { version = "0.22", default-features = false, features = ["std"] }
[target.x86_64-apple-darwin.build-dependencies] [target.x86_64-apple-darwin.build-dependencies]
bitflags = { version = "2", default-features = false, features = ["std"] }
once_cell = { version = "1", default-features = false, features = ["unstable"] } once_cell = { version = "1", default-features = false, features = ["unstable"] }
webpki = { version = "0.22", default-features = false, features = ["std"] } rustix = { version = "0.38", features = ["fs", "termios"] }
rustls = { version = "0.21" }
[target.aarch64-apple-darwin.dependencies] [target.aarch64-apple-darwin.dependencies]
bitflags = { version = "2", default-features = false, features = ["std"] }
nix = { version = "0.26" } nix = { version = "0.26" }
once_cell = { version = "1", default-features = false, features = ["unstable"] } once_cell = { version = "1", default-features = false, features = ["unstable"] }
rustix = { version = "0.38", features = ["fs", "termios"] } rustix = { version = "0.38", features = ["fs", "termios"] }
rustls = { version = "0.21", features = ["dangerous_configuration"] } rustls = { version = "0.21" }
webpki = { version = "0.22", default-features = false, features = ["std"] }
[target.aarch64-apple-darwin.build-dependencies] [target.aarch64-apple-darwin.build-dependencies]
bitflags = { version = "2", default-features = false, features = ["std"] }
once_cell = { version = "1", default-features = false, features = ["unstable"] } once_cell = { version = "1", default-features = false, features = ["unstable"] }
webpki = { version = "0.22", default-features = false, features = ["std"] } rustix = { version = "0.38", features = ["fs", "termios"] }
rustls = { version = "0.21" }
[target.x86_64-pc-windows-msvc.dependencies] [target.x86_64-pc-windows-msvc.dependencies]
once_cell = { version = "1", default-features = false, features = ["unstable"] } once_cell = { version = "1", default-features = false, features = ["unstable"] }
rustls = { version = "0.21", features = ["dangerous_configuration"] } rustls = { version = "0.21" }
scopeguard = { version = "1" } scopeguard = { version = "1" }
webpki = { version = "0.22", default-features = false, features = ["std"] } winapi = { version = "0.3", default-features = false, features = ["basetsd", "cfg", "combaseapi", "consoleapi", "errhandlingapi", "evntrace", "fileapi", "handleapi", "heapapi", "ifdef", "impl-debug", "impl-default", "in6addr", "inaddr", "ioapiset", "iphlpapi", "lmaccess", "lmapibuf", "lmcons", "memoryapi", "minwinbase", "minwindef", "netioapi", "ntlsa", "ntsecapi", "objidl", "oleauto", "pdh", "powerbase", "processenv", "psapi", "rpcdce", "sddl", "securitybaseapi", "shellapi", "std", "stringapiset", "synchapi", "sysinfoapi", "timezoneapi", "wbemcli", "winbase", "wincon", "windef", "winerror", "winioctl", "winnt", "winreg", "winsock2", "winuser", "ws2ipdef", "ws2tcpip", "wtypesbase"] }
winapi = { version = "0.3", default-features = false, features = ["basetsd", "cfg", "combaseapi", "consoleapi", "errhandlingapi", "evntrace", "fileapi", "handleapi", "heapapi", "ifdef", "impl-debug", "impl-default", "in6addr", "inaddr", "ioapiset", "iphlpapi", "knownfolders", "lmaccess", "lmapibuf", "lmcons", "memoryapi", "minwinbase", "minwindef", "netioapi", "ntlsa", "ntsecapi", "ntstatus", "objbase", "objidl", "oleauto", "pdh", "powerbase", "processenv", "psapi", "rpcdce", "sddl", "securitybaseapi", "shellapi", "shlobj", "std", "stringapiset", "synchapi", "sysinfoapi", "timezoneapi", "wbemcli", "winbase", "wincon", "windef", "winerror", "winioctl", "winnt", "winreg", "winsock2", "winuser", "ws2ipdef", "ws2tcpip", "wtypesbase"] }
windows-sys = { version = "0.48", features = ["Win32_Foundation", "Win32_Networking_WinSock", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Console", "Win32_System_IO", "Win32_System_Pipes", "Win32_System_SystemServices", "Win32_System_Threading", "Win32_System_WindowsProgramming", "Win32_UI_Shell"] } windows-sys = { version = "0.48", features = ["Win32_Foundation", "Win32_Networking_WinSock", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Console", "Win32_System_IO", "Win32_System_Pipes", "Win32_System_SystemServices", "Win32_System_Threading", "Win32_System_WindowsProgramming", "Win32_UI_Shell"] }
[target.x86_64-pc-windows-msvc.build-dependencies] [target.x86_64-pc-windows-msvc.build-dependencies]
once_cell = { version = "1", default-features = false, features = ["unstable"] } once_cell = { version = "1", default-features = false, features = ["unstable"] }
rustls = { version = "0.21" }
scopeguard = { version = "1" } scopeguard = { version = "1" }
webpki = { version = "0.22", default-features = false, features = ["std"] } winapi = { version = "0.3", default-features = false, features = ["basetsd", "cfg", "combaseapi", "consoleapi", "errhandlingapi", "evntrace", "fileapi", "handleapi", "heapapi", "ifdef", "impl-debug", "impl-default", "in6addr", "inaddr", "ioapiset", "iphlpapi", "lmaccess", "lmapibuf", "lmcons", "memoryapi", "minwinbase", "minwindef", "netioapi", "ntlsa", "ntsecapi", "objidl", "oleauto", "pdh", "powerbase", "processenv", "psapi", "rpcdce", "sddl", "securitybaseapi", "shellapi", "std", "stringapiset", "synchapi", "sysinfoapi", "timezoneapi", "wbemcli", "winbase", "wincon", "windef", "winerror", "winioctl", "winnt", "winreg", "winsock2", "winuser", "ws2ipdef", "ws2tcpip", "wtypesbase"] }
winapi = { version = "0.3", default-features = false, features = ["basetsd", "cfg", "combaseapi", "consoleapi", "errhandlingapi", "evntrace", "fileapi", "handleapi", "heapapi", "ifdef", "impl-debug", "impl-default", "in6addr", "inaddr", "ioapiset", "iphlpapi", "knownfolders", "lmaccess", "lmapibuf", "lmcons", "memoryapi", "minwinbase", "minwindef", "netioapi", "ntlsa", "ntsecapi", "ntstatus", "objbase", "objidl", "oleauto", "pdh", "powerbase", "processenv", "psapi", "rpcdce", "sddl", "securitybaseapi", "shellapi", "shlobj", "std", "stringapiset", "synchapi", "sysinfoapi", "timezoneapi", "wbemcli", "winbase", "wincon", "windef", "winerror", "winioctl", "winnt", "winreg", "winsock2", "winuser", "ws2ipdef", "ws2tcpip", "wtypesbase"] }
windows-sys = { version = "0.48", features = ["Win32_Foundation", "Win32_Networking_WinSock", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Console", "Win32_System_IO", "Win32_System_Pipes", "Win32_System_SystemServices", "Win32_System_Threading", "Win32_System_WindowsProgramming", "Win32_UI_Shell"] } windows-sys = { version = "0.48", features = ["Win32_Foundation", "Win32_Networking_WinSock", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Console", "Win32_System_IO", "Win32_System_Pipes", "Win32_System_SystemServices", "Win32_System_Threading", "Win32_System_WindowsProgramming", "Win32_UI_Shell"] }
### END HAKARI SECTION ### END HAKARI SECTION