Merge branch 'main' into ntran/table_cli
commit
144778430e
File diff suppressed because it is too large
Load Diff
|
@ -121,8 +121,8 @@ license = "MIT OR Apache-2.0"
|
|||
[workspace.dependencies]
|
||||
arrow = { version = "43.0.0" }
|
||||
arrow-flight = { version = "43.0.0" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "46182c894e5106adba7fb53e9848ce666fb6129b", default-features = false }
|
||||
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev = "46182c894e5106adba7fb53e9848ce666fb6129b" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "44008d71180f2d03e9d21944788e61cb8845abc7", default-features = false }
|
||||
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev = "44008d71180f2d03e9d21944788e61cb8845abc7" }
|
||||
|
||||
hashbrown = { version = "0.14.0" }
|
||||
object_store = { version = "0.6.0" }
|
||||
|
|
|
@ -315,7 +315,7 @@ struct TestStateTtlAndRefresh {
|
|||
ttl_provider: Arc<TestTtlProvider>,
|
||||
refresh_duration_provider: Arc<TestRefreshDurationProvider>,
|
||||
time_provider: Arc<MockProvider>,
|
||||
loader: Arc<TestLoader<u8, String, ()>>,
|
||||
loader: Arc<TestLoader<u8, (), String>>,
|
||||
notify_idle: Arc<Notify>,
|
||||
}
|
||||
|
||||
|
@ -365,7 +365,7 @@ struct TestStateLRUAndRefresh {
|
|||
size_estimator: Arc<TestSizeEstimator>,
|
||||
refresh_duration_provider: Arc<TestRefreshDurationProvider>,
|
||||
time_provider: Arc<MockProvider>,
|
||||
loader: Arc<TestLoader<u8, String, ()>>,
|
||||
loader: Arc<TestLoader<u8, (), String>>,
|
||||
pool: Arc<ResourcePool<TestSize>>,
|
||||
notify_idle: Arc<Notify>,
|
||||
}
|
||||
|
@ -505,7 +505,7 @@ struct TestStateLruAndRefresh {
|
|||
size_estimator: Arc<TestSizeEstimator>,
|
||||
refresh_duration_provider: Arc<TestRefreshDurationProvider>,
|
||||
time_provider: Arc<MockProvider>,
|
||||
loader: Arc<TestLoader<u8, String, ()>>,
|
||||
loader: Arc<TestLoader<u8, (), String>>,
|
||||
notify_idle: Arc<Notify>,
|
||||
}
|
||||
|
||||
|
|
|
@ -963,7 +963,7 @@ mod tests {
|
|||
metric_registry: metric::Registry,
|
||||
refresh_duration_provider: Arc<TestRefreshDurationProvider>,
|
||||
time_provider: Arc<MockProvider>,
|
||||
loader: Arc<TestLoader<u8, String, ()>>,
|
||||
loader: Arc<TestLoader<u8, (), String>>,
|
||||
notify_idle: Arc<Notify>,
|
||||
}
|
||||
|
||||
|
|
|
@ -254,9 +254,10 @@ mod tests {
|
|||
use crate::{
|
||||
cache::{
|
||||
driver::CacheDriver,
|
||||
test_util::{run_test_generic, AbortAndWaitExt, EnsurePendingExt, TestAdapter},
|
||||
test_util::{run_test_generic, TestAdapter},
|
||||
},
|
||||
loader::test_util::TestLoader,
|
||||
test_util::{AbortAndWaitExt, EnsurePendingExt},
|
||||
};
|
||||
|
||||
use super::*;
|
||||
|
|
|
@ -1,12 +1,11 @@
|
|||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use futures::{Future, FutureExt};
|
||||
use tokio::{sync::Barrier, task::JoinHandle};
|
||||
use tokio::sync::Barrier;
|
||||
|
||||
use crate::{
|
||||
cache::{CacheGetStatus, CachePeekStatus},
|
||||
loader::test_util::TestLoader,
|
||||
test_util::{AbortAndWaitExt, EnsurePendingExt},
|
||||
};
|
||||
|
||||
use super::Cache;
|
||||
|
@ -461,60 +460,3 @@ where
|
|||
assert_eq!(res, String::from("foo"));
|
||||
assert_eq!(loader.loaded(), vec![(1, true)]);
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait EnsurePendingExt {
|
||||
type Out;
|
||||
|
||||
/// Ensure that the future is pending. In the pending case, try to pass the given barrier. Afterwards await the future again.
|
||||
///
|
||||
/// This is helpful to ensure a future is in a pending state before continuing with the test setup.
|
||||
async fn ensure_pending(self, barrier: Arc<Barrier>) -> Self::Out;
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<F> EnsurePendingExt for F
|
||||
where
|
||||
F: Future + Send + Unpin,
|
||||
{
|
||||
type Out = F::Output;
|
||||
|
||||
async fn ensure_pending(self, barrier: Arc<Barrier>) -> Self::Out {
|
||||
let mut fut = self.fuse();
|
||||
futures::select_biased! {
|
||||
_ = fut => panic!("fut should be pending"),
|
||||
_ = barrier.wait().fuse() => (),
|
||||
}
|
||||
|
||||
fut.await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait AbortAndWaitExt {
|
||||
/// Abort handle and wait for completion.
|
||||
///
|
||||
/// Note that this is NOT just a "wait with timeout or panic". This extension is specific to [`JoinHandle`] and will:
|
||||
///
|
||||
/// 1. Call [`JoinHandle::abort`].
|
||||
/// 2. Await the [`JoinHandle`] with a timeout (or panic if the timeout is reached).
|
||||
/// 3. Check that the handle returned a [`JoinError`] that signals that the tracked task was indeed cancelled and
|
||||
/// didn't exit otherwise (either by finishing or by panicking).
|
||||
async fn abort_and_wait(self);
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T> AbortAndWaitExt for JoinHandle<T>
|
||||
where
|
||||
T: std::fmt::Debug + Send,
|
||||
{
|
||||
async fn abort_and_wait(mut self) {
|
||||
self.abort();
|
||||
|
||||
let join_err = tokio::time::timeout(Duration::from_secs(1), self)
|
||||
.await
|
||||
.expect("no timeout")
|
||||
.expect_err("handle was aborted and therefore MUST fail");
|
||||
assert!(join_err.is_cancelled());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,3 +24,5 @@ pub mod cache;
|
|||
mod cancellation_safe_future;
|
||||
pub mod loader;
|
||||
pub mod resource_consumption;
|
||||
#[cfg(test)]
|
||||
mod test_util;
|
||||
|
|
|
@ -0,0 +1,485 @@
|
|||
//! Batching of loader request.
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
fmt::Debug,
|
||||
future::Future,
|
||||
hash::Hash,
|
||||
sync::{
|
||||
atomic::{AtomicU64, Ordering},
|
||||
Arc,
|
||||
},
|
||||
task::Poll,
|
||||
};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use futures::FutureExt;
|
||||
use observability_deps::tracing::trace;
|
||||
use parking_lot::Mutex;
|
||||
use tokio::sync::oneshot::{channel, Sender};
|
||||
|
||||
use crate::cancellation_safe_future::{CancellationSafeFuture, CancellationSafeFutureReceiver};
|
||||
|
||||
use super::Loader;
|
||||
|
||||
/// Batch [load](Loader::load) requests.
|
||||
///
|
||||
/// Requests against this loader will be [pending](std::task::Poll::Pending) until [flush](BatchLoaderFlusher::flush) is
|
||||
/// called. To simplify the usage -- esp. in combination with [`Cache::get`] -- use [`BatchLoaderFlusherExt`].
|
||||
///
|
||||
///
|
||||
/// [`Cache::get`]: crate::cache::Cache::get
|
||||
#[derive(Debug)]
|
||||
pub struct BatchLoader<K, Extra, V, L>
|
||||
where
|
||||
K: Debug + Hash + Send + 'static,
|
||||
Extra: Debug + Send + 'static,
|
||||
V: Debug + Send + 'static,
|
||||
L: Loader<K = Vec<K>, Extra = Vec<Extra>, V = Vec<V>>,
|
||||
{
|
||||
inner: Arc<BatchLoaderInner<K, Extra, V, L>>,
|
||||
}
|
||||
|
||||
impl<K, Extra, V, L> BatchLoader<K, Extra, V, L>
|
||||
where
|
||||
K: Debug + Hash + Send + 'static,
|
||||
Extra: Debug + Send + 'static,
|
||||
V: Debug + Send + 'static,
|
||||
L: Loader<K = Vec<K>, Extra = Vec<Extra>, V = Vec<V>>,
|
||||
{
|
||||
/// Create new batch loader based on a non-batched, vector-based one.
|
||||
pub fn new(inner: L) -> Self {
|
||||
Self {
|
||||
inner: Arc::new(BatchLoaderInner {
|
||||
inner,
|
||||
pending: Default::default(),
|
||||
job_id_counter: Default::default(),
|
||||
job_handles: Default::default(),
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// State of [`BatchLoader`].
|
||||
///
|
||||
/// This is an extra struct so it can be wrapped into an [`Arc`] and shared with the futures that are spawned into
|
||||
/// [`CancellationSafeFuture`]
|
||||
#[derive(Debug)]
|
||||
struct BatchLoaderInner<K, Extra, V, L>
|
||||
where
|
||||
K: Debug + Hash + Send + 'static,
|
||||
Extra: Debug + Send + 'static,
|
||||
V: Debug + Send + 'static,
|
||||
L: Loader<K = Vec<K>, Extra = Vec<Extra>, V = Vec<V>>,
|
||||
{
|
||||
inner: L,
|
||||
pending: Mutex<Vec<(K, Extra, Sender<V>)>>,
|
||||
job_id_counter: AtomicU64,
|
||||
job_handles: Mutex<HashMap<u64, CancellationSafeFutureReceiver<()>>>,
|
||||
}
|
||||
|
||||
/// Flush interface for [`BatchLoader`].
|
||||
///
|
||||
/// This is a trait so you can [type-erase](https://en.wikipedia.org/wiki/Type_erasure) it by putting it into an
|
||||
/// [`Arc`],
|
||||
///
|
||||
/// This trait is object-safe.
|
||||
#[async_trait]
|
||||
pub trait BatchLoaderFlusher: Debug + Send + Sync + 'static {
|
||||
/// Flush all batched requests.
|
||||
async fn flush(&self);
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl BatchLoaderFlusher for Arc<dyn BatchLoaderFlusher> {
|
||||
async fn flush(&self) {
|
||||
self.as_ref().flush().await;
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<K, Extra, V, L> BatchLoaderFlusher for BatchLoader<K, Extra, V, L>
|
||||
where
|
||||
K: Debug + Hash + Send + 'static,
|
||||
Extra: Debug + Send + 'static,
|
||||
V: Debug + Send + 'static,
|
||||
L: Loader<K = Vec<K>, Extra = Vec<Extra>, V = Vec<V>>,
|
||||
{
|
||||
async fn flush(&self) {
|
||||
trace!("flushing batch loader");
|
||||
|
||||
let pending: Vec<_> = {
|
||||
let mut pending = self.inner.pending.lock();
|
||||
std::mem::take(pending.as_mut())
|
||||
};
|
||||
|
||||
if pending.is_empty() {
|
||||
return;
|
||||
}
|
||||
let job_id = self.inner.job_id_counter.fetch_add(1, Ordering::SeqCst);
|
||||
let handle_recv = CancellationSafeFutureReceiver::default();
|
||||
|
||||
{
|
||||
let mut job_handles = self.inner.job_handles.lock();
|
||||
job_handles.insert(job_id, handle_recv.clone());
|
||||
}
|
||||
|
||||
let inner = Arc::clone(&self.inner);
|
||||
let fut = CancellationSafeFuture::new(
|
||||
async move {
|
||||
let mut keys = Vec::with_capacity(pending.len());
|
||||
let mut extras = Vec::with_capacity(pending.len());
|
||||
let mut senders = Vec::with_capacity(pending.len());
|
||||
|
||||
for (k, extra, sender) in pending {
|
||||
keys.push(k);
|
||||
extras.push(extra);
|
||||
senders.push(sender);
|
||||
}
|
||||
|
||||
let values = inner.inner.load(keys, extras).await;
|
||||
assert_eq!(values.len(), senders.len());
|
||||
|
||||
for (value, sender) in values.into_iter().zip(senders) {
|
||||
sender.send(value).unwrap();
|
||||
}
|
||||
|
||||
let mut job_handles = inner.job_handles.lock();
|
||||
job_handles.remove(&job_id);
|
||||
},
|
||||
handle_recv,
|
||||
);
|
||||
fut.await;
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<K, Extra, V, L> Loader for BatchLoader<K, Extra, V, L>
|
||||
where
|
||||
K: Debug + Hash + Send + 'static,
|
||||
Extra: Debug + Send + 'static,
|
||||
V: Debug + Send + 'static,
|
||||
L: Loader<K = Vec<K>, Extra = Vec<Extra>, V = Vec<V>>,
|
||||
{
|
||||
type K = K;
|
||||
type Extra = Extra;
|
||||
type V = V;
|
||||
|
||||
async fn load(&self, k: Self::K, extra: Self::Extra) -> Self::V {
|
||||
let (tx, rx) = channel();
|
||||
|
||||
{
|
||||
let mut pending = self.inner.pending.lock();
|
||||
pending.push((k, extra, tx));
|
||||
}
|
||||
|
||||
rx.await.unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
/// Extension trait for [`BatchLoaderFlusher`] because the methods on this extension trait are not object safe.
|
||||
#[async_trait]
|
||||
pub trait BatchLoaderFlusherExt {
|
||||
/// Try to poll all given futures and automatically [flush](BatchLoaderFlusher) if any of them end up in a pending state.
|
||||
///
|
||||
/// This guarantees that the order of the results is identical to the order of the futures.
|
||||
async fn auto_flush<F>(&self, futures: Vec<F>) -> Vec<F::Output>
|
||||
where
|
||||
F: Future + Send,
|
||||
F::Output: Send;
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<B> BatchLoaderFlusherExt for B
|
||||
where
|
||||
B: BatchLoaderFlusher,
|
||||
{
|
||||
async fn auto_flush<F>(&self, futures: Vec<F>) -> Vec<F::Output>
|
||||
where
|
||||
F: Future + Send,
|
||||
F::Output: Send,
|
||||
{
|
||||
let mut futures = futures
|
||||
.into_iter()
|
||||
.map(|f| f.boxed())
|
||||
.enumerate()
|
||||
.collect::<Vec<_>>();
|
||||
let mut output: Vec<Option<F::Output>> = (0..futures.len()).map(|_| None).collect();
|
||||
|
||||
while !futures.is_empty() {
|
||||
let mut pending = Vec::with_capacity(futures.len());
|
||||
|
||||
for (idx, mut f) in futures.into_iter() {
|
||||
match futures::poll!(&mut f) {
|
||||
Poll::Ready(res) => {
|
||||
output[idx] = Some(res);
|
||||
}
|
||||
Poll::Pending => {
|
||||
pending.push((idx, f));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !pending.is_empty() {
|
||||
self.flush().await;
|
||||
}
|
||||
|
||||
futures = pending;
|
||||
}
|
||||
|
||||
output
|
||||
.into_iter()
|
||||
.map(|o| o.expect("all futures finished"))
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use tokio::sync::Barrier;
|
||||
|
||||
use crate::{
|
||||
cache::{driver::CacheDriver, Cache},
|
||||
loader::test_util::TestLoader,
|
||||
test_util::EnsurePendingExt,
|
||||
};
|
||||
|
||||
use super::*;
|
||||
|
||||
type TestLoaderT = Arc<TestLoader<Vec<u8>, Vec<bool>, Vec<String>>>;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_flush_empty() {
|
||||
let (inner, batch) = setup();
|
||||
batch.flush().await;
|
||||
assert_eq!(inner.loaded(), vec![],);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_flush_manual() {
|
||||
let (inner, batch) = setup();
|
||||
|
||||
let pending_barrier_1 = Arc::new(Barrier::new(2));
|
||||
let pending_barrier_1_captured = Arc::clone(&pending_barrier_1);
|
||||
let batch_captured = Arc::clone(&batch);
|
||||
let handle_1 = tokio::spawn(async move {
|
||||
batch_captured
|
||||
.load(1, true)
|
||||
.ensure_pending(pending_barrier_1_captured)
|
||||
.await
|
||||
});
|
||||
pending_barrier_1.wait().await;
|
||||
|
||||
let pending_barrier_2 = Arc::new(Barrier::new(2));
|
||||
let pending_barrier_2_captured = Arc::clone(&pending_barrier_2);
|
||||
let batch_captured = Arc::clone(&batch);
|
||||
let handle_2 = tokio::spawn(async move {
|
||||
batch_captured
|
||||
.load(2, false)
|
||||
.ensure_pending(pending_barrier_2_captured)
|
||||
.await
|
||||
});
|
||||
pending_barrier_2.wait().await;
|
||||
|
||||
inner.mock_next(vec![1, 2], vec![String::from("foo"), String::from("bar")]);
|
||||
|
||||
batch.flush().await;
|
||||
assert_eq!(inner.loaded(), vec![(vec![1, 2], vec![true, false])],);
|
||||
|
||||
assert_eq!(handle_1.await.unwrap(), String::from("foo"));
|
||||
assert_eq!(handle_2.await.unwrap(), String::from("bar"));
|
||||
}
|
||||
|
||||
/// Simulate the following scenario:
|
||||
///
|
||||
/// 1. load `1`, flush it, inner load starts processing `[1]`
|
||||
/// 2. load `2`, flush it, inner load starts processing `[2]`
|
||||
/// 3. inner loader returns result for `[2]`, batch loader returns that result as well
|
||||
/// 4. inner loader returns result for `[1]`, batch loader returns that result as well
|
||||
#[tokio::test]
|
||||
async fn test_concurrent_load() {
|
||||
let (inner, batch) = setup();
|
||||
|
||||
let load_barrier_1 = inner.block_next(vec![1], vec![String::from("foo")]);
|
||||
inner.mock_next(vec![2], vec![String::from("bar")]);
|
||||
|
||||
// set up first load
|
||||
let pending_barrier_1 = Arc::new(Barrier::new(2));
|
||||
let pending_barrier_1_captured = Arc::clone(&pending_barrier_1);
|
||||
let batch_captured = Arc::clone(&batch);
|
||||
let handle_1 = tokio::spawn(async move {
|
||||
batch_captured
|
||||
.load(1, true)
|
||||
.ensure_pending(pending_barrier_1_captured)
|
||||
.await
|
||||
});
|
||||
pending_barrier_1.wait().await;
|
||||
|
||||
// flush first load, this is blocked by the load barrier
|
||||
let pending_barrier_2 = Arc::new(Barrier::new(2));
|
||||
let pending_barrier_2_captured = Arc::clone(&pending_barrier_2);
|
||||
let batch_captured = Arc::clone(&batch);
|
||||
let handle_2 = tokio::spawn(async move {
|
||||
batch_captured
|
||||
.flush()
|
||||
.ensure_pending(pending_barrier_2_captured)
|
||||
.await;
|
||||
});
|
||||
pending_barrier_2.wait().await;
|
||||
|
||||
// set up second load
|
||||
let pending_barrier_3 = Arc::new(Barrier::new(2));
|
||||
let pending_barrier_3_captured = Arc::clone(&pending_barrier_3);
|
||||
let batch_captured = Arc::clone(&batch);
|
||||
let handle_3 = tokio::spawn(async move {
|
||||
batch_captured
|
||||
.load(2, false)
|
||||
.ensure_pending(pending_barrier_3_captured)
|
||||
.await
|
||||
});
|
||||
pending_barrier_3.wait().await;
|
||||
|
||||
// flush 2nd load and get result
|
||||
batch.flush().await;
|
||||
assert_eq!(handle_3.await.unwrap(), String::from("bar"));
|
||||
|
||||
// flush 1st load and get result
|
||||
load_barrier_1.wait().await;
|
||||
handle_2.await.unwrap();
|
||||
assert_eq!(handle_1.await.unwrap(), String::from("foo"));
|
||||
|
||||
assert_eq!(
|
||||
inner.loaded(),
|
||||
vec![(vec![1], vec![true]), (vec![2], vec![false])],
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_cancel_flush() {
|
||||
let (inner, batch) = setup();
|
||||
|
||||
let load_barrier_1 = inner.block_next(vec![1], vec![String::from("foo")]);
|
||||
|
||||
// set up load
|
||||
let pending_barrier_1 = Arc::new(Barrier::new(2));
|
||||
let pending_barrier_1_captured = Arc::clone(&pending_barrier_1);
|
||||
let batch_captured = Arc::clone(&batch);
|
||||
let handle_1 = tokio::spawn(async move {
|
||||
batch_captured
|
||||
.load(1, true)
|
||||
.ensure_pending(pending_barrier_1_captured)
|
||||
.await
|
||||
});
|
||||
pending_barrier_1.wait().await;
|
||||
|
||||
// flush load, this is blocked by the load barrier
|
||||
let pending_barrier_2 = Arc::new(Barrier::new(2));
|
||||
let pending_barrier_2_captured = Arc::clone(&pending_barrier_2);
|
||||
let batch_captured = Arc::clone(&batch);
|
||||
let handle_2 = tokio::spawn(async move {
|
||||
batch_captured
|
||||
.flush()
|
||||
.ensure_pending(pending_barrier_2_captured)
|
||||
.await;
|
||||
});
|
||||
pending_barrier_2.wait().await;
|
||||
|
||||
// abort flush
|
||||
handle_2.abort();
|
||||
|
||||
// flush load and get result
|
||||
load_barrier_1.wait().await;
|
||||
assert_eq!(handle_1.await.unwrap(), String::from("foo"));
|
||||
|
||||
assert_eq!(inner.loaded(), vec![(vec![1], vec![true])],);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_cancel_load_and_flush() {
|
||||
let (inner, batch) = setup();
|
||||
|
||||
let load_barrier_1 = inner.block_next(vec![1], vec![String::from("foo")]);
|
||||
|
||||
// set up load
|
||||
let pending_barrier_1 = Arc::new(Barrier::new(2));
|
||||
let pending_barrier_1_captured = Arc::clone(&pending_barrier_1);
|
||||
let batch_captured = Arc::clone(&batch);
|
||||
let handle_1 = tokio::spawn(async move {
|
||||
batch_captured
|
||||
.load(1, true)
|
||||
.ensure_pending(pending_barrier_1_captured)
|
||||
.await
|
||||
});
|
||||
pending_barrier_1.wait().await;
|
||||
|
||||
// flush load, this is blocked by the load barrier
|
||||
let pending_barrier_2 = Arc::new(Barrier::new(2));
|
||||
let pending_barrier_2_captured = Arc::clone(&pending_barrier_2);
|
||||
let batch_captured = Arc::clone(&batch);
|
||||
let handle_2 = tokio::spawn(async move {
|
||||
batch_captured
|
||||
.flush()
|
||||
.ensure_pending(pending_barrier_2_captured)
|
||||
.await;
|
||||
});
|
||||
pending_barrier_2.wait().await;
|
||||
|
||||
// abort load and flush
|
||||
handle_1.abort();
|
||||
handle_2.abort();
|
||||
|
||||
// unblock
|
||||
load_barrier_1.wait().await;
|
||||
|
||||
// load was still driven to completion
|
||||
assert_eq!(inner.loaded(), vec![(vec![1], vec![true])],);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_auto_flush_with_loader() {
|
||||
let (inner, batch) = setup();
|
||||
|
||||
inner.mock_next(vec![1, 2], vec![String::from("foo"), String::from("bar")]);
|
||||
|
||||
assert_eq!(
|
||||
batch
|
||||
.auto_flush(vec![batch.load(1, true), batch.load(2, false)])
|
||||
.await,
|
||||
vec![String::from("foo"), String::from("bar")],
|
||||
);
|
||||
|
||||
assert_eq!(inner.loaded(), vec![(vec![1, 2], vec![true, false])],);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_auto_flush_integration_with_cache_driver() {
|
||||
let (inner, batch) = setup();
|
||||
let cache = CacheDriver::new(Arc::clone(&batch), HashMap::new());
|
||||
|
||||
inner.mock_next(vec![1, 2], vec![String::from("foo"), String::from("bar")]);
|
||||
inner.mock_next(vec![3], vec![String::from("baz")]);
|
||||
|
||||
assert_eq!(
|
||||
batch
|
||||
.auto_flush(vec![cache.get(1, true), cache.get(2, false)])
|
||||
.await,
|
||||
vec![String::from("foo"), String::from("bar")],
|
||||
);
|
||||
assert_eq!(
|
||||
batch
|
||||
.auto_flush(vec![cache.get(2, true), cache.get(3, true)])
|
||||
.await,
|
||||
vec![String::from("bar"), String::from("baz")],
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
inner.loaded(),
|
||||
vec![(vec![1, 2], vec![true, false]), (vec![3], vec![true])],
|
||||
);
|
||||
}
|
||||
|
||||
fn setup() -> (TestLoaderT, Arc<BatchLoader<u8, bool, String, TestLoaderT>>) {
|
||||
let inner = TestLoaderT::default();
|
||||
let batch = Arc::new(BatchLoader::new(Arc::clone(&inner)));
|
||||
(inner, batch)
|
||||
}
|
||||
}
|
|
@ -2,6 +2,7 @@
|
|||
use async_trait::async_trait;
|
||||
use std::{fmt::Debug, future::Future, hash::Hash, marker::PhantomData, sync::Arc};
|
||||
|
||||
pub mod batch;
|
||||
pub mod metrics;
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
@ -14,7 +14,7 @@ enum TestLoaderResponse<V> {
|
|||
|
||||
/// An easy-to-mock [`Loader`].
|
||||
#[derive(Debug, Default)]
|
||||
pub struct TestLoader<K = u8, V = String, Extra = bool>
|
||||
pub struct TestLoader<K = u8, Extra = bool, V = String>
|
||||
where
|
||||
K: Clone + Debug + Eq + Hash + Send + 'static,
|
||||
Extra: Clone + Debug + Send + 'static,
|
||||
|
@ -25,7 +25,7 @@ where
|
|||
loaded: Mutex<Vec<(K, Extra)>>,
|
||||
}
|
||||
|
||||
impl<K, V, Extra> TestLoader<K, V, Extra>
|
||||
impl<K, V, Extra> TestLoader<K, Extra, V>
|
||||
where
|
||||
K: Clone + Debug + Eq + Hash + Send + 'static,
|
||||
Extra: Clone + Debug + Send + 'static,
|
||||
|
@ -93,7 +93,7 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
impl<K, V, Extra> Drop for TestLoader<K, V, Extra>
|
||||
impl<K, Extra, V> Drop for TestLoader<K, Extra, V>
|
||||
where
|
||||
K: Clone + Debug + Eq + Hash + Send + 'static,
|
||||
Extra: Clone + Debug + Send + 'static,
|
||||
|
@ -110,15 +110,15 @@ where
|
|||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<K, V, Extra> Loader for TestLoader<K, V, Extra>
|
||||
impl<K, V, Extra> Loader for TestLoader<K, Extra, V>
|
||||
where
|
||||
K: Clone + Debug + Eq + Hash + Send + 'static,
|
||||
Extra: Clone + Debug + Send + 'static,
|
||||
V: Clone + Debug + Send + 'static,
|
||||
{
|
||||
type K = K;
|
||||
type V = V;
|
||||
type Extra = Extra;
|
||||
type V = V;
|
||||
|
||||
async fn load(&self, k: Self::K, extra: Self::Extra) -> Self::V {
|
||||
self.loaded.lock().push((k.clone(), extra));
|
||||
|
@ -163,7 +163,7 @@ mod tests {
|
|||
#[tokio::test]
|
||||
#[should_panic(expected = "entry not mocked")]
|
||||
async fn test_loader_panic_entry_unknown() {
|
||||
let loader = TestLoader::<u8, String, ()>::default();
|
||||
let loader = TestLoader::<u8, (), String>::default();
|
||||
loader.load(1, ()).await;
|
||||
}
|
||||
|
||||
|
@ -179,14 +179,14 @@ mod tests {
|
|||
#[test]
|
||||
#[should_panic(expected = "mocked response left")]
|
||||
fn test_loader_panic_requests_left() {
|
||||
let loader = TestLoader::<u8, String, ()>::default();
|
||||
let loader = TestLoader::<u8, (), String>::default();
|
||||
loader.mock_next(1, String::from("foo"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "panic-by-choice")]
|
||||
fn test_loader_no_double_panic() {
|
||||
let loader = TestLoader::<u8, String, ()>::default();
|
||||
let loader = TestLoader::<u8, (), String>::default();
|
||||
loader.mock_next(1, String::from("foo"));
|
||||
panic!("panic-by-choice");
|
||||
}
|
||||
|
|
|
@ -0,0 +1,62 @@
|
|||
use std::{future::Future, sync::Arc, time::Duration};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use futures::FutureExt;
|
||||
use tokio::{sync::Barrier, task::JoinHandle};
|
||||
|
||||
#[async_trait]
|
||||
pub trait EnsurePendingExt {
|
||||
type Out;
|
||||
|
||||
/// Ensure that the future is pending. In the pending case, try to pass the given barrier. Afterwards await the future again.
|
||||
///
|
||||
/// This is helpful to ensure a future is in a pending state before continuing with the test setup.
|
||||
async fn ensure_pending(self, barrier: Arc<Barrier>) -> Self::Out;
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<F> EnsurePendingExt for F
|
||||
where
|
||||
F: Future + Send + Unpin,
|
||||
{
|
||||
type Out = F::Output;
|
||||
|
||||
async fn ensure_pending(self, barrier: Arc<Barrier>) -> Self::Out {
|
||||
let mut fut = self.fuse();
|
||||
futures::select_biased! {
|
||||
_ = fut => panic!("fut should be pending"),
|
||||
_ = barrier.wait().fuse() => (),
|
||||
}
|
||||
|
||||
fut.await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait AbortAndWaitExt {
|
||||
/// Abort handle and wait for completion.
|
||||
///
|
||||
/// Note that this is NOT just a "wait with timeout or panic". This extension is specific to [`JoinHandle`] and will:
|
||||
///
|
||||
/// 1. Call [`JoinHandle::abort`].
|
||||
/// 2. Await the [`JoinHandle`] with a timeout (or panic if the timeout is reached).
|
||||
/// 3. Check that the handle returned a [`JoinError`] that signals that the tracked task was indeed cancelled and
|
||||
/// didn't exit otherwise (either by finishing or by panicking).
|
||||
async fn abort_and_wait(self);
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T> AbortAndWaitExt for JoinHandle<T>
|
||||
where
|
||||
T: std::fmt::Debug + Send,
|
||||
{
|
||||
async fn abort_and_wait(mut self) {
|
||||
self.abort();
|
||||
|
||||
let join_err = tokio::time::timeout(Duration::from_secs(1), self)
|
||||
.await
|
||||
.expect("no timeout")
|
||||
.expect_err("handle was aborted and therefore MUST fail");
|
||||
assert!(join_err.is_cancelled());
|
||||
}
|
||||
}
|
|
@ -21,7 +21,7 @@ uuid = { version = "1", features = ["v4"] }
|
|||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3.6.0"
|
||||
tempfile = "3.7.0"
|
||||
test_helpers = { path = "../test_helpers" }
|
||||
|
||||
[features]
|
||||
|
|
|
@ -9,6 +9,7 @@ license.workspace = true
|
|||
async-trait = "0.1.71"
|
||||
backoff = { path = "../backoff" }
|
||||
bytes = "1.4"
|
||||
chrono = { version = "0.4", default-features = false }
|
||||
compactor_scheduler = { path = "../compactor_scheduler" }
|
||||
datafusion = { workspace = true }
|
||||
data_types = { path = "../data_types" }
|
||||
|
|
|
@ -69,7 +69,7 @@ mod tests {
|
|||
let err = stream.try_collect::<Vec<_>>().await.unwrap_err();
|
||||
assert_eq!(
|
||||
err.to_string(),
|
||||
"Join Error (panic)\ncaused by\nExternal error: foo"
|
||||
"Join Error (panic)\ncaused by\nExternal error: Panic: foo"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@ use std::{fmt::Display, sync::Arc};
|
|||
|
||||
use data_types::{CompactionLevel, ParquetFile};
|
||||
use observability_deps::tracing::info;
|
||||
use parquet_file::ParquetFilePath;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{
|
||||
|
@ -48,14 +49,21 @@ where
|
|||
target_level: CompactionLevel,
|
||||
split_or_compact: FilesToSplitOrCompact,
|
||||
object_store_ids: Vec<Uuid>,
|
||||
object_store_paths: Vec<ParquetFilePath>,
|
||||
) -> Vec<PlanIR> {
|
||||
self.inner
|
||||
.create_plans(partition, target_level, split_or_compact, object_store_ids)
|
||||
self.inner.create_plans(
|
||||
partition,
|
||||
target_level,
|
||||
split_or_compact,
|
||||
object_store_ids,
|
||||
object_store_paths,
|
||||
)
|
||||
}
|
||||
|
||||
fn compact_plan(
|
||||
&self,
|
||||
files: Vec<ParquetFile>,
|
||||
object_store_paths: Vec<ParquetFilePath>,
|
||||
object_store_ids: Vec<Uuid>,
|
||||
reason: CompactReason,
|
||||
partition: Arc<PartitionInfo>,
|
||||
|
@ -65,9 +73,14 @@ where
|
|||
let n_input_files = files.len();
|
||||
let column_count = partition.column_count();
|
||||
let input_file_size_bytes = files.iter().map(|f| f.file_size_bytes).sum::<i64>();
|
||||
let plan =
|
||||
self.inner
|
||||
.compact_plan(files, object_store_ids, reason, partition, compaction_level);
|
||||
let plan = self.inner.compact_plan(
|
||||
files,
|
||||
object_store_paths,
|
||||
object_store_ids,
|
||||
reason,
|
||||
partition,
|
||||
compaction_level,
|
||||
);
|
||||
|
||||
info!(
|
||||
partition_id = partition_id.get(),
|
||||
|
@ -87,6 +100,7 @@ where
|
|||
fn split_plan(
|
||||
&self,
|
||||
file_to_split: FileToSplit,
|
||||
object_store_path: ParquetFilePath,
|
||||
object_store_id: Uuid,
|
||||
reason: SplitReason,
|
||||
partition: Arc<PartitionInfo>,
|
||||
|
@ -98,6 +112,7 @@ where
|
|||
let input_file_size_bytes = file_to_split.file.file_size_bytes;
|
||||
let plan = self.inner.split_plan(
|
||||
file_to_split,
|
||||
object_store_path,
|
||||
object_store_id,
|
||||
reason,
|
||||
partition,
|
||||
|
|
|
@ -4,6 +4,7 @@ use std::{
|
|||
};
|
||||
|
||||
use data_types::{CompactionLevel, ParquetFile};
|
||||
use parquet_file::ParquetFilePath;
|
||||
use uuid::Uuid;
|
||||
|
||||
pub mod logging;
|
||||
|
@ -24,12 +25,14 @@ pub trait IRPlanner: Debug + Display + Send + Sync {
|
|||
target_level: CompactionLevel,
|
||||
split_or_compact: FilesToSplitOrCompact,
|
||||
object_store_ids: Vec<Uuid>,
|
||||
object_store_paths: Vec<ParquetFilePath>,
|
||||
) -> Vec<PlanIR>;
|
||||
|
||||
/// Build a plan to compact give files
|
||||
fn compact_plan(
|
||||
&self,
|
||||
files: Vec<ParquetFile>,
|
||||
paths: Vec<ParquetFilePath>,
|
||||
object_store_ids: Vec<Uuid>,
|
||||
reason: CompactReason,
|
||||
partition: Arc<PartitionInfo>,
|
||||
|
@ -40,6 +43,7 @@ pub trait IRPlanner: Debug + Display + Send + Sync {
|
|||
fn split_plan(
|
||||
&self,
|
||||
file_to_split: FileToSplit,
|
||||
path: ParquetFilePath,
|
||||
object_store_id: Uuid,
|
||||
reason: SplitReason,
|
||||
partition: Arc<PartitionInfo>,
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
use std::{fmt::Display, sync::Arc};
|
||||
|
||||
use data_types::{ChunkOrder, CompactionLevel, ParquetFile, Timestamp, TimestampMinMax};
|
||||
use parquet_file::ParquetFilePath;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{
|
||||
|
@ -125,21 +126,31 @@ impl IRPlanner for V1IRPlanner {
|
|||
target_level: CompactionLevel,
|
||||
split_or_compact: FilesToSplitOrCompact,
|
||||
object_store_ids: Vec<Uuid>,
|
||||
object_store_paths: Vec<ParquetFilePath>,
|
||||
) -> Vec<PlanIR> {
|
||||
match split_or_compact {
|
||||
FilesToSplitOrCompact::Compact(files, reason) => {
|
||||
vec![self.compact_plan(files, object_store_ids, reason, partition, target_level)]
|
||||
vec![self.compact_plan(
|
||||
files,
|
||||
object_store_paths,
|
||||
object_store_ids,
|
||||
reason,
|
||||
partition,
|
||||
target_level,
|
||||
)]
|
||||
}
|
||||
FilesToSplitOrCompact::Split(files, reason) => {
|
||||
files
|
||||
.into_iter()
|
||||
.zip(object_store_ids)
|
||||
.map(|(file_to_split, object_store_id)| {
|
||||
.zip(object_store_paths)
|
||||
.map(|((file_to_split, object_store_id), object_store_path)| {
|
||||
// target level of a split file is the same as its level
|
||||
let target_level = file_to_split.file.compaction_level;
|
||||
|
||||
self.split_plan(
|
||||
file_to_split,
|
||||
object_store_path,
|
||||
object_store_id,
|
||||
reason,
|
||||
Arc::clone(&partition),
|
||||
|
@ -157,6 +168,7 @@ impl IRPlanner for V1IRPlanner {
|
|||
fn compact_plan(
|
||||
&self,
|
||||
files: Vec<ParquetFile>,
|
||||
paths: Vec<ParquetFilePath>,
|
||||
object_store_ids: Vec<Uuid>,
|
||||
reason: CompactReason,
|
||||
_partition: Arc<PartitionInfo>,
|
||||
|
@ -188,13 +200,15 @@ impl IRPlanner for V1IRPlanner {
|
|||
let files = files
|
||||
.into_iter()
|
||||
.zip(object_store_ids)
|
||||
.map(|(file, object_store_id)| {
|
||||
.zip(paths)
|
||||
.map(|((file, object_store_id), path)| {
|
||||
let order = order(file.compaction_level, target_level, file.max_l0_created_at);
|
||||
FileIR {
|
||||
file: ParquetFile {
|
||||
object_store_id,
|
||||
..file
|
||||
},
|
||||
path,
|
||||
order,
|
||||
}
|
||||
})
|
||||
|
@ -248,6 +262,7 @@ impl IRPlanner for V1IRPlanner {
|
|||
fn split_plan(
|
||||
&self,
|
||||
file_to_split: FileToSplit,
|
||||
path: ParquetFilePath,
|
||||
object_store_id: Uuid,
|
||||
reason: SplitReason,
|
||||
_partition: Arc<PartitionInfo>,
|
||||
|
@ -261,6 +276,7 @@ impl IRPlanner for V1IRPlanner {
|
|||
object_store_id,
|
||||
..file
|
||||
},
|
||||
path,
|
||||
order,
|
||||
};
|
||||
|
||||
|
|
|
@ -102,6 +102,6 @@ mod tests {
|
|||
.store(stream, partition, level, max_l0_created_at)
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert_eq!(err.to_string(), "External error: foo",);
|
||||
assert_eq!(err.to_string(), "External error: Panic: foo",);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -49,6 +49,7 @@ pub trait ScratchpadGen: Debug + Display + Send + Sync {
|
|||
/// SMALLER than the uncompressed Arrow data during compaction itself.
|
||||
#[async_trait]
|
||||
pub trait Scratchpad: Debug + Send + Sync + 'static {
|
||||
fn uuids(&self, files: &[ParquetFilePath]) -> Vec<Uuid>;
|
||||
async fn load_to_scratchpad(&self, files: &[ParquetFilePath]) -> Vec<Uuid>;
|
||||
async fn make_public(&self, files: &[ParquetFilePath]) -> Vec<Uuid>;
|
||||
async fn clean_from_scratchpad(&self, files: &[ParquetFilePath]);
|
||||
|
|
|
@ -33,6 +33,10 @@ struct NoopScratchpad;
|
|||
|
||||
#[async_trait]
|
||||
impl Scratchpad for NoopScratchpad {
|
||||
fn uuids(&self, files: &[ParquetFilePath]) -> Vec<Uuid> {
|
||||
files.iter().map(|f| f.objest_store_id()).collect()
|
||||
}
|
||||
|
||||
async fn load_to_scratchpad(&self, files: &[ParquetFilePath]) -> Vec<Uuid> {
|
||||
files.iter().map(|f| f.objest_store_id()).collect()
|
||||
}
|
||||
|
|
|
@ -178,6 +178,11 @@ impl Drop for ProdScratchpad {
|
|||
|
||||
#[async_trait]
|
||||
impl Scratchpad for ProdScratchpad {
|
||||
fn uuids(&self, files: &[ParquetFilePath]) -> Vec<Uuid> {
|
||||
let (_, uuids) = self.apply_mask(files);
|
||||
uuids
|
||||
}
|
||||
|
||||
async fn load_to_scratchpad(&self, files: &[ParquetFilePath]) -> Vec<Uuid> {
|
||||
let (files_to, uuids) = self.apply_mask(files);
|
||||
let (files_from, files_to) = self.check_known(files, &files_to, false);
|
||||
|
@ -323,8 +328,11 @@ mod tests {
|
|||
assert_content(&store_scratchpad, []).await;
|
||||
assert_content(&store_output, []).await;
|
||||
|
||||
let early_get_uuids = pad.uuids(&[f1.clone(), f2.clone()]);
|
||||
|
||||
let uuids = pad.load_to_scratchpad(&[f1.clone(), f2.clone()]).await;
|
||||
assert_eq!(uuids.len(), 2);
|
||||
assert_eq!(early_get_uuids, uuids);
|
||||
let f1_masked = f1.clone().with_object_store_id(uuids[0]);
|
||||
let f2_masked = f2.clone().with_object_store_id(uuids[1]);
|
||||
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
use std::{num::NonZeroUsize, sync::Arc, time::Duration};
|
||||
|
||||
use chrono::Utc;
|
||||
use data_types::{CompactionLevel, ParquetFile, ParquetFileParams, PartitionId};
|
||||
use futures::{stream, StreamExt, TryStreamExt};
|
||||
use iox_query::exec::query_tracing::send_metrics_to_tracing;
|
||||
use observability_deps::tracing::info;
|
||||
use parquet_file::ParquetFilePath;
|
||||
use tokio::sync::watch::Sender;
|
||||
|
@ -17,7 +19,7 @@ use crate::{
|
|||
Components,
|
||||
},
|
||||
error::{DynError, ErrorKind, SimpleError},
|
||||
file_classification::{FileClassification, FilesForProgress, FilesToSplitOrCompact},
|
||||
file_classification::{FileClassification, FilesForProgress},
|
||||
partition_info::PartitionInfo,
|
||||
PlanIR, RoundInfo,
|
||||
};
|
||||
|
@ -301,8 +303,6 @@ async fn execute_branch(
|
|||
// throw away the compaction work we've done.
|
||||
let saved_parquet_file_state = SavedParquetFileState::from(&branch);
|
||||
|
||||
let input_paths: Vec<ParquetFilePath> = branch.iter().map(ParquetFilePath::from).collect();
|
||||
|
||||
// Identify the target level and files that should be
|
||||
// compacted together, upgraded, and kept for next round of
|
||||
// compaction
|
||||
|
@ -329,105 +329,128 @@ async fn execute_branch(
|
|||
}
|
||||
|
||||
let FilesForProgress {
|
||||
upgrade,
|
||||
mut upgrade,
|
||||
split_or_compact,
|
||||
} = files_to_make_progress_on;
|
||||
|
||||
// Compact & Split
|
||||
let created_file_params = run_plans(
|
||||
span.child("run_plans"),
|
||||
split_or_compact.clone(),
|
||||
&partition_info,
|
||||
&components,
|
||||
let paths = split_or_compact.file_input_paths();
|
||||
let object_store_ids = scratchpad_ctx.uuids(&paths);
|
||||
let plans = components.ir_planner.create_plans(
|
||||
Arc::clone(&partition_info),
|
||||
target_level,
|
||||
Arc::clone(&df_semaphore),
|
||||
Arc::<dyn Scratchpad>::clone(&scratchpad_ctx),
|
||||
)
|
||||
.await?;
|
||||
split_or_compact.clone(),
|
||||
object_store_ids,
|
||||
paths,
|
||||
);
|
||||
|
||||
// inputs can be removed from the scratchpad as soon as we're done with compaction.
|
||||
scratchpad_ctx.clean_from_scratchpad(&input_paths).await;
|
||||
let mut files_next: Vec<ParquetFile> = Vec::new();
|
||||
|
||||
// upload files to real object store
|
||||
let upload_span = span.child("upload_objects");
|
||||
let created_file_params = upload_files_to_object_store(
|
||||
created_file_params,
|
||||
Arc::<dyn Scratchpad>::clone(&scratchpad_ctx),
|
||||
)
|
||||
.await;
|
||||
drop(upload_span);
|
||||
// The number of plans is often small (1), but can be thousands, especially in vertical splitting
|
||||
// scenarios when the partition is highly backlogged. So we chunk the plans into groups to control
|
||||
// memory usage (all files for all plans in a chunk are loaded to the scratchpad at once), and to
|
||||
// allow incremental catalog & progress updates. But the chunk size should still be large enough
|
||||
// to facilitate concurrency in plan execution, which can be accomplished with a small multiple on
|
||||
// the concurrency limit.
|
||||
let mut chunks = plans.into_iter().peekable();
|
||||
while chunks.peek().is_some() {
|
||||
// 4x run_plans' concurrency limit will allow adequate concurrency.
|
||||
let chunk: Vec<PlanIR> = chunks
|
||||
.by_ref()
|
||||
.take(df_semaphore.total_permits() * 4)
|
||||
.collect();
|
||||
|
||||
for file_param in &created_file_params {
|
||||
info!(
|
||||
partition_id = partition_info.partition_id.get(),
|
||||
uuid = file_param.object_store_id.to_string(),
|
||||
bytes = file_param.file_size_bytes,
|
||||
"uploaded file to objectstore",
|
||||
);
|
||||
}
|
||||
let files_to_delete = chunk
|
||||
.iter()
|
||||
.flat_map(|plan| plan.input_parquet_files())
|
||||
.collect();
|
||||
|
||||
let created_file_paths: Vec<ParquetFilePath> = created_file_params
|
||||
.iter()
|
||||
.map(ParquetFilePath::from)
|
||||
.collect();
|
||||
// Compact & Split
|
||||
let created_file_params = run_plans(
|
||||
span.child("run_plans"),
|
||||
chunk,
|
||||
&partition_info,
|
||||
&components,
|
||||
Arc::clone(&df_semaphore),
|
||||
Arc::<dyn Scratchpad>::clone(&scratchpad_ctx),
|
||||
)
|
||||
.await?;
|
||||
|
||||
// conditionally (if not shaddow mode) remove the newly created files from the scratchpad.
|
||||
scratchpad_ctx
|
||||
.clean_written_from_scratchpad(&created_file_paths)
|
||||
// upload files to real object store
|
||||
let upload_span = span.child("upload_objects");
|
||||
let created_file_params = upload_files_to_object_store(
|
||||
created_file_params,
|
||||
Arc::<dyn Scratchpad>::clone(&scratchpad_ctx),
|
||||
)
|
||||
.await;
|
||||
drop(upload_span);
|
||||
|
||||
for file_param in &created_file_params {
|
||||
info!(
|
||||
partition_id = partition_info.partition_id.get(),
|
||||
uuid = file_param.object_store_id.to_string(),
|
||||
bytes = file_param.file_size_bytes,
|
||||
"uploaded file to objectstore",
|
||||
);
|
||||
}
|
||||
|
||||
let created_file_paths: Vec<ParquetFilePath> = created_file_params
|
||||
.iter()
|
||||
.map(ParquetFilePath::from)
|
||||
.collect();
|
||||
|
||||
// conditionally (if not shaddow mode) remove the newly created files from the scratchpad.
|
||||
scratchpad_ctx
|
||||
.clean_written_from_scratchpad(&created_file_paths)
|
||||
.await;
|
||||
|
||||
// Update the catalog to reflect the newly created files, soft delete the compacted
|
||||
// files and update the upgraded files
|
||||
let (created_files, upgraded_files) = update_catalog(
|
||||
Arc::clone(&components),
|
||||
partition_id,
|
||||
&saved_parquet_file_state,
|
||||
files_to_delete,
|
||||
upgrade,
|
||||
created_file_params,
|
||||
target_level,
|
||||
)
|
||||
.await;
|
||||
|
||||
// Update the catalog to reflect the newly created files, soft delete the compacted
|
||||
// files and update the upgraded files
|
||||
let files_to_delete = split_or_compact.into_files();
|
||||
let (created_files, upgraded_files) = update_catalog(
|
||||
Arc::clone(&components),
|
||||
partition_id,
|
||||
saved_parquet_file_state,
|
||||
files_to_delete,
|
||||
upgrade,
|
||||
created_file_params,
|
||||
target_level,
|
||||
)
|
||||
.await;
|
||||
// we only need to upgrade files on the first iteration, so empty the upgrade list for next loop.
|
||||
upgrade = Vec::new();
|
||||
|
||||
// Report to `timeout_with_progress_checking` that some progress has been made; stop
|
||||
// if sending this signal fails because something has gone terribly wrong for the other
|
||||
// end of the channel to not be listening anymore.
|
||||
if let Err(e) = transmit_progress_signal.send(true) {
|
||||
return Err(Box::new(e));
|
||||
// Report to `timeout_with_progress_checking` that some progress has been made; stop
|
||||
// if sending this signal fails because something has gone terribly wrong for the other
|
||||
// end of the channel to not be listening anymore.
|
||||
if let Err(e) = transmit_progress_signal.send(true) {
|
||||
return Err(Box::new(e));
|
||||
}
|
||||
|
||||
// track this chunk files to return later
|
||||
files_next.extend(created_files);
|
||||
files_next.extend(upgraded_files);
|
||||
}
|
||||
|
||||
// Extend created files, upgraded files and files_to_keep to files_next
|
||||
let mut files_next = created_files;
|
||||
files_next.extend(upgraded_files);
|
||||
files_next.extend(files_to_keep);
|
||||
|
||||
Ok(files_next)
|
||||
}
|
||||
|
||||
/// Compact or split given files
|
||||
async fn run_plans(
|
||||
span: SpanRecorder,
|
||||
split_or_compact: FilesToSplitOrCompact,
|
||||
plans: Vec<PlanIR>,
|
||||
partition_info: &Arc<PartitionInfo>,
|
||||
components: &Arc<Components>,
|
||||
target_level: CompactionLevel,
|
||||
df_semaphore: Arc<InstrumentedAsyncSemaphore>,
|
||||
scratchpad_ctx: Arc<dyn Scratchpad>,
|
||||
) -> Result<Vec<ParquetFileParams>, DynError> {
|
||||
// stage files
|
||||
let download_span = span.child("download_objects");
|
||||
let input_uuids_inpad = scratchpad_ctx
|
||||
.load_to_scratchpad(&split_or_compact.file_input_paths())
|
||||
.await;
|
||||
drop(download_span);
|
||||
let paths: Vec<ParquetFilePath> = plans.iter().flat_map(|plan| plan.input_paths()).collect();
|
||||
|
||||
let plans = components.ir_planner.create_plans(
|
||||
Arc::clone(partition_info),
|
||||
target_level,
|
||||
split_or_compact,
|
||||
input_uuids_inpad,
|
||||
);
|
||||
// stage files. This could move to execute_plan to reduce peak scratchpad memory use, but that would
|
||||
// cost some concurrency in object downloads.
|
||||
let download_span = span.child("download_objects");
|
||||
let _ = scratchpad_ctx.load_to_scratchpad(&paths).await;
|
||||
drop(download_span);
|
||||
|
||||
info!(
|
||||
partition_id = partition_info.partition_id.get(),
|
||||
|
@ -448,6 +471,7 @@ async fn run_plans(
|
|||
partition_info,
|
||||
components,
|
||||
Arc::clone(&df_semaphore),
|
||||
Arc::<dyn Scratchpad>::clone(&scratchpad_ctx),
|
||||
)
|
||||
})
|
||||
.buffer_unordered(df_semaphore.total_permits())
|
||||
|
@ -463,6 +487,7 @@ async fn execute_plan(
|
|||
partition_info: &Arc<PartitionInfo>,
|
||||
components: &Arc<Components>,
|
||||
df_semaphore: Arc<InstrumentedAsyncSemaphore>,
|
||||
scratchpad_ctx: Arc<dyn Scratchpad>,
|
||||
) -> Result<Vec<ParquetFileParams>, DynError> {
|
||||
span.set_metadata("input_files", plan_ir.input_files().len().to_string());
|
||||
span.set_metadata("input_bytes", plan_ir.input_bytes().to_string());
|
||||
|
@ -508,12 +533,14 @@ async fn execute_plan(
|
|||
"job semaphore acquired",
|
||||
);
|
||||
|
||||
let df_span = span.child("data_fusion");
|
||||
let df_span = span.child_span("data_fusion");
|
||||
let plan = components
|
||||
.df_planner
|
||||
.plan(&plan_ir, Arc::clone(partition_info))
|
||||
.await?;
|
||||
let streams = components.df_plan_exec.exec(plan);
|
||||
let streams = components.df_plan_exec.exec(Arc::<
|
||||
dyn datafusion::physical_plan::ExecutionPlan,
|
||||
>::clone(&plan));
|
||||
let job = components.parquet_files_sink.stream_into_file_sink(
|
||||
streams,
|
||||
Arc::clone(partition_info),
|
||||
|
@ -524,8 +551,18 @@ async fn execute_plan(
|
|||
// TODO: react to OOM and try to divide branch
|
||||
let res = job.await;
|
||||
|
||||
if let Some(span) = &df_span {
|
||||
send_metrics_to_tracing(Utc::now(), span, plan.as_ref(), true);
|
||||
};
|
||||
|
||||
drop(permit);
|
||||
drop(df_span);
|
||||
|
||||
// inputs can be removed from the scratchpad as soon as we're done with compaction.
|
||||
scratchpad_ctx
|
||||
.clean_from_scratchpad(&plan_ir.input_paths())
|
||||
.await;
|
||||
|
||||
info!(
|
||||
partition_id = partition_info.partition_id.get(),
|
||||
plan_id, "job semaphore released",
|
||||
|
@ -580,7 +617,7 @@ async fn fetch_and_save_parquet_file_state(
|
|||
async fn update_catalog(
|
||||
components: Arc<Components>,
|
||||
partition_id: PartitionId,
|
||||
saved_parquet_file_state: SavedParquetFileState,
|
||||
saved_parquet_file_state: &SavedParquetFileState,
|
||||
files_to_delete: Vec<ParquetFile>,
|
||||
files_to_upgrade: Vec<ParquetFile>,
|
||||
file_params_to_create: Vec<ParquetFileParams>,
|
||||
|
@ -592,7 +629,7 @@ async fn update_catalog(
|
|||
// Right now this only logs; in the future we might decide not to commit these changes
|
||||
let _ignore = components
|
||||
.changed_files_filter
|
||||
.apply(&saved_parquet_file_state, ¤t_parquet_file_state);
|
||||
.apply(saved_parquet_file_state, ¤t_parquet_file_state);
|
||||
|
||||
let created_ids = components
|
||||
.commit
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
use std::fmt::Display;
|
||||
|
||||
use data_types::{ChunkOrder, CompactionLevel, ParquetFile};
|
||||
use parquet_file::ParquetFilePath;
|
||||
|
||||
use crate::file_classification::{CompactReason, NoneReason, SplitReason};
|
||||
|
||||
|
@ -78,6 +79,22 @@ impl PlanIR {
|
|||
}
|
||||
}
|
||||
|
||||
/// return the ParquetFiles that will be compacted together
|
||||
pub fn input_parquet_files(&self) -> Vec<ParquetFile> {
|
||||
self.input_files()
|
||||
.iter()
|
||||
.map(|ir| ir.file.clone())
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
/// return the paths of the input files that will be compacted together
|
||||
pub fn input_paths(&self) -> Vec<ParquetFilePath> {
|
||||
self.input_files()
|
||||
.iter()
|
||||
.map(|ir| ir.path.clone())
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
/// return the total bytes of the input files that will be compacted together
|
||||
pub fn input_bytes(&self) -> i64 {
|
||||
self.input_files()
|
||||
|
@ -109,5 +126,6 @@ impl Display for PlanIR {
|
|||
#[derive(Debug)]
|
||||
pub struct FileIR {
|
||||
pub file: ParquetFile,
|
||||
pub path: ParquetFilePath,
|
||||
pub order: ChunkOrder,
|
||||
}
|
||||
|
|
|
@ -460,7 +460,7 @@ async fn test_partition_fail() {
|
|||
&setup,
|
||||
[(
|
||||
setup.partition_info.partition_id,
|
||||
"serialize\ncaused by\nJoin Error (panic)\ncaused by\nExternal error: foo",
|
||||
"serialize\ncaused by\nJoin Error (panic)\ncaused by\nExternal error: Panic: foo",
|
||||
)],
|
||||
)
|
||||
.await;
|
||||
|
|
|
@ -17,7 +17,7 @@ once_cell = "1"
|
|||
ordered-float = "3"
|
||||
schema = { path = "../schema" }
|
||||
sha2 = "0.10"
|
||||
sqlx = { version = "0.6", features = ["runtime-tokio-rustls", "postgres", "uuid"] }
|
||||
sqlx = { version = "0.7.1", features = ["runtime-tokio-rustls", "postgres", "uuid"] }
|
||||
thiserror = "1.0.43"
|
||||
uuid = { version = "1", features = ["v4"] }
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
|
|
|
@ -4,7 +4,6 @@ use super::TableId;
|
|||
use generated_types::influxdata::iox::schema::v1 as proto;
|
||||
use influxdb_line_protocol::FieldValue;
|
||||
use schema::{builder::SchemaBuilder, InfluxColumnType, InfluxFieldType, Schema};
|
||||
use sqlx::postgres::PgHasArrayType;
|
||||
use std::{
|
||||
collections::{BTreeMap, BTreeSet, HashMap},
|
||||
convert::TryFrom,
|
||||
|
@ -26,12 +25,6 @@ impl ColumnId {
|
|||
}
|
||||
}
|
||||
|
||||
impl PgHasArrayType for ColumnId {
|
||||
fn array_type_info() -> sqlx::postgres::PgTypeInfo {
|
||||
<i64 as PgHasArrayType>::array_type_info()
|
||||
}
|
||||
}
|
||||
|
||||
/// Column definitions for a table indexed by their name
|
||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||
pub struct ColumnsByName(BTreeMap<String, ColumnSchema>);
|
||||
|
@ -328,7 +321,7 @@ impl TryFrom<proto::column_schema::ColumnType> for ColumnType {
|
|||
|
||||
/// Set of columns.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, sqlx::Type)]
|
||||
#[sqlx(transparent)]
|
||||
#[sqlx(transparent, no_pg_array)]
|
||||
pub struct ColumnSet(Vec<ColumnId>);
|
||||
|
||||
impl ColumnSet {
|
||||
|
|
|
@ -244,7 +244,7 @@ pub static PARTITION_BY_DAY_PROTO: Lazy<Arc<proto::PartitionTemplate>> = Lazy::n
|
|||
|
||||
/// A partition template specified by a namespace record.
|
||||
#[derive(Debug, PartialEq, Clone, Default, sqlx::Type)]
|
||||
#[sqlx(transparent)]
|
||||
#[sqlx(transparent, no_pg_array)]
|
||||
pub struct NamespacePartitionTemplateOverride(Option<serialization::Wrapper>);
|
||||
|
||||
impl TryFrom<proto::PartitionTemplate> for NamespacePartitionTemplateOverride {
|
||||
|
@ -259,7 +259,7 @@ impl TryFrom<proto::PartitionTemplate> for NamespacePartitionTemplateOverride {
|
|||
|
||||
/// A partition template specified by a table record.
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Default, sqlx::Type)]
|
||||
#[sqlx(transparent)]
|
||||
#[sqlx(transparent, no_pg_array)]
|
||||
pub struct TablePartitionTemplateOverride(Option<serialization::Wrapper>);
|
||||
|
||||
impl TablePartitionTemplateOverride {
|
||||
|
|
|
@ -7,14 +7,6 @@ yanked = "deny"
|
|||
unmaintained = "warn"
|
||||
notice = "warn"
|
||||
ignore = [
|
||||
# "It was sometimes possible for SQLite versions >= 1.0.12, < 3.39.2 to allow an array-bounds overflow when large
|
||||
# string were input into SQLite's printf function."
|
||||
#
|
||||
# We are not using `printf` with untrusted inputs.
|
||||
#
|
||||
# This is currently blocked by upstream:
|
||||
# https://github.com/launchbadge/sqlx/issues/2346
|
||||
"RUSTSEC-2022-0090",
|
||||
]
|
||||
git-fetch-with-cli = true
|
||||
|
||||
|
|
|
@ -51,3 +51,4 @@ We hold monthly Tech Talks that explain the project's technical underpinnings. Y
|
|||
* [Querier <> Ingester Query Protocol](ingester_querier_protocol.md)
|
||||
* [Underground Guide to Running IOx Locally](underground_guide.md)
|
||||
* [Query Processing](query_processing.md)
|
||||
* [How to Reproduce and Debug Production Data Locally](debug.md)
|
||||
|
|
|
@ -0,0 +1,105 @@
|
|||
# How to Reproduce and Debug Production Data Locally
|
||||
|
||||
Here is a way to reproduce issues using production data locally with all in one mode.
|
||||
|
||||
## Summary of steps
|
||||
|
||||
Reproduce the error locally by building a local catalog from the output of `influxdb_iox remote store get-table`:
|
||||
|
||||
1. Download contents of table_name into a directory named 'table_name'
|
||||
|
||||
```
|
||||
influxdb_iox remote store get-table <namespace> <table_name>
|
||||
```
|
||||
|
||||
1. Create a catalog and object_store in /tmp/data_dir
|
||||
|
||||
```
|
||||
influxdb_iox debug build-catalog <table_dir> /tmp/data_dir
|
||||
```
|
||||
|
||||
1. Start iox using this data directory (you can now query `table_name` locally):
|
||||
```
|
||||
influxdb_iox --data-dir /tmp/data_dir
|
||||
```
|
||||
|
||||
## Demonstration
|
||||
|
||||
## Setup
|
||||
|
||||
Running `influxdb_iox` and getting local telegraph data
|
||||
|
||||
```shell
|
||||
$ influxdb_iox namespace list
|
||||
[
|
||||
{
|
||||
"id": "1",
|
||||
"name": "26f7e5a4b7be365b_917b97a92e883afc",
|
||||
"maxTables": 500,
|
||||
"maxColumnsPerTable": 200
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
## Export `cpu` table:
|
||||
|
||||
```shell
|
||||
$ influxdb_iox remote store get-table 26f7e5a4b7be365b_917b97a92e883afc cpu
|
||||
found 11 Parquet files, exporting...
|
||||
downloading file 1 of 11 (1b8eb36a-7a34-4635-9156-251efcb1c024.4.parquet)...
|
||||
downloading file 2 of 11 (1819137f-7cb5-4dc8-8051-6fa0b42990cb.4.parquet)...
|
||||
downloading file 3 of 11 (4931cad7-7aaf-4b41-8f46-2d3be85c492b.4.parquet)...
|
||||
downloading file 4 of 11 (be75f5fb-a8bc-4646-893a-70d496b13f3d.4.parquet)...
|
||||
downloading file 5 of 11 (5235b87d-19ee-48ae-830f-b19d81bfe915.4.parquet)...
|
||||
downloading file 6 of 11 (a8f7be33-42b6-4353-8735-51b245196d39.4.parquet)...
|
||||
downloading file 7 of 11 (3b43c4ee-7500-47f9-9c0f-76d4f80b480e.4.parquet)...
|
||||
downloading file 8 of 11 (081da5be-e0f9-4b42-8cd2-45bfebbd934c.4.parquet)...
|
||||
downloading file 9 of 11 (f29ba3b4-53b1-4c68-9287-4bcea7c4e86b.4.parquet)...
|
||||
downloading file 10 of 11 (1ce94ce2-1200-4516-950a-64828a7cebba.4.parquet)...
|
||||
downloading file 11 of 11 (3a2b5525-3be5-41ef-b082-b279edc32acb.4.parquet)...
|
||||
Done.
|
||||
$ ls cpu/
|
||||
081da5be-e0f9-4b42-8cd2-45bfebbd934c.4.parquet 1ce94ce2-1200-4516-950a-64828a7cebba.4.parquet 4931cad7-7aaf-4b41-8f46-2d3be85c492b.4.parquet be75f5fb-a8bc-4646-893a-70d496b13f3d.4.parquet
|
||||
081da5be-e0f9-4b42-8cd2-45bfebbd934c.4.parquet.json 1ce94ce2-1200-4516-950a-64828a7cebba.4.parquet.json 4931cad7-7aaf-4b41-8f46-2d3be85c492b.4.parquet.json be75f5fb-a8bc-4646-893a-70d496b13f3d.4.parquet.json
|
||||
1819137f-7cb5-4dc8-8051-6fa0b42990cb.4.parquet 3a2b5525-3be5-41ef-b082-b279edc32acb.4.parquet 5235b87d-19ee-48ae-830f-b19d81bfe915.4.parquet f29ba3b4-53b1-4c68-9287-4bcea7c4e86b.4.parquet
|
||||
1819137f-7cb5-4dc8-8051-6fa0b42990cb.4.parquet.json 3a2b5525-3be5-41ef-b082-b279edc32acb.4.parquet.json 5235b87d-19ee-48ae-830f-b19d81bfe915.4.parquet.json f29ba3b4-53b1-4c68-9287-4bcea7c4e86b.4.parquet.json
|
||||
1b8eb36a-7a34-4635-9156-251efcb1c024.4.parquet 3b43c4ee-7500-47f9-9c0f-76d4f80b480e.4.parquet a8f7be33-42b6-4353-8735-51b245196d39.4.parquet partition.4.json
|
||||
1b8eb36a-7a34-4635-9156-251efcb1c024.4.parquet.json 3b43c4ee-7500-47f9-9c0f-76d4f80b480e.4.parquet.json a8f7be33-42b6-4353-8735-51b245196d39.4.parquet.json table.1.json
|
||||
```
|
||||
|
||||
## Build a new `new_data_dir` from export:
|
||||
|
||||
```shell
|
||||
$ influxdb_iox debug build-catalog cpu new_data_dir
|
||||
Beginning catalog / object_store build from "cpu" in "new_data_dir"....
|
||||
Done
|
||||
|
||||
$ ls new_data_dir/
|
||||
catalog.sqlite object_store/
|
||||
```
|
||||
|
||||
## Run `influxdb_iox` with `new_data_dir`:
|
||||
|
||||
```shell
|
||||
$ influxdb_iox --data-dir new_data_dir/
|
||||
```
|
||||
|
||||
And in a separate shell, you can query the data and see it is present:
|
||||
|
||||
```shell
|
||||
$ influxdb_iox query 26f7e5a4b7be365b_917b97a92e883afc 'select * from cpu limit 10';
|
||||
+-----------+---------------------+----------------------+-------------+------------------+-------------------+--------------+-----------+------------+---------------+-------------+--------------------+--------------------+
|
||||
| cpu | host | time | usage_guest | usage_guest_nice | usage_idle | usage_iowait | usage_irq | usage_nice | usage_softirq | usage_steal | usage_system | usage_user |
|
||||
+-----------+---------------------+----------------------+-------------+------------------+-------------------+--------------+-----------+------------+---------------+-------------+--------------------+--------------------+
|
||||
| cpu-total | MacBook-Pro-8.local | 2023-07-06T17:13:40Z | 0.0 | 0.0 | 95.6668753914105 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.4902943018170824 | 2.8428303068453085 |
|
||||
| cpu-total | MacBook-Pro-8.local | 2023-07-06T17:13:50Z | 0.0 | 0.0 | 95.9551687433697 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.4213261536472683 | 2.6235051029648098 |
|
||||
| cpu-total | MacBook-Pro-8.local | 2023-07-06T17:14:00Z | 0.0 | 0.0 | 96.52108622167991 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.37029157802418 | 2.108622199968126 |
|
||||
| cpu-total | MacBook-Pro-8.local | 2023-07-06T17:14:10Z | 0.0 | 0.0 | 95.26819803491809 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.752519246414341 | 2.979282718922596 |
|
||||
| cpu-total | MacBook-Pro-8.local | 2023-07-06T17:14:20Z | 0.0 | 0.0 | 95.28402329791422 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.6408843239063593 | 3.0750923780335997 |
|
||||
| cpu-total | MacBook-Pro-8.local | 2023-07-06T17:14:30Z | 0.0 | 0.0 | 93.97484827633119 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2.0271538509716924 | 3.9979978727699588 |
|
||||
| cpu-total | MacBook-Pro-8.local | 2023-07-06T17:14:40Z | 0.0 | 0.0 | 95.69219209824692 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.458894245831095 | 2.848913656031324 |
|
||||
| cpu-total | MacBook-Pro-8.local | 2023-07-06T17:14:50Z | 0.0 | 0.0 | 94.78402607970591 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.9685286188771443 | 3.2474453011797517 |
|
||||
| cpu-total | MacBook-Pro-8.local | 2023-07-06T17:15:00Z | 0.0 | 0.0 | 95.85132344665212 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.5706151054475623 | 2.5780614479731607 |
|
||||
| cpu0 | MacBook-Pro-8.local | 2023-07-06T17:13:40Z | 0.0 | 0.0 | 78.65055387717186 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 7.452165156077374 | 13.897280966824042 |
|
||||
+-----------+---------------------+----------------------+-------------+------------------+-------------------+--------------+-----------+------------+---------------+-------------+--------------------+--------------------+
|
||||
```
|
|
@ -327,7 +327,7 @@ Each querier process has a set of in-memory caches. These are:
|
|||
| ---- | ---- | -------------- | --- | ----- | ------------------------------ | ----- |
|
||||
| Namespace | Metadata | Catalog | Namespace Name | `CachedNamespace` | refresh policy, TTL, invalidation by unknown table/columns | Unknown entries NOT cached (assumes upstream DDoS protection) |
|
||||
| Object Store | Data | Object Store | Path | Raw object store bytes for the entire object | -- | |
|
||||
| Parquet File | Metadata | Catalog | Table ID | Parquet files (all the data that the catalog has, i.e. the entire row) for all files that are NOT marked for deletion. | No refresh yet (see #5718), can be invalided by ingester watermark. | |
|
||||
| Parquet File | Metadata | Catalog | Table ID | Parquet files (all the data that the catalog has, i.e. the entire row) for all files that are NOT marked for deletion. | TTL, but no refresh yet (see #5718), can be invalided by ingester watermark. | |
|
||||
| Partition | Metadata | Catalog | Partition ID | `CachedPartition` | Invalided if ingester data or any parquet files has columns that are NOT covered by the sort key. | Needs `CachedTable` for access |
|
||||
| Projected Schema | Metadata | Querier | Table ID, Column IDs | `ProjectedSchema` | -- | Needs `CachedTable` for access |
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@ observability_deps = { path = "../observability_deps" }
|
|||
once_cell = { version = "1.18", features = ["parking_lot"] }
|
||||
parking_lot = "0.12"
|
||||
pin-project = "1.1"
|
||||
snafu = "0.7"
|
||||
tokio = { version = "1.29" }
|
||||
tokio-util = { version = "0.7.8" }
|
||||
tokio_metrics_bridge = { path = "../tokio_metrics_bridge" }
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
)]
|
||||
|
||||
use metric::Registry;
|
||||
use snafu::Snafu;
|
||||
#[cfg(tokio_unstable)]
|
||||
use tokio_metrics_bridge::setup_tokio_metrics;
|
||||
// Workaround for "unused crate" lint false positives.
|
||||
|
@ -68,8 +69,16 @@ impl Task {
|
|||
}
|
||||
}
|
||||
|
||||
/// The type of error that is returned from tasks in this module
|
||||
pub type Error = String;
|
||||
/// Errors occuring when polling [`Job`].
|
||||
#[derive(Debug, Snafu)]
|
||||
#[allow(missing_docs)]
|
||||
pub enum JobError {
|
||||
#[snafu(display("Worker thread gone, executor was likely shut down"))]
|
||||
WorkerGone,
|
||||
|
||||
#[snafu(display("Panic: {msg}"))]
|
||||
Panic { msg: String },
|
||||
}
|
||||
|
||||
/// Job within the executor.
|
||||
///
|
||||
|
@ -80,7 +89,7 @@ pub struct Job<T> {
|
|||
cancel: CancellationToken,
|
||||
detached: bool,
|
||||
#[pin]
|
||||
rx: Receiver<Result<T, String>>,
|
||||
rx: Receiver<Result<T, JobError>>,
|
||||
}
|
||||
|
||||
impl<T> Job<T> {
|
||||
|
@ -94,7 +103,7 @@ impl<T> Job<T> {
|
|||
}
|
||||
|
||||
impl<T> Future for Job<T> {
|
||||
type Output = Result<T, Error>;
|
||||
type Output = Result<T, JobError>;
|
||||
|
||||
fn poll(
|
||||
self: Pin<&mut Self>,
|
||||
|
@ -103,9 +112,7 @@ impl<T> Future for Job<T> {
|
|||
let this = self.project();
|
||||
match ready!(this.rx.poll(cx)) {
|
||||
Ok(res) => std::task::Poll::Ready(res),
|
||||
Err(_) => std::task::Poll::Ready(Err(String::from(
|
||||
"Worker thread gone, executor was likely shut down",
|
||||
))),
|
||||
Err(_) => std::task::Poll::Ready(Err(JobError::WorkerGone)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -315,13 +322,15 @@ impl DedicatedExecutor {
|
|||
|
||||
let fut = Box::pin(async move {
|
||||
let task_output = AssertUnwindSafe(task).catch_unwind().await.map_err(|e| {
|
||||
if let Some(s) = e.downcast_ref::<String>() {
|
||||
let s = if let Some(s) = e.downcast_ref::<String>() {
|
||||
s.clone()
|
||||
} else if let Some(s) = e.downcast_ref::<&str>() {
|
||||
s.to_string()
|
||||
} else {
|
||||
"unknown internal error".to_string()
|
||||
}
|
||||
};
|
||||
|
||||
JobError::Panic { msg: s }
|
||||
});
|
||||
|
||||
if tx.send(task_output).is_err() {
|
||||
|
@ -571,7 +580,7 @@ mod tests {
|
|||
let err = dedicated_task.await.unwrap_err();
|
||||
assert_eq!(
|
||||
err.to_string(),
|
||||
"At the disco, on the dedicated task scheduler",
|
||||
"Panic: At the disco, on the dedicated task scheduler",
|
||||
);
|
||||
|
||||
exec.join().await;
|
||||
|
@ -590,7 +599,7 @@ mod tests {
|
|||
|
||||
// should not be able to get the result
|
||||
let err = dedicated_task.await.unwrap_err();
|
||||
assert_eq!(err.to_string(), "1 2",);
|
||||
assert_eq!(err.to_string(), "Panic: 1 2",);
|
||||
|
||||
exec.join().await;
|
||||
}
|
||||
|
@ -608,7 +617,7 @@ mod tests {
|
|||
|
||||
// should not be able to get the result
|
||||
let err = dedicated_task.await.unwrap_err();
|
||||
assert_eq!(err.to_string(), "unknown internal error",);
|
||||
assert_eq!(err.to_string(), "Panic: unknown internal error",);
|
||||
|
||||
exec.join().await;
|
||||
}
|
||||
|
|
|
@ -32,5 +32,5 @@ metric = { path = "../metric" }
|
|||
once_cell = { version = "1.18", features = ["parking_lot"] }
|
||||
parquet_file = { path = "../parquet_file" }
|
||||
tempfile = "3"
|
||||
sqlx = { version = "0.6", features = [ "runtime-tokio-rustls" ] }
|
||||
sqlx = { version = "0.7.1", features = [ "runtime-tokio-rustls" ] }
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@ prost = "0.11.9"
|
|||
thiserror = "1.0.40"
|
||||
tokio = { version = "1.28.2", features = ["net", "io-util", "time", "rt", "sync", "macros"] }
|
||||
tracing = "0.1.37"
|
||||
uuid = { version = "1.3.3", features = ["v4"] }
|
||||
uuid = { version = "1.4.1", features = ["v4"] }
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
|
||||
[build-dependencies]
|
||||
|
|
|
@ -69,7 +69,7 @@ once_cell = { version = "1.18", features = ["parking_lot"] }
|
|||
rustyline = { version = "12.0", default-features = false, features = ["with-file-history"]}
|
||||
serde_json = "1.0.103"
|
||||
snafu = "0.7"
|
||||
tempfile = "3.6.0"
|
||||
tempfile = "3.7.0"
|
||||
thiserror = "1.0.43"
|
||||
tikv-jemalloc-ctl = { version = "0.5.0", optional = true }
|
||||
tokio = { version = "1.29", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time", "io-std"] }
|
||||
|
@ -93,7 +93,7 @@ predicate = { path = "../predicate" }
|
|||
predicates = "3.0.3"
|
||||
pretty_assertions = "1.4.0"
|
||||
proptest = { version = "1.2.0", default-features = false }
|
||||
serde = "1.0.171"
|
||||
serde = "1.0.173"
|
||||
test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
|
||||
test_helpers_end_to_end = { path = "../test_helpers_end_to_end" }
|
||||
insta = { version = "1", features = ["yaml"] }
|
||||
|
|
|
@ -50,12 +50,12 @@ enum Command {
|
|||
Schema(schema::Config),
|
||||
|
||||
// NB: The example formatting below is weird so Clap make a nice help text
|
||||
/// Build a local catalog from the output of `remote get-table`.
|
||||
/// Build a local catalog from the output of `remote store get-table`.
|
||||
///
|
||||
/// For example:
|
||||
/// ```text
|
||||
/// # download contents of table_name into a directory named 'table_name'
|
||||
/// influxdb_iox remote get-table <namespace> <table_name>
|
||||
/// influxdb_iox remote store get-table <namespace> <table_name>
|
||||
///
|
||||
/// # Create a catalog and object_store in /tmp/data_dir
|
||||
/// influxdb_iox debug build-catalog <table_dir> /tmp/data_dir
|
||||
|
|
|
@ -957,7 +957,7 @@ async fn query_ingester() {
|
|||
test_helpers::maybe_start_logging();
|
||||
let database_url = maybe_skip_integration!();
|
||||
|
||||
let mut cluster = MiniCluster::create_shared(database_url).await;
|
||||
let mut cluster = MiniCluster::create_shared_never_persist(database_url).await;
|
||||
|
||||
StepTest::new(
|
||||
&mut cluster,
|
||||
|
|
|
@ -1,10 +1,5 @@
|
|||
//! Tests the `influxdb_iox debug` commands
|
||||
use std::{
|
||||
collections::VecDeque,
|
||||
io::Write,
|
||||
path::{Path, PathBuf},
|
||||
time::Duration,
|
||||
};
|
||||
use std::path::Path;
|
||||
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use arrow_util::assert_batches_sorted_eq;
|
||||
|
@ -12,7 +7,6 @@ use assert_cmd::Command;
|
|||
use futures::FutureExt;
|
||||
use predicates::prelude::*;
|
||||
use tempfile::TempDir;
|
||||
use test_helpers::timeout::FutureTimeout;
|
||||
use test_helpers_end_to_end::{
|
||||
maybe_skip_integration, run_sql, MiniCluster, ServerFixture, Step, StepTest, StepTestState,
|
||||
TestConfig,
|
||||
|
@ -52,8 +46,6 @@ async fn test_print_cpu() {
|
|||
/// 3. Start a all-in-one instance from that rebuilt catalog
|
||||
/// 4. Can run a query successfully
|
||||
#[tokio::test]
|
||||
// Ignore due to https://github.com/influxdata/influxdb_iox/issues/8203
|
||||
#[ignore]
|
||||
async fn build_catalog() {
|
||||
test_helpers::maybe_start_logging();
|
||||
let database_url = maybe_skip_integration!();
|
||||
|
@ -111,20 +103,11 @@ async fn build_catalog() {
|
|||
let table_dir = export_dir.path().join(table_name);
|
||||
|
||||
// We can build a catalog and start up the server and run a query
|
||||
let restarted = RestartedServer::build_catalog_and_start(&table_dir).await;
|
||||
let batches = restarted
|
||||
.run_sql_until_non_empty(sql, namespace.as_str())
|
||||
.await;
|
||||
assert_batches_sorted_eq!(&expected, &batches);
|
||||
rebuild_and_query(&table_dir, &namespace, sql, &expected).await;
|
||||
|
||||
// We can also rebuild a catalog from just the parquet files
|
||||
let only_parquet_dir = copy_only_parquet_files(&table_dir);
|
||||
let restarted =
|
||||
RestartedServer::build_catalog_and_start(only_parquet_dir.path()).await;
|
||||
let batches = restarted
|
||||
.run_sql_until_non_empty(sql, namespace.as_str())
|
||||
.await;
|
||||
assert_batches_sorted_eq!(&expected, &batches);
|
||||
rebuild_and_query(only_parquet_dir.path(), &namespace, sql, &expected).await;
|
||||
}
|
||||
.boxed()
|
||||
})),
|
||||
|
@ -134,6 +117,30 @@ async fn build_catalog() {
|
|||
.await
|
||||
}
|
||||
|
||||
/// Rebuilds a catalog from an export directory, starts up a server
|
||||
/// and verifies the running `sql` in `namespace` produces `expected`
|
||||
async fn rebuild_and_query(table_dir: &Path, namespace: &str, sql: &str, expected: &[&str]) {
|
||||
// Very occassionally, something goes wrong with the sqlite based
|
||||
// catalog and it doesn't get the new files. Thus try a few times
|
||||
//
|
||||
// See https://github.com/influxdata/influxdb_iox/issues/8287
|
||||
let mut retries = 5;
|
||||
|
||||
while retries > 0 {
|
||||
println!("** Retries remaining: {retries}");
|
||||
let restarted = RestartedServer::build_catalog_and_start(table_dir).await;
|
||||
let batches = restarted.run_sql(sql, namespace).await;
|
||||
|
||||
// if we got results, great, otherwise try again
|
||||
if !batches.is_empty() {
|
||||
assert_batches_sorted_eq!(expected, &batches);
|
||||
return;
|
||||
}
|
||||
|
||||
retries -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// An all in one instance, with data directory of `data_dir`
|
||||
struct RestartedServer {
|
||||
all_in_one: ServerFixture,
|
||||
|
@ -171,7 +178,7 @@ impl RestartedServer {
|
|||
println!("target_directory: {data_dir:?}");
|
||||
|
||||
// call `influxdb_iox debug build-catalog <table_dir> <new_data_dir>`
|
||||
let cmd = Command::cargo_bin("influxdb_iox")
|
||||
Command::cargo_bin("influxdb_iox")
|
||||
.unwrap()
|
||||
// use -v to enable logging so we can check the status messages
|
||||
.arg("-vv")
|
||||
|
@ -180,31 +187,18 @@ impl RestartedServer {
|
|||
.arg(exported_table_dir.as_os_str().to_str().unwrap())
|
||||
.arg(data_dir.path().as_os_str().to_str().unwrap())
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
// debug information to track down https://github.com/influxdata/influxdb_iox/issues/8203
|
||||
println!("***** Begin build-catalog STDOUT ****");
|
||||
std::io::stdout()
|
||||
.write_all(&cmd.get_output().stdout)
|
||||
.unwrap();
|
||||
println!("***** Begin build-catalog STDERR ****");
|
||||
std::io::stdout()
|
||||
.write_all(&cmd.get_output().stderr)
|
||||
.unwrap();
|
||||
println!("***** DONE ****");
|
||||
|
||||
cmd.stdout(
|
||||
predicate::str::contains("Beginning catalog / object_store build")
|
||||
.and(predicate::str::contains(
|
||||
"Begin importing files total_files=1",
|
||||
))
|
||||
.and(predicate::str::contains(
|
||||
"Completed importing files total_files=1",
|
||||
)),
|
||||
);
|
||||
.success()
|
||||
.stdout(
|
||||
predicate::str::contains("Beginning catalog / object_store build")
|
||||
.and(predicate::str::contains(
|
||||
"Begin importing files total_files=1",
|
||||
))
|
||||
.and(predicate::str::contains(
|
||||
"Completed importing files total_files=1",
|
||||
)),
|
||||
);
|
||||
|
||||
println!("Completed rebuild in {data_dir:?}");
|
||||
RecursiveDirPrinter::new().print(data_dir.path());
|
||||
|
||||
// now, start up a new server in all-in-one mode
|
||||
// using the newly built data directory
|
||||
|
@ -216,27 +210,6 @@ impl RestartedServer {
|
|||
data_dir,
|
||||
}
|
||||
}
|
||||
|
||||
/// Runs the SQL query against this server, in a loop until
|
||||
/// results are returned. Panics if the results are not produced
|
||||
/// within a 5 seconds
|
||||
async fn run_sql_until_non_empty(&self, sql: &str, namespace: &str) -> Vec<RecordBatch> {
|
||||
let timeout = Duration::from_secs(5);
|
||||
let loop_sleep = Duration::from_millis(500);
|
||||
let fut = async {
|
||||
loop {
|
||||
let batches = self.run_sql(sql, namespace).await;
|
||||
if !batches.is_empty() {
|
||||
return batches;
|
||||
}
|
||||
tokio::time::sleep(loop_sleep).await;
|
||||
}
|
||||
};
|
||||
|
||||
fut.with_timeout(timeout)
|
||||
.await
|
||||
.expect("timed out waiting for non-empty batches in result")
|
||||
}
|
||||
}
|
||||
|
||||
/// Copies only parquet files from the source directory to a new
|
||||
|
@ -262,43 +235,3 @@ fn copy_only_parquet_files(src: &Path) -> TempDir {
|
|||
}
|
||||
target_dir
|
||||
}
|
||||
|
||||
/// Prints out the contents of the directory recursively
|
||||
/// for debugging.
|
||||
///
|
||||
/// ```text
|
||||
/// RecursiveDirPrinter All files rooted at "/tmp/.tmpvf16r0"
|
||||
/// "/tmp/.tmpvf16r0"
|
||||
/// "/tmp/.tmpvf16r0/catalog.sqlite"
|
||||
/// "/tmp/.tmpvf16r0/object_store"
|
||||
/// "/tmp/.tmpvf16r0/object_store/1"
|
||||
/// "/tmp/.tmpvf16r0/object_store/1/1"
|
||||
/// "/tmp/.tmpvf16r0/object_store/1/1/b862a7e9b329ee6a418cde191198eaeb1512753f19b87a81def2ae6c3d0ed237"
|
||||
/// "/tmp/.tmpvf16r0/object_store/1/1/b862a7e9b329ee6a418cde191198eaeb1512753f19b87a81def2ae6c3d0ed237/d78abef6-6859-48eb-aa62-3518097fbb9b.parquet"
|
||||
///
|
||||
struct RecursiveDirPrinter {
|
||||
paths: VecDeque<PathBuf>,
|
||||
}
|
||||
|
||||
impl RecursiveDirPrinter {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
paths: VecDeque::new(),
|
||||
}
|
||||
}
|
||||
|
||||
// print root and all directories
|
||||
fn print(mut self, root: &Path) {
|
||||
println!("RecursiveDirPrinter All files rooted at {root:?}");
|
||||
self.paths.push_back(PathBuf::from(root));
|
||||
|
||||
while let Some(path) = self.paths.pop_front() {
|
||||
println!("{path:?}");
|
||||
if path.is_dir() {
|
||||
for entry in std::fs::read_dir(path).unwrap() {
|
||||
self.paths.push_front(entry.unwrap().path());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
use std::{collections::HashMap, path::PathBuf, sync::Arc};
|
||||
use std::path::PathBuf;
|
||||
|
||||
use arrow::{
|
||||
array::as_generic_binary_array,
|
||||
datatypes::{DataType, Fields, Schema, SchemaRef, TimeUnit},
|
||||
datatypes::{DataType, Schema, TimeUnit},
|
||||
record_batch::RecordBatch,
|
||||
};
|
||||
use arrow_flight::{
|
||||
|
@ -1592,10 +1592,7 @@ async fn assert_schema(client: &mut FlightClient, cmd: Any) {
|
|||
let mut saw_data = false;
|
||||
while let Some(batch) = result_stream.try_next().await.unwrap() {
|
||||
saw_data = true;
|
||||
// strip metadata (GetFlightInfo doesn't include metadata for
|
||||
// some reason) before comparison
|
||||
// https://github.com/influxdata/influxdb_iox/issues/7282
|
||||
let batch_schema = strip_metadata(&batch.schema());
|
||||
let batch_schema = batch.schema();
|
||||
assert_eq!(
|
||||
batch_schema.as_ref(),
|
||||
&flight_info_schema,
|
||||
|
@ -1603,10 +1600,6 @@ async fn assert_schema(client: &mut FlightClient, cmd: Any) {
|
|||
);
|
||||
// The stream itself also may report a schema
|
||||
if let Some(stream_schema) = result_stream.schema() {
|
||||
// strip metadata (GetFlightInfo doesn't include metadata for
|
||||
// some reason) before comparison
|
||||
// https://github.com/influxdata/influxdb_iox/issues/7282
|
||||
let stream_schema = strip_metadata(stream_schema);
|
||||
assert_eq!(stream_schema.as_ref(), &flight_info_schema);
|
||||
}
|
||||
}
|
||||
|
@ -1615,16 +1608,6 @@ async fn assert_schema(client: &mut FlightClient, cmd: Any) {
|
|||
assert!(saw_data);
|
||||
}
|
||||
|
||||
fn strip_metadata(schema: &Schema) -> SchemaRef {
|
||||
let stripped_fields: Fields = schema
|
||||
.fields()
|
||||
.iter()
|
||||
.map(|f| f.as_ref().clone().with_metadata(HashMap::new()))
|
||||
.collect();
|
||||
|
||||
Arc::new(Schema::new(stripped_fields))
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn authz() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
|
|
@ -235,9 +235,9 @@ async fn test_tracing_create_compactor_trace() {
|
|||
|
||||
// "shallow" packet inspection and verify the UDP server got omething that had some expected
|
||||
// results. We could look for any text of any of the compaction spans. The name of the span
|
||||
// for data fusion execution is arbitrarily chosen.
|
||||
// for acquiring permit is arbitrarily chosen.
|
||||
udp_capture
|
||||
.wait_for(|m| m.to_string().contains("data_fusion"))
|
||||
.wait_for(|m| m.to_string().contains("acquire_permit"))
|
||||
.await;
|
||||
|
||||
// debugging assistance
|
||||
|
|
|
@ -135,3 +135,28 @@ SELECT
|
|||
from cpu
|
||||
where time between timestamp '2000-05-05T12:00:00Z' and timestamp '2000-05-05T12:59:00Z'
|
||||
group by region, minute;
|
||||
|
||||
-- With a VALUES clause, which affects how the range is found
|
||||
-- Fix for https://github.com/influxdata/idpe/issues/17880
|
||||
SELECT
|
||||
date_bin_gapfill(INTERVAL '1 minute', time) as _time,
|
||||
pod,
|
||||
locf(selector_last(image, time))
|
||||
FROM
|
||||
(VALUES ('2023-06-10T12:00:00Z'::timestamp, 'pod1', 'imageA'),
|
||||
('2023-06-10T12:00:00Z'::timestamp, 'pod2', 'imageA'),
|
||||
('2023-06-10T12:00:01Z'::timestamp, 'pod1', 'imageB'),
|
||||
('2023-06-10T12:00:02Z'::timestamp, 'pod1', 'imageB'),
|
||||
('2023-06-10T12:00:02Z'::timestamp, 'pod2', 'imageB')
|
||||
) AS data(time, pod, image)
|
||||
WHERE time >= timestamp '2023-06-10T11:55:00Z' AND time < timestamp '2023-06-10T12:05:00Z'
|
||||
GROUP BY _time, pod;
|
||||
|
||||
-- This is not supported since the grouping is not on the values produced by
|
||||
-- date_bin_gapfill. The query should fail with a reasonable message.
|
||||
select
|
||||
date_bin_gapfill('60 seconds'::interval, time)::bigint as time,
|
||||
sum(idle)
|
||||
from cpu
|
||||
WHERE time >= '2020-06-11T16:52:00Z' AND time < '2020-06-11T16:54:00Z'
|
||||
group by 1;
|
||||
|
|
|
@ -222,4 +222,33 @@ Error during planning: gap-filling query is missing lower time bound
|
|||
| b | 2000-05-05T12:30:00Z | 27.049999999999997 |
|
||||
| b | 2000-05-05T12:40:00Z | 27.049999999999997 |
|
||||
| b | 2000-05-05T12:50:00Z | 27.049999999999997 |
|
||||
+--------+----------------------+--------------------+
|
||||
+--------+----------------------+--------------------+
|
||||
-- SQL: SELECT date_bin_gapfill(INTERVAL '1 minute', time) as _time, pod, locf(selector_last(image, time)) FROM (VALUES ('2023-06-10T12:00:00Z'::timestamp, 'pod1', 'imageA'), ('2023-06-10T12:00:00Z'::timestamp, 'pod2', 'imageA'), ('2023-06-10T12:00:01Z'::timestamp, 'pod1', 'imageB'), ('2023-06-10T12:00:02Z'::timestamp, 'pod1', 'imageB'), ('2023-06-10T12:00:02Z'::timestamp, 'pod2', 'imageB') ) AS data(time, pod, image) WHERE time >= timestamp '2023-06-10T11:55:00Z' AND time < timestamp '2023-06-10T12:05:00Z' GROUP BY _time, pod;
|
||||
+----------------------+------+--------------------------------------------+
|
||||
| _time | pod | locf(selector_last(image,time)) |
|
||||
+----------------------+------+--------------------------------------------+
|
||||
| 2023-06-10T11:55:00Z | pod1 | |
|
||||
| 2023-06-10T11:56:00Z | pod1 | |
|
||||
| 2023-06-10T11:57:00Z | pod1 | |
|
||||
| 2023-06-10T11:58:00Z | pod1 | |
|
||||
| 2023-06-10T11:59:00Z | pod1 | |
|
||||
| 2023-06-10T12:00:00Z | pod1 | {value: imageB, time: 2023-06-10T12:00:02} |
|
||||
| 2023-06-10T12:01:00Z | pod1 | {value: imageB, time: 2023-06-10T12:00:02} |
|
||||
| 2023-06-10T12:02:00Z | pod1 | {value: imageB, time: 2023-06-10T12:00:02} |
|
||||
| 2023-06-10T12:03:00Z | pod1 | {value: imageB, time: 2023-06-10T12:00:02} |
|
||||
| 2023-06-10T12:04:00Z | pod1 | {value: imageB, time: 2023-06-10T12:00:02} |
|
||||
| 2023-06-10T11:55:00Z | pod2 | |
|
||||
| 2023-06-10T11:56:00Z | pod2 | |
|
||||
| 2023-06-10T11:57:00Z | pod2 | |
|
||||
| 2023-06-10T11:58:00Z | pod2 | |
|
||||
| 2023-06-10T11:59:00Z | pod2 | |
|
||||
| 2023-06-10T12:00:00Z | pod2 | {value: imageB, time: 2023-06-10T12:00:02} |
|
||||
| 2023-06-10T12:01:00Z | pod2 | {value: imageB, time: 2023-06-10T12:00:02} |
|
||||
| 2023-06-10T12:02:00Z | pod2 | {value: imageB, time: 2023-06-10T12:00:02} |
|
||||
| 2023-06-10T12:03:00Z | pod2 | {value: imageB, time: 2023-06-10T12:00:02} |
|
||||
| 2023-06-10T12:04:00Z | pod2 | {value: imageB, time: 2023-06-10T12:00:02} |
|
||||
+----------------------+------+--------------------------------------------+
|
||||
-- SQL: select date_bin_gapfill('60 seconds'::interval, time)::bigint as time, sum(idle) from cpu WHERE time >= '2020-06-11T16:52:00Z' AND time < '2020-06-11T16:54:00Z' group by 1;
|
||||
Error while planning query: Optimizer rule 'handle_gap_fill' failed
|
||||
caused by
|
||||
Error during planning: DATE_BIN_GAPFILL must a top-level expression in the GROUP BY clause when gap filling. It cannot be part of another expression or cast
|
|
@ -339,6 +339,12 @@ SELECT COUNT(f64), SUM(f64) FROM m0 GROUP BY TIME(30s) FILL(none);
|
|||
-- supports offset parameter
|
||||
SELECT COUNT(f64), SUM(f64) FROM m0 GROUP BY TIME(30s, 1s) FILL(none);
|
||||
|
||||
-- N.B. The gap filling of the COUNT(usage_idle) and COUNT(bytes_free)
|
||||
-- columns happens before the two measurements are UNIONed together
|
||||
-- when producing the output table. This means that a COUNT column for
|
||||
-- a field that is not present for a measurement will contain NULLs,
|
||||
-- rather than being filled with 0s. This is consistent with older
|
||||
-- versions of influxdb.
|
||||
SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk;
|
||||
SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk GROUP BY TIME(1s) FILL(none);
|
||||
SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk GROUP BY cpu;
|
||||
|
@ -360,7 +366,9 @@ SELECT COUNT(usage_idle), usage_idle FROM cpu;
|
|||
|
||||
-- Default FILL(null) when FILL is omitted
|
||||
SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s);
|
||||
SELECT COUNT(usage_idle)+2 FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s);
|
||||
SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s);
|
||||
SELECT COUNT(usage_idle)+1, COUNT(bytes_free)+2 FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s);
|
||||
SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) FILL(null);
|
||||
SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) FILL(null);
|
||||
SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) FILL(previous);
|
||||
|
@ -655,3 +663,6 @@ SELECT SUM(bytes_free) / SUM(bytes_used) AS result FROM disk WHERE time >= '2022
|
|||
|
||||
-- Unsupported: host is a field in one subquery and a tag in the other
|
||||
SELECT count(host) FROM (SELECT usage_idle AS host FROM cpu), (SELECT host, usage_idle FROM cpu);
|
||||
|
||||
-- Using a selector or a aggregate function on a tag column returns NULL
|
||||
SELECT last(host) AS host, first(usage_idle) AS usage_idle FROM cpu GROUP BY host;
|
||||
|
|
|
@ -919,10 +919,10 @@ name: logical_plan
|
|||
plan
|
||||
Sort: iox::measurement ASC NULLS LAST, tag0 ASC NULLS LAST, time ASC NULLS LAST
|
||||
Union
|
||||
Projection: Dictionary(Int32, Utf8("m0")) AS iox::measurement, TimestampNanosecond(0, None) AS time, m0.tag0 AS tag0, COUNT(m0.f64) AS count, SUM(m0.f64) AS sum, STDDEV(m0.f64) AS stddev
|
||||
Projection: Dictionary(Int32, Utf8("m0")) AS iox::measurement, TimestampNanosecond(0, None) AS time, m0.tag0 AS tag0, coalesce_struct(COUNT(m0.f64), Int64(0)) AS count, SUM(m0.f64) AS sum, STDDEV(m0.f64) AS stddev
|
||||
Aggregate: groupBy=[[m0.tag0]], aggr=[[COUNT(m0.f64), SUM(m0.f64), STDDEV(m0.f64)]]
|
||||
TableScan: m0 projection=[f64, tag0]
|
||||
Projection: Dictionary(Int32, Utf8("m1")) AS iox::measurement, TimestampNanosecond(0, None) AS time, m1.tag0 AS tag0, COUNT(m1.f64) AS count, SUM(m1.f64) AS sum, STDDEV(m1.f64) AS stddev
|
||||
Projection: Dictionary(Int32, Utf8("m1")) AS iox::measurement, TimestampNanosecond(0, None) AS time, m1.tag0 AS tag0, coalesce_struct(COUNT(m1.f64), Int64(0)) AS count, SUM(m1.f64) AS sum, STDDEV(m1.f64) AS stddev
|
||||
Aggregate: groupBy=[[m1.tag0]], aggr=[[COUNT(m1.f64), SUM(m1.f64), STDDEV(m1.f64)]]
|
||||
TableScan: m1 projection=[f64, tag0]
|
||||
name: physical_plan
|
||||
|
@ -930,7 +930,7 @@ name: physical_plan
|
|||
SortPreservingMergeExec: [iox::measurement@0 ASC NULLS LAST,tag0@2 ASC NULLS LAST,time@1 ASC NULLS LAST]
|
||||
UnionExec
|
||||
SortExec: expr=[iox::measurement@0 ASC NULLS LAST,tag0@2 ASC NULLS LAST,time@1 ASC NULLS LAST]
|
||||
ProjectionExec: expr=[m0 as iox::measurement, 0 as time, tag0@0 as tag0, COUNT(m0.f64)@1 as count, SUM(m0.f64)@2 as sum, STDDEV(m0.f64)@3 as stddev]
|
||||
ProjectionExec: expr=[m0 as iox::measurement, 0 as time, tag0@0 as tag0, coalesce_struct(COUNT(m0.f64)@1, 0) as count, SUM(m0.f64)@2 as sum, STDDEV(m0.f64)@3 as stddev]
|
||||
AggregateExec: mode=FinalPartitioned, gby=[tag0@0 as tag0], aggr=[COUNT(m0.f64), SUM(m0.f64), STDDEV(m0.f64)]
|
||||
CoalesceBatchesExec: target_batch_size=8192
|
||||
RepartitionExec: partitioning=Hash([tag0@0], 4), input_partitions=4
|
||||
|
@ -938,7 +938,7 @@ name: physical_plan
|
|||
AggregateExec: mode=Partial, gby=[tag0@1 as tag0], aggr=[COUNT(m0.f64), SUM(m0.f64), STDDEV(m0.f64)]
|
||||
ParquetExec: file_groups={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, projection=[f64, tag0]
|
||||
SortExec: expr=[iox::measurement@0 ASC NULLS LAST,tag0@2 ASC NULLS LAST,time@1 ASC NULLS LAST]
|
||||
ProjectionExec: expr=[m1 as iox::measurement, 0 as time, tag0@0 as tag0, COUNT(m1.f64)@1 as count, SUM(m1.f64)@2 as sum, STDDEV(m1.f64)@3 as stddev]
|
||||
ProjectionExec: expr=[m1 as iox::measurement, 0 as time, tag0@0 as tag0, coalesce_struct(COUNT(m1.f64)@1, 0) as count, SUM(m1.f64)@2 as sum, STDDEV(m1.f64)@3 as stddev]
|
||||
RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=4
|
||||
AggregateExec: mode=FinalPartitioned, gby=[tag0@0 as tag0], aggr=[COUNT(m1.f64), SUM(m1.f64), STDDEV(m1.f64)], ordering_mode=FullyOrdered
|
||||
CoalesceBatchesExec: target_batch_size=8192
|
||||
|
@ -1267,9 +1267,19 @@ name: cpu
|
|||
| time | count |
|
||||
+---------------------+-------+
|
||||
| 2022-10-31T02:00:00 | 6 |
|
||||
| 2022-10-31T02:00:30 | |
|
||||
| 2022-10-31T02:01:00 | |
|
||||
| 2022-10-31T02:01:30 | |
|
||||
| 2022-10-31T02:00:30 | 0 |
|
||||
| 2022-10-31T02:01:00 | 0 |
|
||||
| 2022-10-31T02:01:30 | 0 |
|
||||
+---------------------+-------+
|
||||
-- InfluxQL: SELECT COUNT(usage_idle)+2 FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s);
|
||||
name: cpu
|
||||
+---------------------+-------+
|
||||
| time | count |
|
||||
+---------------------+-------+
|
||||
| 2022-10-31T02:00:00 | 8 |
|
||||
| 2022-10-31T02:00:30 | 2 |
|
||||
| 2022-10-31T02:01:00 | 2 |
|
||||
| 2022-10-31T02:01:30 | 2 |
|
||||
+---------------------+-------+
|
||||
-- InfluxQL: SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s);
|
||||
name: cpu
|
||||
|
@ -1277,18 +1287,37 @@ name: cpu
|
|||
| time | count | count_1 |
|
||||
+---------------------+-------+---------+
|
||||
| 2022-10-31T02:00:00 | 6 | |
|
||||
| 2022-10-31T02:00:30 | | |
|
||||
| 2022-10-31T02:01:00 | | |
|
||||
| 2022-10-31T02:01:30 | | |
|
||||
| 2022-10-31T02:00:30 | 0 | |
|
||||
| 2022-10-31T02:01:00 | 0 | |
|
||||
| 2022-10-31T02:01:30 | 0 | |
|
||||
+---------------------+-------+---------+
|
||||
name: disk
|
||||
+---------------------+-------+---------+
|
||||
| time | count | count_1 |
|
||||
+---------------------+-------+---------+
|
||||
| 2022-10-31T02:00:00 | | 6 |
|
||||
| 2022-10-31T02:00:30 | | |
|
||||
| 2022-10-31T02:01:00 | | |
|
||||
| 2022-10-31T02:01:30 | | |
|
||||
| 2022-10-31T02:00:30 | | 0 |
|
||||
| 2022-10-31T02:01:00 | | 0 |
|
||||
| 2022-10-31T02:01:30 | | 0 |
|
||||
+---------------------+-------+---------+
|
||||
-- InfluxQL: SELECT COUNT(usage_idle)+1, COUNT(bytes_free)+2 FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s);
|
||||
name: cpu
|
||||
+---------------------+-------+---------+
|
||||
| time | count | count_1 |
|
||||
+---------------------+-------+---------+
|
||||
| 2022-10-31T02:00:00 | 7 | |
|
||||
| 2022-10-31T02:00:30 | 1 | |
|
||||
| 2022-10-31T02:01:00 | 1 | |
|
||||
| 2022-10-31T02:01:30 | 1 | |
|
||||
+---------------------+-------+---------+
|
||||
name: disk
|
||||
+---------------------+-------+---------+
|
||||
| time | count | count_1 |
|
||||
+---------------------+-------+---------+
|
||||
| 2022-10-31T02:00:00 | | 8 |
|
||||
| 2022-10-31T02:00:30 | | 2 |
|
||||
| 2022-10-31T02:01:00 | | 2 |
|
||||
| 2022-10-31T02:01:30 | | 2 |
|
||||
+---------------------+-------+---------+
|
||||
-- InfluxQL: SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) FILL(null);
|
||||
name: cpu
|
||||
|
@ -1296,9 +1325,9 @@ name: cpu
|
|||
| time | count |
|
||||
+---------------------+-------+
|
||||
| 2022-10-31T02:00:00 | 6 |
|
||||
| 2022-10-31T02:00:30 | |
|
||||
| 2022-10-31T02:01:00 | |
|
||||
| 2022-10-31T02:01:30 | |
|
||||
| 2022-10-31T02:00:30 | 0 |
|
||||
| 2022-10-31T02:01:00 | 0 |
|
||||
| 2022-10-31T02:01:30 | 0 |
|
||||
+---------------------+-------+
|
||||
-- InfluxQL: SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) FILL(null);
|
||||
name: cpu
|
||||
|
@ -1306,18 +1335,18 @@ name: cpu
|
|||
| time | count | count_1 |
|
||||
+---------------------+-------+---------+
|
||||
| 2022-10-31T02:00:00 | 6 | |
|
||||
| 2022-10-31T02:00:30 | | |
|
||||
| 2022-10-31T02:01:00 | | |
|
||||
| 2022-10-31T02:01:30 | | |
|
||||
| 2022-10-31T02:00:30 | 0 | |
|
||||
| 2022-10-31T02:01:00 | 0 | |
|
||||
| 2022-10-31T02:01:30 | 0 | |
|
||||
+---------------------+-------+---------+
|
||||
name: disk
|
||||
+---------------------+-------+---------+
|
||||
| time | count | count_1 |
|
||||
+---------------------+-------+---------+
|
||||
| 2022-10-31T02:00:00 | | 6 |
|
||||
| 2022-10-31T02:00:30 | | |
|
||||
| 2022-10-31T02:01:00 | | |
|
||||
| 2022-10-31T02:01:30 | | |
|
||||
| 2022-10-31T02:00:30 | | 0 |
|
||||
| 2022-10-31T02:01:00 | | 0 |
|
||||
| 2022-10-31T02:01:30 | | 0 |
|
||||
+---------------------+-------+---------+
|
||||
-- InfluxQL: SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) FILL(previous);
|
||||
name: cpu
|
||||
|
@ -1507,9 +1536,9 @@ tags: cpu=cpu-total
|
|||
| time | count |
|
||||
+---------------------+-------+
|
||||
| 2022-10-31T02:00:00 | 2 |
|
||||
| 2022-10-31T02:00:30 | |
|
||||
| 2022-10-31T02:01:00 | |
|
||||
| 2022-10-31T02:01:30 | |
|
||||
| 2022-10-31T02:00:30 | 0 |
|
||||
| 2022-10-31T02:01:00 | 0 |
|
||||
| 2022-10-31T02:01:30 | 0 |
|
||||
+---------------------+-------+
|
||||
name: cpu
|
||||
tags: cpu=cpu0
|
||||
|
@ -1517,9 +1546,9 @@ tags: cpu=cpu0
|
|||
| time | count |
|
||||
+---------------------+-------+
|
||||
| 2022-10-31T02:00:00 | 2 |
|
||||
| 2022-10-31T02:00:30 | |
|
||||
| 2022-10-31T02:01:00 | |
|
||||
| 2022-10-31T02:01:30 | |
|
||||
| 2022-10-31T02:00:30 | 0 |
|
||||
| 2022-10-31T02:01:00 | 0 |
|
||||
| 2022-10-31T02:01:30 | 0 |
|
||||
+---------------------+-------+
|
||||
name: cpu
|
||||
tags: cpu=cpu1
|
||||
|
@ -1527,9 +1556,9 @@ tags: cpu=cpu1
|
|||
| time | count |
|
||||
+---------------------+-------+
|
||||
| 2022-10-31T02:00:00 | 2 |
|
||||
| 2022-10-31T02:00:30 | |
|
||||
| 2022-10-31T02:01:00 | |
|
||||
| 2022-10-31T02:01:30 | |
|
||||
| 2022-10-31T02:00:30 | 0 |
|
||||
| 2022-10-31T02:01:00 | 0 |
|
||||
| 2022-10-31T02:01:30 | 0 |
|
||||
+---------------------+-------+
|
||||
-- InfluxQL: SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s), cpu FILL(null);
|
||||
name: cpu
|
||||
|
@ -1538,9 +1567,9 @@ tags: cpu=cpu-total
|
|||
| time | count |
|
||||
+---------------------+-------+
|
||||
| 2022-10-31T02:00:00 | 2 |
|
||||
| 2022-10-31T02:00:30 | |
|
||||
| 2022-10-31T02:01:00 | |
|
||||
| 2022-10-31T02:01:30 | |
|
||||
| 2022-10-31T02:00:30 | 0 |
|
||||
| 2022-10-31T02:01:00 | 0 |
|
||||
| 2022-10-31T02:01:30 | 0 |
|
||||
+---------------------+-------+
|
||||
name: cpu
|
||||
tags: cpu=cpu0
|
||||
|
@ -1548,9 +1577,9 @@ tags: cpu=cpu0
|
|||
| time | count |
|
||||
+---------------------+-------+
|
||||
| 2022-10-31T02:00:00 | 2 |
|
||||
| 2022-10-31T02:00:30 | |
|
||||
| 2022-10-31T02:01:00 | |
|
||||
| 2022-10-31T02:01:30 | |
|
||||
| 2022-10-31T02:00:30 | 0 |
|
||||
| 2022-10-31T02:01:00 | 0 |
|
||||
| 2022-10-31T02:01:30 | 0 |
|
||||
+---------------------+-------+
|
||||
name: cpu
|
||||
tags: cpu=cpu1
|
||||
|
@ -1558,9 +1587,9 @@ tags: cpu=cpu1
|
|||
| time | count |
|
||||
+---------------------+-------+
|
||||
| 2022-10-31T02:00:00 | 2 |
|
||||
| 2022-10-31T02:00:30 | |
|
||||
| 2022-10-31T02:01:00 | |
|
||||
| 2022-10-31T02:01:30 | |
|
||||
| 2022-10-31T02:00:30 | 0 |
|
||||
| 2022-10-31T02:01:00 | 0 |
|
||||
| 2022-10-31T02:01:30 | 0 |
|
||||
+---------------------+-------+
|
||||
-- InfluxQL: SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s), cpu, device FILL(null);
|
||||
name: cpu
|
||||
|
@ -1569,9 +1598,9 @@ tags: cpu=cpu-total, device=
|
|||
| time | count | count_1 |
|
||||
+---------------------+-------+---------+
|
||||
| 2022-10-31T02:00:00 | 2 | |
|
||||
| 2022-10-31T02:00:30 | | |
|
||||
| 2022-10-31T02:01:00 | | |
|
||||
| 2022-10-31T02:01:30 | | |
|
||||
| 2022-10-31T02:00:30 | 0 | |
|
||||
| 2022-10-31T02:01:00 | 0 | |
|
||||
| 2022-10-31T02:01:30 | 0 | |
|
||||
+---------------------+-------+---------+
|
||||
name: cpu
|
||||
tags: cpu=cpu0, device=
|
||||
|
@ -1579,9 +1608,9 @@ tags: cpu=cpu0, device=
|
|||
| time | count | count_1 |
|
||||
+---------------------+-------+---------+
|
||||
| 2022-10-31T02:00:00 | 2 | |
|
||||
| 2022-10-31T02:00:30 | | |
|
||||
| 2022-10-31T02:01:00 | | |
|
||||
| 2022-10-31T02:01:30 | | |
|
||||
| 2022-10-31T02:00:30 | 0 | |
|
||||
| 2022-10-31T02:01:00 | 0 | |
|
||||
| 2022-10-31T02:01:30 | 0 | |
|
||||
+---------------------+-------+---------+
|
||||
name: cpu
|
||||
tags: cpu=cpu1, device=
|
||||
|
@ -1589,9 +1618,9 @@ tags: cpu=cpu1, device=
|
|||
| time | count | count_1 |
|
||||
+---------------------+-------+---------+
|
||||
| 2022-10-31T02:00:00 | 2 | |
|
||||
| 2022-10-31T02:00:30 | | |
|
||||
| 2022-10-31T02:01:00 | | |
|
||||
| 2022-10-31T02:01:30 | | |
|
||||
| 2022-10-31T02:00:30 | 0 | |
|
||||
| 2022-10-31T02:01:00 | 0 | |
|
||||
| 2022-10-31T02:01:30 | 0 | |
|
||||
+---------------------+-------+---------+
|
||||
name: disk
|
||||
tags: cpu=, device=disk1s1
|
||||
|
@ -1599,9 +1628,9 @@ tags: cpu=, device=disk1s1
|
|||
| time | count | count_1 |
|
||||
+---------------------+-------+---------+
|
||||
| 2022-10-31T02:00:00 | | 2 |
|
||||
| 2022-10-31T02:00:30 | | |
|
||||
| 2022-10-31T02:01:00 | | |
|
||||
| 2022-10-31T02:01:30 | | |
|
||||
| 2022-10-31T02:00:30 | | 0 |
|
||||
| 2022-10-31T02:01:00 | | 0 |
|
||||
| 2022-10-31T02:01:30 | | 0 |
|
||||
+---------------------+-------+---------+
|
||||
name: disk
|
||||
tags: cpu=, device=disk1s2
|
||||
|
@ -1609,9 +1638,9 @@ tags: cpu=, device=disk1s2
|
|||
| time | count | count_1 |
|
||||
+---------------------+-------+---------+
|
||||
| 2022-10-31T02:00:00 | | 2 |
|
||||
| 2022-10-31T02:00:30 | | |
|
||||
| 2022-10-31T02:01:00 | | |
|
||||
| 2022-10-31T02:01:30 | | |
|
||||
| 2022-10-31T02:00:30 | | 0 |
|
||||
| 2022-10-31T02:01:00 | | 0 |
|
||||
| 2022-10-31T02:01:30 | | 0 |
|
||||
+---------------------+-------+---------+
|
||||
name: disk
|
||||
tags: cpu=, device=disk1s5
|
||||
|
@ -1619,9 +1648,9 @@ tags: cpu=, device=disk1s5
|
|||
| time | count | count_1 |
|
||||
+---------------------+-------+---------+
|
||||
| 2022-10-31T02:00:00 | | 2 |
|
||||
| 2022-10-31T02:00:30 | | |
|
||||
| 2022-10-31T02:01:00 | | |
|
||||
| 2022-10-31T02:01:30 | | |
|
||||
| 2022-10-31T02:00:30 | | 0 |
|
||||
| 2022-10-31T02:01:00 | | 0 |
|
||||
| 2022-10-31T02:01:30 | | 0 |
|
||||
+---------------------+-------+---------+
|
||||
-- InfluxQL: SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s), cpu FILL(previous);
|
||||
name: cpu
|
||||
|
@ -2202,15 +2231,15 @@ name: cpu
|
|||
| time | count |
|
||||
+---------------------+-------+
|
||||
| 2022-10-31T02:00:00 | 6 |
|
||||
| 2022-10-31T02:00:30 | |
|
||||
| 2022-10-31T02:00:30 | 0 |
|
||||
+---------------------+-------+
|
||||
-- InfluxQL: SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:05:00Z' GROUP BY TIME(30s) LIMIT 2 OFFSET 2;
|
||||
name: cpu
|
||||
+---------------------+-------+
|
||||
| time | count |
|
||||
+---------------------+-------+
|
||||
| 2022-10-31T02:01:00 | |
|
||||
| 2022-10-31T02:01:30 | |
|
||||
| 2022-10-31T02:01:00 | 0 |
|
||||
| 2022-10-31T02:01:30 | 0 |
|
||||
+---------------------+-------+
|
||||
-- InfluxQL: SELECT COUNT(usage_idle) FROM cpu WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:05:00Z' GROUP BY TIME(30s), cpu LIMIT 2;
|
||||
name: cpu
|
||||
|
@ -2219,7 +2248,7 @@ tags: cpu=cpu-total
|
|||
| time | count |
|
||||
+---------------------+-------+
|
||||
| 2022-10-31T02:00:00 | 2 |
|
||||
| 2022-10-31T02:00:30 | |
|
||||
| 2022-10-31T02:00:30 | 0 |
|
||||
+---------------------+-------+
|
||||
name: cpu
|
||||
tags: cpu=cpu0
|
||||
|
@ -2227,7 +2256,7 @@ tags: cpu=cpu0
|
|||
| time | count |
|
||||
+---------------------+-------+
|
||||
| 2022-10-31T02:00:00 | 2 |
|
||||
| 2022-10-31T02:00:30 | |
|
||||
| 2022-10-31T02:00:30 | 0 |
|
||||
+---------------------+-------+
|
||||
name: cpu
|
||||
tags: cpu=cpu1
|
||||
|
@ -2235,7 +2264,7 @@ tags: cpu=cpu1
|
|||
| time | count |
|
||||
+---------------------+-------+
|
||||
| 2022-10-31T02:00:00 | 2 |
|
||||
| 2022-10-31T02:00:30 | |
|
||||
| 2022-10-31T02:00:30 | 0 |
|
||||
+---------------------+-------+
|
||||
-- InfluxQL: SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s) LIMIT 1;
|
||||
name: cpu
|
||||
|
@ -2268,13 +2297,13 @@ name: cpu
|
|||
+---------------------+-------+---------+
|
||||
| time | count | count_1 |
|
||||
+---------------------+-------+---------+
|
||||
| 2022-10-31T02:01:30 | | |
|
||||
| 2022-10-31T02:01:30 | 0 | |
|
||||
+---------------------+-------+---------+
|
||||
name: disk
|
||||
+---------------------+-------+---------+
|
||||
| time | count | count_1 |
|
||||
+---------------------+-------+---------+
|
||||
| 2022-10-31T02:01:30 | | |
|
||||
| 2022-10-31T02:01:30 | | 0 |
|
||||
+---------------------+-------+---------+
|
||||
-- InfluxQL: SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(30s), cpu, device LIMIT 1;
|
||||
name: cpu
|
||||
|
@ -3087,4 +3116,12 @@ name: disk
|
|||
| 2022-10-31T02:00:10 | 0.007000528400759146 |
|
||||
+---------------------+----------------------+
|
||||
-- InfluxQL: SELECT count(host) FROM (SELECT usage_idle AS host FROM cpu), (SELECT host, usage_idle FROM cpu);
|
||||
Error while planning query: This feature is not implemented: cannot mix tag and field columns with the same name: host
|
||||
Error while planning query: This feature is not implemented: cannot mix tag and field columns with the same name: host
|
||||
-- InfluxQL: SELECT last(host) AS host, first(usage_idle) AS usage_idle FROM cpu GROUP BY host;
|
||||
name: cpu
|
||||
tags: host=
|
||||
+---------------------+------+------------+
|
||||
| time | host | usage_idle |
|
||||
+---------------------+------+------------+
|
||||
| 1970-01-01T00:00:00 | | 2.98 |
|
||||
+---------------------+------+------------+
|
|
@ -21,6 +21,19 @@ SELECT difference(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AN
|
|||
-- group by time and a tag
|
||||
SELECT difference(mean(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
|
||||
|
||||
--
|
||||
-- difference + selector
|
||||
--
|
||||
SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
|
||||
SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
|
||||
-- the input data is regular data at 10s intervals, so 7s windows ensure the `mean` generates windows with NULL values to test NULL handling of difference
|
||||
SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
|
||||
SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
|
||||
-- linear filling of selector functions produces an execution error
|
||||
-- (see https://github.com/influxdata/influxdb_iox/issues/8302).
|
||||
-- SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
|
||||
-- group by time and a tag
|
||||
SELECT difference(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
|
||||
|
||||
--
|
||||
-- non_negative_difference
|
||||
|
@ -35,6 +48,11 @@ SELECT non_negative_difference(usage_idle) FROM cpu WHERE time >= 00000001300000
|
|||
--
|
||||
SELECT non_negative_difference(mean(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
|
||||
|
||||
--
|
||||
-- non_negative_difference + selector
|
||||
--
|
||||
SELECT non_negative_difference(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
|
||||
|
||||
--
|
||||
-- moving_average
|
||||
--
|
||||
|
@ -61,6 +79,17 @@ SELECT moving_average(mean(writes), 3) FROM diskio WHERE time >= 000000013000000
|
|||
SELECT moving_average(mean(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
|
||||
SELECT moving_average(mean(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
|
||||
|
||||
--
|
||||
-- moving_average + selector
|
||||
--
|
||||
-- the input data is regular data at 10s intervals, so 7s windows ensure the `mean` generates windows with NULL values to test NULL handling of moving_average
|
||||
SELECT moving_average(first(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
|
||||
SELECT moving_average(first(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
|
||||
SELECT moving_average(first(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
|
||||
-- linear filling of selector functions produces an execution error
|
||||
-- (see https://github.com/influxdata/influxdb_iox/issues/8302).
|
||||
-- SELECT moving_average(first(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
|
||||
|
||||
--
|
||||
-- combining window functions
|
||||
--
|
||||
|
@ -109,7 +138,7 @@ SELECT derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AN
|
|||
SELECT derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
|
||||
SELECT derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
|
||||
SELECT derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
|
||||
-- the input data is regular data at 10s intervals, so 7s windows ensure the `mean` generates windows with NULL values to test NULL handling of difference
|
||||
-- the input data is regular data at 10s intervals, so 7s windows ensure the `mean` generates windows with NULL values to test NULL handling of derivative
|
||||
SELECT derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
|
||||
SELECT derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
|
||||
SELECT derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
|
||||
|
@ -120,6 +149,26 @@ SELECT derivative(mean(writes), 500ms) FROM diskio WHERE time >= 000000013000000
|
|||
SELECT derivative(mean(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
|
||||
SELECT derivative(mean(usage_idle), 500ms) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
|
||||
|
||||
--
|
||||
-- derivative + selector
|
||||
--
|
||||
SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
|
||||
SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
|
||||
SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
|
||||
SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
|
||||
-- the input data is regular data at 10s intervals, so 7s windows ensure the `first` generates windows with NULL values to test NULL handling of derivative
|
||||
SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
|
||||
SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
|
||||
SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
|
||||
SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
|
||||
-- linear filling of selector functions produces an execution error
|
||||
-- (see https://github.com/influxdata/influxdb_iox/issues/8302).
|
||||
-- SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
|
||||
-- SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
|
||||
-- group by time and a tag
|
||||
SELECT derivative(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
|
||||
SELECT derivative(first(usage_idle), 500ms) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
|
||||
|
||||
--
|
||||
-- non_negative_derivative
|
||||
--
|
||||
|
@ -138,7 +187,7 @@ SELECT non_negative_derivative(mean(writes)) FROM diskio WHERE time >= 000000013
|
|||
SELECT non_negative_derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
|
||||
SELECT non_negative_derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
|
||||
SELECT non_negative_derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
|
||||
-- the input data is regular data at 10s intervals, so 7s windows ensure the `mean` generates windows with NULL values to test NULL handling of difference
|
||||
-- the input data is regular data at 10s intervals, so 7s windows ensure the `mean` generates windows with NULL values to test NULL handling of non_negative_derivative
|
||||
SELECT non_negative_derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
|
||||
SELECT non_negative_derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
|
||||
SELECT non_negative_derivative(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
|
||||
|
@ -147,4 +196,58 @@ SELECT non_negative_derivative(mean(writes)) FROM diskio WHERE time >= 000000013
|
|||
SELECT non_negative_derivative(mean(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
|
||||
-- group by time and a tag
|
||||
SELECT non_negative_derivative(mean(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
|
||||
SELECT non_negative_derivative(mean(usage_idle), 500ms) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
|
||||
SELECT non_negative_derivative(mean(usage_idle), 500ms) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
|
||||
|
||||
--
|
||||
-- non_negative_derivative + selector
|
||||
--
|
||||
SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
|
||||
SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
|
||||
SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
|
||||
SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
|
||||
-- the input data is regular data at 10s intervals, so 7s windows ensure the `first` generates windows with NULL values to test NULL handling of non_negative_derivative
|
||||
SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
|
||||
SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
|
||||
SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
|
||||
SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
|
||||
-- linear filling of selector functions produces an execution error
|
||||
-- (see https://github.com/influxdata/influxdb_iox/issues/8302).
|
||||
-- SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
|
||||
-- SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
|
||||
-- group by time and a tag
|
||||
SELECT non_negative_derivative(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
|
||||
SELECT non_negative_derivative(first(usage_idle), 500ms) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
|
||||
|
||||
--
|
||||
-- cumulative_sum
|
||||
--
|
||||
SELECT cumulative_sum(writes) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001;
|
||||
SELECT cumulative_sum(usage_system) FROM cpu WHERE time >= 0000000060000000000 AND time < 0000000210000000001 AND cpu = 'cpu0';
|
||||
SELECT cumulative_sum(usage_idle), cumulative_sum(usage_system) FROM cpu WHERE time >= 0000000060000000000 AND time < 0000000210000000001 AND cpu = 'cpu0';
|
||||
SELECT cumulative_sum(usage_idle) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY cpu;
|
||||
|
||||
--
|
||||
-- cumulative_sum + aggregate
|
||||
--
|
||||
SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
|
||||
SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
|
||||
-- the input data is regular data at 10s intervals, so 7s windows ensure the `mean` generates windows with NULL values to test NULL handling of cumulative_sum
|
||||
SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
|
||||
SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
|
||||
SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
|
||||
-- group by time and a tag
|
||||
SELECT cumulative_sum(mean(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
|
||||
|
||||
--
|
||||
-- cumulative_sum + selector
|
||||
--
|
||||
SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
|
||||
SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
|
||||
-- the input data is regular data at 10s intervals, so 7s windows ensure the `first` generates windows with NULL values to test NULL handling of cumulative_sum
|
||||
SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
|
||||
SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
|
||||
-- linear filling of selector functions produces an execution error
|
||||
-- (see https://github.com/influxdata/influxdb_iox/issues/8302).
|
||||
-- SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
|
||||
-- group by time and a tag
|
||||
SELECT cumulative_sum(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
|
|
@ -148,6 +148,86 @@ tags: cpu=cpu1
|
|||
| 1970-01-01T00:02:30 | -0.03333333333334565 |
|
||||
| 1970-01-01T00:03:00 | -0.03333333333333144 |
|
||||
+---------------------+----------------------+
|
||||
-- InfluxQL: SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
|
||||
name: diskio
|
||||
+---------------------+------------+
|
||||
| time | difference |
|
||||
+---------------------+------------+
|
||||
| 1970-01-01T00:02:20 | 164 |
|
||||
| 1970-01-01T00:02:27 | 187 |
|
||||
| 1970-01-01T00:02:34 | 112 |
|
||||
| 1970-01-01T00:02:48 | 110 |
|
||||
| 1970-01-01T00:02:55 | 219 |
|
||||
| 1970-01-01T00:03:09 | 75 |
|
||||
| 1970-01-01T00:03:16 | 76 |
|
||||
| 1970-01-01T00:03:30 | 146 |
|
||||
+---------------------+------------+
|
||||
-- InfluxQL: SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
|
||||
name: diskio
|
||||
+---------------------+------------+
|
||||
| time | difference |
|
||||
+---------------------+------------+
|
||||
| 1970-01-01T00:02:00 | 366 |
|
||||
| 1970-01-01T00:02:30 | 421 |
|
||||
| 1970-01-01T00:03:00 | 441 |
|
||||
| 1970-01-01T00:03:30 | 297 |
|
||||
+---------------------+------------+
|
||||
-- InfluxQL: SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
|
||||
name: diskio
|
||||
+---------------------+------------+
|
||||
| time | difference |
|
||||
+---------------------+------------+
|
||||
| 1970-01-01T00:02:06 | 5592646 |
|
||||
| 1970-01-01T00:02:13 | -5592646 |
|
||||
| 1970-01-01T00:02:20 | 5592810 |
|
||||
| 1970-01-01T00:02:27 | 187 |
|
||||
| 1970-01-01T00:02:34 | 112 |
|
||||
| 1970-01-01T00:02:41 | -5593109 |
|
||||
| 1970-01-01T00:02:48 | 5593219 |
|
||||
| 1970-01-01T00:02:55 | 219 |
|
||||
| 1970-01-01T00:03:02 | -5593438 |
|
||||
| 1970-01-01T00:03:09 | 5593513 |
|
||||
| 1970-01-01T00:03:16 | 76 |
|
||||
| 1970-01-01T00:03:23 | -5593589 |
|
||||
| 1970-01-01T00:03:30 | 5593735 |
|
||||
+---------------------+------------+
|
||||
-- InfluxQL: SELECT difference(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
|
||||
name: diskio
|
||||
+---------------------+------------+
|
||||
| time | difference |
|
||||
+---------------------+------------+
|
||||
| 1970-01-01T00:02:13 | 0 |
|
||||
| 1970-01-01T00:02:20 | 164 |
|
||||
| 1970-01-01T00:02:27 | 187 |
|
||||
| 1970-01-01T00:02:34 | 112 |
|
||||
| 1970-01-01T00:02:41 | 0 |
|
||||
| 1970-01-01T00:02:48 | 110 |
|
||||
| 1970-01-01T00:02:55 | 219 |
|
||||
| 1970-01-01T00:03:02 | 0 |
|
||||
| 1970-01-01T00:03:09 | 75 |
|
||||
| 1970-01-01T00:03:16 | 76 |
|
||||
| 1970-01-01T00:03:23 | 0 |
|
||||
| 1970-01-01T00:03:30 | 146 |
|
||||
+---------------------+------------+
|
||||
-- InfluxQL: SELECT difference(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
|
||||
name: cpu
|
||||
tags: cpu=cpu0
|
||||
+---------------------+---------------------+
|
||||
| time | difference |
|
||||
+---------------------+---------------------+
|
||||
| 1970-01-01T00:02:00 | -0.7999999999999972 |
|
||||
| 1970-01-01T00:02:30 | 3.5 |
|
||||
| 1970-01-01T00:03:00 | -0.4000000000000057 |
|
||||
+---------------------+---------------------+
|
||||
name: cpu
|
||||
tags: cpu=cpu1
|
||||
+---------------------+----------------------+
|
||||
| time | difference |
|
||||
+---------------------+----------------------+
|
||||
| 1970-01-01T00:02:00 | 0.20000000000000284 |
|
||||
| 1970-01-01T00:02:30 | 0.0 |
|
||||
| 1970-01-01T00:03:00 | -0.10000000000000853 |
|
||||
+---------------------+----------------------+
|
||||
-- InfluxQL: SELECT non_negative_difference(usage_system) FROM cpu WHERE time >= 0000000060000000000 AND time < 0000000210000000001 AND cpu = 'cpu0';
|
||||
name: cpu
|
||||
+---------------------+-------------------------+
|
||||
|
@ -202,6 +282,22 @@ tags: cpu=cpu1
|
|||
+---------------------+-------------------------+
|
||||
| 1970-01-01T00:02:00 | 0.36666666666667425 |
|
||||
+---------------------+-------------------------+
|
||||
-- InfluxQL: SELECT non_negative_difference(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
|
||||
name: cpu
|
||||
tags: cpu=cpu0
|
||||
+---------------------+-------------------------+
|
||||
| time | non_negative_difference |
|
||||
+---------------------+-------------------------+
|
||||
| 1970-01-01T00:02:30 | 3.5 |
|
||||
+---------------------+-------------------------+
|
||||
name: cpu
|
||||
tags: cpu=cpu1
|
||||
+---------------------+-------------------------+
|
||||
| time | non_negative_difference |
|
||||
+---------------------+-------------------------+
|
||||
| 1970-01-01T00:02:00 | 0.20000000000000284 |
|
||||
| 1970-01-01T00:02:30 | 0.0 |
|
||||
+---------------------+-------------------------+
|
||||
-- InfluxQL: SELECT moving_average(writes, 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001;
|
||||
name: diskio
|
||||
+---------------------+-------------------+
|
||||
|
@ -307,6 +403,54 @@ name: diskio
|
|||
| 1970-01-01T00:03:23 | 5593588.0 |
|
||||
| 1970-01-01T00:03:30 | 5593662.0 |
|
||||
+---------------------+-------------------+
|
||||
-- InfluxQL: SELECT moving_average(first(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
|
||||
name: diskio
|
||||
+---------------------+-------------------+
|
||||
| time | moving_average |
|
||||
+---------------------+-------------------+
|
||||
| 1970-01-01T00:02:27 | 5592817.666666667 |
|
||||
| 1970-01-01T00:02:34 | 5592972.0 |
|
||||
| 1970-01-01T00:02:48 | 5593108.333333333 |
|
||||
| 1970-01-01T00:02:55 | 5593255.333333333 |
|
||||
| 1970-01-01T00:03:09 | 5593390.0 |
|
||||
| 1970-01-01T00:03:16 | 5593513.333333333 |
|
||||
| 1970-01-01T00:03:30 | 5593612.333333333 |
|
||||
+---------------------+-------------------+
|
||||
-- InfluxQL: SELECT moving_average(first(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
|
||||
name: diskio
|
||||
+---------------------+--------------------+
|
||||
| time | moving_average |
|
||||
+---------------------+--------------------+
|
||||
| 1970-01-01T00:02:13 | 1864215.3333333333 |
|
||||
| 1970-01-01T00:02:20 | 3728485.3333333335 |
|
||||
| 1970-01-01T00:02:27 | 3728602.3333333335 |
|
||||
| 1970-01-01T00:02:34 | 5592972.0 |
|
||||
| 1970-01-01T00:02:41 | 3728702.0 |
|
||||
| 1970-01-01T00:02:48 | 3728776.0 |
|
||||
| 1970-01-01T00:02:55 | 3728885.6666666665 |
|
||||
| 1970-01-01T00:03:02 | 3728885.6666666665 |
|
||||
| 1970-01-01T00:03:09 | 3728983.6666666665 |
|
||||
| 1970-01-01T00:03:16 | 3729034.0 |
|
||||
| 1970-01-01T00:03:23 | 3729034.0 |
|
||||
| 1970-01-01T00:03:30 | 3729108.0 |
|
||||
+---------------------+--------------------+
|
||||
-- InfluxQL: SELECT moving_average(first(writes), 3) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
|
||||
name: diskio
|
||||
+---------------------+-------------------+
|
||||
| time | moving_average |
|
||||
+---------------------+-------------------+
|
||||
| 1970-01-01T00:02:20 | 5592700.666666667 |
|
||||
| 1970-01-01T00:02:27 | 5592817.666666667 |
|
||||
| 1970-01-01T00:02:34 | 5592972.0 |
|
||||
| 1970-01-01T00:02:41 | 5593071.666666667 |
|
||||
| 1970-01-01T00:02:48 | 5593145.666666667 |
|
||||
| 1970-01-01T00:02:55 | 5593255.333333333 |
|
||||
| 1970-01-01T00:03:02 | 5593365.0 |
|
||||
| 1970-01-01T00:03:09 | 5593463.0 |
|
||||
| 1970-01-01T00:03:16 | 5593513.333333333 |
|
||||
| 1970-01-01T00:03:23 | 5593563.666666667 |
|
||||
| 1970-01-01T00:03:30 | 5593637.666666667 |
|
||||
+---------------------+-------------------+
|
||||
-- InfluxQL: SELECT difference(usage_idle), non_negative_difference(usage_idle), moving_average(usage_idle, 4) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY cpu;
|
||||
name: cpu
|
||||
tags: cpu=cpu0
|
||||
|
@ -649,6 +793,166 @@ tags: cpu=cpu1
|
|||
| 1970-01-01T00:02:30 | -0.0005555555555557608 |
|
||||
| 1970-01-01T00:03:00 | -0.000555555555555524 |
|
||||
+---------------------+------------------------+
|
||||
-- InfluxQL: SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
|
||||
name: diskio
|
||||
+---------------------+------------+
|
||||
| time | derivative |
|
||||
+---------------------+------------+
|
||||
| 1970-01-01T00:02:20 | 82.0 |
|
||||
| 1970-01-01T00:02:27 | 187.0 |
|
||||
| 1970-01-01T00:02:34 | 112.0 |
|
||||
| 1970-01-01T00:02:48 | 55.0 |
|
||||
| 1970-01-01T00:02:55 | 219.0 |
|
||||
| 1970-01-01T00:03:09 | 37.5 |
|
||||
| 1970-01-01T00:03:16 | 76.0 |
|
||||
| 1970-01-01T00:03:30 | 73.0 |
|
||||
+---------------------+------------+
|
||||
-- InfluxQL: SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
|
||||
name: diskio
|
||||
+---------------------+--------------------+
|
||||
| time | derivative |
|
||||
+---------------------+--------------------+
|
||||
| 1970-01-01T00:02:20 | 5.857142857142857 |
|
||||
| 1970-01-01T00:02:27 | 13.357142857142858 |
|
||||
| 1970-01-01T00:02:34 | 8.0 |
|
||||
| 1970-01-01T00:02:48 | 3.9285714285714284 |
|
||||
| 1970-01-01T00:02:55 | 15.642857142857142 |
|
||||
| 1970-01-01T00:03:09 | 2.6785714285714284 |
|
||||
| 1970-01-01T00:03:16 | 5.428571428571429 |
|
||||
| 1970-01-01T00:03:30 | 5.214285714285714 |
|
||||
+---------------------+--------------------+
|
||||
-- InfluxQL: SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
|
||||
name: diskio
|
||||
+---------------------+------------+
|
||||
| time | derivative |
|
||||
+---------------------+------------+
|
||||
| 1970-01-01T00:02:00 | 366.0 |
|
||||
| 1970-01-01T00:02:30 | 421.0 |
|
||||
| 1970-01-01T00:03:00 | 441.0 |
|
||||
| 1970-01-01T00:03:30 | 297.0 |
|
||||
+---------------------+------------+
|
||||
-- InfluxQL: SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
|
||||
name: diskio
|
||||
+---------------------+-------------------+
|
||||
| time | derivative |
|
||||
+---------------------+-------------------+
|
||||
| 1970-01-01T00:02:00 | 6.1 |
|
||||
| 1970-01-01T00:02:30 | 7.016666666666667 |
|
||||
| 1970-01-01T00:03:00 | 7.35 |
|
||||
| 1970-01-01T00:03:30 | 4.95 |
|
||||
+---------------------+-------------------+
|
||||
-- InfluxQL: SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
|
||||
name: diskio
|
||||
+---------------------+------------+
|
||||
| time | derivative |
|
||||
+---------------------+------------+
|
||||
| 1970-01-01T00:02:06 | 5592646.0 |
|
||||
| 1970-01-01T00:02:13 | -5592646.0 |
|
||||
| 1970-01-01T00:02:20 | 5592810.0 |
|
||||
| 1970-01-01T00:02:27 | 187.0 |
|
||||
| 1970-01-01T00:02:34 | 112.0 |
|
||||
| 1970-01-01T00:02:41 | -5593109.0 |
|
||||
| 1970-01-01T00:02:48 | 5593219.0 |
|
||||
| 1970-01-01T00:02:55 | 219.0 |
|
||||
| 1970-01-01T00:03:02 | -5593438.0 |
|
||||
| 1970-01-01T00:03:09 | 5593513.0 |
|
||||
| 1970-01-01T00:03:16 | 76.0 |
|
||||
| 1970-01-01T00:03:23 | -5593589.0 |
|
||||
| 1970-01-01T00:03:30 | 5593735.0 |
|
||||
+---------------------+------------+
|
||||
-- InfluxQL: SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
|
||||
name: diskio
|
||||
+---------------------+---------------------+
|
||||
| time | derivative |
|
||||
+---------------------+---------------------+
|
||||
| 1970-01-01T00:02:06 | 399474.71428571426 |
|
||||
| 1970-01-01T00:02:13 | -399474.71428571426 |
|
||||
| 1970-01-01T00:02:20 | 399486.4285714286 |
|
||||
| 1970-01-01T00:02:27 | 13.357142857142858 |
|
||||
| 1970-01-01T00:02:34 | 8.0 |
|
||||
| 1970-01-01T00:02:41 | -399507.78571428574 |
|
||||
| 1970-01-01T00:02:48 | 399515.64285714284 |
|
||||
| 1970-01-01T00:02:55 | 15.642857142857142 |
|
||||
| 1970-01-01T00:03:02 | -399531.28571428574 |
|
||||
| 1970-01-01T00:03:09 | 399536.64285714284 |
|
||||
| 1970-01-01T00:03:16 | 5.428571428571429 |
|
||||
| 1970-01-01T00:03:23 | -399542.0714285714 |
|
||||
| 1970-01-01T00:03:30 | 399552.5 |
|
||||
+---------------------+---------------------+
|
||||
-- InfluxQL: SELECT derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
|
||||
name: diskio
|
||||
+---------------------+------------+
|
||||
| time | derivative |
|
||||
+---------------------+------------+
|
||||
| 1970-01-01T00:02:13 | 0.0 |
|
||||
| 1970-01-01T00:02:20 | 164.0 |
|
||||
| 1970-01-01T00:02:27 | 187.0 |
|
||||
| 1970-01-01T00:02:34 | 112.0 |
|
||||
| 1970-01-01T00:02:41 | 0.0 |
|
||||
| 1970-01-01T00:02:48 | 110.0 |
|
||||
| 1970-01-01T00:02:55 | 219.0 |
|
||||
| 1970-01-01T00:03:02 | 0.0 |
|
||||
| 1970-01-01T00:03:09 | 75.0 |
|
||||
| 1970-01-01T00:03:16 | 76.0 |
|
||||
| 1970-01-01T00:03:23 | 0.0 |
|
||||
| 1970-01-01T00:03:30 | 146.0 |
|
||||
+---------------------+------------+
|
||||
-- InfluxQL: SELECT derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
|
||||
name: diskio
|
||||
+---------------------+--------------------+
|
||||
| time | derivative |
|
||||
+---------------------+--------------------+
|
||||
| 1970-01-01T00:02:13 | 0.0 |
|
||||
| 1970-01-01T00:02:20 | 11.714285714285714 |
|
||||
| 1970-01-01T00:02:27 | 13.357142857142858 |
|
||||
| 1970-01-01T00:02:34 | 8.0 |
|
||||
| 1970-01-01T00:02:41 | 0.0 |
|
||||
| 1970-01-01T00:02:48 | 7.857142857142857 |
|
||||
| 1970-01-01T00:02:55 | 15.642857142857142 |
|
||||
| 1970-01-01T00:03:02 | 0.0 |
|
||||
| 1970-01-01T00:03:09 | 5.357142857142857 |
|
||||
| 1970-01-01T00:03:16 | 5.428571428571429 |
|
||||
| 1970-01-01T00:03:23 | 0.0 |
|
||||
| 1970-01-01T00:03:30 | 10.428571428571429 |
|
||||
+---------------------+--------------------+
|
||||
-- InfluxQL: SELECT derivative(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
|
||||
name: cpu
|
||||
tags: cpu=cpu0
|
||||
+---------------------+---------------------+
|
||||
| time | derivative |
|
||||
+---------------------+---------------------+
|
||||
| 1970-01-01T00:02:00 | -0.7999999999999972 |
|
||||
| 1970-01-01T00:02:30 | 3.5 |
|
||||
| 1970-01-01T00:03:00 | -0.4000000000000057 |
|
||||
+---------------------+---------------------+
|
||||
name: cpu
|
||||
tags: cpu=cpu1
|
||||
+---------------------+----------------------+
|
||||
| time | derivative |
|
||||
+---------------------+----------------------+
|
||||
| 1970-01-01T00:02:00 | 0.20000000000000284 |
|
||||
| 1970-01-01T00:02:30 | 0.0 |
|
||||
| 1970-01-01T00:03:00 | -0.10000000000000853 |
|
||||
+---------------------+----------------------+
|
||||
-- InfluxQL: SELECT derivative(first(usage_idle), 500ms) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
|
||||
name: cpu
|
||||
tags: cpu=cpu0
|
||||
+---------------------+-----------------------+
|
||||
| time | derivative |
|
||||
+---------------------+-----------------------+
|
||||
| 1970-01-01T00:02:00 | -0.013333333333333286 |
|
||||
| 1970-01-01T00:02:30 | 0.058333333333333334 |
|
||||
| 1970-01-01T00:03:00 | -0.006666666666666762 |
|
||||
+---------------------+-----------------------+
|
||||
name: cpu
|
||||
tags: cpu=cpu1
|
||||
+---------------------+------------------------+
|
||||
| time | derivative |
|
||||
+---------------------+------------------------+
|
||||
| 1970-01-01T00:02:00 | 0.003333333333333381 |
|
||||
| 1970-01-01T00:02:30 | 0.0 |
|
||||
| 1970-01-01T00:03:00 | -0.0016666666666668088 |
|
||||
+---------------------+------------------------+
|
||||
-- InfluxQL: SELECT non_negative_derivative(writes) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001;
|
||||
name: diskio
|
||||
+---------------------+-------------------------+
|
||||
|
@ -917,4 +1221,408 @@ tags: cpu=cpu1
|
|||
| time | non_negative_derivative |
|
||||
+---------------------+-------------------------+
|
||||
| 1970-01-01T00:02:00 | 0.006111111111111237 |
|
||||
+---------------------+-------------------------+
|
||||
+---------------------+-------------------------+
|
||||
-- InfluxQL: SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
|
||||
name: diskio
|
||||
+---------------------+-------------------------+
|
||||
| time | non_negative_derivative |
|
||||
+---------------------+-------------------------+
|
||||
| 1970-01-01T00:02:20 | 82.0 |
|
||||
| 1970-01-01T00:02:27 | 187.0 |
|
||||
| 1970-01-01T00:02:34 | 112.0 |
|
||||
| 1970-01-01T00:02:48 | 55.0 |
|
||||
| 1970-01-01T00:02:55 | 219.0 |
|
||||
| 1970-01-01T00:03:09 | 37.5 |
|
||||
| 1970-01-01T00:03:16 | 76.0 |
|
||||
| 1970-01-01T00:03:30 | 73.0 |
|
||||
+---------------------+-------------------------+
|
||||
-- InfluxQL: SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
|
||||
name: diskio
|
||||
+---------------------+-------------------------+
|
||||
| time | non_negative_derivative |
|
||||
+---------------------+-------------------------+
|
||||
| 1970-01-01T00:02:20 | 5.857142857142857 |
|
||||
| 1970-01-01T00:02:27 | 13.357142857142858 |
|
||||
| 1970-01-01T00:02:34 | 8.0 |
|
||||
| 1970-01-01T00:02:48 | 3.9285714285714284 |
|
||||
| 1970-01-01T00:02:55 | 15.642857142857142 |
|
||||
| 1970-01-01T00:03:09 | 2.6785714285714284 |
|
||||
| 1970-01-01T00:03:16 | 5.428571428571429 |
|
||||
| 1970-01-01T00:03:30 | 5.214285714285714 |
|
||||
+---------------------+-------------------------+
|
||||
-- InfluxQL: SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
|
||||
name: diskio
|
||||
+---------------------+-------------------------+
|
||||
| time | non_negative_derivative |
|
||||
+---------------------+-------------------------+
|
||||
| 1970-01-01T00:02:00 | 366.0 |
|
||||
| 1970-01-01T00:02:30 | 421.0 |
|
||||
| 1970-01-01T00:03:00 | 441.0 |
|
||||
| 1970-01-01T00:03:30 | 297.0 |
|
||||
+---------------------+-------------------------+
|
||||
-- InfluxQL: SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
|
||||
name: diskio
|
||||
+---------------------+-------------------------+
|
||||
| time | non_negative_derivative |
|
||||
+---------------------+-------------------------+
|
||||
| 1970-01-01T00:02:00 | 6.1 |
|
||||
| 1970-01-01T00:02:30 | 7.016666666666667 |
|
||||
| 1970-01-01T00:03:00 | 7.35 |
|
||||
| 1970-01-01T00:03:30 | 4.95 |
|
||||
+---------------------+-------------------------+
|
||||
-- InfluxQL: SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
|
||||
name: diskio
|
||||
+---------------------+-------------------------+
|
||||
| time | non_negative_derivative |
|
||||
+---------------------+-------------------------+
|
||||
| 1970-01-01T00:02:06 | 5592646.0 |
|
||||
| 1970-01-01T00:02:20 | 5592810.0 |
|
||||
| 1970-01-01T00:02:27 | 187.0 |
|
||||
| 1970-01-01T00:02:34 | 112.0 |
|
||||
| 1970-01-01T00:02:48 | 5593219.0 |
|
||||
| 1970-01-01T00:02:55 | 219.0 |
|
||||
| 1970-01-01T00:03:09 | 5593513.0 |
|
||||
| 1970-01-01T00:03:16 | 76.0 |
|
||||
| 1970-01-01T00:03:30 | 5593735.0 |
|
||||
+---------------------+-------------------------+
|
||||
-- InfluxQL: SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
|
||||
name: diskio
|
||||
+---------------------+-------------------------+
|
||||
| time | non_negative_derivative |
|
||||
+---------------------+-------------------------+
|
||||
| 1970-01-01T00:02:06 | 399474.71428571426 |
|
||||
| 1970-01-01T00:02:20 | 399486.4285714286 |
|
||||
| 1970-01-01T00:02:27 | 13.357142857142858 |
|
||||
| 1970-01-01T00:02:34 | 8.0 |
|
||||
| 1970-01-01T00:02:48 | 399515.64285714284 |
|
||||
| 1970-01-01T00:02:55 | 15.642857142857142 |
|
||||
| 1970-01-01T00:03:09 | 399536.64285714284 |
|
||||
| 1970-01-01T00:03:16 | 5.428571428571429 |
|
||||
| 1970-01-01T00:03:30 | 399552.5 |
|
||||
+---------------------+-------------------------+
|
||||
-- InfluxQL: SELECT non_negative_derivative(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
|
||||
name: diskio
|
||||
+---------------------+-------------------------+
|
||||
| time | non_negative_derivative |
|
||||
+---------------------+-------------------------+
|
||||
| 1970-01-01T00:02:13 | 0.0 |
|
||||
| 1970-01-01T00:02:20 | 164.0 |
|
||||
| 1970-01-01T00:02:27 | 187.0 |
|
||||
| 1970-01-01T00:02:34 | 112.0 |
|
||||
| 1970-01-01T00:02:41 | 0.0 |
|
||||
| 1970-01-01T00:02:48 | 110.0 |
|
||||
| 1970-01-01T00:02:55 | 219.0 |
|
||||
| 1970-01-01T00:03:02 | 0.0 |
|
||||
| 1970-01-01T00:03:09 | 75.0 |
|
||||
| 1970-01-01T00:03:16 | 76.0 |
|
||||
| 1970-01-01T00:03:23 | 0.0 |
|
||||
| 1970-01-01T00:03:30 | 146.0 |
|
||||
+---------------------+-------------------------+
|
||||
-- InfluxQL: SELECT non_negative_derivative(first(writes), 500ms) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
|
||||
name: diskio
|
||||
+---------------------+-------------------------+
|
||||
| time | non_negative_derivative |
|
||||
+---------------------+-------------------------+
|
||||
| 1970-01-01T00:02:13 | 0.0 |
|
||||
| 1970-01-01T00:02:20 | 11.714285714285714 |
|
||||
| 1970-01-01T00:02:27 | 13.357142857142858 |
|
||||
| 1970-01-01T00:02:34 | 8.0 |
|
||||
| 1970-01-01T00:02:41 | 0.0 |
|
||||
| 1970-01-01T00:02:48 | 7.857142857142857 |
|
||||
| 1970-01-01T00:02:55 | 15.642857142857142 |
|
||||
| 1970-01-01T00:03:02 | 0.0 |
|
||||
| 1970-01-01T00:03:09 | 5.357142857142857 |
|
||||
| 1970-01-01T00:03:16 | 5.428571428571429 |
|
||||
| 1970-01-01T00:03:23 | 0.0 |
|
||||
| 1970-01-01T00:03:30 | 10.428571428571429 |
|
||||
+---------------------+-------------------------+
|
||||
-- InfluxQL: SELECT non_negative_derivative(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
|
||||
name: cpu
|
||||
tags: cpu=cpu0
|
||||
+---------------------+-------------------------+
|
||||
| time | non_negative_derivative |
|
||||
+---------------------+-------------------------+
|
||||
| 1970-01-01T00:02:30 | 3.5 |
|
||||
+---------------------+-------------------------+
|
||||
name: cpu
|
||||
tags: cpu=cpu1
|
||||
+---------------------+-------------------------+
|
||||
| time | non_negative_derivative |
|
||||
+---------------------+-------------------------+
|
||||
| 1970-01-01T00:02:00 | 0.20000000000000284 |
|
||||
| 1970-01-01T00:02:30 | 0.0 |
|
||||
+---------------------+-------------------------+
|
||||
-- InfluxQL: SELECT non_negative_derivative(first(usage_idle), 500ms) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
|
||||
name: cpu
|
||||
tags: cpu=cpu0
|
||||
+---------------------+-------------------------+
|
||||
| time | non_negative_derivative |
|
||||
+---------------------+-------------------------+
|
||||
| 1970-01-01T00:02:30 | 0.058333333333333334 |
|
||||
+---------------------+-------------------------+
|
||||
name: cpu
|
||||
tags: cpu=cpu1
|
||||
+---------------------+-------------------------+
|
||||
| time | non_negative_derivative |
|
||||
+---------------------+-------------------------+
|
||||
| 1970-01-01T00:02:00 | 0.003333333333333381 |
|
||||
| 1970-01-01T00:02:30 | 0.0 |
|
||||
+---------------------+-------------------------+
|
||||
-- InfluxQL: SELECT cumulative_sum(writes) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001;
|
||||
name: diskio
|
||||
+---------------------+----------------+
|
||||
| time | cumulative_sum |
|
||||
+---------------------+----------------+
|
||||
| 1970-01-01T00:02:10 | 5592646 |
|
||||
| 1970-01-01T00:02:20 | 11185456 |
|
||||
| 1970-01-01T00:02:30 | 16778453 |
|
||||
| 1970-01-01T00:02:40 | 22371562 |
|
||||
| 1970-01-01T00:02:50 | 27964781 |
|
||||
| 1970-01-01T00:03:00 | 33558219 |
|
||||
| 1970-01-01T00:03:10 | 39151732 |
|
||||
| 1970-01-01T00:03:20 | 44745321 |
|
||||
| 1970-01-01T00:03:30 | 50339056 |
|
||||
+---------------------+----------------+
|
||||
-- InfluxQL: SELECT cumulative_sum(usage_system) FROM cpu WHERE time >= 0000000060000000000 AND time < 0000000210000000001 AND cpu = 'cpu0';
|
||||
name: cpu
|
||||
+---------------------+----------------+
|
||||
| time | cumulative_sum |
|
||||
+---------------------+----------------+
|
||||
| 1970-01-01T00:01:00 | 89.5 |
|
||||
| 1970-01-01T00:01:10 | 178.1 |
|
||||
| 1970-01-01T00:01:30 | 261.5 |
|
||||
| 1970-01-01T00:01:40 | 349.2 |
|
||||
| 1970-01-01T00:02:10 | 439.0 |
|
||||
| 1970-01-01T00:02:50 | 528.8 |
|
||||
| 1970-01-01T00:03:00 | 618.8 |
|
||||
+---------------------+----------------+
|
||||
-- InfluxQL: SELECT cumulative_sum(usage_idle), cumulative_sum(usage_system) FROM cpu WHERE time >= 0000000060000000000 AND time < 0000000210000000001 AND cpu = 'cpu0';
|
||||
name: cpu
|
||||
+---------------------+--------------------+------------------+
|
||||
| time | cumulative_sum | cumulative_sum_1 |
|
||||
+---------------------+--------------------+------------------+
|
||||
| 1970-01-01T00:01:00 | 89.5 | 89.5 |
|
||||
| 1970-01-01T00:01:10 | 178.1 | 178.1 |
|
||||
| 1970-01-01T00:01:20 | 266.7 | |
|
||||
| 1970-01-01T00:01:30 | 350.1 | 261.5 |
|
||||
| 1970-01-01T00:01:40 | 437.8 | 349.2 |
|
||||
| 1970-01-01T00:01:50 | 526.5 | |
|
||||
| 1970-01-01T00:02:00 | 613.4 | |
|
||||
| 1970-01-01T00:02:10 | 703.1999999999999 | 439.0 |
|
||||
| 1970-01-01T00:02:20 | 792.1999999999999 | |
|
||||
| 1970-01-01T00:02:30 | 882.5999999999999 | |
|
||||
| 1970-01-01T00:02:40 | 972.8 | |
|
||||
| 1970-01-01T00:02:50 | 1062.6 | 528.8 |
|
||||
| 1970-01-01T00:03:00 | 1152.6 | 618.8 |
|
||||
| 1970-01-01T00:03:10 | 1241.3999999999999 | |
|
||||
+---------------------+--------------------+------------------+
|
||||
-- InfluxQL: SELECT cumulative_sum(usage_idle) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY cpu;
|
||||
name: cpu
|
||||
tags: cpu=cpu0
|
||||
+---------------------+--------------------+
|
||||
| time | cumulative_sum |
|
||||
+---------------------+--------------------+
|
||||
| 1970-01-01T00:02:10 | 89.8 |
|
||||
| 1970-01-01T00:02:20 | 178.8 |
|
||||
| 1970-01-01T00:02:30 | 269.20000000000005 |
|
||||
| 1970-01-01T00:02:40 | 359.40000000000003 |
|
||||
| 1970-01-01T00:02:50 | 449.20000000000005 |
|
||||
| 1970-01-01T00:03:00 | 539.2 |
|
||||
| 1970-01-01T00:03:10 | 628.0 |
|
||||
+---------------------+--------------------+
|
||||
name: cpu
|
||||
tags: cpu=cpu1
|
||||
+---------------------+--------------------+
|
||||
| time | cumulative_sum |
|
||||
+---------------------+--------------------+
|
||||
| 1970-01-01T00:02:10 | 99.8 |
|
||||
| 1970-01-01T00:02:20 | 199.7 |
|
||||
| 1970-01-01T00:02:30 | 299.6 |
|
||||
| 1970-01-01T00:02:40 | 399.40000000000003 |
|
||||
| 1970-01-01T00:02:50 | 499.20000000000005 |
|
||||
| 1970-01-01T00:03:00 | 599.0 |
|
||||
| 1970-01-01T00:03:10 | 698.8 |
|
||||
+---------------------+--------------------+
|
||||
-- InfluxQL: SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
|
||||
name: diskio
|
||||
+---------------------+----------------+
|
||||
| time | cumulative_sum |
|
||||
+---------------------+----------------+
|
||||
| 1970-01-01T00:02:06 | 5592646.0 |
|
||||
| 1970-01-01T00:02:20 | 11185456.0 |
|
||||
| 1970-01-01T00:02:27 | 16778453.0 |
|
||||
| 1970-01-01T00:02:34 | 22371562.0 |
|
||||
| 1970-01-01T00:02:48 | 27964781.0 |
|
||||
| 1970-01-01T00:02:55 | 33558219.0 |
|
||||
| 1970-01-01T00:03:09 | 39151732.0 |
|
||||
| 1970-01-01T00:03:16 | 44745321.0 |
|
||||
| 1970-01-01T00:03:30 | 50339056.0 |
|
||||
+---------------------+----------------+
|
||||
-- InfluxQL: SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
|
||||
name: diskio
|
||||
+---------------------+--------------------+
|
||||
| time | cumulative_sum |
|
||||
+---------------------+--------------------+
|
||||
| 1970-01-01T00:02:00 | 5592728.0 |
|
||||
| 1970-01-01T00:02:30 | 11185836.333333332 |
|
||||
| 1970-01-01T00:03:00 | 16779349.666666664 |
|
||||
| 1970-01-01T00:03:30 | 22373084.666666664 |
|
||||
+---------------------+--------------------+
|
||||
-- InfluxQL: SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
|
||||
name: diskio
|
||||
+---------------------+----------------+
|
||||
| time | cumulative_sum |
|
||||
+---------------------+----------------+
|
||||
| 1970-01-01T00:02:06 | 5592646.0 |
|
||||
| 1970-01-01T00:02:13 | 5592646.0 |
|
||||
| 1970-01-01T00:02:20 | 11185456.0 |
|
||||
| 1970-01-01T00:02:27 | 16778453.0 |
|
||||
| 1970-01-01T00:02:34 | 22371562.0 |
|
||||
| 1970-01-01T00:02:41 | 22371562.0 |
|
||||
| 1970-01-01T00:02:48 | 27964781.0 |
|
||||
| 1970-01-01T00:02:55 | 33558219.0 |
|
||||
| 1970-01-01T00:03:02 | 33558219.0 |
|
||||
| 1970-01-01T00:03:09 | 39151732.0 |
|
||||
| 1970-01-01T00:03:16 | 44745321.0 |
|
||||
| 1970-01-01T00:03:23 | 44745321.0 |
|
||||
| 1970-01-01T00:03:30 | 50339056.0 |
|
||||
+---------------------+----------------+
|
||||
-- InfluxQL: SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
|
||||
name: diskio
|
||||
+---------------------+----------------+
|
||||
| time | cumulative_sum |
|
||||
+---------------------+----------------+
|
||||
| 1970-01-01T00:02:06 | 5592646.0 |
|
||||
| 1970-01-01T00:02:13 | 11185292.0 |
|
||||
| 1970-01-01T00:02:20 | 16778102.0 |
|
||||
| 1970-01-01T00:02:27 | 22371099.0 |
|
||||
| 1970-01-01T00:02:34 | 27964208.0 |
|
||||
| 1970-01-01T00:02:41 | 33557317.0 |
|
||||
| 1970-01-01T00:02:48 | 39150536.0 |
|
||||
| 1970-01-01T00:02:55 | 44743974.0 |
|
||||
| 1970-01-01T00:03:02 | 50337412.0 |
|
||||
| 1970-01-01T00:03:09 | 55930925.0 |
|
||||
| 1970-01-01T00:03:16 | 61524514.0 |
|
||||
| 1970-01-01T00:03:23 | 67118103.0 |
|
||||
| 1970-01-01T00:03:30 | 72711838.0 |
|
||||
+---------------------+----------------+
|
||||
-- InfluxQL: SELECT cumulative_sum(mean(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(linear);
|
||||
name: diskio
|
||||
+---------------------+----------------+
|
||||
| time | cumulative_sum |
|
||||
+---------------------+----------------+
|
||||
| 1970-01-01T00:02:06 | 5592646.0 |
|
||||
| 1970-01-01T00:02:13 | 11185374.0 |
|
||||
| 1970-01-01T00:02:20 | 16778184.0 |
|
||||
| 1970-01-01T00:02:27 | 22371181.0 |
|
||||
| 1970-01-01T00:02:34 | 27964290.0 |
|
||||
| 1970-01-01T00:02:41 | 33557454.0 |
|
||||
| 1970-01-01T00:02:48 | 39150673.0 |
|
||||
| 1970-01-01T00:02:55 | 44744111.0 |
|
||||
| 1970-01-01T00:03:02 | 50337586.5 |
|
||||
| 1970-01-01T00:03:09 | 55931099.5 |
|
||||
| 1970-01-01T00:03:16 | 61524688.5 |
|
||||
| 1970-01-01T00:03:23 | 67118350.5 |
|
||||
| 1970-01-01T00:03:30 | 72712085.5 |
|
||||
+---------------------+----------------+
|
||||
-- InfluxQL: SELECT cumulative_sum(mean(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
|
||||
name: cpu
|
||||
tags: cpu=cpu0
|
||||
+---------------------+--------------------+
|
||||
| time | cumulative_sum |
|
||||
+---------------------+--------------------+
|
||||
| 1970-01-01T00:02:00 | 89.4 |
|
||||
| 1970-01-01T00:02:30 | 179.53333333333336 |
|
||||
| 1970-01-01T00:03:00 | 268.9333333333334 |
|
||||
+---------------------+--------------------+
|
||||
name: cpu
|
||||
tags: cpu=cpu1
|
||||
+---------------------+--------------------+
|
||||
| time | cumulative_sum |
|
||||
+---------------------+--------------------+
|
||||
| 1970-01-01T00:02:00 | 99.85 |
|
||||
| 1970-01-01T00:02:30 | 199.68333333333334 |
|
||||
| 1970-01-01T00:03:00 | 299.48333333333335 |
|
||||
+---------------------+--------------------+
|
||||
-- InfluxQL: SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s);
|
||||
name: diskio
|
||||
+---------------------+----------------+
|
||||
| time | cumulative_sum |
|
||||
+---------------------+----------------+
|
||||
| 1970-01-01T00:02:06 | 5592646 |
|
||||
| 1970-01-01T00:02:20 | 11185456 |
|
||||
| 1970-01-01T00:02:27 | 16778453 |
|
||||
| 1970-01-01T00:02:34 | 22371562 |
|
||||
| 1970-01-01T00:02:48 | 27964781 |
|
||||
| 1970-01-01T00:02:55 | 33558219 |
|
||||
| 1970-01-01T00:03:09 | 39151732 |
|
||||
| 1970-01-01T00:03:16 | 44745321 |
|
||||
| 1970-01-01T00:03:30 | 50339056 |
|
||||
+---------------------+----------------+
|
||||
-- InfluxQL: SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(30s);
|
||||
name: diskio
|
||||
+---------------------+----------------+
|
||||
| time | cumulative_sum |
|
||||
+---------------------+----------------+
|
||||
| 1970-01-01T00:02:00 | 5592646 |
|
||||
| 1970-01-01T00:02:30 | 11185643 |
|
||||
| 1970-01-01T00:03:00 | 16779081 |
|
||||
| 1970-01-01T00:03:30 | 22372816 |
|
||||
+---------------------+----------------+
|
||||
-- InfluxQL: SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(0);
|
||||
name: diskio
|
||||
+---------------------+----------------+
|
||||
| time | cumulative_sum |
|
||||
+---------------------+----------------+
|
||||
| 1970-01-01T00:02:06 | 5592646 |
|
||||
| 1970-01-01T00:02:13 | 5592646 |
|
||||
| 1970-01-01T00:02:20 | 11185456 |
|
||||
| 1970-01-01T00:02:27 | 16778453 |
|
||||
| 1970-01-01T00:02:34 | 22371562 |
|
||||
| 1970-01-01T00:02:41 | 22371562 |
|
||||
| 1970-01-01T00:02:48 | 27964781 |
|
||||
| 1970-01-01T00:02:55 | 33558219 |
|
||||
| 1970-01-01T00:03:02 | 33558219 |
|
||||
| 1970-01-01T00:03:09 | 39151732 |
|
||||
| 1970-01-01T00:03:16 | 44745321 |
|
||||
| 1970-01-01T00:03:23 | 44745321 |
|
||||
| 1970-01-01T00:03:30 | 50339056 |
|
||||
+---------------------+----------------+
|
||||
-- InfluxQL: SELECT cumulative_sum(first(writes)) FROM diskio WHERE time >= 0000000130000000000 AND time < 0000000210000000001 GROUP BY time(7s) fill(previous);
|
||||
name: diskio
|
||||
+---------------------+----------------+
|
||||
| time | cumulative_sum |
|
||||
+---------------------+----------------+
|
||||
| 1970-01-01T00:02:06 | 5592646 |
|
||||
| 1970-01-01T00:02:13 | 11185292 |
|
||||
| 1970-01-01T00:02:20 | 16778102 |
|
||||
| 1970-01-01T00:02:27 | 22371099 |
|
||||
| 1970-01-01T00:02:34 | 27964208 |
|
||||
| 1970-01-01T00:02:41 | 33557317 |
|
||||
| 1970-01-01T00:02:48 | 39150536 |
|
||||
| 1970-01-01T00:02:55 | 44743974 |
|
||||
| 1970-01-01T00:03:02 | 50337412 |
|
||||
| 1970-01-01T00:03:09 | 55930925 |
|
||||
| 1970-01-01T00:03:16 | 61524514 |
|
||||
| 1970-01-01T00:03:23 | 67118103 |
|
||||
| 1970-01-01T00:03:30 | 72711838 |
|
||||
+---------------------+----------------+
|
||||
-- InfluxQL: SELECT cumulative_sum(first(usage_idle)) FROM cpu WHERE time >= 0000000130000000000 AND time < 0000000210000000001 AND cpu =~ /^cpu(0|1)$/ GROUP BY TIME(30s), cpu;
|
||||
name: cpu
|
||||
tags: cpu=cpu0
|
||||
+---------------------+----------------+
|
||||
| time | cumulative_sum |
|
||||
+---------------------+----------------+
|
||||
| 1970-01-01T00:02:00 | 89.8 |
|
||||
| 1970-01-01T00:02:30 | 180.2 |
|
||||
| 1970-01-01T00:03:00 | 270.2 |
|
||||
+---------------------+----------------+
|
||||
name: cpu
|
||||
tags: cpu=cpu1
|
||||
+---------------------+----------------+
|
||||
| time | cumulative_sum |
|
||||
+---------------------+----------------+
|
||||
| 1970-01-01T00:02:00 | 99.8 |
|
||||
| 1970-01-01T00:02:30 | 199.7 |
|
||||
| 1970-01-01T00:03:00 | 299.5 |
|
||||
+---------------------+----------------+
|
|
@ -6,7 +6,7 @@ edition.workspace = true
|
|||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
sqlparser = "0.35.0"
|
||||
sqlparser = "0.36.0"
|
||||
snafu = "0.7.5"
|
||||
|
||||
generated_types = { path = "../generated_types" }
|
||||
|
|
|
@ -45,7 +45,7 @@ tokio = { version = "1.29", features = ["macros", "parking_lot", "rt-multi-threa
|
|||
tokio-util = "0.7.8"
|
||||
tonic = { workspace = true }
|
||||
trace = { version = "0.1.0", path = "../trace" }
|
||||
uuid = "1.4.0"
|
||||
uuid = "1.4.1"
|
||||
wal = { version = "0.1.0", path = "../wal" }
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
|
||||
|
@ -60,7 +60,7 @@ lazy_static = "1.4.0"
|
|||
mutable_batch_lp = { path = "../mutable_batch_lp" }
|
||||
object_store = { workspace = true }
|
||||
paste = "1.0.14"
|
||||
tempfile = "3.6.0"
|
||||
tempfile = "3.7.0"
|
||||
test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
|
||||
tokio = { version = "1.29", features = ["macros", "time", "test-util"] }
|
||||
|
||||
|
|
|
@ -2,9 +2,7 @@ use std::{collections::HashMap, sync::Arc, time::Duration};
|
|||
|
||||
use async_trait::async_trait;
|
||||
use backoff::BackoffConfig;
|
||||
use data_types::{
|
||||
NamespaceId, Partition, PartitionHashId, PartitionId, PartitionKey, SequenceNumber, TableId,
|
||||
};
|
||||
use data_types::{NamespaceId, Partition, PartitionHashId, PartitionId, PartitionKey, TableId};
|
||||
use iox_catalog::interface::Catalog;
|
||||
use observability_deps::tracing::debug;
|
||||
use parking_lot::Mutex;
|
||||
|
@ -222,6 +220,7 @@ mod tests {
|
|||
// Harmless in tests - saves a bunch of extra vars.
|
||||
#![allow(clippy::await_holding_lock)]
|
||||
|
||||
use data_types::PartitionId;
|
||||
use iox_catalog::mem::MemCatalog;
|
||||
|
||||
use super::*;
|
||||
|
|
|
@ -6,7 +6,6 @@ use std::{
|
|||
},
|
||||
};
|
||||
|
||||
use arrow::compute::kernels::partition;
|
||||
use async_trait::async_trait;
|
||||
use data_types::{NamespaceId, PartitionKey, TableId};
|
||||
use futures::{future::Shared, FutureExt};
|
||||
|
@ -25,11 +24,10 @@ use super::PartitionProvider;
|
|||
type BoxedResolveFuture =
|
||||
Pin<Box<dyn std::future::Future<Output = Arc<Mutex<PartitionData>>> + Send>>;
|
||||
|
||||
/// A compound key of `(namespace, table, partition_key)` which uniquely
|
||||
/// A compound key of `(table, partition_key)` which uniquely
|
||||
/// identifies a single partition.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
struct Key {
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
partition_key: PartitionKey,
|
||||
}
|
||||
|
@ -149,7 +147,6 @@ where
|
|||
table: Arc<DeferredLoad<TableMetadata>>,
|
||||
) -> Arc<Mutex<PartitionData>> {
|
||||
let key = Key {
|
||||
namespace_id,
|
||||
table_id,
|
||||
partition_key: partition_key.clone(), // Ref-counted anyway!
|
||||
};
|
||||
|
@ -267,12 +264,11 @@ mod tests {
|
|||
use assert_matches::assert_matches;
|
||||
use futures::Future;
|
||||
use futures::{stream::FuturesUnordered, StreamExt};
|
||||
use lazy_static::lazy_static;
|
||||
use test_helpers::timeout::FutureTimeout;
|
||||
use tokio::sync::{Notify, Semaphore};
|
||||
|
||||
use crate::{
|
||||
buffer_tree::partition::{resolver::mock::MockPartitionProvider, SortKeyState},
|
||||
buffer_tree::partition::resolver::mock::MockPartitionProvider,
|
||||
test_util::{
|
||||
defer_namespace_name_1_sec, defer_table_metadata_1_sec, PartitionDataBuilder,
|
||||
ARBITRARY_NAMESPACE_ID, ARBITRARY_PARTITION_KEY, ARBITRARY_TABLE_ID,
|
||||
|
|
|
@ -2,8 +2,6 @@
|
|||
//!
|
||||
//! [`PartitionData`]: crate::buffer_tree::partition::PartitionData
|
||||
|
||||
#![allow(unused_imports)] // Transition time only.
|
||||
|
||||
mod cache;
|
||||
pub(crate) use cache::*;
|
||||
|
||||
|
|
|
@ -49,11 +49,11 @@ where
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::{sync::Arc, time::Duration};
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::*;
|
||||
use crate::{
|
||||
buffer_tree::partition::{resolver::mock::MockPartitionProvider, SortKeyState},
|
||||
buffer_tree::partition::resolver::mock::MockPartitionProvider,
|
||||
test_util::{
|
||||
defer_namespace_name_1_sec, defer_table_metadata_1_sec, PartitionDataBuilder,
|
||||
ARBITRARY_NAMESPACE_ID, ARBITRARY_PARTITION_ID, ARBITRARY_PARTITION_KEY,
|
||||
|
|
|
@ -998,12 +998,8 @@ mod tests {
|
|||
assert_eq!(m, 1, "tables counter mismatch");
|
||||
}
|
||||
|
||||
/// Assert that multiple writes to a single namespace/table results in a
|
||||
/// single namespace being created, and matching metrics.
|
||||
#[tokio::test]
|
||||
async fn test_partition_iter() {
|
||||
// Configure the mock partition provider to return a single partition, named
|
||||
// p1.
|
||||
let partition_provider = Arc::new(
|
||||
MockPartitionProvider::default()
|
||||
.with_partition(
|
||||
|
|
|
@ -27,7 +27,7 @@ object_store = { workspace = true }
|
|||
observability_deps = { version = "0.1.0", path = "../observability_deps" }
|
||||
parquet_file = { version = "0.1.0", path = "../parquet_file" }
|
||||
prost = { version = "0.11.9", default-features = false, features = ["std"] }
|
||||
tempfile = { version = "3.6.0" }
|
||||
tempfile = { version = "3.7.0" }
|
||||
test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
|
||||
tokio = { version = "1.29", features = ["macros", "parking_lot", "rt-multi-thread", "sync", "time"] }
|
||||
tokio-util = "0.7.8"
|
||||
|
|
|
@ -18,7 +18,7 @@ parking_lot = { version = "0.12" }
|
|||
serde = { version = "1.0", features = ["derive"] }
|
||||
siphasher = "0.3"
|
||||
snafu = "0.7"
|
||||
sqlx = { version = "0.6", features = [ "runtime-tokio-rustls" , "postgres", "uuid", "sqlite" ] }
|
||||
sqlx = { version = "0.7.1", features = [ "runtime-tokio-rustls" , "postgres", "uuid", "sqlite" ] }
|
||||
sqlx-hotswap-pool = { path = "../sqlx-hotswap-pool" }
|
||||
thiserror = "1.0.43"
|
||||
tokio = { version = "1.29", features = ["io-util", "macros", "parking_lot", "rt-multi-thread", "time"] }
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
-- Drop the foreign key constraints referencing the various
|
||||
-- placeholder kafka columns
|
||||
ALTER TABLE IF EXISTS namespace DROP CONSTRAINT IF EXISTS namespace_kafka_topic_id_fkey, DROP CONSTRAINT IF EXISTS namespace_query_pool_id_fkey;
|
||||
ALTER TABLE IF EXISTS parquet_file DROP CONSTRAINT IF EXISTS parquet_file_sequencer_id_fkey;
|
||||
ALTER TABLE IF EXISTS partition DROP CONSTRAINT IF EXISTS partition_sequencer_id_fkey;
|
||||
ALTER TABLE IF EXISTS tombstone DROP CONSTRAINT IF EXISTS tombstone_sequencer_id_fkey;
|
||||
-- Allow the ID columns in these tables to be nullable
|
||||
ALTER TABLE IF EXISTS namespace ALTER COLUMN topic_id DROP NOT NULL, ALTER COLUMN query_pool_id DROP NOT NULL;
|
||||
ALTER TABLE IF EXISTS parquet_file ALTER COLUMN shard_id DROP NOT NULL;
|
||||
ALTER TABLE IF EXISTS partition ALTER COLUMN shard_id DROP NOT NULL;
|
||||
ALTER TABLE IF EXISTS tombstone ALTER COLUMN shard_id DROP NOT NULL;
|
|
@ -0,0 +1,13 @@
|
|||
-- FUNTION that updates the new_file_at field in the partition table when the update_partition trigger is fired
|
||||
-- The field new_file_at signals when the last file was added to the partition for compaction.
|
||||
|
||||
CREATE OR REPLACE FUNCTION update_partition_on_new_file_at()
|
||||
RETURNS TRIGGER
|
||||
LANGUAGE PLPGSQL
|
||||
AS $$
|
||||
BEGIN
|
||||
UPDATE partition SET new_file_at = NEW.created_at WHERE id = NEW.partition_id;
|
||||
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$;
|
|
@ -0,0 +1,9 @@
|
|||
-- update new_file_at for all compactions, not just L0 & L1
|
||||
drop trigger update_partition;
|
||||
create trigger if not exists update_partition
|
||||
after insert
|
||||
on parquet_file
|
||||
for each row
|
||||
begin
|
||||
UPDATE partition set new_file_at = NEW.created_at WHERE id = NEW.partition_id;
|
||||
end;
|
|
@ -372,12 +372,25 @@ pub trait PartitionRepo: Send + Sync {
|
|||
/// get partition by ID
|
||||
async fn get_by_id(&mut self, partition_id: PartitionId) -> Result<Option<Partition>>;
|
||||
|
||||
/// get multiple partitions by ID.
|
||||
///
|
||||
/// the output order is undefined, non-existing partitions are not part of the output.
|
||||
async fn get_by_id_batch(&mut self, partition_ids: Vec<PartitionId>) -> Result<Vec<Partition>>;
|
||||
|
||||
/// get partition by deterministic hash ID
|
||||
async fn get_by_hash_id(
|
||||
&mut self,
|
||||
partition_hash_id: &PartitionHashId,
|
||||
) -> Result<Option<Partition>>;
|
||||
|
||||
/// get partition by deterministic hash ID
|
||||
///
|
||||
/// the output order is undefined, non-existing partitions are not part of the output.
|
||||
async fn get_by_hash_id_batch(
|
||||
&mut self,
|
||||
partition_hash_ids: &[&PartitionHashId],
|
||||
) -> Result<Vec<Partition>>;
|
||||
|
||||
/// return the partitions by table id
|
||||
async fn list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>>;
|
||||
|
||||
|
@ -1487,6 +1500,8 @@ pub(crate) mod test_helpers {
|
|||
.unwrap();
|
||||
|
||||
// partitions can be retrieved easily
|
||||
let mut created_sorted = created.values().cloned().collect::<Vec<_>>();
|
||||
created_sorted.sort_by_key(|p| p.id);
|
||||
assert_eq!(
|
||||
other_partition,
|
||||
repos
|
||||
|
@ -1505,21 +1520,47 @@ pub(crate) mod test_helpers {
|
|||
.unwrap()
|
||||
.unwrap()
|
||||
);
|
||||
let non_existing_partition_id = PartitionId::new(i64::MAX);
|
||||
let non_existing_partition_hash_id =
|
||||
PartitionHashId::new(TableId::new(i64::MAX), &PartitionKey::from("arbitrary"));
|
||||
assert!(repos
|
||||
.partitions()
|
||||
.get_by_id(PartitionId::new(i64::MAX))
|
||||
.get_by_id(non_existing_partition_id)
|
||||
.await
|
||||
.unwrap()
|
||||
.is_none());
|
||||
assert!(repos
|
||||
.partitions()
|
||||
.get_by_hash_id(&PartitionHashId::new(
|
||||
TableId::new(i64::MAX),
|
||||
&PartitionKey::from("arbitrary")
|
||||
))
|
||||
.get_by_hash_id(&non_existing_partition_hash_id)
|
||||
.await
|
||||
.unwrap()
|
||||
.is_none());
|
||||
let mut batch = repos
|
||||
.partitions()
|
||||
.get_by_id_batch(
|
||||
created
|
||||
.keys()
|
||||
.cloned()
|
||||
.chain([non_existing_partition_id])
|
||||
.collect(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
batch.sort_by_key(|p| p.id);
|
||||
assert_eq!(created_sorted, batch);
|
||||
let mut batch = repos
|
||||
.partitions()
|
||||
.get_by_hash_id_batch(
|
||||
&created
|
||||
.values()
|
||||
.map(|p| p.hash_id().unwrap())
|
||||
.chain([&non_existing_partition_hash_id])
|
||||
.collect::<Vec<_>>(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
batch.sort_by_key(|p| p.id);
|
||||
assert_eq!(created_sorted, batch);
|
||||
|
||||
let listed = repos
|
||||
.partitions()
|
||||
|
@ -2534,7 +2575,6 @@ pub(crate) mod test_helpers {
|
|||
assert!(partitions.is_empty());
|
||||
|
||||
// Add an L2 file created just now for partition three
|
||||
// Since the file is L2, the partition won't get updated
|
||||
let l2_file_params = ParquetFileParams {
|
||||
object_store_id: Uuid::new_v4(),
|
||||
created_at: time_now,
|
||||
|
@ -2547,16 +2587,17 @@ pub(crate) mod test_helpers {
|
|||
.create(l2_file_params.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
// still should return partition one and two only
|
||||
// now should return partition one two and three
|
||||
let mut partitions = repos
|
||||
.partitions()
|
||||
.partitions_new_file_between(time_two_hour_ago, None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(partitions.len(), 2);
|
||||
assert_eq!(partitions.len(), 3);
|
||||
partitions.sort();
|
||||
assert_eq!(partitions[0], partition1.id);
|
||||
assert_eq!(partitions[1], partition2.id);
|
||||
assert_eq!(partitions[2], partition3.id);
|
||||
// Only return partition1: the creation time must be strictly less than the maximum time,
|
||||
// not equal
|
||||
let partitions = repos
|
||||
|
|
|
@ -88,6 +88,48 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
/// Look up multiple partitions in the catalog by either database-assigned ID or deterministic hash ID.
|
||||
///
|
||||
/// The output only contains existing partitions, the order is undefined.
|
||||
///
|
||||
/// The existence of this function should be temporary; it can be removed once all partition lookup
|
||||
/// is happening with only the deterministic hash ID.
|
||||
pub async fn partition_lookup_batch<R>(
|
||||
repos: &mut R,
|
||||
ids: &[&TransitionPartitionId],
|
||||
) -> Result<Vec<Partition>, Error>
|
||||
where
|
||||
R: RepoCollection + ?Sized,
|
||||
{
|
||||
let mut partition_ids = Vec::with_capacity(ids.len());
|
||||
let mut partition_hash_ids = Vec::with_capacity(ids.len());
|
||||
|
||||
for id in ids {
|
||||
match id {
|
||||
TransitionPartitionId::Deprecated(partition_id) => {
|
||||
partition_ids.push(*partition_id);
|
||||
}
|
||||
TransitionPartitionId::Deterministic(partition_hash_id) => {
|
||||
partition_hash_ids.push(partition_hash_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut out = Vec::with_capacity(partition_ids.len() + partition_hash_ids.len());
|
||||
if !partition_ids.is_empty() {
|
||||
let mut partitions = repos.partitions().get_by_id_batch(partition_ids).await?;
|
||||
out.append(&mut partitions);
|
||||
}
|
||||
if !partition_hash_ids.is_empty() {
|
||||
let mut partitions = repos
|
||||
.partitions()
|
||||
.get_by_hash_id_batch(&partition_hash_ids)
|
||||
.await?;
|
||||
out.append(&mut partitions);
|
||||
}
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
/// Given an iterator of `(table_name, batch)` to validate, this function
|
||||
/// ensures all the columns within `batch` match the existing schema for
|
||||
/// `table_name` in `schema`. If the column does not already exist in `schema`,
|
||||
|
|
|
@ -586,6 +586,19 @@ impl PartitionRepo for MemTxn {
|
|||
.cloned())
|
||||
}
|
||||
|
||||
async fn get_by_id_batch(&mut self, partition_ids: Vec<PartitionId>) -> Result<Vec<Partition>> {
|
||||
let lookup = partition_ids.into_iter().collect::<HashSet<_>>();
|
||||
|
||||
let stage = self.stage();
|
||||
|
||||
Ok(stage
|
||||
.partitions
|
||||
.iter()
|
||||
.filter(|p| lookup.contains(&p.id))
|
||||
.cloned()
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn get_by_hash_id(
|
||||
&mut self,
|
||||
partition_hash_id: &PartitionHashId,
|
||||
|
@ -603,6 +616,26 @@ impl PartitionRepo for MemTxn {
|
|||
.cloned())
|
||||
}
|
||||
|
||||
async fn get_by_hash_id_batch(
|
||||
&mut self,
|
||||
partition_hash_ids: &[&PartitionHashId],
|
||||
) -> Result<Vec<Partition>> {
|
||||
let lookup = partition_hash_ids.iter().copied().collect::<HashSet<_>>();
|
||||
|
||||
let stage = self.stage();
|
||||
|
||||
Ok(stage
|
||||
.partitions
|
||||
.iter()
|
||||
.filter(|p| {
|
||||
p.hash_id()
|
||||
.map(|hash_id| lookup.contains(hash_id))
|
||||
.unwrap_or_default()
|
||||
})
|
||||
.cloned()
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>> {
|
||||
let stage = self.stage();
|
||||
|
||||
|
@ -962,23 +995,19 @@ async fn create_parquet_file(
|
|||
parquet_file_params,
|
||||
ParquetFileId::new(stage.parquet_files.len() as i64 + 1),
|
||||
);
|
||||
let compaction_level = parquet_file.compaction_level;
|
||||
let created_at = parquet_file.created_at;
|
||||
let partition_id = parquet_file.partition_id;
|
||||
stage.parquet_files.push(parquet_file);
|
||||
|
||||
// Update the new_file_at field its partition to the time of created_at
|
||||
// Only update if the compaction level is not Final which signal more compaction needed
|
||||
if compaction_level < CompactionLevel::Final {
|
||||
let partition = stage
|
||||
.partitions
|
||||
.iter_mut()
|
||||
.find(|p| p.id == partition_id)
|
||||
.ok_or(Error::PartitionNotFound {
|
||||
id: TransitionPartitionId::Deprecated(partition_id),
|
||||
})?;
|
||||
partition.new_file_at = Some(created_at);
|
||||
}
|
||||
let partition = stage
|
||||
.partitions
|
||||
.iter_mut()
|
||||
.find(|p| p.id == partition_id)
|
||||
.ok_or(Error::PartitionNotFound {
|
||||
id: TransitionPartitionId::Deprecated(partition_id),
|
||||
})?;
|
||||
partition.new_file_at = Some(created_at);
|
||||
|
||||
Ok(stage.parquet_files.last().unwrap().clone())
|
||||
}
|
||||
|
|
|
@ -171,7 +171,9 @@ decorate!(
|
|||
methods = [
|
||||
"partition_create_or_get" = create_or_get(&mut self, key: PartitionKey, table_id: TableId) -> Result<Partition>;
|
||||
"partition_get_by_id" = get_by_id(&mut self, partition_id: PartitionId) -> Result<Option<Partition>>;
|
||||
"partition_get_by_id_batch" = get_by_id_batch(&mut self, partition_ids: Vec<PartitionId>) -> Result<Vec<Partition>>;
|
||||
"partition_get_by_hash_id" = get_by_hash_id(&mut self, partition_hash_id: &PartitionHashId) -> Result<Option<Partition>>;
|
||||
"partition_get_by_hash_id_batch" = get_by_hash_id_batch(&mut self, partition_hash_ids: &[&PartitionHashId]) -> Result<Vec<Partition>>;
|
||||
"partition_list_by_table_id" = list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>>;
|
||||
"partition_list_ids" = list_ids(&mut self) -> Result<Vec<PartitionId>>;
|
||||
"partition_update_sort_key" = cas_sort_key(&mut self, partition_id: &TransitionPartitionId, old_sort_key: Option<Vec<String>>, new_sort_key: &[&str]) -> Result<Partition, CasFailure<Vec<String>>>;
|
||||
|
|
|
@ -329,9 +329,9 @@ async fn new_raw_pool(
|
|||
parsed_dsn: &str,
|
||||
) -> Result<sqlx::Pool<Postgres>, sqlx::Error> {
|
||||
// sqlx exposes some options as pool options, while other options are available as connection options.
|
||||
let mut connect_options = PgConnectOptions::from_str(parsed_dsn)?;
|
||||
// the default is INFO, which is frankly surprising.
|
||||
connect_options.log_statements(log::LevelFilter::Trace);
|
||||
let connect_options = PgConnectOptions::from_str(parsed_dsn)?
|
||||
// the default is INFO, which is frankly surprising.
|
||||
.log_statements(log::LevelFilter::Trace);
|
||||
|
||||
let app_name = options.app_name.clone();
|
||||
let app_name2 = options.app_name.clone(); // just to log below
|
||||
|
@ -816,7 +816,7 @@ RETURNING *;
|
|||
.bind(name) // $1
|
||||
.bind(partition_template) // $2
|
||||
.bind(namespace_id) // $3
|
||||
.fetch_one(&mut tx)
|
||||
.fetch_one(&mut *tx)
|
||||
.await
|
||||
.map_err(|e| match e {
|
||||
sqlx::Error::RowNotFound => Error::TableCreateLimitError {
|
||||
|
@ -843,7 +843,8 @@ RETURNING *;
|
|||
// columns with an unsupported type.
|
||||
for template_part in table.partition_template.parts() {
|
||||
if let TemplatePart::TagValue(tag_name) = template_part {
|
||||
insert_column_with_connection(&mut tx, tag_name, table.id, ColumnType::Tag).await?;
|
||||
insert_column_with_connection(&mut *tx, tag_name, table.id, ColumnType::Tag)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1095,6 +1096,22 @@ WHERE id = $1;
|
|||
Ok(Some(partition))
|
||||
}
|
||||
|
||||
async fn get_by_id_batch(&mut self, partition_ids: Vec<PartitionId>) -> Result<Vec<Partition>> {
|
||||
let ids: Vec<_> = partition_ids.iter().map(|p| p.get()).collect();
|
||||
|
||||
sqlx::query_as::<_, Partition>(
|
||||
r#"
|
||||
SELECT id, hash_id, table_id, partition_key, sort_key, new_file_at
|
||||
FROM partition
|
||||
WHERE id = ANY($1);
|
||||
"#,
|
||||
)
|
||||
.bind(&ids[..]) // $1
|
||||
.fetch_all(&mut self.inner)
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })
|
||||
}
|
||||
|
||||
async fn get_by_hash_id(
|
||||
&mut self,
|
||||
partition_hash_id: &PartitionHashId,
|
||||
|
@ -1119,6 +1136,25 @@ WHERE hash_id = $1;
|
|||
Ok(Some(partition))
|
||||
}
|
||||
|
||||
async fn get_by_hash_id_batch(
|
||||
&mut self,
|
||||
partition_ids: &[&PartitionHashId],
|
||||
) -> Result<Vec<Partition>> {
|
||||
let ids: Vec<_> = partition_ids.iter().map(|p| p.as_bytes()).collect();
|
||||
|
||||
sqlx::query_as::<_, Partition>(
|
||||
r#"
|
||||
SELECT id, hash_id, table_id, partition_key, sort_key, new_file_at
|
||||
FROM partition
|
||||
WHERE hash_id = ANY($1);
|
||||
"#,
|
||||
)
|
||||
.bind(&ids[..]) // $1
|
||||
.fetch_all(&mut self.inner)
|
||||
.await
|
||||
.map_err(|e| Error::SqlxError { source: e })
|
||||
}
|
||||
|
||||
async fn list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>> {
|
||||
sqlx::query_as::<_, Partition>(
|
||||
r#"
|
||||
|
@ -1538,15 +1574,14 @@ WHERE object_store_id = $1;
|
|||
) -> Result<Vec<Uuid>> {
|
||||
sqlx::query(
|
||||
// sqlx's readme suggests using PG's ANY operator instead of IN; see link below.
|
||||
// https://github.com/launchbadge/sqlx/blob/main/FAQ.md#how-can-i-do-a-select--where-foo-in--query
|
||||
r#"
|
||||
SELECT object_store_id
|
||||
FROM parquet_file
|
||||
WHERE object_store_id = ANY($1);
|
||||
"#,
|
||||
)
|
||||
// from https://github.com/launchbadge/sqlx/blob/main/FAQ.md#how-can-i-do-a-select--where-foo-in--query
|
||||
// a bug of the parameter typechecking code requires all array parameters to be slices
|
||||
.bind(&object_store_ids[..]) // $1
|
||||
.bind(object_store_ids) // $1
|
||||
.map(|pgr| pgr.get::<Uuid, _>("object_store_id"))
|
||||
.fetch_all(&mut self.inner)
|
||||
.await
|
||||
|
@ -1576,13 +1611,13 @@ WHERE object_store_id = ANY($1);
|
|||
.map_err(|e| Error::StartTransaction { source: e })?;
|
||||
|
||||
let marked_at = Timestamp::from(self.time_provider.now());
|
||||
flag_for_delete(&mut tx, delete, marked_at).await?;
|
||||
flag_for_delete(&mut *tx, delete, marked_at).await?;
|
||||
|
||||
update_compaction_level(&mut tx, upgrade, target_level).await?;
|
||||
update_compaction_level(&mut *tx, upgrade, target_level).await?;
|
||||
|
||||
let mut ids = Vec::with_capacity(create.len());
|
||||
for file in create {
|
||||
let id = create_parquet_file(&mut tx, file).await?;
|
||||
let id = create_parquet_file(&mut *tx, file).await?;
|
||||
ids.push(id);
|
||||
}
|
||||
|
||||
|
@ -1667,12 +1702,9 @@ async fn flag_for_delete<'q, E>(
|
|||
where
|
||||
E: Executor<'q, Database = Postgres>,
|
||||
{
|
||||
// If I try to do `.bind(parquet_file_ids)` directly, I get a compile error from sqlx.
|
||||
// See https://github.com/launchbadge/sqlx/issues/1744
|
||||
let ids: Vec<_> = ids.iter().map(|p| p.get()).collect();
|
||||
let query = sqlx::query(r#"UPDATE parquet_file SET to_delete = $1 WHERE id = ANY($2);"#)
|
||||
.bind(marked_at) // $1
|
||||
.bind(&ids[..]); // $2
|
||||
.bind(ids); // $2
|
||||
query
|
||||
.execute(executor)
|
||||
.await
|
||||
|
@ -1689,9 +1721,6 @@ async fn update_compaction_level<'q, E>(
|
|||
where
|
||||
E: Executor<'q, Database = Postgres>,
|
||||
{
|
||||
// If I try to do `.bind(parquet_file_ids)` directly, I get a compile error from sqlx.
|
||||
// See https://github.com/launchbadge/sqlx/issues/1744
|
||||
let ids: Vec<_> = parquet_file_ids.iter().map(|p| p.get()).collect();
|
||||
let query = sqlx::query(
|
||||
r#"
|
||||
UPDATE parquet_file
|
||||
|
@ -1700,7 +1729,7 @@ WHERE id = ANY($2);
|
|||
"#,
|
||||
)
|
||||
.bind(compaction_level) // $1
|
||||
.bind(&ids[..]); // $2
|
||||
.bind(parquet_file_ids); // $2
|
||||
query
|
||||
.execute(executor)
|
||||
.await
|
||||
|
|
|
@ -24,8 +24,8 @@ use data_types::{
|
|||
Table, TableId, Timestamp, TransitionPartitionId,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashSet;
|
||||
use std::{collections::HashMap, fmt::Display};
|
||||
use std::{collections::HashSet, fmt::Write};
|
||||
|
||||
use crate::interface::MAX_PARQUET_FILES_SELECTED_ONCE_FOR_DELETE;
|
||||
use iox_time::{SystemProvider, TimeProvider};
|
||||
|
@ -577,7 +577,7 @@ RETURNING *;
|
|||
.bind(name) // $1
|
||||
.bind(partition_template) // $2
|
||||
.bind(namespace_id) // $3
|
||||
.fetch_one(&mut tx)
|
||||
.fetch_one(&mut *tx)
|
||||
.await
|
||||
.map_err(|e| match e {
|
||||
sqlx::Error::RowNotFound => Error::TableCreateLimitError {
|
||||
|
@ -604,7 +604,8 @@ RETURNING *;
|
|||
// columns with an unsupported type.
|
||||
for template_part in table.partition_template.parts() {
|
||||
if let TemplatePart::TagValue(tag_name) = template_part {
|
||||
insert_column_with_connection(&mut tx, tag_name, table.id, ColumnType::Tag).await?;
|
||||
insert_column_with_connection(&mut *tx, tag_name, table.id, ColumnType::Tag)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -891,6 +892,24 @@ WHERE id = $1;
|
|||
Ok(Some(partition.into()))
|
||||
}
|
||||
|
||||
async fn get_by_id_batch(&mut self, partition_ids: Vec<PartitionId>) -> Result<Vec<Partition>> {
|
||||
// We use a JSON-based "IS IN" check.
|
||||
let ids: Vec<_> = partition_ids.iter().map(|p| p.get()).collect();
|
||||
|
||||
sqlx::query_as::<_, PartitionPod>(
|
||||
r#"
|
||||
SELECT id, hash_id, table_id, partition_key, sort_key, new_file_at
|
||||
FROM partition
|
||||
WHERE id IN (SELECT value FROM json_each($1));
|
||||
"#,
|
||||
)
|
||||
.bind(Json(&ids[..])) // $1
|
||||
.fetch_all(self.inner.get_mut())
|
||||
.await
|
||||
.map(|vals| vals.into_iter().map(Partition::from).collect())
|
||||
.map_err(|e| Error::SqlxError { source: e })
|
||||
}
|
||||
|
||||
async fn get_by_hash_id(
|
||||
&mut self,
|
||||
partition_hash_id: &PartitionHashId,
|
||||
|
@ -915,6 +934,38 @@ WHERE hash_id = $1;
|
|||
Ok(Some(partition.into()))
|
||||
}
|
||||
|
||||
async fn get_by_hash_id_batch(
|
||||
&mut self,
|
||||
partition_hash_ids: &[&PartitionHashId],
|
||||
) -> Result<Vec<Partition>> {
|
||||
// We use a JSON-based "IS IN" check.
|
||||
let ids: Vec<_> = partition_hash_ids
|
||||
.iter()
|
||||
.map(|id| {
|
||||
// convert partiion hash ID to uppercase hex string
|
||||
let bytes = id.as_bytes();
|
||||
let mut s = String::with_capacity(bytes.len() * 2);
|
||||
for b in bytes {
|
||||
write!(&mut s, "{:02X}", b).expect("never fails");
|
||||
}
|
||||
s
|
||||
})
|
||||
.collect();
|
||||
|
||||
sqlx::query_as::<_, PartitionPod>(
|
||||
r#"
|
||||
SELECT id, hash_id, table_id, partition_key, sort_key, new_file_at
|
||||
FROM partition
|
||||
WHERE hex(hash_id) IN (SELECT value FROM json_each($1));
|
||||
"#,
|
||||
)
|
||||
.bind(Json(&ids[..])) // $1
|
||||
.fetch_all(self.inner.get_mut())
|
||||
.await
|
||||
.map(|vals| vals.into_iter().map(Partition::from).collect())
|
||||
.map_err(|e| Error::SqlxError { source: e })
|
||||
}
|
||||
|
||||
async fn list_by_table_id(&mut self, table_id: TableId) -> Result<Vec<Partition>> {
|
||||
Ok(sqlx::query_as::<_, PartitionPod>(
|
||||
r#"
|
||||
|
@ -1451,14 +1502,14 @@ WHERE object_store_id IN ({v});",
|
|||
|
||||
for id in delete {
|
||||
let marked_at = Timestamp::from(self.time_provider.now());
|
||||
flag_for_delete(&mut tx, *id, marked_at).await?;
|
||||
flag_for_delete(&mut *tx, *id, marked_at).await?;
|
||||
}
|
||||
|
||||
update_compaction_level(&mut tx, upgrade, target_level).await?;
|
||||
update_compaction_level(&mut *tx, upgrade, target_level).await?;
|
||||
|
||||
let mut ids = Vec::with_capacity(create.len());
|
||||
for file in create {
|
||||
let res = create_parquet_file(&mut tx, file.clone()).await?;
|
||||
let res = create_parquet_file(&mut *tx, file.clone()).await?;
|
||||
ids.push(res.id);
|
||||
}
|
||||
tx.commit()
|
||||
|
@ -1562,8 +1613,7 @@ async fn update_compaction_level<'q, E>(
|
|||
where
|
||||
E: Executor<'q, Database = Sqlite>,
|
||||
{
|
||||
// If I try to do `.bind(parquet_file_ids)` directly, I get a compile error from sqlx.
|
||||
// See https://github.com/launchbadge/sqlx/issues/1744
|
||||
// We use a JSON-based "IS IN" check.
|
||||
let ids: Vec<_> = parquet_file_ids.iter().map(|p| p.get()).collect();
|
||||
let query = sqlx::query(
|
||||
r#"
|
||||
|
|
|
@ -6,7 +6,7 @@ pub mod field;
|
|||
pub mod fieldlist;
|
||||
pub mod gapfill;
|
||||
mod non_null_checker;
|
||||
mod query_tracing;
|
||||
pub mod query_tracing;
|
||||
mod schema_pivot;
|
||||
pub mod seriesset;
|
||||
pub(crate) mod split;
|
||||
|
|
|
@ -648,7 +648,7 @@ impl IOxSessionContext {
|
|||
exec.spawn(fut).await.unwrap_or_else(|e| {
|
||||
Err(Error::Context(
|
||||
"Join Error".to_string(),
|
||||
Box::new(Error::External(e.into())),
|
||||
Box::new(Error::External(Box::new(e))),
|
||||
))
|
||||
})
|
||||
}
|
||||
|
|
|
@ -74,11 +74,11 @@ where
|
|||
/// Create new stream based on an existing stream that transports [`Result`]s.
|
||||
///
|
||||
/// Also receives an executor that actually executes the underlying stream as well as a converter that convets
|
||||
/// [`executor::Error`] to the error type of the stream (so we can send potential crashes/panics).
|
||||
/// [`executor::JobError`] to the error type of the stream (so we can send potential crashes/panics).
|
||||
fn new_with_error_stream<S, C>(stream: S, exec: DedicatedExecutor, converter: C) -> Self
|
||||
where
|
||||
S: Stream<Item = Result<X, E>> + Send + 'static,
|
||||
C: Fn(executor::Error) -> E + Send + 'static,
|
||||
C: Fn(executor::JobError) -> E + Send + 'static,
|
||||
{
|
||||
Self::new_with_tx(|tx| {
|
||||
// future to be run in the other runtime
|
||||
|
@ -177,7 +177,7 @@ mod tests {
|
|||
let barrier1_captured = Arc::clone(&barrier1);
|
||||
let barrier2 = Arc::new(tokio::sync::Barrier::new(2));
|
||||
let barrier2_captured = Arc::clone(&barrier2);
|
||||
let mut stream = CrossRtStream::<Result<u8, executor::Error>>::new_with_error_stream(
|
||||
let mut stream = CrossRtStream::<Result<u8, executor::JobError>>::new_with_error_stream(
|
||||
futures::stream::once(async move {
|
||||
barrier1_captured.wait().await;
|
||||
barrier2_captured.wait().await;
|
||||
|
@ -195,7 +195,7 @@ mod tests {
|
|||
barrier2.wait().await;
|
||||
|
||||
let res = f.await.expect("streamed data");
|
||||
assert_eq!(res, Ok(1));
|
||||
assert_eq!(res.unwrap(), 1);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
@ -212,7 +212,7 @@ mod tests {
|
|||
let barrier1_captured = Arc::clone(&barrier1);
|
||||
let barrier2 = Arc::new(std::sync::Barrier::new(2));
|
||||
let barrier2_captured = Arc::clone(&barrier2);
|
||||
let mut stream = CrossRtStream::<Result<u8, executor::Error>>::new_with_error_stream(
|
||||
let mut stream = CrossRtStream::<Result<u8, executor::JobError>>::new_with_error_stream(
|
||||
futures::stream::once(async move {
|
||||
barrier1_captured.wait();
|
||||
barrier2_captured.wait();
|
||||
|
@ -230,13 +230,13 @@ mod tests {
|
|||
barrier2.wait();
|
||||
|
||||
let res = f.await.expect("streamed data");
|
||||
assert_eq!(res, Ok(1));
|
||||
assert_eq!(res.unwrap(), 1);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_panic() {
|
||||
let exec = DedicatedExecutor::new_testing();
|
||||
let mut stream = CrossRtStream::<Result<(), executor::Error>>::new_with_error_stream(
|
||||
let mut stream = CrossRtStream::<Result<(), executor::JobError>>::new_with_error_stream(
|
||||
futures::stream::once(async { panic!("foo") }),
|
||||
exec,
|
||||
std::convert::identity,
|
||||
|
@ -247,7 +247,7 @@ mod tests {
|
|||
.await
|
||||
.expect("stream not finished")
|
||||
.unwrap_err();
|
||||
assert_eq!(e.to_string(), "foo");
|
||||
assert_eq!(e.to_string(), "Panic: foo");
|
||||
|
||||
let none = stream.next().await;
|
||||
assert!(none.is_none());
|
||||
|
@ -260,7 +260,7 @@ mod tests {
|
|||
let barrier1_captured = Arc::clone(&barrier1);
|
||||
let barrier2 = Arc::new(tokio::sync::Barrier::new(2));
|
||||
let barrier2_captured = Arc::clone(&barrier2);
|
||||
let mut stream = CrossRtStream::<Result<u8, executor::Error>>::new_with_error_stream(
|
||||
let mut stream = CrossRtStream::<Result<u8, executor::JobError>>::new_with_error_stream(
|
||||
futures::stream::once(async move {
|
||||
barrier1_captured.wait().await;
|
||||
barrier2_captured.wait().await;
|
||||
|
@ -281,7 +281,7 @@ mod tests {
|
|||
|
||||
barrier2.wait().await;
|
||||
let res = stream.next().await.expect("streamed data");
|
||||
assert_eq!(res, Ok(1));
|
||||
assert_eq!(res.unwrap(), 1);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
@ -289,7 +289,7 @@ mod tests {
|
|||
let exec = DedicatedExecutor::new_testing();
|
||||
let barrier = Arc::new(tokio::sync::Barrier::new(2));
|
||||
let barrier_captured = Arc::clone(&barrier);
|
||||
let mut stream = CrossRtStream::<Result<u8, executor::Error>>::new_with_error_stream(
|
||||
let mut stream = CrossRtStream::<Result<u8, executor::JobError>>::new_with_error_stream(
|
||||
futures::stream::once(async move {
|
||||
barrier_captured.wait().await;
|
||||
|
||||
|
|
|
@ -109,7 +109,7 @@ impl Drop for TracedStream {
|
|||
/// 1. If the ExecutionPlan had no metrics
|
||||
/// 2. The total number of rows produced by the ExecutionPlan (if available)
|
||||
/// 3. The elapsed compute time taken by the ExecutionPlan
|
||||
fn send_metrics_to_tracing(
|
||||
pub fn send_metrics_to_tracing(
|
||||
default_end_time: DateTime<Utc>,
|
||||
parent_span: &Span,
|
||||
physical_plan: &dyn ExecutionPlan,
|
||||
|
|
|
@ -8,7 +8,7 @@ use datafusion::{
|
|||
common::tree_node::{RewriteRecursion, TreeNode, TreeNodeRewriter, VisitRecursion},
|
||||
error::{DataFusionError, Result},
|
||||
logical_expr::{
|
||||
expr::{ScalarFunction, ScalarUDF},
|
||||
expr::{Alias, ScalarFunction, ScalarUDF},
|
||||
utils::expr_to_columns,
|
||||
Aggregate, BuiltinScalarFunction, Extension, LogicalPlan, Projection,
|
||||
},
|
||||
|
@ -293,13 +293,26 @@ fn replace_date_bin_gapfill(group_expr: &[Expr]) -> Result<Option<RewriteInfo>>
|
|||
})?;
|
||||
match date_bin_gapfill_count {
|
||||
0 => return Ok(None),
|
||||
2.. => {
|
||||
1 => {
|
||||
// Make sure that the call to DATE_BIN_GAPFILL is root expression
|
||||
// excluding aliases.
|
||||
let dbg_idx = dbg_idx.expect("should have found exactly one call");
|
||||
if !matches_udf(
|
||||
unwrap_alias(&group_expr[dbg_idx]),
|
||||
DATE_BIN_GAPFILL_UDF_NAME,
|
||||
) {
|
||||
return Err(DataFusionError::Plan(
|
||||
"DATE_BIN_GAPFILL must a top-level expression in the GROUP BY clause when gap filling. It cannot be part of another expression or cast".to_string(),
|
||||
));
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return Err(DataFusionError::Plan(
|
||||
"DATE_BIN_GAPFILL specified more than once".to_string(),
|
||||
))
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
let date_bin_gapfill_index = dbg_idx.expect("should be found exactly one call");
|
||||
|
||||
let mut rewriter = DateBinGapfillRewriter { args: None };
|
||||
|
@ -323,6 +336,15 @@ fn replace_date_bin_gapfill(group_expr: &[Expr]) -> Result<Option<RewriteInfo>>
|
|||
}))
|
||||
}
|
||||
|
||||
fn unwrap_alias(mut e: &Expr) -> &Expr {
|
||||
loop {
|
||||
match e {
|
||||
Expr::Alias(Alias { expr, .. }) => e = expr.as_ref(),
|
||||
e => break e,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct DateBinGapfillRewriter {
|
||||
args: Option<Vec<Expr>>,
|
||||
}
|
||||
|
@ -486,17 +508,21 @@ impl FillFnRewriter {
|
|||
fn count_udf(e: &Expr, name: &str) -> Result<usize> {
|
||||
let mut count = 0;
|
||||
e.apply(&mut |expr| {
|
||||
match expr {
|
||||
Expr::ScalarUDF(ScalarUDF { fun, .. }) if fun.name == name => {
|
||||
count += 1;
|
||||
}
|
||||
_ => (),
|
||||
};
|
||||
if matches_udf(expr, name) {
|
||||
count += 1;
|
||||
}
|
||||
Ok(VisitRecursion::Continue)
|
||||
})?;
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
fn matches_udf(e: &Expr, name: &str) -> bool {
|
||||
matches!(
|
||||
e,
|
||||
Expr::ScalarUDF(ScalarUDF { fun, .. }) if fun.name == name
|
||||
)
|
||||
}
|
||||
|
||||
fn check_node(node: &LogicalPlan) -> Result<()> {
|
||||
node.expressions().iter().try_for_each(|expr| {
|
||||
let dbg_count = count_udf(expr, DATE_BIN_GAPFILL_UDF_NAME)?;
|
||||
|
|
|
@ -7,11 +7,13 @@ use datafusion::{
|
|||
DFSchema,
|
||||
},
|
||||
error::Result,
|
||||
logical_expr::{expr::Alias, Between, BinaryExpr, LogicalPlan, Operator},
|
||||
logical_expr::{Between, BinaryExpr, LogicalPlan, Operator},
|
||||
optimizer::utils::split_conjunction,
|
||||
prelude::{Column, Expr},
|
||||
};
|
||||
|
||||
use super::unwrap_alias;
|
||||
|
||||
/// Given a plan and a column, finds the predicates that use that column
|
||||
/// and return a range with expressions for upper and lower bounds.
|
||||
pub fn find_time_range(plan: &LogicalPlan, time_col: &Column) -> Result<Range<Bound<Expr>>> {
|
||||
|
@ -65,6 +67,12 @@ impl TreeNodeVisitor for TimeRangeVisitor {
|
|||
self.range = range;
|
||||
Ok(VisitRecursion::Continue)
|
||||
}
|
||||
LogicalPlan::SubqueryAlias(_) => {
|
||||
// The nodes below this one refer to the column with a different table name,
|
||||
// just unset the relation so we match on the column name.
|
||||
self.col.relation = None;
|
||||
Ok(VisitRecursion::Continue)
|
||||
}
|
||||
// These nodes do not alter their schema, so we can recurse through them
|
||||
LogicalPlan::Sort(_)
|
||||
| LogicalPlan::Repartition(_)
|
||||
|
@ -76,15 +84,6 @@ impl TreeNodeVisitor for TimeRangeVisitor {
|
|||
}
|
||||
}
|
||||
|
||||
fn unwrap_alias(mut e: &Expr) -> &Expr {
|
||||
loop {
|
||||
match e {
|
||||
Expr::Alias(Alias { expr, .. }) => e = expr.as_ref(),
|
||||
e => break e,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Encapsulates the upper and lower bounds of a time column
|
||||
/// in a logical plan.
|
||||
#[derive(Clone)]
|
||||
|
|
|
@ -39,7 +39,7 @@ pub(super) fn accumulator(dt: &DataType) -> Result<Box<dyn Accumulator>> {
|
|||
/// Calculate the intermediate merge state for the aggregator.
|
||||
pub(super) fn state_type(dt: &DataType) -> Result<Arc<Vec<DataType>>> {
|
||||
Ok(Arc::new(vec![
|
||||
DataType::List(Arc::new(Field::new("state", dt.clone(), false))),
|
||||
DataType::List(Arc::new(Field::new("item", dt.clone(), true))),
|
||||
DataType::Float64,
|
||||
]))
|
||||
}
|
||||
|
|
|
@ -9,18 +9,18 @@ use crate::plan::planner::select::{
|
|||
};
|
||||
use crate::plan::planner_time_range_expression::time_range_to_df_expr;
|
||||
use crate::plan::rewriter::{find_table_names, rewrite_statement, ProjectionType};
|
||||
use crate::plan::udaf::{
|
||||
derivative_udf, non_negative_derivative_udf, DIFFERENCE, MOVING_AVERAGE,
|
||||
NON_NEGATIVE_DIFFERENCE,
|
||||
};
|
||||
use crate::plan::udaf::MOVING_AVERAGE;
|
||||
use crate::plan::udf::{
|
||||
derivative, difference, find_window_udfs, moving_average, non_negative_derivative,
|
||||
non_negative_difference,
|
||||
cumulative_sum, derivative, difference, find_window_udfs, moving_average,
|
||||
non_negative_derivative, non_negative_difference,
|
||||
};
|
||||
use crate::plan::util::{binary_operator_to_df_operator, rebase_expr, Schemas};
|
||||
use crate::plan::util::{binary_operator_to_df_operator, rebase_expr, IQLSchema};
|
||||
use crate::plan::var_ref::var_ref_data_type_to_data_type;
|
||||
use crate::plan::{planner_rewrite_expression, udf, util_copy};
|
||||
use crate::window::PERCENT_ROW_NUMBER;
|
||||
use crate::window::{
|
||||
CUMULATIVE_SUM, DERIVATIVE, DIFFERENCE, NON_NEGATIVE_DERIVATIVE, NON_NEGATIVE_DIFFERENCE,
|
||||
PERCENT_ROW_NUMBER,
|
||||
};
|
||||
use arrow::array::{StringBuilder, StringDictionaryBuilder};
|
||||
use arrow::datatypes::{DataType, Field as ArrowField, Int32Type, Schema as ArrowSchema};
|
||||
use arrow::record_batch::RecordBatch;
|
||||
|
@ -94,7 +94,6 @@ use std::ops::{Bound, ControlFlow, Deref, Not, Range};
|
|||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::ir::DataSourceSchema;
|
||||
use super::parse_regex;
|
||||
use super::util::contains_expr;
|
||||
use super::util_copy::clone_with_replacement;
|
||||
|
@ -712,16 +711,14 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
continue;
|
||||
};
|
||||
|
||||
let schemas = Schemas::new(plan.schema())?;
|
||||
let ds_schema = ds.schema(self.s)?;
|
||||
let schema = IQLSchema::new_from_ds_schema(plan.schema(), ds.schema(self.s)?)?;
|
||||
let plan = self.plan_condition_time_range(
|
||||
ctx.condition,
|
||||
ctx.extended_time_range(),
|
||||
plan,
|
||||
&schemas,
|
||||
&ds_schema,
|
||||
&schema,
|
||||
)?;
|
||||
plans.push((plan, ds_schema));
|
||||
plans.push((plan, schema));
|
||||
}
|
||||
|
||||
Ok(match plans.len() {
|
||||
|
@ -797,10 +794,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
/// Plan "Raw" SELECT queriers, These are queries that have no grouping
|
||||
/// and call only scalar functions.
|
||||
fn project_select_raw(&self, input: LogicalPlan, fields: &[Field]) -> Result<LogicalPlan> {
|
||||
let schemas = Schemas::new(input.schema())?;
|
||||
let schema = IQLSchema::new_from_fields(input.schema(), fields)?;
|
||||
|
||||
// Transform InfluxQL AST field expressions to a list of DataFusion expressions.
|
||||
let select_exprs = self.field_list_to_exprs(&input, fields, &schemas)?;
|
||||
let select_exprs = self.field_list_to_exprs(&input, fields, &schema)?;
|
||||
|
||||
// Wrap the plan in a `LogicalPlan::Projection` from the select expressions
|
||||
project(input, select_exprs)
|
||||
|
@ -813,10 +810,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
input: LogicalPlan,
|
||||
fields: &[Field],
|
||||
) -> Result<LogicalPlan> {
|
||||
let schemas = Schemas::new(input.schema())?;
|
||||
let schema = IQLSchema::new_from_fields(input.schema(), fields)?;
|
||||
|
||||
// Transform InfluxQL AST field expressions to a list of DataFusion expressions.
|
||||
let mut select_exprs = self.field_list_to_exprs(&input, fields, &schemas)?;
|
||||
let mut select_exprs = self.field_list_to_exprs(&input, fields, &schema)?;
|
||||
|
||||
// This is a special case, where exactly one column can be projected with a `DISTINCT`
|
||||
// clause or the `distinct` function.
|
||||
|
@ -850,10 +847,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
fields: &[Field],
|
||||
group_by_tag_set: &[&str],
|
||||
) -> Result<LogicalPlan> {
|
||||
let schemas = Schemas::new(input.schema())?;
|
||||
let schema = IQLSchema::new_from_fields(input.schema(), fields)?;
|
||||
|
||||
// Transform InfluxQL AST field expressions to a list of DataFusion expressions.
|
||||
let select_exprs = self.field_list_to_exprs(&input, fields, &schemas)?;
|
||||
let select_exprs = self.field_list_to_exprs(&input, fields, &schema)?;
|
||||
|
||||
let (plan, select_exprs) =
|
||||
self.select_aggregate(ctx, input, fields, select_exprs, group_by_tag_set)?;
|
||||
|
@ -871,10 +868,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
fields: &[Field],
|
||||
group_by_tag_set: &[&str],
|
||||
) -> Result<LogicalPlan> {
|
||||
let schemas = Schemas::new(input.schema())?;
|
||||
let schema = IQLSchema::new_from_fields(input.schema(), fields)?;
|
||||
|
||||
// Transform InfluxQL AST field expressions to a list of DataFusion expressions.
|
||||
let select_exprs = self.field_list_to_exprs(&input, fields, &schemas)?;
|
||||
let select_exprs = self.field_list_to_exprs(&input, fields, &schema)?;
|
||||
|
||||
let (plan, select_exprs) =
|
||||
self.select_window(ctx, input, select_exprs, group_by_tag_set)?;
|
||||
|
@ -909,10 +906,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
fields: &[Field],
|
||||
group_by_tag_set: &[&str],
|
||||
) -> Result<LogicalPlan> {
|
||||
let schemas = Schemas::new(input.schema())?;
|
||||
let schema = IQLSchema::new_from_fields(input.schema(), fields)?;
|
||||
|
||||
// Transform InfluxQL AST field expressions to a list of DataFusion expressions.
|
||||
let select_exprs = self.field_list_to_exprs(&input, fields, &schemas)?;
|
||||
let select_exprs = self.field_list_to_exprs(&input, fields, &schema)?;
|
||||
|
||||
let (plan, select_exprs) =
|
||||
self.select_aggregate(ctx, input, fields, select_exprs, group_by_tag_set)?;
|
||||
|
@ -953,7 +950,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
fields: &[Field],
|
||||
group_by_tag_set: &[&str],
|
||||
) -> Result<LogicalPlan> {
|
||||
let schemas = Schemas::new(input.schema())?;
|
||||
let schema = IQLSchema::new_from_fields(input.schema(), fields)?;
|
||||
|
||||
let (selector_index, field_key, plan) = match Selector::find_enumerated(fields)? {
|
||||
(_, Selector::First { .. })
|
||||
|
@ -1027,7 +1024,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
});
|
||||
|
||||
// Transform InfluxQL AST field expressions to a list of DataFusion expressions.
|
||||
let select_exprs = self.field_list_to_exprs(&plan, fields_vec.as_slice(), &schemas)?;
|
||||
let select_exprs = self.field_list_to_exprs(&plan, fields_vec.as_slice(), &schema)?;
|
||||
|
||||
// Wrap the plan in a `LogicalPlan::Projection` from the select expressions
|
||||
project(plan, select_exprs)
|
||||
|
@ -1043,7 +1040,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
fields: &[Field],
|
||||
group_by_tag_set: &[&str],
|
||||
) -> Result<LogicalPlan> {
|
||||
let schemas = Schemas::new(input.schema())?;
|
||||
let schema = IQLSchema::new_from_fields(input.schema(), fields)?;
|
||||
|
||||
let (selector_index, is_bottom, field_key, tag_keys, narg) =
|
||||
match Selector::find_enumerated(fields)? {
|
||||
|
@ -1098,7 +1095,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
}
|
||||
|
||||
// Transform InfluxQL AST field expressions to a list of DataFusion expressions.
|
||||
let select_exprs = self.field_list_to_exprs(&input, fields_vec.as_slice(), &schemas)?;
|
||||
let select_exprs = self.field_list_to_exprs(&input, fields_vec.as_slice(), &schema)?;
|
||||
|
||||
let plan = if !tag_keys.is_empty() {
|
||||
self.select_first(ctx, input, order_by, internal_group_by.as_slice(), 1)?
|
||||
|
@ -1326,18 +1323,25 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
_ => None,
|
||||
};
|
||||
|
||||
// Some aggregates, such as COUNT, should be filled with zero by default
|
||||
// rather than NULL.
|
||||
let should_zero_fill_expr = fields
|
||||
.iter()
|
||||
.map(is_zero_filled_aggregate_field)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// Rewrite the aggregate columns from the projection, so that the expressions
|
||||
// refer to the columns from the aggregate projection
|
||||
let select_exprs_post_aggr = select_exprs
|
||||
.iter()
|
||||
.zip(should_fill_expr)
|
||||
.map(|(expr, should_fill)| {
|
||||
.zip(should_fill_expr.iter().zip(should_zero_fill_expr))
|
||||
.map(|(expr, (should_fill, should_zero_fill))| {
|
||||
// This implements the `FILL(<value>)` strategy, by coalescing any aggregate
|
||||
// expressions to `<value>` when they are `NULL`.
|
||||
let fill_if_null = if fill_if_null.is_some() && should_fill {
|
||||
fill_if_null
|
||||
} else {
|
||||
None
|
||||
let fill_if_null = match (fill_if_null, should_fill, should_zero_fill) {
|
||||
(Some(_), true, _) => fill_if_null,
|
||||
(None, true, true) => Some(0.into()),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
rebase_expr(expr, &aggr_projection_exprs, &fill_if_null, &plan)
|
||||
|
@ -1450,17 +1454,17 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
return error::internal(format!("udf_to_expr: unexpected expression: {e}"))
|
||||
};
|
||||
|
||||
fn derivative_unit(ctx: &Context<'_>, args: &Vec<Expr>) -> Result<i64> {
|
||||
fn derivative_unit(ctx: &Context<'_>, args: &Vec<Expr>) -> Result<ScalarValue> {
|
||||
if args.len() > 1 {
|
||||
if let Expr::Literal(ScalarValue::IntervalMonthDayNano(Some(v))) = args[1] {
|
||||
Ok(v as i64)
|
||||
if let Expr::Literal(v) = &args[1] {
|
||||
Ok(v.clone())
|
||||
} else {
|
||||
error::internal(format!("udf_to_expr: unexpected expression: {}", args[1]))
|
||||
}
|
||||
} else if let Some(interval) = ctx.interval {
|
||||
Ok(interval.duration)
|
||||
Ok(ScalarValue::new_interval_mdn(0, 0, interval.duration))
|
||||
} else {
|
||||
Ok(1000000000) // 1s
|
||||
Ok(ScalarValue::new_interval_mdn(0, 0, 1_000_000_000)) // 1s
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1478,63 +1482,77 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
})
|
||||
.alias(alias)),
|
||||
Some(udf::WindowFunction::Difference) => Ok(Expr::WindowFunction(WindowFunction {
|
||||
fun: window_function::WindowFunction::AggregateUDF(DIFFERENCE.clone()),
|
||||
fun: DIFFERENCE.clone(),
|
||||
args,
|
||||
partition_by,
|
||||
order_by,
|
||||
window_frame: WindowFrame {
|
||||
units: WindowFrameUnits::Rows,
|
||||
start_bound: WindowFrameBound::Preceding(ScalarValue::Null),
|
||||
end_bound: WindowFrameBound::CurrentRow,
|
||||
end_bound: WindowFrameBound::Following(ScalarValue::Null),
|
||||
},
|
||||
})
|
||||
.alias(alias)),
|
||||
Some(udf::WindowFunction::NonNegativeDifference) => {
|
||||
Ok(Expr::WindowFunction(WindowFunction {
|
||||
fun: window_function::WindowFunction::AggregateUDF(
|
||||
NON_NEGATIVE_DIFFERENCE.clone(),
|
||||
),
|
||||
fun: NON_NEGATIVE_DIFFERENCE.clone(),
|
||||
args,
|
||||
partition_by,
|
||||
order_by,
|
||||
window_frame: WindowFrame {
|
||||
units: WindowFrameUnits::Rows,
|
||||
start_bound: WindowFrameBound::Preceding(ScalarValue::Null),
|
||||
end_bound: WindowFrameBound::CurrentRow,
|
||||
end_bound: WindowFrameBound::Following(ScalarValue::Null),
|
||||
},
|
||||
})
|
||||
.alias(alias))
|
||||
}
|
||||
Some(udf::WindowFunction::Derivative) => Ok(Expr::WindowFunction(WindowFunction {
|
||||
fun: window_function::WindowFunction::AggregateUDF(
|
||||
derivative_udf(derivative_unit(ctx, &args)?).into(),
|
||||
),
|
||||
args: vec!["time".as_expr(), args[0].clone()],
|
||||
fun: DERIVATIVE.clone(),
|
||||
args: vec![
|
||||
args[0].clone(),
|
||||
lit(derivative_unit(ctx, &args)?),
|
||||
"time".as_expr(),
|
||||
],
|
||||
partition_by,
|
||||
order_by,
|
||||
window_frame: WindowFrame {
|
||||
units: WindowFrameUnits::Rows,
|
||||
start_bound: WindowFrameBound::Preceding(ScalarValue::Null),
|
||||
end_bound: WindowFrameBound::CurrentRow,
|
||||
end_bound: WindowFrameBound::Following(ScalarValue::Null),
|
||||
},
|
||||
})
|
||||
.alias(alias)),
|
||||
Some(udf::WindowFunction::NonNegativeDerivative) => {
|
||||
Ok(Expr::WindowFunction(WindowFunction {
|
||||
fun: window_function::WindowFunction::AggregateUDF(
|
||||
non_negative_derivative_udf(derivative_unit(ctx, &args)?).into(),
|
||||
),
|
||||
args: vec!["time".as_expr(), args[0].clone()],
|
||||
fun: NON_NEGATIVE_DERIVATIVE.clone(),
|
||||
args: vec![
|
||||
args[0].clone(),
|
||||
lit(derivative_unit(ctx, &args)?),
|
||||
"time".as_expr(),
|
||||
],
|
||||
partition_by,
|
||||
order_by,
|
||||
window_frame: WindowFrame {
|
||||
units: WindowFrameUnits::Rows,
|
||||
start_bound: WindowFrameBound::Preceding(ScalarValue::Null),
|
||||
end_bound: WindowFrameBound::CurrentRow,
|
||||
end_bound: WindowFrameBound::Following(ScalarValue::Null),
|
||||
},
|
||||
})
|
||||
.alias(alias))
|
||||
}
|
||||
Some(udf::WindowFunction::CumulativeSum) => Ok(Expr::WindowFunction(WindowFunction {
|
||||
fun: CUMULATIVE_SUM.clone(),
|
||||
args,
|
||||
partition_by,
|
||||
order_by,
|
||||
window_frame: WindowFrame {
|
||||
units: WindowFrameUnits::Rows,
|
||||
start_bound: WindowFrameBound::Preceding(ScalarValue::Null),
|
||||
end_bound: WindowFrameBound::Following(ScalarValue::Null),
|
||||
},
|
||||
})
|
||||
.alias(alias)),
|
||||
None => error::internal(format!(
|
||||
"unexpected user-defined window function: {}",
|
||||
fun.name
|
||||
|
@ -1688,7 +1706,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
&self,
|
||||
plan: &LogicalPlan,
|
||||
fields: &[Field],
|
||||
schemas: &Schemas,
|
||||
schema: &IQLSchema<'_>,
|
||||
) -> Result<Vec<Expr>> {
|
||||
let mut names: HashMap<&str, usize> = HashMap::new();
|
||||
fields
|
||||
|
@ -1708,7 +1726,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
};
|
||||
new_field
|
||||
})
|
||||
.map(|field| self.field_to_df_expr(&field, plan, schemas))
|
||||
.map(|field| self.field_to_df_expr(&field, plan, schema))
|
||||
.collect()
|
||||
}
|
||||
|
||||
|
@ -1719,10 +1737,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
&self,
|
||||
field: &Field,
|
||||
plan: &LogicalPlan,
|
||||
schemas: &Schemas,
|
||||
schema: &IQLSchema<'_>,
|
||||
) -> Result<Expr> {
|
||||
let expr = self.expr_to_df_expr(ExprScope::Projection, &field.expr, schemas)?;
|
||||
let expr = planner_rewrite_expression::rewrite_field_expr(expr, schemas)?;
|
||||
let expr = self.expr_to_df_expr(ExprScope::Projection, &field.expr, schema)?;
|
||||
let expr = planner_rewrite_expression::rewrite_field_expr(expr, schema)?;
|
||||
normalize_col(expr.alias(&field.name), plan)
|
||||
}
|
||||
|
||||
|
@ -1730,16 +1748,14 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
fn conditional_to_df_expr(
|
||||
&self,
|
||||
iql: &ConditionalExpression,
|
||||
schemas: &Schemas,
|
||||
schema: &IQLSchema<'_>,
|
||||
) -> Result<Expr> {
|
||||
match iql {
|
||||
ConditionalExpression::Expr(expr) => {
|
||||
self.expr_to_df_expr(ExprScope::Where, expr, schemas)
|
||||
self.expr_to_df_expr(ExprScope::Where, expr, schema)
|
||||
}
|
||||
ConditionalExpression::Binary(expr) => {
|
||||
self.binary_conditional_to_df_expr(expr, schemas)
|
||||
}
|
||||
ConditionalExpression::Grouped(e) => self.conditional_to_df_expr(e, schemas),
|
||||
ConditionalExpression::Binary(expr) => self.binary_conditional_to_df_expr(expr, schema),
|
||||
ConditionalExpression::Grouped(e) => self.conditional_to_df_expr(e, schema),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1747,20 +1763,25 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
fn binary_conditional_to_df_expr(
|
||||
&self,
|
||||
expr: &ConditionalBinary,
|
||||
schemas: &Schemas,
|
||||
schema: &IQLSchema<'_>,
|
||||
) -> Result<Expr> {
|
||||
let ConditionalBinary { lhs, op, rhs } = expr;
|
||||
|
||||
Ok(binary_expr(
|
||||
self.conditional_to_df_expr(lhs, schemas)?,
|
||||
self.conditional_to_df_expr(lhs, schema)?,
|
||||
conditional_op_to_operator(*op)?,
|
||||
self.conditional_to_df_expr(rhs, schemas)?,
|
||||
self.conditional_to_df_expr(rhs, schema)?,
|
||||
))
|
||||
}
|
||||
|
||||
/// Map an InfluxQL [`IQLExpr`] to a DataFusion [`Expr`].
|
||||
fn expr_to_df_expr(&self, scope: ExprScope, iql: &IQLExpr, schemas: &Schemas) -> Result<Expr> {
|
||||
let schema = &schemas.df_schema;
|
||||
fn expr_to_df_expr(
|
||||
&self,
|
||||
scope: ExprScope,
|
||||
iql: &IQLExpr,
|
||||
schema: &IQLSchema<'_>,
|
||||
) -> Result<Expr> {
|
||||
let df_schema = &schema.df_schema;
|
||||
match iql {
|
||||
// rewriter is expected to expand wildcard expressions
|
||||
IQLExpr::Wildcard(_) => error::internal("unexpected wildcard in projection"),
|
||||
|
@ -1777,7 +1798,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
"time".as_expr()
|
||||
}
|
||||
(ExprScope::Projection, "time") => "time".as_expr(),
|
||||
(_, name) => match schema
|
||||
(_, name) => match df_schema
|
||||
.fields_with_unqualified_name(name)
|
||||
.first()
|
||||
.map(|f| f.data_type().clone())
|
||||
|
@ -1801,7 +1822,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
// and it is safe to unconditionally unwrap, as the
|
||||
// `is_numeric_type` call guarantees it can be mapped to
|
||||
// an Arrow DataType
|
||||
column.cast_to(&dst_type, &schemas.df_schema)?
|
||||
column.cast_to(&dst_type, &schema.df_schema)?
|
||||
} else {
|
||||
// If the cast is incompatible, evaluates to NULL
|
||||
Expr::Literal(ScalarValue::Null)
|
||||
|
@ -1839,9 +1860,9 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
},
|
||||
// A DISTINCT <ident> clause should have been replaced by `rewrite_statement`.
|
||||
IQLExpr::Distinct(_) => error::internal("distinct expression"),
|
||||
IQLExpr::Call(call) => self.call_to_df_expr(scope, call, schemas),
|
||||
IQLExpr::Binary(expr) => self.arithmetic_expr_to_df_expr(scope, expr, schemas),
|
||||
IQLExpr::Nested(e) => self.expr_to_df_expr(scope, e, schemas),
|
||||
IQLExpr::Call(call) => self.call_to_df_expr(scope, call, schema),
|
||||
IQLExpr::Binary(expr) => self.arithmetic_expr_to_df_expr(scope, expr, schema),
|
||||
IQLExpr::Nested(e) => self.expr_to_df_expr(scope, e, schema),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1861,9 +1882,14 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
/// > * <https://github.com/influxdata/influxdb_iox/issues/6939>
|
||||
///
|
||||
/// [docs]: https://docs.influxdata.com/influxdb/v1.8/query_language/functions/
|
||||
fn call_to_df_expr(&self, scope: ExprScope, call: &Call, schemas: &Schemas) -> Result<Expr> {
|
||||
fn call_to_df_expr(
|
||||
&self,
|
||||
scope: ExprScope,
|
||||
call: &Call,
|
||||
schema: &IQLSchema<'_>,
|
||||
) -> Result<Expr> {
|
||||
if is_scalar_math_function(call.name.as_str()) {
|
||||
return self.scalar_math_func_to_df_expr(scope, call, schemas);
|
||||
return self.scalar_math_func_to_df_expr(scope, call, schema);
|
||||
}
|
||||
|
||||
match scope {
|
||||
|
@ -1875,7 +1901,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
error::query(format!("invalid function call in condition: {name}"))
|
||||
}
|
||||
}
|
||||
ExprScope::Projection => self.function_to_df_expr(scope, call, schemas),
|
||||
ExprScope::Projection => self.function_to_df_expr(scope, call, schema),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1883,7 +1909,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
&self,
|
||||
scope: ExprScope,
|
||||
call: &Call,
|
||||
schemas: &Schemas,
|
||||
schema: &IQLSchema<'_>,
|
||||
) -> Result<Expr> {
|
||||
fn check_arg_count(name: &str, args: &[IQLExpr], count: usize) -> Result<()> {
|
||||
let got = args.len();
|
||||
|
@ -1918,13 +1944,13 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
// The DISTINCT function is handled as a `ProjectionType::RawDistinct`
|
||||
// query, so the planner only needs to project the single column
|
||||
// argument.
|
||||
"distinct" => self.expr_to_df_expr(scope, &args[0], schemas),
|
||||
"distinct" => self.expr_to_df_expr(scope, &args[0], schema),
|
||||
"count" => {
|
||||
let (expr, distinct) = match &args[0] {
|
||||
IQLExpr::Call(c) if c.name == "distinct" => {
|
||||
(self.expr_to_df_expr(scope, &c.args[0], schemas)?, true)
|
||||
(self.expr_to_df_expr(scope, &c.args[0], schema)?, true)
|
||||
}
|
||||
expr => (self.expr_to_df_expr(scope, expr, schemas)?, false),
|
||||
expr => (self.expr_to_df_expr(scope, expr, schema)?, false),
|
||||
};
|
||||
if let Expr::Literal(ScalarValue::Null) = expr {
|
||||
return Ok(expr);
|
||||
|
@ -1940,7 +1966,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
)))
|
||||
}
|
||||
"sum" | "stddev" | "mean" | "median" => {
|
||||
let expr = self.expr_to_df_expr(scope, &args[0], schemas)?;
|
||||
let expr = self.expr_to_df_expr(scope, &args[0], schema)?;
|
||||
if let Expr::Literal(ScalarValue::Null) = expr {
|
||||
return Ok(expr);
|
||||
}
|
||||
|
@ -1955,13 +1981,13 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
)))
|
||||
}
|
||||
"percentile" => {
|
||||
let expr = self.expr_to_df_expr(scope, &args[0], schemas)?;
|
||||
let expr = self.expr_to_df_expr(scope, &args[0], schema)?;
|
||||
if let Expr::Literal(ScalarValue::Null) = expr {
|
||||
return Ok(expr);
|
||||
}
|
||||
|
||||
check_arg_count(name, args, 2)?;
|
||||
let nexpr = self.expr_to_df_expr(scope, &args[1], schemas)?;
|
||||
let nexpr = self.expr_to_df_expr(scope, &args[1], schema)?;
|
||||
Ok(Expr::AggregateUDF(expr::AggregateUDF::new(
|
||||
PERCENTILE.clone(),
|
||||
vec![expr, nexpr],
|
||||
|
@ -1970,7 +1996,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
)))
|
||||
}
|
||||
name @ ("first" | "last" | "min" | "max") => {
|
||||
let expr = self.expr_to_df_expr(scope, &args[0], schemas)?;
|
||||
let expr = self.expr_to_df_expr(scope, &args[0], schema)?;
|
||||
if let Expr::Literal(ScalarValue::Null) = expr {
|
||||
return Ok(expr);
|
||||
}
|
||||
|
@ -1993,7 +2019,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
check_arg_count(name, args, 1)?;
|
||||
|
||||
// arg0 should be a column or function
|
||||
let arg0 = self.expr_to_df_expr(scope, &args[0], schemas)?;
|
||||
let arg0 = self.expr_to_df_expr(scope, &args[0], schema)?;
|
||||
if let Expr::Literal(ScalarValue::Null) = arg0 {
|
||||
return Ok(arg0);
|
||||
}
|
||||
|
@ -2004,7 +2030,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
check_arg_count(name, args, 1)?;
|
||||
|
||||
// arg0 should be a column or function
|
||||
let arg0 = self.expr_to_df_expr(scope, &args[0], schemas)?;
|
||||
let arg0 = self.expr_to_df_expr(scope, &args[0], schema)?;
|
||||
if let Expr::Literal(ScalarValue::Null) = arg0 {
|
||||
return Ok(arg0);
|
||||
}
|
||||
|
@ -2015,14 +2041,14 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
check_arg_count(name, args, 2)?;
|
||||
|
||||
// arg0 should be a column or function
|
||||
let arg0 = self.expr_to_df_expr(scope, &args[0], schemas)?;
|
||||
let arg0 = self.expr_to_df_expr(scope, &args[0], schema)?;
|
||||
if let Expr::Literal(ScalarValue::Null) = arg0 {
|
||||
return Ok(arg0);
|
||||
}
|
||||
|
||||
// arg1 should be an integer.
|
||||
let arg1 = ScalarValue::Int64(Some(
|
||||
match self.expr_to_df_expr(scope, &args[1], schemas)? {
|
||||
match self.expr_to_df_expr(scope, &args[1], schema)? {
|
||||
Expr::Literal(ScalarValue::Int64(Some(v))) => v,
|
||||
Expr::Literal(ScalarValue::UInt64(Some(v))) => v as i64,
|
||||
_ => {
|
||||
|
@ -2039,13 +2065,13 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
check_arg_count_range(name, args, 1, 2)?;
|
||||
|
||||
// arg0 should be a column or function
|
||||
let arg0 = self.expr_to_df_expr(scope, &args[0], schemas)?;
|
||||
let arg0 = self.expr_to_df_expr(scope, &args[0], schema)?;
|
||||
if let Expr::Literal(ScalarValue::Null) = arg0 {
|
||||
return Ok(arg0);
|
||||
}
|
||||
let mut eargs = vec![arg0];
|
||||
if args.len() > 1 {
|
||||
let arg1 = self.expr_to_df_expr(scope, &args[1], schemas)?;
|
||||
let arg1 = self.expr_to_df_expr(scope, &args[1], schema)?;
|
||||
eargs.push(arg1);
|
||||
}
|
||||
|
||||
|
@ -2055,22 +2081,33 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
check_arg_count_range(name, args, 1, 2)?;
|
||||
|
||||
// arg0 should be a column or function
|
||||
let arg0 = self.expr_to_df_expr(scope, &args[0], schemas)?;
|
||||
let arg0 = self.expr_to_df_expr(scope, &args[0], schema)?;
|
||||
if let Expr::Literal(ScalarValue::Null) = arg0 {
|
||||
return Ok(arg0);
|
||||
}
|
||||
let mut eargs = vec![arg0];
|
||||
if args.len() > 1 {
|
||||
let arg1 = self.expr_to_df_expr(scope, &args[1], schemas)?;
|
||||
let arg1 = self.expr_to_df_expr(scope, &args[1], schema)?;
|
||||
eargs.push(arg1);
|
||||
}
|
||||
|
||||
Ok(non_negative_derivative(eargs))
|
||||
}
|
||||
"cumulative_sum" => {
|
||||
check_arg_count(name, args, 1)?;
|
||||
|
||||
// arg0 should be a column or function
|
||||
let arg0 = self.expr_to_df_expr(scope, &args[0], schema)?;
|
||||
if let Expr::Literal(ScalarValue::Null) = arg0 {
|
||||
return Ok(arg0);
|
||||
}
|
||||
|
||||
Ok(cumulative_sum(vec![arg0]))
|
||||
}
|
||||
// The TOP/BOTTOM function is handled as a `ProjectionType::TopBottomSelector`
|
||||
// query, so the planner only needs to project the single column
|
||||
// argument.
|
||||
"top" | "bottom" => self.expr_to_df_expr(scope, &args[0], schemas),
|
||||
"top" | "bottom" => self.expr_to_df_expr(scope, &args[0], schema),
|
||||
|
||||
_ => error::query(format!("Invalid function '{name}'")),
|
||||
}
|
||||
|
@ -2081,12 +2118,12 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
&self,
|
||||
scope: ExprScope,
|
||||
call: &Call,
|
||||
schemas: &Schemas,
|
||||
schema: &IQLSchema<'a>,
|
||||
) -> Result<Expr> {
|
||||
let args = call
|
||||
.args
|
||||
.iter()
|
||||
.map(|e| self.expr_to_df_expr(scope, e, schemas))
|
||||
.map(|e| self.expr_to_df_expr(scope, e, schema))
|
||||
.collect::<Result<Vec<Expr>>>()?;
|
||||
|
||||
match BuiltinScalarFunction::from_str(call.name.as_str())? {
|
||||
|
@ -2109,12 +2146,12 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
&self,
|
||||
scope: ExprScope,
|
||||
expr: &Binary,
|
||||
schemas: &Schemas,
|
||||
schema: &IQLSchema<'_>,
|
||||
) -> Result<Expr> {
|
||||
Ok(binary_expr(
|
||||
self.expr_to_df_expr(scope, &expr.lhs, schemas)?,
|
||||
self.expr_to_df_expr(scope, &expr.lhs, schema)?,
|
||||
binary_operator_to_df_operator(expr.op),
|
||||
self.expr_to_df_expr(scope, &expr.rhs, schemas)?,
|
||||
self.expr_to_df_expr(scope, &expr.rhs, schema)?,
|
||||
))
|
||||
}
|
||||
|
||||
|
@ -2123,17 +2160,15 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
condition: Option<&ConditionalExpression>,
|
||||
time_range: TimeRange,
|
||||
plan: LogicalPlan,
|
||||
schemas: &Schemas,
|
||||
ds_schema: &DataSourceSchema<'_>,
|
||||
schema: &IQLSchema<'a>,
|
||||
) -> Result<LogicalPlan> {
|
||||
let filter_expr = condition
|
||||
.map(|condition| {
|
||||
let filter_expr = self.conditional_to_df_expr(condition, schemas)?;
|
||||
let filter_expr = self.conditional_to_df_expr(condition, schema)?;
|
||||
planner_rewrite_expression::rewrite_conditional_expr(
|
||||
self.s.execution_props(),
|
||||
filter_expr,
|
||||
schemas,
|
||||
ds_schema,
|
||||
schema,
|
||||
)
|
||||
})
|
||||
.transpose()?;
|
||||
|
@ -2156,8 +2191,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
plan: LogicalPlan,
|
||||
condition: &Option<WhereClause>,
|
||||
cutoff: MetadataCutoff,
|
||||
schemas: &Schemas,
|
||||
ds_schema: &DataSourceSchema<'_>,
|
||||
schema: &IQLSchema<'_>,
|
||||
) -> Result<LogicalPlan> {
|
||||
let start_time = Timestamp::from(self.s.execution_props().query_execution_start_time);
|
||||
|
||||
|
@ -2189,7 +2223,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
time_range
|
||||
};
|
||||
|
||||
self.plan_condition_time_range(cond.as_ref(), time_range, plan, schemas, ds_schema)
|
||||
self.plan_condition_time_range(cond.as_ref(), time_range, plan, schema)
|
||||
}
|
||||
|
||||
/// Generate a logical plan for the specified `DataSource`.
|
||||
|
@ -2363,16 +2397,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
let Some(table_schema) = self.s.table_schema(&table) else {continue};
|
||||
let Some((plan, measurement_expr)) = self.create_table_ref(&table)? else {continue;};
|
||||
|
||||
let schemas = Schemas::new(plan.schema())?;
|
||||
let ds = DataSource::Table(table.clone());
|
||||
let ds_schema = ds.schema(self.s)?;
|
||||
let plan = self.plan_where_clause(
|
||||
plan,
|
||||
&condition,
|
||||
metadata_cutoff,
|
||||
&schemas,
|
||||
&ds_schema,
|
||||
)?;
|
||||
let schema = IQLSchema::new_from_ds_schema(plan.schema(), ds.schema(self.s)?)?;
|
||||
let plan =
|
||||
self.plan_where_clause(plan, &condition, metadata_cutoff, &schema)?;
|
||||
|
||||
let tags = table_schema
|
||||
.iter()
|
||||
|
@ -2616,16 +2644,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
|
||||
let Some((plan, measurement_expr)) = self.create_table_ref(&table)? else {continue;};
|
||||
|
||||
let schemas = Schemas::new(plan.schema())?;
|
||||
let ds = DataSource::Table(table.clone());
|
||||
let ds_schema = ds.schema(self.s)?;
|
||||
let plan = self.plan_where_clause(
|
||||
plan,
|
||||
&show_tag_values.condition,
|
||||
metadata_cutoff,
|
||||
&schemas,
|
||||
&ds_schema,
|
||||
)?;
|
||||
let schema = IQLSchema::new_from_ds_schema(plan.schema(), ds.schema(self.s)?)?;
|
||||
let plan =
|
||||
self.plan_where_clause(plan, &show_tag_values.condition, metadata_cutoff, &schema)?;
|
||||
|
||||
for key in keys {
|
||||
let idx = plan
|
||||
|
@ -2722,16 +2744,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
|||
for table in tables {
|
||||
let Some((plan, _measurement_expr)) = self.create_table_ref(&table)? else {continue;};
|
||||
|
||||
let schemas = Schemas::new(plan.schema())?;
|
||||
let ds = DataSource::Table(table.clone());
|
||||
let ds_schema = ds.schema(self.s)?;
|
||||
let plan = self.plan_where_clause(
|
||||
plan,
|
||||
&condition,
|
||||
metadata_cutoff,
|
||||
&schemas,
|
||||
&ds_schema,
|
||||
)?;
|
||||
let schema = IQLSchema::new_from_ds_schema(plan.schema(), ds.schema(self.s)?)?;
|
||||
let plan =
|
||||
self.plan_where_clause(plan, &condition, metadata_cutoff, &schema)?;
|
||||
|
||||
let plan = LogicalPlanBuilder::from(plan)
|
||||
.limit(0, Some(1))?
|
||||
|
@ -3072,6 +3088,16 @@ fn is_aggregate_field(f: &Field) -> bool {
|
|||
.is_break()
|
||||
}
|
||||
|
||||
/// A utility function that checks whether `f` is an aggregate field
|
||||
/// that should be filled with a 0 rather than an NULL.
|
||||
fn is_zero_filled_aggregate_field(f: &Field) -> bool {
|
||||
walk_expr(&f.expr, &mut |e| match e {
|
||||
IQLExpr::Call(Call { name, .. }) if name == "count" => ControlFlow::Break(()),
|
||||
_ => ControlFlow::Continue(()),
|
||||
})
|
||||
.is_break()
|
||||
}
|
||||
|
||||
fn conditional_op_to_operator(op: ConditionalOperator) -> Result<Operator> {
|
||||
match op {
|
||||
ConditionalOperator::Eq => Ok(Operator::Eq),
|
||||
|
@ -3886,7 +3912,7 @@ mod test {
|
|||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, difference [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), difference:Float64;N]
|
||||
Filter: NOT difference IS NULL [time:Timestamp(Nanosecond, None), difference:Float64;N]
|
||||
Projection: cpu.time AS time, difference(cpu.usage_idle) AS difference [time:Timestamp(Nanosecond, None), difference:Float64;N]
|
||||
WindowAggr: windowExpr=[[AggregateUDF { name: "difference", signature: Signature { type_signature: OneOf([Exact([Int64]), Exact([UInt64]), Exact([Float64])]), volatility: Immutable }, fun: "<FUNC>" }(cpu.usage_idle) ORDER BY [cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS difference(cpu.usage_idle)]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, difference(cpu.usage_idle):Float64;N]
|
||||
WindowAggr: windowExpr=[[difference(cpu.usage_idle) ORDER BY [cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS difference(cpu.usage_idle)]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, difference(cpu.usage_idle):Float64;N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
"###);
|
||||
|
||||
|
@ -3896,7 +3922,7 @@ mod test {
|
|||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, difference [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, difference:Float64;N]
|
||||
Filter: NOT difference IS NULL [time:Timestamp(Nanosecond, None);N, difference:Float64;N]
|
||||
Projection: time, difference(AVG(cpu.usage_idle)) AS difference [time:Timestamp(Nanosecond, None);N, difference:Float64;N]
|
||||
WindowAggr: windowExpr=[[AggregateUDF { name: "difference", signature: Signature { type_signature: OneOf([Exact([Int64]), Exact([UInt64]), Exact([Float64])]), volatility: Immutable }, fun: "<FUNC>" }(AVG(cpu.usage_idle)) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS difference(AVG(cpu.usage_idle))]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N, difference(AVG(cpu.usage_idle)):Float64;N]
|
||||
WindowAggr: windowExpr=[[difference(AVG(cpu.usage_idle)) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS difference(AVG(cpu.usage_idle))]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N, difference(AVG(cpu.usage_idle)):Float64;N]
|
||||
GapFill: groupBy=[time], aggr=[[AVG(cpu.usage_idle)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[AVG(cpu.usage_idle)]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
|
||||
Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
|
@ -3912,7 +3938,7 @@ mod test {
|
|||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, non_negative_difference [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), non_negative_difference:Float64;N]
|
||||
Filter: NOT non_negative_difference IS NULL [time:Timestamp(Nanosecond, None), non_negative_difference:Float64;N]
|
||||
Projection: cpu.time AS time, non_negative_difference(cpu.usage_idle) AS non_negative_difference [time:Timestamp(Nanosecond, None), non_negative_difference:Float64;N]
|
||||
WindowAggr: windowExpr=[[AggregateUDF { name: "non_negative_difference", signature: Signature { type_signature: OneOf([Exact([Int64]), Exact([UInt64]), Exact([Float64])]), volatility: Immutable }, fun: "<FUNC>" }(cpu.usage_idle) ORDER BY [cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS non_negative_difference(cpu.usage_idle)]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, non_negative_difference(cpu.usage_idle):Float64;N]
|
||||
WindowAggr: windowExpr=[[non_negative_difference(cpu.usage_idle) ORDER BY [cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS non_negative_difference(cpu.usage_idle)]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, non_negative_difference(cpu.usage_idle):Float64;N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
"###);
|
||||
|
||||
|
@ -3922,7 +3948,7 @@ mod test {
|
|||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, non_negative_difference [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, non_negative_difference:Float64;N]
|
||||
Filter: NOT non_negative_difference IS NULL [time:Timestamp(Nanosecond, None);N, non_negative_difference:Float64;N]
|
||||
Projection: time, non_negative_difference(AVG(cpu.usage_idle)) AS non_negative_difference [time:Timestamp(Nanosecond, None);N, non_negative_difference:Float64;N]
|
||||
WindowAggr: windowExpr=[[AggregateUDF { name: "non_negative_difference", signature: Signature { type_signature: OneOf([Exact([Int64]), Exact([UInt64]), Exact([Float64])]), volatility: Immutable }, fun: "<FUNC>" }(AVG(cpu.usage_idle)) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS non_negative_difference(AVG(cpu.usage_idle))]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N, non_negative_difference(AVG(cpu.usage_idle)):Float64;N]
|
||||
WindowAggr: windowExpr=[[non_negative_difference(AVG(cpu.usage_idle)) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS non_negative_difference(AVG(cpu.usage_idle))]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N, non_negative_difference(AVG(cpu.usage_idle)):Float64;N]
|
||||
GapFill: groupBy=[time], aggr=[[AVG(cpu.usage_idle)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[AVG(cpu.usage_idle)]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
|
||||
Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
|
@ -3967,7 +3993,7 @@ mod test {
|
|||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, derivative [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), derivative:Float64;N]
|
||||
Filter: NOT derivative IS NULL [time:Timestamp(Nanosecond, None), derivative:Float64;N]
|
||||
Projection: cpu.time AS time, derivative(cpu.usage_idle) AS derivative [time:Timestamp(Nanosecond, None), derivative:Float64;N]
|
||||
WindowAggr: windowExpr=[[AggregateUDF { name: "derivative(unit: 1000000000)", signature: Signature { type_signature: OneOf([Exact([Timestamp(Nanosecond, None), Int64]), Exact([Timestamp(Nanosecond, None), UInt64]), Exact([Timestamp(Nanosecond, None), Float64])]), volatility: Immutable }, fun: "<FUNC>" }(cpu.time, cpu.usage_idle) ORDER BY [cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS derivative(cpu.usage_idle)]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, derivative(cpu.usage_idle):Float64;N]
|
||||
WindowAggr: windowExpr=[[derivative(cpu.usage_idle, IntervalMonthDayNano("1000000000"), cpu.time) ORDER BY [cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS derivative(cpu.usage_idle)]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, derivative(cpu.usage_idle):Float64;N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
"###);
|
||||
|
||||
|
@ -3977,7 +4003,7 @@ mod test {
|
|||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, derivative [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, derivative:Float64;N]
|
||||
Filter: NOT derivative IS NULL [time:Timestamp(Nanosecond, None);N, derivative:Float64;N]
|
||||
Projection: time, derivative(AVG(cpu.usage_idle)) AS derivative [time:Timestamp(Nanosecond, None);N, derivative:Float64;N]
|
||||
WindowAggr: windowExpr=[[AggregateUDF { name: "derivative(unit: 10000000000)", signature: Signature { type_signature: OneOf([Exact([Timestamp(Nanosecond, None), Int64]), Exact([Timestamp(Nanosecond, None), UInt64]), Exact([Timestamp(Nanosecond, None), Float64])]), volatility: Immutable }, fun: "<FUNC>" }(time, AVG(cpu.usage_idle)) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS derivative(AVG(cpu.usage_idle))]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N, derivative(AVG(cpu.usage_idle)):Float64;N]
|
||||
WindowAggr: windowExpr=[[derivative(AVG(cpu.usage_idle), IntervalMonthDayNano("10000000000"), time) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS derivative(AVG(cpu.usage_idle))]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N, derivative(AVG(cpu.usage_idle)):Float64;N]
|
||||
GapFill: groupBy=[time], aggr=[[AVG(cpu.usage_idle)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[AVG(cpu.usage_idle)]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
|
||||
Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
|
@ -3993,7 +4019,7 @@ mod test {
|
|||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, non_negative_derivative [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), non_negative_derivative:Float64;N]
|
||||
Filter: NOT non_negative_derivative IS NULL [time:Timestamp(Nanosecond, None), non_negative_derivative:Float64;N]
|
||||
Projection: cpu.time AS time, non_negative_derivative(cpu.usage_idle) AS non_negative_derivative [time:Timestamp(Nanosecond, None), non_negative_derivative:Float64;N]
|
||||
WindowAggr: windowExpr=[[AggregateUDF { name: "non_negative_derivative(unit: 1000000000)", signature: Signature { type_signature: OneOf([Exact([Timestamp(Nanosecond, None), Int64]), Exact([Timestamp(Nanosecond, None), UInt64]), Exact([Timestamp(Nanosecond, None), Float64])]), volatility: Immutable }, fun: "<FUNC>" }(cpu.time, cpu.usage_idle) ORDER BY [cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS non_negative_derivative(cpu.usage_idle)]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, non_negative_derivative(cpu.usage_idle):Float64;N]
|
||||
WindowAggr: windowExpr=[[non_negative_derivative(cpu.usage_idle, IntervalMonthDayNano("1000000000"), cpu.time) ORDER BY [cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS non_negative_derivative(cpu.usage_idle)]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, non_negative_derivative(cpu.usage_idle):Float64;N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
"###);
|
||||
|
||||
|
@ -4003,7 +4029,46 @@ mod test {
|
|||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, non_negative_derivative [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, non_negative_derivative:Float64;N]
|
||||
Filter: NOT non_negative_derivative IS NULL [time:Timestamp(Nanosecond, None);N, non_negative_derivative:Float64;N]
|
||||
Projection: time, non_negative_derivative(AVG(cpu.usage_idle)) AS non_negative_derivative [time:Timestamp(Nanosecond, None);N, non_negative_derivative:Float64;N]
|
||||
WindowAggr: windowExpr=[[AggregateUDF { name: "non_negative_derivative(unit: 10000000000)", signature: Signature { type_signature: OneOf([Exact([Timestamp(Nanosecond, None), Int64]), Exact([Timestamp(Nanosecond, None), UInt64]), Exact([Timestamp(Nanosecond, None), Float64])]), volatility: Immutable }, fun: "<FUNC>" }(time, AVG(cpu.usage_idle)) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS non_negative_derivative(AVG(cpu.usage_idle))]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N, non_negative_derivative(AVG(cpu.usage_idle)):Float64;N]
|
||||
WindowAggr: windowExpr=[[non_negative_derivative(AVG(cpu.usage_idle), IntervalMonthDayNano("10000000000"), time) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS non_negative_derivative(AVG(cpu.usage_idle))]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N, non_negative_derivative(AVG(cpu.usage_idle)):Float64;N]
|
||||
GapFill: groupBy=[time], aggr=[[AVG(cpu.usage_idle)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[AVG(cpu.usage_idle)]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
|
||||
Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
"###);
|
||||
|
||||
// selector
|
||||
assert_snapshot!(plan("SELECT NON_NEGATIVE_DERIVATIVE(LAST(usage_idle)) FROM cpu GROUP BY TIME(10s)"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, non_negative_derivative:Float64;N]
|
||||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, non_negative_derivative [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, non_negative_derivative:Float64;N]
|
||||
Filter: NOT non_negative_derivative IS NULL [time:Timestamp(Nanosecond, None);N, non_negative_derivative:Float64;N]
|
||||
Projection: time, non_negative_derivative(selector_last(cpu.usage_idle,cpu.time)[value]) AS non_negative_derivative [time:Timestamp(Nanosecond, None);N, non_negative_derivative:Float64;N]
|
||||
WindowAggr: windowExpr=[[non_negative_derivative((selector_last(cpu.usage_idle,cpu.time))[value], IntervalMonthDayNano("10000000000"), time) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS non_negative_derivative(selector_last(cpu.usage_idle,cpu.time)[value])]] [time:Timestamp(Nanosecond, None);N, selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N, non_negative_derivative(selector_last(cpu.usage_idle,cpu.time)[value]):Float64;N]
|
||||
GapFill: groupBy=[time], aggr=[[selector_last(cpu.usage_idle,cpu.time)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[selector_last(cpu.usage_idle, cpu.time)]] [time:Timestamp(Nanosecond, None);N, selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
|
||||
Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cumulative_sum() {
|
||||
// no aggregates
|
||||
assert_snapshot!(plan("SELECT CUMULATIVE_SUM(usage_idle) FROM cpu"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cumulative_sum:Float64;N]
|
||||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, cumulative_sum [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cumulative_sum:Float64;N]
|
||||
Filter: NOT cumulative_sum IS NULL [time:Timestamp(Nanosecond, None), cumulative_sum:Float64;N]
|
||||
Projection: cpu.time AS time, cumulative_sum(cpu.usage_idle) AS cumulative_sum [time:Timestamp(Nanosecond, None), cumulative_sum:Float64;N]
|
||||
WindowAggr: windowExpr=[[cumumlative_sum(cpu.usage_idle) ORDER BY [cpu.time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS cumulative_sum(cpu.usage_idle)]] [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N, cumulative_sum(cpu.usage_idle):Float64;N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
"###);
|
||||
|
||||
// aggregate
|
||||
assert_snapshot!(plan("SELECT CUMULATIVE_SUM(MEAN(usage_idle)) FROM cpu GROUP BY TIME(10s)"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, cumulative_sum:Float64;N]
|
||||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, time, cumulative_sum [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, cumulative_sum:Float64;N]
|
||||
Filter: NOT cumulative_sum IS NULL [time:Timestamp(Nanosecond, None);N, cumulative_sum:Float64;N]
|
||||
Projection: time, cumulative_sum(AVG(cpu.usage_idle)) AS cumulative_sum [time:Timestamp(Nanosecond, None);N, cumulative_sum:Float64;N]
|
||||
WindowAggr: windowExpr=[[cumumlative_sum(AVG(cpu.usage_idle)) ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS cumulative_sum(AVG(cpu.usage_idle))]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N, cumulative_sum(AVG(cpu.usage_idle)):Float64;N]
|
||||
GapFill: groupBy=[time], aggr=[[AVG(cpu.usage_idle)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), cpu.time, TimestampNanosecond(0, None)) AS time]], aggr=[[AVG(cpu.usage_idle)]] [time:Timestamp(Nanosecond, None);N, AVG(cpu.usage_idle):Float64;N]
|
||||
Filter: cpu.time <= TimestampNanosecond(1672531200000000000, None) [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
|
@ -4043,7 +4108,7 @@ mod test {
|
|||
"###);
|
||||
assert_snapshot!(plan("SELECT COUNT(DISTINCT usage_idle) FROM cpu"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, TimestampNanosecond(0, None) AS time, COUNT(DISTINCT cpu.usage_idle) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, TimestampNanosecond(0, None) AS time, coalesce_struct(COUNT(DISTINCT cpu.usage_idle), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N]
|
||||
Aggregate: groupBy=[[]], aggr=[[COUNT(DISTINCT cpu.usage_idle)]] [COUNT(DISTINCT cpu.usage_idle):Int64;N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
"###);
|
||||
|
@ -4114,7 +4179,7 @@ mod test {
|
|||
fn test_selectors_and_aggregate() {
|
||||
assert_snapshot!(plan("SELECT LAST(usage_idle), COUNT(usage_idle) FROM cpu"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), last:Float64;N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, TimestampNanosecond(0, None) AS time, (selector_last(cpu.usage_idle,cpu.time))[value] AS last, COUNT(cpu.usage_idle) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), last:Float64;N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, TimestampNanosecond(0, None) AS time, (selector_last(cpu.usage_idle,cpu.time))[value] AS last, coalesce_struct(COUNT(cpu.usage_idle), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), last:Float64;N, count:Int64;N]
|
||||
Aggregate: groupBy=[[]], aggr=[[selector_last(cpu.usage_idle, cpu.time), COUNT(cpu.usage_idle)]] [selector_last(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N, COUNT(cpu.usage_idle):Int64;N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
"###);
|
||||
|
@ -4793,20 +4858,20 @@ mod test {
|
|||
fn no_group_by() {
|
||||
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N]
|
||||
Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
"###);
|
||||
|
||||
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY non_existent"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), non_existent:Null;N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, NULL AS non_existent, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), non_existent:Null;N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, NULL AS non_existent, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), non_existent:Null;N, count:Int64;N]
|
||||
Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
"###);
|
||||
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY foo"), @r###"
|
||||
Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
|
||||
Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
"###);
|
||||
|
@ -4814,7 +4879,7 @@ mod test {
|
|||
// The `COUNT(f64_field)` aggregate is only projected ones in the Aggregate and reused in the projection
|
||||
assert_snapshot!(plan("SELECT COUNT(f64_field), COUNT(f64_field) + COUNT(f64_field), COUNT(f64_field) * 3 FROM data"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_count:Int64;N, count_1:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, COUNT(data.f64_field) AS count, COUNT(data.f64_field) + COUNT(data.f64_field) AS count_count, COUNT(data.f64_field) * Int64(3) AS count_1 [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_count:Int64;N, count_1:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count, coalesce_struct(COUNT(data.f64_field), Int64(0)) + coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count_count, coalesce_struct(COUNT(data.f64_field), Int64(0)) * Int64(3) AS count_1 [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_count:Int64;N, count_1:Int64;N]
|
||||
Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
"###);
|
||||
|
@ -4822,7 +4887,7 @@ mod test {
|
|||
// non-existent tags are excluded from the Aggregate groupBy and Sort operators
|
||||
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY foo, non_existent"), @r###"
|
||||
Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, non_existent:Null;N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, NULL AS non_existent, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, non_existent:Null;N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, NULL AS non_existent, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, non_existent:Null;N, count:Int64;N]
|
||||
Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
"###);
|
||||
|
@ -4830,7 +4895,7 @@ mod test {
|
|||
// Aggregate expression is projected once and reused in final projection
|
||||
assert_snapshot!(plan("SELECT COUNT(f64_field), COUNT(f64_field) * 2 FROM data"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_1:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, COUNT(data.f64_field) AS count, COUNT(data.f64_field) * Int64(2) AS count_1 [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_1:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count, coalesce_struct(COUNT(data.f64_field), Int64(0)) * Int64(2) AS count_1 [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_1:Int64;N]
|
||||
Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
"###);
|
||||
|
@ -4869,7 +4934,7 @@ mod test {
|
|||
fn group_by_time() {
|
||||
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(none)"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
@ -4878,7 +4943,7 @@ mod test {
|
|||
// supports offset parameter
|
||||
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s, 5s) FILL(none)"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(5000000000, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
@ -4890,7 +4955,7 @@ mod test {
|
|||
// No time bounds
|
||||
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s)"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
GapFill: groupBy=[time], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
@ -4903,7 +4968,7 @@ mod test {
|
|||
// No lower time bounds
|
||||
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data WHERE time < '2022-10-31T02:02:00Z' GROUP BY TIME(10s)"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
GapFill: groupBy=[time], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1667181719999999999, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
Filter: data.time <= TimestampNanosecond(1667181719999999999, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
@ -4916,7 +4981,7 @@ mod test {
|
|||
// No upper time bounds
|
||||
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data WHERE time >= '2022-10-31T02:00:00Z' GROUP BY TIME(10s)"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
GapFill: groupBy=[time], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Included(Literal(TimestampNanosecond(1667181600000000000, None)))..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
Filter: data.time >= TimestampNanosecond(1667181600000000000, None) AND data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
@ -4929,7 +4994,7 @@ mod test {
|
|||
// Default is FILL(null)
|
||||
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data WHERE time >= '2022-10-31T02:00:00Z' AND time < '2022-10-31T02:02:00Z' GROUP BY TIME(10s)"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
GapFill: groupBy=[time], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Included(Literal(TimestampNanosecond(1667181600000000000, None)))..Included(Literal(TimestampNanosecond(1667181719999999999, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
Filter: data.time >= TimestampNanosecond(1667181600000000000, None) AND data.time <= TimestampNanosecond(1667181719999999999, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
@ -4941,7 +5006,7 @@ mod test {
|
|||
fn group_by_time_gapfill_default_is_fill_null1() {
|
||||
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s)"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
GapFill: groupBy=[time], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
@ -4953,7 +5018,7 @@ mod test {
|
|||
fn group_by_time_gapfill_default_is_fill_null2() {
|
||||
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(null)"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
GapFill: groupBy=[time], aggr=[[COUNT(data.f64_field)]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
@ -4965,7 +5030,7 @@ mod test {
|
|||
fn group_by_time_gapfill_default_is_fill_null3() {
|
||||
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(previous)"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
GapFill: groupBy=[time], aggr=[[LOCF(COUNT(data.f64_field))]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
@ -4989,7 +5054,7 @@ mod test {
|
|||
fn group_by_time_gapfill_default_is_fill_null5() {
|
||||
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(linear)"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
GapFill: groupBy=[time], aggr=[[INTERPOLATE(COUNT(data.f64_field))]], time_column=time, stride=IntervalMonthDayNano("10000000000"), range=Unbounded..Included(Literal(TimestampNanosecond(1672531200000000000, None))) [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000000000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
@ -5031,7 +5096,7 @@ mod test {
|
|||
Filter: iox::row <= Int64(1) [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N, iox::row:UInt64;N]
|
||||
WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [foo] ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS iox::row]] [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N, iox::row:UInt64;N]
|
||||
Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
|
||||
Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
"###);
|
||||
|
@ -5045,7 +5110,7 @@ mod test {
|
|||
Filter: iox::row > Int64(1) [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N, iox::row:UInt64;N]
|
||||
WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [foo] ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS iox::row]] [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N, iox::row:UInt64;N]
|
||||
Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
|
||||
Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
"###);
|
||||
|
@ -5059,7 +5124,7 @@ mod test {
|
|||
Filter: iox::row BETWEEN Int64(4) AND Int64(5) [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N, iox::row:UInt64;N]
|
||||
WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [foo] ORDER BY [time ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS iox::row]] [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N, iox::row:UInt64;N]
|
||||
Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N]
|
||||
Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
"###);
|
||||
|
@ -5085,7 +5150,7 @@ mod test {
|
|||
fn group_by_time_precision() {
|
||||
assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10u) FILL(none)"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, coalesce_struct(COUNT(data.f64_field), Int64(0)) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N]
|
||||
Aggregate: groupBy=[[date_bin(IntervalMonthDayNano("10000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N]
|
||||
Filter: data.time <= TimestampNanosecond(1672531200000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N]
|
||||
|
@ -5333,6 +5398,22 @@ mod test {
|
|||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_select_function_tag_column() {
|
||||
assert_snapshot!(plan("SELECT last(foo) as foo, first(usage_idle) from cpu group by foo"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Null;N, first:Float64;N]
|
||||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, TimestampNanosecond(0, None) AS time, NULL AS foo, (selector_first(cpu.usage_idle,cpu.time))[value] AS first [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Null;N, first:Float64;N]
|
||||
Aggregate: groupBy=[[]], aggr=[[selector_first(cpu.usage_idle, cpu.time)]] [selector_first(cpu.usage_idle,cpu.time):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
"###);
|
||||
assert_snapshot!(plan("SELECT count(foo) as foo, first(usage_idle) from cpu group by foo"), @r###"
|
||||
Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Null;N, foo_1:Null;N, first:Float64;N]
|
||||
Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, TimestampNanosecond(0, None) AS time, NULL AS foo, (coalesce_struct(selector_first(cpu.usage_idle,cpu.time,NULL), Struct({value:Float64(0),time:TimestampNanosecond(0, None),other_1:NULL})))[other_1] AS foo_1, (selector_first(cpu.usage_idle,cpu.time,NULL))[value] AS first [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Null;N, foo_1:Null;N, first:Float64;N]
|
||||
Aggregate: groupBy=[[]], aggr=[[selector_first(cpu.usage_idle, cpu.time, NULL)]] [selector_first(cpu.usage_idle,cpu.time,NULL):Struct([Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "time", data_type: Timestamp(Nanosecond, None), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "other_1", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]);N]
|
||||
TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N]
|
||||
"###);
|
||||
}
|
||||
|
||||
// The following is an outline of additional scenarios to develop
|
||||
// as the planner learns more features.
|
||||
// This is not an exhaustive list and is expected to grow as the
|
||||
|
|
|
@ -27,11 +27,8 @@ pub(super) fn make_tag_key_column_meta(
|
|||
let index_map = fields
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(index, f)| match &f.expr {
|
||||
IQLExpr::VarRef(VarRef {
|
||||
name,
|
||||
data_type: Some(VarRefDataType::Tag) | None,
|
||||
}) => Some((name.as_str(), index + START_INDEX)),
|
||||
.filter_map(|(index, f)| match &f.data_type {
|
||||
Some(InfluxColumnType::Tag) | None => Some((f.name.as_str(), index + START_INDEX)),
|
||||
_ => None,
|
||||
})
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
|
|
@ -123,44 +123,42 @@
|
|||
//! [`Eval`]: https://github.com/influxdata/influxql/blob/1ba470371ec093d57a726b143fe6ccbacf1b452b/ast.go#L4137
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::plan::util::Schemas;
|
||||
use crate::plan::util::IQLSchema;
|
||||
use arrow::datatypes::DataType;
|
||||
use datafusion::common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
|
||||
use datafusion::common::{Result, ScalarValue};
|
||||
use datafusion::logical_expr::expr::{AggregateFunction, AggregateUDF, WindowFunction};
|
||||
use datafusion::logical_expr::{
|
||||
binary_expr, cast, coalesce, lit, BinaryExpr, Expr, ExprSchemable, Operator,
|
||||
binary_expr, cast, coalesce, lit, BinaryExpr, Expr, ExprSchemable, GetIndexedField, Operator,
|
||||
};
|
||||
use datafusion::optimizer::simplify_expressions::{ExprSimplifier, SimplifyContext};
|
||||
use datafusion::physical_expr::execution_props::ExecutionProps;
|
||||
use datafusion::prelude::when;
|
||||
use datafusion::prelude::{when, Column};
|
||||
use observability_deps::tracing::trace;
|
||||
use predicate::rpc_predicate::{iox_expr_rewrite, simplify_predicate};
|
||||
|
||||
use super::ir::DataSourceSchema;
|
||||
|
||||
/// Perform a series of passes to rewrite `expr` in compliance with InfluxQL behavior
|
||||
/// in an effort to ensure the query executes without error.
|
||||
pub(super) fn rewrite_conditional_expr(
|
||||
exec_props: &ExecutionProps,
|
||||
expr: Expr,
|
||||
schemas: &Schemas,
|
||||
ds_schema: &DataSourceSchema<'_>,
|
||||
schema: &IQLSchema<'_>,
|
||||
) -> Result<Expr> {
|
||||
let simplify_context =
|
||||
SimplifyContext::new(exec_props).with_schema(Arc::clone(&schemas.df_schema));
|
||||
SimplifyContext::new(exec_props).with_schema(Arc::clone(&schema.df_schema));
|
||||
let simplifier = ExprSimplifier::new(simplify_context);
|
||||
|
||||
Ok(expr)
|
||||
.map(|expr| log_rewrite(expr, "original"))
|
||||
// make regex matching with invalid types produce false
|
||||
.and_then(|expr| expr.rewrite(&mut FixRegularExpressions { schemas }))
|
||||
.and_then(|expr| expr.rewrite(&mut FixRegularExpressions { schema }))
|
||||
.map(|expr| log_rewrite(expr, "after fix_regular_expressions"))
|
||||
// rewrite exprs with incompatible operands to NULL or FALSE
|
||||
// (seems like FixRegularExpressions could be combined into this pass)
|
||||
.and_then(|expr| rewrite_expr(expr, schemas))
|
||||
.and_then(|expr| rewrite_expr(expr, schema))
|
||||
.map(|expr| log_rewrite(expr, "after rewrite_expr"))
|
||||
// Convert tag column references to CASE WHEN <tag> IS NULL THEN '' ELSE <tag> END
|
||||
.and_then(|expr| rewrite_tag_columns(expr, schemas, ds_schema))
|
||||
.and_then(|expr| rewrite_tag_columns(expr, schema))
|
||||
.map(|expr| log_rewrite(expr, "after rewrite_tag_columns"))
|
||||
// Push comparison operators into CASE exprs:
|
||||
// CASE WHEN tag0 IS NULL THEN '' ELSE tag0 END = 'foo'
|
||||
|
@ -172,7 +170,7 @@ pub(super) fn rewrite_conditional_expr(
|
|||
// - convert numeric types so that operands agree
|
||||
// - convert Utf8 to Dictionary as needed
|
||||
// The next step will fail with type errors if we don't do this.
|
||||
.and_then(|expr| simplifier.coerce(expr, Arc::clone(&schemas.df_schema)))
|
||||
.and_then(|expr| simplifier.coerce(expr, Arc::clone(&schema.df_schema)))
|
||||
.map(|expr| log_rewrite(expr, "after coerce"))
|
||||
// DataFusion expression simplification. This is important here because:
|
||||
// CASE WHEN tag0 IS NULL THEN '' = 'foo' ELSE tag0 = 'foo' END
|
||||
|
@ -206,8 +204,8 @@ fn log_rewrite(expr: Expr, description: &str) -> Expr {
|
|||
|
||||
/// Perform a series of passes to rewrite `expr`, used as a column projection,
|
||||
/// to match the behavior of InfluxQL.
|
||||
pub(super) fn rewrite_field_expr(expr: Expr, schemas: &Schemas) -> Result<Expr> {
|
||||
rewrite_expr(expr, schemas)
|
||||
pub(super) fn rewrite_field_expr(expr: Expr, schema: &IQLSchema<'_>) -> Result<Expr> {
|
||||
rewrite_expr(expr, schema)
|
||||
}
|
||||
|
||||
/// The expression was rewritten
|
||||
|
@ -225,7 +223,7 @@ fn no(expr: Expr) -> Result<Transformed<Expr>> {
|
|||
///
|
||||
/// Rewrite and coerce the expression tree to model the behavior
|
||||
/// of an InfluxQL query.
|
||||
fn rewrite_expr(expr: Expr, schemas: &Schemas) -> Result<Expr> {
|
||||
fn rewrite_expr(expr: Expr, schema: &IQLSchema<'_>) -> Result<Expr> {
|
||||
expr.transform(&|expr| {
|
||||
match expr {
|
||||
Expr::BinaryExpr(BinaryExpr {
|
||||
|
@ -233,8 +231,8 @@ fn rewrite_expr(expr: Expr, schemas: &Schemas) -> Result<Expr> {
|
|||
op,
|
||||
ref right,
|
||||
}) => {
|
||||
let lhs_type = left.get_type(&schemas.df_schema)?;
|
||||
let rhs_type = right.get_type(&schemas.df_schema)?;
|
||||
let lhs_type = left.get_type(&schema.df_schema)?;
|
||||
let rhs_type = right.get_type(&schema.df_schema)?;
|
||||
|
||||
match (lhs_type, op, rhs_type) {
|
||||
//
|
||||
|
@ -422,6 +420,23 @@ fn rewrite_expr(expr: Expr, schemas: &Schemas) -> Result<Expr> {
|
|||
_ => yes(lit(ScalarValue::Null)),
|
||||
}
|
||||
}
|
||||
|
||||
// Invoking an aggregate or window function on a tag column should return `NULL`
|
||||
// to be consistent with OG.
|
||||
Expr::AggregateFunction(AggregateFunction { ref args, .. } )
|
||||
| Expr::AggregateUDF(AggregateUDF { ref args, .. } )
|
||||
| Expr::WindowFunction(WindowFunction { ref args, .. } ) => match &args[0] {
|
||||
Expr::Column(Column { ref name, .. }) if schema.is_tag_field(name) => yes(lit(ScalarValue::Null)),
|
||||
_ => no(expr),
|
||||
}
|
||||
|
||||
// If the InfluxQL query used a selector on a tag column, like `last(tag_col)`
|
||||
// then there will be an indexed field. Convert this to `NULL` as well.
|
||||
Expr::GetIndexedField(GetIndexedField { expr: ref e, .. }) => match e.as_ref() {
|
||||
Expr::Literal(ScalarValue::Null) => yes(lit(ScalarValue::Null)),
|
||||
_ => no(expr),
|
||||
}
|
||||
|
||||
//
|
||||
// Literals and other expressions are passed through to DataFusion,
|
||||
// as it will handle evaluating function calls, etc
|
||||
|
@ -467,7 +482,7 @@ fn rewrite_boolean(lhs: Expr, op: Operator, rhs: Expr) -> Expr {
|
|||
|
||||
/// Rewrite regex conditional expressions to match InfluxQL behaviour.
|
||||
struct FixRegularExpressions<'a> {
|
||||
schemas: &'a Schemas,
|
||||
schema: &'a IQLSchema<'a>,
|
||||
}
|
||||
|
||||
impl<'a> TreeNodeRewriter for FixRegularExpressions<'a> {
|
||||
|
@ -483,7 +498,7 @@ impl<'a> TreeNodeRewriter for FixRegularExpressions<'a> {
|
|||
right,
|
||||
}) => {
|
||||
Ok(if let Expr::Column(ref col) = *left {
|
||||
match self.schemas.df_schema.field_from_column(col)?.data_type() {
|
||||
match self.schema.df_schema.field_from_column(col)?.data_type() {
|
||||
DataType::Dictionary(..) | DataType::Utf8 => {
|
||||
Expr::BinaryExpr(BinaryExpr { left, op, right })
|
||||
}
|
||||
|
@ -517,13 +532,9 @@ impl<'a> TreeNodeRewriter for FixRegularExpressions<'a> {
|
|||
/// case when tag0 is null then "" else tag0 end
|
||||
/// ```
|
||||
/// This ensures that we treat tags with the same semantics as OG InfluxQL.
|
||||
fn rewrite_tag_columns(
|
||||
expr: Expr,
|
||||
_schemas: &Schemas,
|
||||
ds_schema: &DataSourceSchema<'_>,
|
||||
) -> Result<Expr> {
|
||||
fn rewrite_tag_columns(expr: Expr, schema: &IQLSchema<'_>) -> Result<Expr> {
|
||||
expr.transform(&|expr| match expr {
|
||||
Expr::Column(ref c) if ds_schema.is_tag_field(&c.name) => {
|
||||
Expr::Column(ref c) if schema.is_tag_field(&c.name) => {
|
||||
yes(when(expr.clone().is_null(), lit("")).otherwise(expr)?)
|
||||
}
|
||||
e => no(e),
|
||||
|
@ -532,6 +543,8 @@ fn rewrite_tag_columns(
|
|||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::plan::ir::DataSourceSchema;
|
||||
|
||||
use super::*;
|
||||
use datafusion::logical_expr::lit_timestamp_nano;
|
||||
use datafusion::prelude::col;
|
||||
|
@ -542,7 +555,7 @@ mod test {
|
|||
use schema::{InfluxFieldType, SchemaBuilder};
|
||||
use std::sync::Arc;
|
||||
|
||||
fn new_schemas() -> (Schemas, DataSourceSchema<'static>) {
|
||||
fn new_schema() -> IQLSchema<'static> {
|
||||
let iox_schema = SchemaBuilder::new()
|
||||
.measurement("m0")
|
||||
.timestamp()
|
||||
|
@ -556,7 +569,8 @@ mod test {
|
|||
.build()
|
||||
.expect("schema failed");
|
||||
let df_schema: DFSchemaRef = Arc::clone(iox_schema.inner()).to_dfschema_ref().unwrap();
|
||||
(Schemas { df_schema }, DataSourceSchema::Table(iox_schema))
|
||||
let ds_schema = DataSourceSchema::Table(iox_schema);
|
||||
IQLSchema::new_from_ds_schema(&df_schema, ds_schema).unwrap()
|
||||
}
|
||||
|
||||
/// Tests which validate that division is coalesced to `0`, to handle division by zero,
|
||||
|
@ -566,7 +580,7 @@ mod test {
|
|||
/// binary expression to a scalar value, `0`.
|
||||
#[test]
|
||||
fn test_division() {
|
||||
let (schemas, _) = new_schemas();
|
||||
let schemas = new_schema();
|
||||
let rewrite = |expr| rewrite_expr(expr, &schemas).unwrap().to_string();
|
||||
|
||||
// Float64
|
||||
|
@ -627,7 +641,7 @@ mod test {
|
|||
#[test]
|
||||
fn test_pass_thru() {
|
||||
test_helpers::maybe_start_logging();
|
||||
let (schemas, _) = new_schemas();
|
||||
let schemas = new_schema();
|
||||
let rewrite = |expr| rewrite_expr(expr, &schemas).unwrap().to_string();
|
||||
|
||||
let expr = lit(5.5).gt(lit(1_i64));
|
||||
|
@ -664,9 +678,9 @@ mod test {
|
|||
#[test]
|
||||
fn test_string_operations() {
|
||||
let props = execution_props();
|
||||
let (schemas, ds_schema) = new_schemas();
|
||||
let schemas = new_schema();
|
||||
let rewrite = |expr| {
|
||||
rewrite_conditional_expr(&props, expr, &schemas, &ds_schema)
|
||||
rewrite_conditional_expr(&props, expr, &schemas)
|
||||
.unwrap()
|
||||
.to_string()
|
||||
};
|
||||
|
@ -688,7 +702,7 @@ mod test {
|
|||
/// to the supported bitwise operators.
|
||||
#[test]
|
||||
fn test_boolean_operations() {
|
||||
let (schemas, _) = new_schemas();
|
||||
let schemas = new_schema();
|
||||
let rewrite = |expr| rewrite_expr(expr, &schemas).unwrap().to_string();
|
||||
|
||||
let expr = "boolean_field".as_expr().and(lit(true));
|
||||
|
@ -743,7 +757,7 @@ mod test {
|
|||
/// Tests cases to validate Boolean and NULL data types
|
||||
#[test]
|
||||
fn test_rewrite_conditional_null() {
|
||||
let (schemas, _) = new_schemas();
|
||||
let schemas = new_schema();
|
||||
let rewrite = |expr| rewrite_expr(expr, &schemas).unwrap().to_string();
|
||||
|
||||
// NULL on either side and boolean on the other of a binary expression
|
||||
|
@ -779,7 +793,7 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn test_time_range() {
|
||||
let (schemas, _) = new_schemas();
|
||||
let schemas = new_schema();
|
||||
let rewrite = |expr| rewrite_expr(expr, &schemas).unwrap().to_string();
|
||||
|
||||
let expr = "time".as_expr().gt_eq(lit_timestamp_nano(1000));
|
||||
|
@ -811,7 +825,7 @@ mod test {
|
|||
/// valid operation for the given the operands. These are used when projecting columns.
|
||||
#[test]
|
||||
fn test_rewrite_expr_coercion_reduce_to_null() {
|
||||
let (schemas, _) = new_schemas();
|
||||
let schemas = new_schema();
|
||||
let rewrite = |expr| rewrite_expr(expr, &schemas).unwrap().to_string();
|
||||
|
||||
//
|
||||
|
@ -851,9 +865,9 @@ mod test {
|
|||
fn test_rewrite_tag_columns_eq() {
|
||||
test_helpers::maybe_start_logging();
|
||||
let props = execution_props();
|
||||
let (schemas, ds_schema) = new_schemas();
|
||||
let schemas = new_schema();
|
||||
let rewrite = |expr| {
|
||||
rewrite_conditional_expr(&props, expr, &schemas, &ds_schema)
|
||||
rewrite_conditional_expr(&props, expr, &schemas)
|
||||
.unwrap()
|
||||
.to_string()
|
||||
};
|
||||
|
@ -904,9 +918,9 @@ mod test {
|
|||
fn test_rewrite_tag_columns_regex() {
|
||||
let props = execution_props();
|
||||
test_helpers::maybe_start_logging();
|
||||
let (schemas, ds_schema) = new_schemas();
|
||||
let schemas = new_schema();
|
||||
let rewrite = |expr| {
|
||||
rewrite_conditional_expr(&props, expr, &schemas, &ds_schema)
|
||||
rewrite_conditional_expr(&props, expr, &schemas)
|
||||
.unwrap()
|
||||
.to_string()
|
||||
};
|
||||
|
@ -931,9 +945,9 @@ mod test {
|
|||
fn test_fields_pass_thru() {
|
||||
test_helpers::maybe_start_logging();
|
||||
let props = execution_props();
|
||||
let (schemas, ds_schema) = new_schemas();
|
||||
let schemas = new_schema();
|
||||
let rewrite = |expr| {
|
||||
rewrite_conditional_expr(&props, expr, &schemas, &ds_schema)
|
||||
rewrite_conditional_expr(&props, expr, &schemas)
|
||||
.unwrap()
|
||||
.to_string()
|
||||
};
|
||||
|
|
|
@ -1029,7 +1029,7 @@ impl FieldChecker {
|
|||
ProjectionType::TopBottomSelector
|
||||
} else if self.has_group_by_time {
|
||||
if self.window_count > 0 {
|
||||
if self.window_count == self.aggregate_count {
|
||||
if self.window_count == self.aggregate_count + self.selector_count {
|
||||
ProjectionType::WindowAggregate
|
||||
} else {
|
||||
ProjectionType::WindowAggregateMixed
|
||||
|
@ -1338,11 +1338,8 @@ impl FieldChecker {
|
|||
}
|
||||
|
||||
fn check_cumulative_sum(&mut self, args: &[Expr]) -> Result<()> {
|
||||
self.inc_aggregate_count();
|
||||
self.inc_window_count();
|
||||
check_exp_args!("cumulative_sum", 1, args);
|
||||
|
||||
set_extra_intervals!(self, 1);
|
||||
|
||||
self.check_nested_symbol("cumulative_sum", &args[0])
|
||||
}
|
||||
|
||||
|
|
|
@ -1,13 +1,12 @@
|
|||
use crate::{error, NUMERICS};
|
||||
use arrow::array::{Array, ArrayRef, Int64Array};
|
||||
use arrow::datatypes::{DataType, TimeUnit};
|
||||
use arrow::datatypes::DataType;
|
||||
use datafusion::common::{downcast_value, DataFusionError, Result, ScalarValue};
|
||||
use datafusion::logical_expr::{
|
||||
Accumulator, AccumulatorFactoryFunction, AggregateUDF, ReturnTypeFunction, Signature,
|
||||
StateTypeFunction, TypeSignature, Volatility,
|
||||
};
|
||||
use once_cell::sync::Lazy;
|
||||
use std::mem::replace;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// Name of the `MOVING_AVERAGE` user-defined aggregate function.
|
||||
|
@ -148,339 +147,3 @@ impl Accumulator for AvgNAccumulator {
|
|||
- std::mem::size_of_val(&self.data_type)
|
||||
}
|
||||
}
|
||||
|
||||
/// Name of the `DIFFERENCE` user-defined aggregate function.
|
||||
pub(crate) const DIFFERENCE_NAME: &str = "difference";
|
||||
|
||||
/// Definition of the `DIFFERENCE` user-defined aggregate function.
|
||||
pub(crate) static DIFFERENCE: Lazy<Arc<AggregateUDF>> = Lazy::new(|| {
|
||||
let return_type: ReturnTypeFunction = Arc::new(|dt| Ok(Arc::new(dt[0].clone())));
|
||||
let accumulator: AccumulatorFactoryFunction =
|
||||
Arc::new(|dt| Ok(Box::new(DifferenceAccumulator::new(dt))));
|
||||
let state_type: StateTypeFunction = Arc::new(|_| Ok(Arc::new(vec![])));
|
||||
Arc::new(AggregateUDF::new(
|
||||
DIFFERENCE_NAME,
|
||||
&Signature::one_of(
|
||||
NUMERICS
|
||||
.iter()
|
||||
.map(|dt| TypeSignature::Exact(vec![dt.clone()]))
|
||||
.collect(),
|
||||
Volatility::Immutable,
|
||||
),
|
||||
&return_type,
|
||||
&accumulator,
|
||||
// State shouldn't be called, so no schema to report
|
||||
&state_type,
|
||||
))
|
||||
});
|
||||
|
||||
#[derive(Debug)]
|
||||
struct DifferenceAccumulator {
|
||||
data_type: DataType,
|
||||
last: ScalarValue,
|
||||
diff: ScalarValue,
|
||||
}
|
||||
|
||||
impl DifferenceAccumulator {
|
||||
fn new(data_type: &DataType) -> Self {
|
||||
let last: ScalarValue = data_type.try_into().expect("data_type → ScalarValue");
|
||||
let diff = last.clone();
|
||||
Self {
|
||||
data_type: data_type.clone(),
|
||||
last,
|
||||
diff,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Accumulator for DifferenceAccumulator {
|
||||
/// `state` is only called when used as an aggregate function. It can be
|
||||
/// can safely left unimplemented, as this accumulator is only used as a window aggregate.
|
||||
///
|
||||
/// See: <https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.Accumulator.html#tymethod.state>
|
||||
fn state(&self) -> Result<Vec<ScalarValue>> {
|
||||
error::internal("unexpected call to DifferenceAccumulator::state")
|
||||
}
|
||||
|
||||
fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
|
||||
if values.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
let arr = &values[0];
|
||||
for index in 0..arr.len() {
|
||||
let scalar = ScalarValue::try_from_array(arr, index)?;
|
||||
if !scalar.is_null() {
|
||||
if !self.last.is_null() {
|
||||
self.diff = scalar.sub(self.last.clone())?
|
||||
}
|
||||
self.last = scalar;
|
||||
} else {
|
||||
self.diff = ScalarValue::try_from(&self.data_type).unwrap()
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// `merge_batch` is only called when used as an aggregate function. It can be
|
||||
/// can safely left unimplemented, as this accumulator is only used as a window aggregate.
|
||||
///
|
||||
/// See: <https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.Accumulator.html#tymethod.state>
|
||||
fn merge_batch(&mut self, _states: &[ArrayRef]) -> Result<()> {
|
||||
error::internal("unexpected call to DifferenceAccumulator::merge_batch")
|
||||
}
|
||||
|
||||
fn evaluate(&self) -> Result<ScalarValue> {
|
||||
Ok(self.diff.clone())
|
||||
}
|
||||
|
||||
fn size(&self) -> usize {
|
||||
std::mem::size_of_val(self)
|
||||
}
|
||||
}
|
||||
|
||||
/// Name of the `NON_NEGATIVE_DIFFERENCE` user-defined aggregate function.
|
||||
pub(crate) const NON_NEGATIVE_DIFFERENCE_NAME: &str = "non_negative_difference";
|
||||
|
||||
/// Definition of the `NON_NEGATIVE_DIFFERENCE` user-defined aggregate function.
|
||||
pub(crate) static NON_NEGATIVE_DIFFERENCE: Lazy<Arc<AggregateUDF>> = Lazy::new(|| {
|
||||
let return_type: ReturnTypeFunction = Arc::new(|dt| Ok(Arc::new(dt[0].clone())));
|
||||
let accumulator: AccumulatorFactoryFunction = Arc::new(|dt| {
|
||||
Ok(Box::new(NonNegative::<_>::new(DifferenceAccumulator::new(
|
||||
dt,
|
||||
))))
|
||||
});
|
||||
let state_type: StateTypeFunction = Arc::new(|_| Ok(Arc::new(vec![])));
|
||||
Arc::new(AggregateUDF::new(
|
||||
NON_NEGATIVE_DIFFERENCE_NAME,
|
||||
&Signature::one_of(
|
||||
NUMERICS
|
||||
.iter()
|
||||
.map(|dt| TypeSignature::Exact(vec![dt.clone()]))
|
||||
.collect(),
|
||||
Volatility::Immutable,
|
||||
),
|
||||
&return_type,
|
||||
&accumulator,
|
||||
// State shouldn't be called, so no schema to report
|
||||
&state_type,
|
||||
))
|
||||
});
|
||||
|
||||
/// NonNegative is a wrapper around an Accumulator that transposes
|
||||
/// negative value to be NULL.
|
||||
#[derive(Debug)]
|
||||
struct NonNegative<T> {
|
||||
acc: T,
|
||||
}
|
||||
|
||||
impl<T> NonNegative<T> {
|
||||
fn new(acc: T) -> Self {
|
||||
Self { acc }
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Accumulator> Accumulator for NonNegative<T> {
|
||||
fn state(&self) -> Result<Vec<ScalarValue>> {
|
||||
self.acc.state()
|
||||
}
|
||||
|
||||
fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
|
||||
self.acc.update_batch(values)
|
||||
}
|
||||
|
||||
fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
|
||||
self.acc.merge_batch(states)
|
||||
}
|
||||
|
||||
fn evaluate(&self) -> Result<ScalarValue> {
|
||||
Ok(match self.acc.evaluate()? {
|
||||
ScalarValue::Float64(Some(v)) if v < 0.0 => ScalarValue::Float64(None),
|
||||
ScalarValue::Int64(Some(v)) if v < 0 => ScalarValue::Int64(None),
|
||||
v => v,
|
||||
})
|
||||
}
|
||||
|
||||
fn size(&self) -> usize {
|
||||
self.acc.size()
|
||||
}
|
||||
}
|
||||
|
||||
/// Name of the `DERIVATIVE` user-defined aggregate function.
|
||||
pub(crate) const DERIVATIVE_NAME: &str = "derivative";
|
||||
|
||||
pub(crate) fn derivative_udf(unit: i64) -> AggregateUDF {
|
||||
let return_type: ReturnTypeFunction = Arc::new(|_| Ok(Arc::new(DataType::Float64)));
|
||||
let accumulator: AccumulatorFactoryFunction =
|
||||
Arc::new(move |_| Ok(Box::new(DerivativeAccumulator::new(unit))));
|
||||
let state_type: StateTypeFunction = Arc::new(|_| Ok(Arc::new(vec![])));
|
||||
let sig = Signature::one_of(
|
||||
NUMERICS
|
||||
.iter()
|
||||
.map(|dt| {
|
||||
TypeSignature::Exact(vec![
|
||||
DataType::Timestamp(TimeUnit::Nanosecond, None),
|
||||
dt.clone(),
|
||||
])
|
||||
})
|
||||
.collect(),
|
||||
Volatility::Immutable,
|
||||
);
|
||||
AggregateUDF::new(
|
||||
format!("{DERIVATIVE_NAME}(unit: {unit})").as_str(),
|
||||
&sig,
|
||||
&return_type,
|
||||
&accumulator,
|
||||
// State shouldn't be called, so no schema to report
|
||||
&state_type,
|
||||
)
|
||||
}
|
||||
|
||||
/// Name of the `NON_NEGATIVE_DERIVATIVE` user-defined aggregate function.
|
||||
pub(crate) const NON_NEGATIVE_DERIVATIVE_NAME: &str = "non_negative_derivative";
|
||||
|
||||
pub(crate) fn non_negative_derivative_udf(unit: i64) -> AggregateUDF {
|
||||
let return_type: ReturnTypeFunction = Arc::new(|_| Ok(Arc::new(DataType::Float64)));
|
||||
let accumulator: AccumulatorFactoryFunction = Arc::new(move |_| {
|
||||
Ok(Box::new(NonNegative::<_>::new(DerivativeAccumulator::new(
|
||||
unit,
|
||||
))))
|
||||
});
|
||||
let state_type: StateTypeFunction = Arc::new(|_| Ok(Arc::new(vec![])));
|
||||
let sig = Signature::one_of(
|
||||
NUMERICS
|
||||
.iter()
|
||||
.map(|dt| {
|
||||
TypeSignature::Exact(vec![
|
||||
DataType::Timestamp(TimeUnit::Nanosecond, None),
|
||||
dt.clone(),
|
||||
])
|
||||
})
|
||||
.collect(),
|
||||
Volatility::Immutable,
|
||||
);
|
||||
AggregateUDF::new(
|
||||
format!("{NON_NEGATIVE_DERIVATIVE_NAME}(unit: {unit})").as_str(),
|
||||
&sig,
|
||||
&return_type,
|
||||
&accumulator,
|
||||
// State shouldn't be called, so no schema to report
|
||||
&state_type,
|
||||
)
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct DerivativeAccumulator {
|
||||
unit: i64,
|
||||
prev: Option<Point>,
|
||||
curr: Option<Point>,
|
||||
}
|
||||
|
||||
impl DerivativeAccumulator {
|
||||
fn new(unit: i64) -> Self {
|
||||
Self {
|
||||
unit,
|
||||
prev: None,
|
||||
curr: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Accumulator for DerivativeAccumulator {
|
||||
/// `state` is only called when used as an aggregate function. It can be
|
||||
/// can safely left unimplemented, as this accumulator is only used as a window aggregate.
|
||||
///
|
||||
/// See: <https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.Accumulator.html#tymethod.state>
|
||||
fn state(&self) -> Result<Vec<ScalarValue>> {
|
||||
error::internal("unexpected call to DerivativeAccumulator::state")
|
||||
}
|
||||
|
||||
fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
|
||||
if values.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let times = &values[0];
|
||||
let arr = &values[1];
|
||||
for index in 0..arr.len() {
|
||||
let time = match ScalarValue::try_from_array(times, index)? {
|
||||
ScalarValue::TimestampNanosecond(Some(ts), _) => ts,
|
||||
v => {
|
||||
return Err(DataFusionError::Internal(format!(
|
||||
"invalid time value: {}",
|
||||
v
|
||||
)))
|
||||
}
|
||||
};
|
||||
let curr = Point::new(time, ScalarValue::try_from_array(arr, index)?);
|
||||
let prev = replace(&mut self.curr, curr);
|
||||
|
||||
// don't replace the previous value if the current value has the same timestamp.
|
||||
if self.prev.is_none()
|
||||
|| prev
|
||||
.as_ref()
|
||||
.is_some_and(|prev| prev.time > self.prev.as_ref().unwrap().time)
|
||||
{
|
||||
self.prev = prev
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// `merge_batch` is only called when used as an aggregate function. It can be
|
||||
/// can safely left unimplemented, as this accumulator is only used as a window aggregate.
|
||||
///
|
||||
/// See: <https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.Accumulator.html#tymethod.state>
|
||||
fn merge_batch(&mut self, _states: &[ArrayRef]) -> Result<()> {
|
||||
error::internal("unexpected call to DerivativeAccumulator::merge_batch")
|
||||
}
|
||||
|
||||
fn evaluate(&self) -> Result<ScalarValue> {
|
||||
Ok(ScalarValue::Float64(
|
||||
self.curr
|
||||
.as_ref()
|
||||
.and_then(|c| c.derivative(self.prev.as_ref(), self.unit)),
|
||||
))
|
||||
}
|
||||
|
||||
fn size(&self) -> usize {
|
||||
std::mem::size_of_val(self)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Point {
|
||||
time: i64,
|
||||
value: ScalarValue,
|
||||
}
|
||||
|
||||
impl Point {
|
||||
fn new(time: i64, value: ScalarValue) -> Option<Self> {
|
||||
if value.is_null() {
|
||||
None
|
||||
} else {
|
||||
Some(Self { time, value })
|
||||
}
|
||||
}
|
||||
|
||||
fn value_as_f64(&self) -> f64 {
|
||||
match self.value {
|
||||
ScalarValue::Int64(Some(v)) => v as f64,
|
||||
ScalarValue::Float64(Some(v)) => v,
|
||||
ScalarValue::UInt64(Some(v)) => v as f64,
|
||||
_ => panic!("invalid point {:?}", self),
|
||||
}
|
||||
}
|
||||
|
||||
fn derivative(&self, prev: Option<&Self>, unit: i64) -> Option<f64> {
|
||||
prev.and_then(|prev| {
|
||||
let diff = self.value_as_f64() - prev.value_as_f64();
|
||||
let elapsed = match self.time - prev.time {
|
||||
// if the time hasn't changed then it is a NULL.
|
||||
0 => return None,
|
||||
v => v,
|
||||
} as f64;
|
||||
let devisor = elapsed / (unit as f64);
|
||||
Some(diff / devisor)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
use crate::plan::util_copy::find_exprs_in_exprs;
|
||||
use crate::{error, NUMERICS};
|
||||
use arrow::datatypes::DataType;
|
||||
use arrow::datatypes::{DataType, TimeUnit};
|
||||
use datafusion::logical_expr::{
|
||||
Expr, ReturnTypeFunction, ScalarFunctionImplementation, ScalarUDF, Signature, TypeSignature,
|
||||
Volatility,
|
||||
|
@ -21,6 +21,7 @@ pub(super) enum WindowFunction {
|
|||
NonNegativeDifference,
|
||||
Derivative,
|
||||
NonNegativeDerivative,
|
||||
CumulativeSum,
|
||||
}
|
||||
|
||||
impl WindowFunction {
|
||||
|
@ -32,6 +33,7 @@ impl WindowFunction {
|
|||
NON_NEGATIVE_DIFFERENCE_UDF_NAME => Some(Self::NonNegativeDifference),
|
||||
DERIVATIVE_UDF_NAME => Some(Self::Derivative),
|
||||
NON_NEGATIVE_DERIVATIVE_UDF_NAME => Some(Self::NonNegativeDerivative),
|
||||
CUMULATIVE_SUM_UDF_NAME => Some(Self::CumulativeSum),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
@ -129,13 +131,21 @@ pub(crate) fn derivative(args: Vec<Expr>) -> Expr {
|
|||
|
||||
/// Definition of the `DERIVATIVE` function.
|
||||
static DERIVATIVE: Lazy<Arc<ScalarUDF>> = Lazy::new(|| {
|
||||
let return_type_fn: ReturnTypeFunction = Arc::new(|args| Ok(Arc::new(args[0].clone())));
|
||||
let return_type_fn: ReturnTypeFunction = Arc::new(|_| Ok(Arc::new(DataType::Float64)));
|
||||
Arc::new(ScalarUDF::new(
|
||||
DERIVATIVE_UDF_NAME,
|
||||
&Signature::one_of(
|
||||
NUMERICS
|
||||
.iter()
|
||||
.map(|dt| TypeSignature::Exact(vec![dt.clone()]))
|
||||
.flat_map(|dt| {
|
||||
vec![
|
||||
TypeSignature::Exact(vec![dt.clone()]),
|
||||
TypeSignature::Exact(vec![
|
||||
dt.clone(),
|
||||
DataType::Duration(TimeUnit::Nanosecond),
|
||||
]),
|
||||
]
|
||||
})
|
||||
.collect(),
|
||||
Volatility::Immutable,
|
||||
),
|
||||
|
@ -153,13 +163,21 @@ pub(crate) fn non_negative_derivative(args: Vec<Expr>) -> Expr {
|
|||
|
||||
/// Definition of the `NON_NEGATIVE_DERIVATIVE` function.
|
||||
static NON_NEGATIVE_DERIVATIVE: Lazy<Arc<ScalarUDF>> = Lazy::new(|| {
|
||||
let return_type_fn: ReturnTypeFunction = Arc::new(|args| Ok(Arc::new(args[0].clone())));
|
||||
let return_type_fn: ReturnTypeFunction = Arc::new(|_| Ok(Arc::new(DataType::Float64)));
|
||||
Arc::new(ScalarUDF::new(
|
||||
NON_NEGATIVE_DERIVATIVE_UDF_NAME,
|
||||
&Signature::one_of(
|
||||
NUMERICS
|
||||
.iter()
|
||||
.map(|dt| TypeSignature::Exact(vec![dt.clone()]))
|
||||
.flat_map(|dt| {
|
||||
vec![
|
||||
TypeSignature::Exact(vec![dt.clone()]),
|
||||
TypeSignature::Exact(vec![
|
||||
dt.clone(),
|
||||
DataType::Duration(TimeUnit::Nanosecond),
|
||||
]),
|
||||
]
|
||||
})
|
||||
.collect(),
|
||||
Volatility::Immutable,
|
||||
),
|
||||
|
@ -168,6 +186,29 @@ static NON_NEGATIVE_DERIVATIVE: Lazy<Arc<ScalarUDF>> = Lazy::new(|| {
|
|||
))
|
||||
});
|
||||
|
||||
const CUMULATIVE_SUM_UDF_NAME: &str = "cumulative_sum";
|
||||
|
||||
/// Create an expression to represent the `CUMULATIVE_SUM` function.
|
||||
pub(crate) fn cumulative_sum(args: Vec<Expr>) -> Expr {
|
||||
CUMULATIVE_SUM.call(args)
|
||||
}
|
||||
/// Definition of the `CUMULATIVE_SUM` function.
|
||||
static CUMULATIVE_SUM: Lazy<Arc<ScalarUDF>> = Lazy::new(|| {
|
||||
let return_type_fn: ReturnTypeFunction = Arc::new(|args| Ok(Arc::new(args[0].clone())));
|
||||
Arc::new(ScalarUDF::new(
|
||||
CUMULATIVE_SUM_UDF_NAME,
|
||||
&Signature::one_of(
|
||||
NUMERICS
|
||||
.iter()
|
||||
.map(|dt| TypeSignature::Exact(vec![dt.clone()]))
|
||||
.collect(),
|
||||
Volatility::Immutable,
|
||||
),
|
||||
&return_type_fn,
|
||||
&stand_in_impl(CUMULATIVE_SUM_UDF_NAME),
|
||||
))
|
||||
});
|
||||
|
||||
/// Returns an implementation that always returns an error.
|
||||
fn stand_in_impl(name: &'static str) -> ScalarFunctionImplementation {
|
||||
Arc::new(move |_| error::internal(format!("{name} should not exist in the final logical plan")))
|
||||
|
|
|
@ -11,8 +11,11 @@ use influxdb_influxql_parser::literal::Number;
|
|||
use influxdb_influxql_parser::string::Regex;
|
||||
use query_functions::clean_non_meta_escapes;
|
||||
use query_functions::coalesce_struct::coalesce_struct;
|
||||
use schema::InfluxColumnType;
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::ir::{DataSourceSchema, Field};
|
||||
|
||||
pub(in crate::plan) fn binary_operator_to_df_operator(op: BinaryOperator) -> Operator {
|
||||
match op {
|
||||
BinaryOperator::Add => Operator::Plus,
|
||||
|
@ -26,17 +29,62 @@ pub(in crate::plan) fn binary_operator_to_df_operator(op: BinaryOperator) -> Ope
|
|||
}
|
||||
}
|
||||
|
||||
/// Container for both the DataFusion and equivalent IOx schema.
|
||||
pub(in crate::plan) struct Schemas {
|
||||
/// Container for the DataFusion schema as well as
|
||||
/// info on which columns are tags.
|
||||
pub(in crate::plan) struct IQLSchema<'a> {
|
||||
pub(in crate::plan) df_schema: DFSchemaRef,
|
||||
tag_info: TagInfo<'a>,
|
||||
}
|
||||
|
||||
impl Schemas {
|
||||
pub(in crate::plan) fn new(df_schema: &DFSchemaRef) -> Result<Self> {
|
||||
impl<'a> IQLSchema<'a> {
|
||||
/// Create a new IQLSchema from a [`DataSourceSchema`] from the
|
||||
/// FROM clause of a query or subquery.
|
||||
pub(in crate::plan) fn new_from_ds_schema(
|
||||
df_schema: &DFSchemaRef,
|
||||
ds_schema: DataSourceSchema<'a>,
|
||||
) -> Result<Self> {
|
||||
Ok(Self {
|
||||
df_schema: Arc::clone(df_schema),
|
||||
tag_info: TagInfo::DataSourceSchema(ds_schema),
|
||||
})
|
||||
}
|
||||
|
||||
/// Create a new IQLSchema from a list of [`Field`]s on the SELECT list
|
||||
/// of a subquery.
|
||||
pub(in crate::plan) fn new_from_fields(
|
||||
df_schema: &DFSchemaRef,
|
||||
fields: &'a [Field],
|
||||
) -> Result<Self> {
|
||||
Ok(Self {
|
||||
df_schema: Arc::clone(df_schema),
|
||||
tag_info: TagInfo::FieldList(fields),
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns `true` if the schema contains a tag column with the specified name.
|
||||
pub fn is_tag_field(&self, name: &str) -> bool {
|
||||
match self.tag_info {
|
||||
TagInfo::DataSourceSchema(ref ds_schema) => ds_schema.is_tag_field(name),
|
||||
TagInfo::FieldList(fields) => fields
|
||||
.iter()
|
||||
.any(|f| f.name == name && f.data_type == Some(InfluxColumnType::Tag)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `true` if the schema contains a tag column with the specified name.
|
||||
/// If the underlying data source is a subquery, it will apply any aliases in the
|
||||
/// projection that represents the SELECT list.
|
||||
pub fn is_projected_tag_field(&self, name: &str) -> bool {
|
||||
match self.tag_info {
|
||||
TagInfo::DataSourceSchema(ref ds_schema) => ds_schema.is_projected_tag_field(name),
|
||||
_ => self.is_tag_field(name),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(in crate::plan) enum TagInfo<'a> {
|
||||
DataSourceSchema(DataSourceSchema<'a>),
|
||||
FieldList(&'a [Field]),
|
||||
}
|
||||
|
||||
/// Sanitize an InfluxQL regular expression and create a compiled [`regex::Regex`].
|
||||
|
@ -70,6 +118,7 @@ fn number_to_scalar(n: &Number, data_type: &DataType) -> Result<ScalarValue> {
|
|||
),
|
||||
fields.clone(),
|
||||
),
|
||||
(_, DataType::Null) => ScalarValue::Null,
|
||||
(n, data_type) => {
|
||||
// The only output data types expected are Int64, Float64 or UInt64
|
||||
return error::internal(format!("no conversion from {n} to {data_type}"));
|
||||
|
|
|
@ -145,34 +145,27 @@ where
|
|||
negated,
|
||||
expr,
|
||||
pattern,
|
||||
case_insensitive,
|
||||
escape_char,
|
||||
}) => Ok(Expr::Like(Like::new(
|
||||
*negated,
|
||||
Box::new(clone_with_replacement(expr, replacement_fn)?),
|
||||
Box::new(clone_with_replacement(pattern, replacement_fn)?),
|
||||
*escape_char,
|
||||
))),
|
||||
Expr::ILike(Like {
|
||||
negated,
|
||||
expr,
|
||||
pattern,
|
||||
escape_char,
|
||||
}) => Ok(Expr::ILike(Like::new(
|
||||
*negated,
|
||||
Box::new(clone_with_replacement(expr, replacement_fn)?),
|
||||
Box::new(clone_with_replacement(pattern, replacement_fn)?),
|
||||
*escape_char,
|
||||
*case_insensitive,
|
||||
))),
|
||||
Expr::SimilarTo(Like {
|
||||
negated,
|
||||
expr,
|
||||
pattern,
|
||||
case_insensitive,
|
||||
escape_char,
|
||||
}) => Ok(Expr::SimilarTo(Like::new(
|
||||
*negated,
|
||||
Box::new(clone_with_replacement(expr, replacement_fn)?),
|
||||
Box::new(clone_with_replacement(pattern, replacement_fn)?),
|
||||
*escape_char,
|
||||
*case_insensitive,
|
||||
))),
|
||||
Expr::Case(case) => Ok(Expr::Case(Case::new(
|
||||
match &case.expr {
|
||||
|
|
|
@ -6,8 +6,92 @@ use datafusion::logical_expr::{
|
|||
use once_cell::sync::Lazy;
|
||||
use std::sync::Arc;
|
||||
|
||||
mod cumulative_sum;
|
||||
mod derivative;
|
||||
mod difference;
|
||||
mod non_negative;
|
||||
mod percent_row_number;
|
||||
|
||||
/// Definition of the `CUMULATIVE_SUM` user-defined window function.
|
||||
pub(crate) static CUMULATIVE_SUM: Lazy<WindowFunction> = Lazy::new(|| {
|
||||
let return_type: ReturnTypeFunction = Arc::new(cumulative_sum::return_type);
|
||||
let partition_evaluator_factory: PartitionEvaluatorFactory =
|
||||
Arc::new(cumulative_sum::partition_evaluator_factory);
|
||||
|
||||
WindowFunction::WindowUDF(Arc::new(WindowUDF::new(
|
||||
cumulative_sum::NAME,
|
||||
&cumulative_sum::SIGNATURE,
|
||||
&return_type,
|
||||
&partition_evaluator_factory,
|
||||
)))
|
||||
});
|
||||
|
||||
/// Definition of the `DERIVATIVE` user-defined window function.
|
||||
pub(crate) static DERIVATIVE: Lazy<WindowFunction> = Lazy::new(|| {
|
||||
let return_type: ReturnTypeFunction = Arc::new(derivative::return_type);
|
||||
let partition_evaluator_factory: PartitionEvaluatorFactory =
|
||||
Arc::new(derivative::partition_evaluator_factory);
|
||||
|
||||
WindowFunction::WindowUDF(Arc::new(WindowUDF::new(
|
||||
derivative::NAME,
|
||||
&derivative::SIGNATURE,
|
||||
&return_type,
|
||||
&partition_evaluator_factory,
|
||||
)))
|
||||
});
|
||||
|
||||
/// Definition of the `DIFFERENCE` user-defined window function.
|
||||
pub(crate) static DIFFERENCE: Lazy<WindowFunction> = Lazy::new(|| {
|
||||
let return_type: ReturnTypeFunction = Arc::new(difference::return_type);
|
||||
let partition_evaluator_factory: PartitionEvaluatorFactory =
|
||||
Arc::new(difference::partition_evaluator_factory);
|
||||
|
||||
WindowFunction::WindowUDF(Arc::new(WindowUDF::new(
|
||||
difference::NAME,
|
||||
&difference::SIGNATURE,
|
||||
&return_type,
|
||||
&partition_evaluator_factory,
|
||||
)))
|
||||
});
|
||||
|
||||
const NON_NEGATIVE_DERIVATIVE_NAME: &str = "non_negative_derivative";
|
||||
|
||||
/// Definition of the `NON_NEGATIVE_DERIVATIVE` user-defined window function.
|
||||
pub(crate) static NON_NEGATIVE_DERIVATIVE: Lazy<WindowFunction> = Lazy::new(|| {
|
||||
let return_type: ReturnTypeFunction = Arc::new(derivative::return_type);
|
||||
let partition_evaluator_factory: PartitionEvaluatorFactory = Arc::new(|| {
|
||||
Ok(non_negative::wrapper(
|
||||
derivative::partition_evaluator_factory()?,
|
||||
))
|
||||
});
|
||||
|
||||
WindowFunction::WindowUDF(Arc::new(WindowUDF::new(
|
||||
NON_NEGATIVE_DERIVATIVE_NAME,
|
||||
&derivative::SIGNATURE,
|
||||
&return_type,
|
||||
&partition_evaluator_factory,
|
||||
)))
|
||||
});
|
||||
|
||||
const NON_NEGATIVE_DIFFERENCE_NAME: &str = "non_negative_difference";
|
||||
|
||||
/// Definition of the `NON_NEGATIVE_DIFFERENCE` user-defined window function.
|
||||
pub(crate) static NON_NEGATIVE_DIFFERENCE: Lazy<WindowFunction> = Lazy::new(|| {
|
||||
let return_type: ReturnTypeFunction = Arc::new(difference::return_type);
|
||||
let partition_evaluator_factory: PartitionEvaluatorFactory = Arc::new(|| {
|
||||
Ok(non_negative::wrapper(
|
||||
difference::partition_evaluator_factory()?,
|
||||
))
|
||||
});
|
||||
|
||||
WindowFunction::WindowUDF(Arc::new(WindowUDF::new(
|
||||
NON_NEGATIVE_DIFFERENCE_NAME,
|
||||
&difference::SIGNATURE,
|
||||
&return_type,
|
||||
&partition_evaluator_factory,
|
||||
)))
|
||||
});
|
||||
|
||||
/// Definition of the `PERCENT_ROW_NUMBER` user-defined window function.
|
||||
pub(crate) static PERCENT_ROW_NUMBER: Lazy<WindowFunction> = Lazy::new(|| {
|
||||
let return_type: ReturnTypeFunction = Arc::new(percent_row_number::return_type);
|
||||
|
|
|
@ -0,0 +1,64 @@
|
|||
use crate::NUMERICS;
|
||||
use arrow::array::{Array, ArrayRef};
|
||||
use arrow::datatypes::DataType;
|
||||
use datafusion::common::{Result, ScalarValue};
|
||||
use datafusion::logical_expr::{PartitionEvaluator, Signature, TypeSignature, Volatility};
|
||||
use once_cell::sync::Lazy;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// The name of the cumulative_sum window function.
|
||||
pub(super) const NAME: &str = "cumumlative_sum";
|
||||
|
||||
/// Valid signatures for the cumulative_sum window function.
|
||||
pub(super) static SIGNATURE: Lazy<Signature> = Lazy::new(|| {
|
||||
Signature::one_of(
|
||||
NUMERICS
|
||||
.iter()
|
||||
.map(|dt| TypeSignature::Exact(vec![dt.clone()]))
|
||||
.collect(),
|
||||
Volatility::Immutable,
|
||||
)
|
||||
});
|
||||
|
||||
/// Calculate the return type given the function signature.
|
||||
pub(super) fn return_type(sig: &[DataType]) -> Result<Arc<DataType>> {
|
||||
Ok(Arc::new(sig[0].clone()))
|
||||
}
|
||||
|
||||
/// Create a new partition_evaluator_factory.
|
||||
pub(super) fn partition_evaluator_factory() -> Result<Box<dyn PartitionEvaluator>> {
|
||||
Ok(Box::new(CumulativeSumPartitionEvaluator {}))
|
||||
}
|
||||
|
||||
/// PartitionEvaluator which returns the cumulative sum of the input.
|
||||
#[derive(Debug)]
|
||||
struct CumulativeSumPartitionEvaluator {}
|
||||
|
||||
impl PartitionEvaluator for CumulativeSumPartitionEvaluator {
|
||||
fn evaluate_all(&mut self, values: &[ArrayRef], num_rows: usize) -> Result<Arc<dyn Array>> {
|
||||
assert_eq!(values.len(), 1);
|
||||
|
||||
let array = Arc::clone(&values[0]);
|
||||
let mut sum = ScalarValue::new_zero(array.data_type())?;
|
||||
let mut cumulative: Vec<ScalarValue> = vec![];
|
||||
for idx in 0..num_rows {
|
||||
let v = ScalarValue::try_from_array(&array, idx)?;
|
||||
let res = if v.is_null() {
|
||||
v
|
||||
} else {
|
||||
sum = sum.add(&v)?;
|
||||
sum.clone()
|
||||
};
|
||||
cumulative.push(res);
|
||||
}
|
||||
Ok(Arc::new(ScalarValue::iter_to_array(cumulative)?))
|
||||
}
|
||||
|
||||
fn uses_window_frame(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn include_rank(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
|
@ -0,0 +1,125 @@
|
|||
use crate::{error, NUMERICS};
|
||||
use arrow::array::{Array, ArrayRef};
|
||||
use arrow::datatypes::{DataType, TimeUnit};
|
||||
use datafusion::common::{Result, ScalarValue};
|
||||
use datafusion::logical_expr::{PartitionEvaluator, Signature, TypeSignature, Volatility};
|
||||
use once_cell::sync::Lazy;
|
||||
use std::borrow::Borrow;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// The name of the derivative window function.
|
||||
pub(super) const NAME: &str = "derivative";
|
||||
|
||||
/// Valid signatures for the derivative window function.
|
||||
pub(super) static SIGNATURE: Lazy<Signature> = Lazy::new(|| {
|
||||
Signature::one_of(
|
||||
NUMERICS
|
||||
.iter()
|
||||
.map(|dt| {
|
||||
TypeSignature::Exact(vec![
|
||||
dt.clone(),
|
||||
DataType::Duration(TimeUnit::Nanosecond),
|
||||
DataType::Timestamp(TimeUnit::Nanosecond, None),
|
||||
])
|
||||
})
|
||||
.collect(),
|
||||
Volatility::Immutable,
|
||||
)
|
||||
});
|
||||
|
||||
/// Calculate the return type given the function signature.
|
||||
pub(super) fn return_type(_: &[DataType]) -> Result<Arc<DataType>> {
|
||||
Ok(Arc::new(DataType::Float64))
|
||||
}
|
||||
|
||||
/// Create a new partition_evaluator_factory.
|
||||
pub(super) fn partition_evaluator_factory() -> Result<Box<dyn PartitionEvaluator>> {
|
||||
Ok(Box::new(DifferencePartitionEvaluator {}))
|
||||
}
|
||||
|
||||
/// PartitionEvaluator which returns the derivative between input values,
|
||||
/// in the provided units.
|
||||
#[derive(Debug)]
|
||||
struct DifferencePartitionEvaluator {}
|
||||
|
||||
impl PartitionEvaluator for DifferencePartitionEvaluator {
|
||||
fn evaluate_all(&mut self, values: &[ArrayRef], _num_rows: usize) -> Result<Arc<dyn Array>> {
|
||||
assert_eq!(values.len(), 3);
|
||||
|
||||
let array = Arc::clone(&values[0]);
|
||||
let times = Arc::clone(&values[2]);
|
||||
|
||||
// The second element of the values array is the second argument to
|
||||
// the 'derivative' function. This specifies the unit duration for the
|
||||
// derivation to use.
|
||||
//
|
||||
// INVARIANT:
|
||||
// The planner guarantees that the second argument is always a duration
|
||||
// literal.
|
||||
let unit = ScalarValue::try_from_array(&values[1], 0)?;
|
||||
|
||||
let mut idx: usize = 0;
|
||||
let mut last: ScalarValue = array.data_type().try_into()?;
|
||||
let mut last_time: ScalarValue = times.data_type().try_into()?;
|
||||
let mut derivative: Vec<ScalarValue> = vec![];
|
||||
|
||||
while idx < array.len() {
|
||||
last = ScalarValue::try_from_array(&array, idx)?;
|
||||
last_time = ScalarValue::try_from_array(×, idx)?;
|
||||
derivative.push(ScalarValue::Float64(None));
|
||||
idx += 1;
|
||||
if !last.is_null() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
while idx < array.len() {
|
||||
let v = ScalarValue::try_from_array(&array, idx)?;
|
||||
let t = ScalarValue::try_from_array(×, idx)?;
|
||||
if v.is_null() {
|
||||
derivative.push(ScalarValue::Float64(None));
|
||||
} else {
|
||||
derivative.push(ScalarValue::Float64(Some(
|
||||
delta(&v, &last)? / delta_time(&t, &last_time, &unit)?,
|
||||
)));
|
||||
last = v.clone();
|
||||
last_time = t.clone();
|
||||
}
|
||||
idx += 1;
|
||||
}
|
||||
Ok(Arc::new(ScalarValue::iter_to_array(derivative)?))
|
||||
}
|
||||
|
||||
fn uses_window_frame(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn include_rank(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
fn delta(curr: &ScalarValue, prev: &ScalarValue) -> Result<f64> {
|
||||
match (curr.borrow(), prev.borrow()) {
|
||||
(ScalarValue::Float64(Some(curr)), ScalarValue::Float64(Some(prev))) => Ok(*curr - *prev),
|
||||
(ScalarValue::Int64(Some(curr)), ScalarValue::Int64(Some(prev))) => {
|
||||
Ok(*curr as f64 - *prev as f64)
|
||||
}
|
||||
(ScalarValue::UInt64(Some(curr)), ScalarValue::UInt64(Some(prev))) => {
|
||||
Ok(*curr as f64 - *prev as f64)
|
||||
}
|
||||
_ => error::internal("derivative attempted on unsupported values"),
|
||||
}
|
||||
}
|
||||
|
||||
fn delta_time(curr: &ScalarValue, prev: &ScalarValue, unit: &ScalarValue) -> Result<f64> {
|
||||
if let (
|
||||
ScalarValue::TimestampNanosecond(Some(curr), _),
|
||||
ScalarValue::TimestampNanosecond(Some(prev), _),
|
||||
ScalarValue::IntervalMonthDayNano(Some(unit)),
|
||||
) = (curr, prev, unit)
|
||||
{
|
||||
Ok((*curr as f64 - *prev as f64) / *unit as f64)
|
||||
} else {
|
||||
error::internal("derivative attempted on unsupported values")
|
||||
}
|
||||
}
|
|
@ -0,0 +1,79 @@
|
|||
use crate::NUMERICS;
|
||||
use arrow::array::{Array, ArrayRef};
|
||||
use arrow::compute::{shift, subtract_dyn};
|
||||
use arrow::datatypes::DataType;
|
||||
use datafusion::common::{Result, ScalarValue};
|
||||
use datafusion::logical_expr::{PartitionEvaluator, Signature, TypeSignature, Volatility};
|
||||
use once_cell::sync::Lazy;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// The name of the difference window function.
|
||||
pub(super) const NAME: &str = "difference";
|
||||
|
||||
/// Valid signatures for the difference window function.
|
||||
pub(super) static SIGNATURE: Lazy<Signature> = Lazy::new(|| {
|
||||
Signature::one_of(
|
||||
NUMERICS
|
||||
.iter()
|
||||
.map(|dt| TypeSignature::Exact(vec![dt.clone()]))
|
||||
.collect(),
|
||||
Volatility::Immutable,
|
||||
)
|
||||
});
|
||||
|
||||
/// Calculate the return type given the function signature.
|
||||
pub(super) fn return_type(sig: &[DataType]) -> Result<Arc<DataType>> {
|
||||
Ok(Arc::new(sig[0].clone()))
|
||||
}
|
||||
|
||||
/// Create a new partition_evaluator_factory.
|
||||
pub(super) fn partition_evaluator_factory() -> Result<Box<dyn PartitionEvaluator>> {
|
||||
Ok(Box::new(DifferencePartitionEvaluator {}))
|
||||
}
|
||||
|
||||
/// PartitionEvaluator which returns the difference between input values.
|
||||
#[derive(Debug)]
|
||||
struct DifferencePartitionEvaluator {}
|
||||
|
||||
impl PartitionEvaluator for DifferencePartitionEvaluator {
|
||||
fn evaluate_all(&mut self, values: &[ArrayRef], _num_rows: usize) -> Result<Arc<dyn Array>> {
|
||||
assert_eq!(values.len(), 1);
|
||||
|
||||
let array = Arc::clone(&values[0]);
|
||||
if array.null_count() == 0 {
|
||||
// If there are no gaps then use arrow kernels.
|
||||
Ok(subtract_dyn(&array, &shift(&array, 1)?)?)
|
||||
} else {
|
||||
let mut idx: usize = 0;
|
||||
let mut last: ScalarValue = array.data_type().try_into()?;
|
||||
let mut difference: Vec<ScalarValue> = vec![];
|
||||
while idx < array.len() {
|
||||
last = ScalarValue::try_from_array(&array, idx)?;
|
||||
difference.push(array.data_type().try_into()?);
|
||||
idx += 1;
|
||||
if !last.is_null() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
while idx < array.len() {
|
||||
let v = ScalarValue::try_from_array(&array, idx)?;
|
||||
if v.is_null() {
|
||||
difference.push(array.data_type().try_into()?);
|
||||
} else {
|
||||
difference.push(v.sub(last)?);
|
||||
last = v;
|
||||
}
|
||||
idx += 1;
|
||||
}
|
||||
Ok(Arc::new(ScalarValue::iter_to_array(difference)?))
|
||||
}
|
||||
}
|
||||
|
||||
fn uses_window_frame(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn include_rank(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
|
@ -0,0 +1,74 @@
|
|||
use arrow::array::Array;
|
||||
use arrow::compute::{lt_dyn_scalar, nullif};
|
||||
use datafusion::common::{Result, ScalarValue};
|
||||
use datafusion::logical_expr::window_state::WindowAggState;
|
||||
use datafusion::logical_expr::PartitionEvaluator;
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// Wrap a PartitionEvaluator in a non-negative filter.
|
||||
pub(super) fn wrapper(
|
||||
partition_evaluator: Box<dyn PartitionEvaluator>,
|
||||
) -> Box<dyn PartitionEvaluator> {
|
||||
Box::new(NonNegative {
|
||||
partition_evaluator,
|
||||
})
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct NonNegative {
|
||||
partition_evaluator: Box<dyn PartitionEvaluator>,
|
||||
}
|
||||
|
||||
impl PartitionEvaluator for NonNegative {
|
||||
fn memoize(&mut self, state: &mut WindowAggState) -> Result<()> {
|
||||
self.partition_evaluator.memoize(state)
|
||||
}
|
||||
|
||||
fn get_range(&self, idx: usize, n_rows: usize) -> Result<Range<usize>> {
|
||||
self.partition_evaluator.get_range(idx, n_rows)
|
||||
}
|
||||
|
||||
fn evaluate_all(
|
||||
&mut self,
|
||||
values: &[Arc<dyn Array>],
|
||||
num_rows: usize,
|
||||
) -> Result<Arc<dyn Array>> {
|
||||
let array = self.partition_evaluator.evaluate_all(values, num_rows)?;
|
||||
let predicate = lt_dyn_scalar(&array, 0)?;
|
||||
Ok(nullif(&array, &predicate)?)
|
||||
}
|
||||
|
||||
fn evaluate(&mut self, values: &[Arc<dyn Array>], range: &Range<usize>) -> Result<ScalarValue> {
|
||||
let value = self.partition_evaluator.evaluate(values, range)?;
|
||||
Ok(match value {
|
||||
ScalarValue::Float64(Some(v)) if v < 0.0 => ScalarValue::Float64(None),
|
||||
ScalarValue::Int64(Some(v)) if v < 0 => ScalarValue::Int64(None),
|
||||
v => v,
|
||||
})
|
||||
}
|
||||
|
||||
fn evaluate_all_with_rank(
|
||||
&self,
|
||||
num_rows: usize,
|
||||
ranks_in_partition: &[Range<usize>],
|
||||
) -> Result<Arc<dyn Array>> {
|
||||
let array = self
|
||||
.partition_evaluator
|
||||
.evaluate_all_with_rank(num_rows, ranks_in_partition)?;
|
||||
let predicate = lt_dyn_scalar(&array, 0)?;
|
||||
Ok(nullif(&array, &predicate)?)
|
||||
}
|
||||
|
||||
fn supports_bounded_execution(&self) -> bool {
|
||||
self.partition_evaluator.supports_bounded_execution()
|
||||
}
|
||||
|
||||
fn uses_window_frame(&self) -> bool {
|
||||
self.partition_evaluator.uses_window_frame()
|
||||
}
|
||||
|
||||
fn include_rank(&self) -> bool {
|
||||
self.partition_evaluator.include_rank()
|
||||
}
|
||||
}
|
|
@ -6,10 +6,10 @@ use datafusion::logical_expr::{PartitionEvaluator, Signature, TypeSignature, Vol
|
|||
use once_cell::sync::Lazy;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// The name of the percent_row_number aggregate function.
|
||||
/// The name of the percent_row_number window function.
|
||||
pub(super) const NAME: &str = "percent_row_number";
|
||||
|
||||
/// Valid signatures for the percent_row_number aggregate function.
|
||||
/// Valid signatures for the percent_row_number window function.
|
||||
pub(super) static SIGNATURE: Lazy<Signature> = Lazy::new(|| {
|
||||
Signature::one_of(
|
||||
vec![
|
||||
|
|
|
@ -16,7 +16,7 @@ observability_deps = { path = "../observability_deps" }
|
|||
query_functions = { path = "../query_functions"}
|
||||
schema = { path = "../schema" }
|
||||
snafu = "0.7"
|
||||
sqlparser = "0.35.0"
|
||||
sqlparser = "0.36.0"
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
|
||||
[dev-dependencies]
|
||||
|
|
|
@ -496,7 +496,6 @@ impl TreeNodeVisitor for RowBasedVisitor {
|
|||
| Expr::Column(_)
|
||||
| Expr::Exists { .. }
|
||||
| Expr::GetIndexedField { .. }
|
||||
| Expr::ILike { .. }
|
||||
| Expr::InList { .. }
|
||||
| Expr::InSubquery { .. }
|
||||
| Expr::IsFalse(_)
|
||||
|
|
|
@ -515,6 +515,7 @@ mod tests {
|
|||
expr,
|
||||
pattern,
|
||||
escape_char: None,
|
||||
case_insensitive: false,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
|
@ -64,9 +64,7 @@ impl From<&SingleTenantExtractError> for hyper::StatusCode {
|
|||
SingleTenantExtractError::NoBucketSpecified => Self::BAD_REQUEST,
|
||||
SingleTenantExtractError::InvalidNamespace(_) => Self::BAD_REQUEST,
|
||||
SingleTenantExtractError::ParseV1Request(
|
||||
V1WriteParseError::NoQueryParams
|
||||
| V1WriteParseError::DecodeFail(_)
|
||||
| V1WriteParseError::ContainsRpSeparator,
|
||||
V1WriteParseError::NoQueryParams | V1WriteParseError::DecodeFail(_),
|
||||
) => Self::BAD_REQUEST,
|
||||
SingleTenantExtractError::ParseV2Request(
|
||||
V2WriteParseError::NoQueryParams | V2WriteParseError::DecodeFail(_),
|
||||
|
@ -125,10 +123,6 @@ async fn parse_v1(
|
|||
// Extract the write parameters.
|
||||
let write_params = WriteParamsV1::try_from(req)?;
|
||||
|
||||
// Extracting the write parameters validates the db field never contains the
|
||||
// '/' separator to avoid ambiguity with the "namespace/rp" construction.
|
||||
debug_assert!(!write_params.db.contains(V1_NAMESPACE_RP_SEPARATOR));
|
||||
|
||||
// Extract or construct the namespace name string from the write parameters
|
||||
let namespace = NamespaceName::new(match write_params.rp {
|
||||
RetentionPolicy::Unspecified | RetentionPolicy::Autogen => write_params.db,
|
||||
|
@ -316,22 +310,65 @@ mod tests {
|
|||
}
|
||||
);
|
||||
|
||||
// Prevent ambiguity by denying the `/` character in the DB
|
||||
// Permit `/` character in the DB
|
||||
test_parse_v1!(
|
||||
no_rp_db_with_rp_separator,
|
||||
query_string = "?db=bananas/are/great",
|
||||
want = Err(Error::SingleTenantError(
|
||||
SingleTenantExtractError::ParseV1Request(V1WriteParseError::ContainsRpSeparator)
|
||||
))
|
||||
want = Ok(WriteParams{ namespace, precision }) => {
|
||||
assert_eq!(namespace.as_str(), "bananas/are/great");
|
||||
assert_matches!(precision, Precision::Nanoseconds);
|
||||
}
|
||||
);
|
||||
|
||||
// Prevent ambiguity by denying the `/` character in the RP
|
||||
// Permit the `/` character in the RP
|
||||
test_parse_v1!(
|
||||
rp_with_rp_separator,
|
||||
query_string = "?db=bananas&rp=are/great",
|
||||
want = Err(Error::SingleTenantError(
|
||||
SingleTenantExtractError::ParseV1Request(V1WriteParseError::ContainsRpSeparator)
|
||||
))
|
||||
want = Ok(WriteParams{ namespace, precision }) => {
|
||||
assert_eq!(namespace.as_str(), "bananas/are/great");
|
||||
assert_matches!(precision, Precision::Nanoseconds);
|
||||
}
|
||||
);
|
||||
|
||||
// `/` character is allowed in the DB, if a named RP is specified
|
||||
test_parse_v1!(
|
||||
db_with_rp_separator_and_rp,
|
||||
query_string = "?db=foo/bar&rp=my_rp",
|
||||
want = Ok(WriteParams{ namespace, precision }) => {
|
||||
assert_eq!(namespace.as_str(), "foo/bar/my_rp");
|
||||
assert_matches!(precision, Precision::Nanoseconds);
|
||||
}
|
||||
);
|
||||
|
||||
// Always concat, even if this results in duplication rp within the namespace.
|
||||
// ** this matches the query API behavior **
|
||||
test_parse_v1!(
|
||||
db_with_rp_separator_and_duplicate_rp,
|
||||
query_string = "?db=foo/my_rp&rp=my_rp",
|
||||
want = Ok(WriteParams{ namespace, precision }) => {
|
||||
assert_eq!(namespace.as_str(), "foo/my_rp/my_rp");
|
||||
assert_matches!(precision, Precision::Nanoseconds);
|
||||
}
|
||||
);
|
||||
|
||||
// `/` character is allowed in the DB, if an autogen RP is specified
|
||||
test_parse_v1!(
|
||||
db_with_rp_separator_and_rp_autogen,
|
||||
query_string = "?db=foo/bar&rp=autogen",
|
||||
want = Ok(WriteParams{ namespace, precision }) => {
|
||||
assert_eq!(namespace.as_str(), "foo/bar");
|
||||
assert_matches!(precision, Precision::Nanoseconds);
|
||||
}
|
||||
);
|
||||
|
||||
// `/` character is allowed in the DB, if a default RP is specified
|
||||
test_parse_v1!(
|
||||
db_with_rp_separator_and_rp_default,
|
||||
query_string = "?db=foo/bar&rp=default",
|
||||
want = Ok(WriteParams{ namespace, precision }) => {
|
||||
assert_eq!(namespace.as_str(), "foo/bar");
|
||||
assert_matches!(precision, Precision::Nanoseconds);
|
||||
}
|
||||
);
|
||||
|
||||
test_parse_v1!(
|
||||
|
|
|
@ -29,12 +29,6 @@ pub enum V1WriteParseError {
|
|||
/// The request contains invalid parameters.
|
||||
#[error("failed to deserialize db/rp/precision in request: {0}")]
|
||||
DecodeFail(#[from] serde::de::value::Error),
|
||||
|
||||
/// The provided "db" or "rp" value contains the reserved `/` character.
|
||||
///
|
||||
/// See [`V1_NAMESPACE_RP_SEPARATOR`].
|
||||
#[error("db cannot contain the reserved character '/'")]
|
||||
ContainsRpSeparator,
|
||||
}
|
||||
|
||||
/// May be empty string, explicit rp name, or `autogen`. As provided at the
|
||||
|
@ -61,7 +55,7 @@ impl<'de> Deserialize<'de> for RetentionPolicy {
|
|||
Ok(match s.as_str() {
|
||||
"" => RetentionPolicy::Unspecified,
|
||||
"''" => RetentionPolicy::Unspecified,
|
||||
"autogen" => RetentionPolicy::Autogen,
|
||||
"autogen" | "default" => RetentionPolicy::Autogen,
|
||||
_ => RetentionPolicy::Named(s),
|
||||
})
|
||||
}
|
||||
|
@ -90,20 +84,6 @@ impl<T> TryFrom<&Request<T>> for WriteParamsV1 {
|
|||
let query = req.uri().query().ok_or(V1WriteParseError::NoQueryParams)?;
|
||||
let params: WriteParamsV1 = serde_urlencoded::from_str(query)?;
|
||||
|
||||
// No namespace (db) is ever allowed to contain a `/` to prevent
|
||||
// ambiguity with the namespace/rp NamespaceName construction.
|
||||
if params.db.contains(V1_NAMESPACE_RP_SEPARATOR) {
|
||||
return Err(V1WriteParseError::ContainsRpSeparator);
|
||||
}
|
||||
|
||||
// Likewise the "rp" field itself cannot contain the `/` character if
|
||||
// specified.
|
||||
if let RetentionPolicy::Named(s) = ¶ms.rp {
|
||||
if s.contains(V1_NAMESPACE_RP_SEPARATOR) {
|
||||
return Err(V1WriteParseError::ContainsRpSeparator);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(params)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -9,6 +9,7 @@ license.workspace = true
|
|||
async-trait = "0.1.71"
|
||||
bytes = "1.4"
|
||||
datafusion = { workspace = true }
|
||||
executor = { path = "../executor" }
|
||||
iox_query = { path = "../iox_query" }
|
||||
iox_query_influxql = { path = "../iox_query_influxql" }
|
||||
iox_query_influxrpc = { path = "../iox_query_influxrpc" }
|
||||
|
|
|
@ -53,16 +53,25 @@ pub fn datafusion_error_to_tonic_code(e: &DataFusionError) -> tonic::Code {
|
|||
| DataFusionError::NotImplemented(_)
|
||||
| DataFusionError::Plan(_) => tonic::Code::InvalidArgument,
|
||||
DataFusionError::Context(_,_) => unreachable!("handled in chain traversal above"),
|
||||
// External errors are mostly traversed by the DataFusion already except for some IOx errors
|
||||
DataFusionError::External(e) => {
|
||||
if let Some(e) = e.downcast_ref::<executor::JobError>() {
|
||||
match e {
|
||||
executor::JobError::WorkerGone => tonic::Code::Unavailable,
|
||||
executor::JobError::Panic { .. } => tonic::Code::Internal,
|
||||
}
|
||||
} else {
|
||||
// All other, unclassified cases are signalled as "internal error" to the user since they cannot do
|
||||
// anything about it (except for reporting a bug). Note that DataFusion "external" error is only from
|
||||
// DataFusion's PoV, not from a users PoV.
|
||||
tonic::Code::Internal
|
||||
}
|
||||
}
|
||||
// Map as many as possible back into user visible
|
||||
// (non internal) errors and only treat the ones
|
||||
// the user likely can't do anything about as internal
|
||||
DataFusionError::ObjectStore(_)
|
||||
| DataFusionError::IoError(_)
|
||||
// External originate from outside DataFusion’s core codebase.
|
||||
// As of 2022-10-17, these always come external object store
|
||||
// errors (e.g. misconfiguration or bad path) which would be
|
||||
// an internal error and thus we classify them as such.
|
||||
| DataFusionError::External(_)
|
||||
// Substrait errors come from internal code and are unused
|
||||
// with DataFusion at the moment
|
||||
| DataFusionError::Substrait(_)
|
||||
|
@ -100,7 +109,7 @@ mod test {
|
|||
tonic::Code::InvalidArgument,
|
||||
);
|
||||
|
||||
do_transl_test(DataFusionError::Internal(s), tonic::Code::Internal);
|
||||
do_transl_test(DataFusionError::Internal(s.clone()), tonic::Code::Internal);
|
||||
|
||||
// traversal
|
||||
do_transl_test(
|
||||
|
@ -110,6 +119,29 @@ mod test {
|
|||
),
|
||||
tonic::Code::ResourceExhausted,
|
||||
);
|
||||
|
||||
// inspect "external" errors
|
||||
do_transl_test(
|
||||
DataFusionError::External(s.clone().into()),
|
||||
tonic::Code::Internal,
|
||||
);
|
||||
do_transl_test(
|
||||
DataFusionError::External(Box::new(executor::JobError::Panic { msg: s })),
|
||||
tonic::Code::Internal,
|
||||
);
|
||||
do_transl_test(
|
||||
DataFusionError::External(Box::new(executor::JobError::WorkerGone)),
|
||||
tonic::Code::Unavailable,
|
||||
);
|
||||
do_transl_test(
|
||||
DataFusionError::Context(
|
||||
"ctx".into(),
|
||||
Box::new(DataFusionError::External(Box::new(
|
||||
executor::JobError::WorkerGone,
|
||||
))),
|
||||
),
|
||||
tonic::Code::Unavailable,
|
||||
);
|
||||
}
|
||||
|
||||
fn do_transl_test(e: DataFusionError, code: tonic::Code) {
|
||||
|
|
|
@ -10,7 +10,7 @@ license.workspace = true
|
|||
publish = false
|
||||
|
||||
[dependencies]
|
||||
sqlx = { version = "0.6.3", features = ["runtime-tokio-rustls", "postgres", "json", "tls"] }
|
||||
sqlx = { version = "0.7.1", features = ["runtime-tokio-rustls", "postgres", "json", "tls-rustls"] }
|
||||
either = "1.8.1"
|
||||
futures = "0.3"
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
|
|
|
@ -8,7 +8,7 @@ license.workspace = true
|
|||
[dependencies] # In alphabetical order
|
||||
dotenvy = "0.15.7"
|
||||
parking_lot = "0.12"
|
||||
tempfile = "3.6.0"
|
||||
tempfile = "3.7.0"
|
||||
tracing-log = "0.1"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
|
|
|
@ -31,8 +31,8 @@ rand = "0.8.3"
|
|||
regex = "1.9"
|
||||
reqwest = { version = "0.11", default-features = false, features = ["json", "rustls-tls"] }
|
||||
snafu = "0.7"
|
||||
sqlx = { version = "0.6", features = [ "runtime-tokio-rustls" , "postgres", "uuid" ] }
|
||||
tempfile = "3.6.0"
|
||||
sqlx = { version = "0.7.1", features = [ "runtime-tokio-rustls" , "postgres", "uuid" ] }
|
||||
tempfile = "3.7.0"
|
||||
test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
|
||||
tokio = { version = "1.29", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time"] }
|
||||
tokio-util = "0.7"
|
||||
|
|
|
@ -22,6 +22,6 @@ workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
|||
sysinfo = "0.29.5"
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3.6.0"
|
||||
tempfile = "3.7.0"
|
||||
# Need the multi-threaded executor for testing
|
||||
tokio = { version = "1.29", features = ["macros", "parking_lot", "rt-multi-thread", "time"] }
|
||||
|
|
|
@ -16,25 +16,23 @@ license.workspace = true
|
|||
|
||||
### BEGIN HAKARI SECTION
|
||||
[dependencies]
|
||||
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
|
||||
ahash = { version = "0.8" }
|
||||
arrow = { version = "43", features = ["dyn_cmp_dict", "prettyprint"] }
|
||||
arrow-array = { version = "43", default-features = false, features = ["chrono-tz"] }
|
||||
arrow-flight = { version = "43", features = ["flight-sql-experimental"] }
|
||||
arrow-ord = { version = "43", default-features = false, features = ["dyn_cmp_dict"] }
|
||||
arrow-string = { version = "43", default-features = false, features = ["dyn_cmp_dict"] }
|
||||
base64-594e8ee84c453af0 = { package = "base64", version = "0.13" }
|
||||
base64-647d43efb71741da = { package = "base64", version = "0.21" }
|
||||
bitflags = { version = "1" }
|
||||
base64 = { version = "0.21" }
|
||||
byteorder = { version = "1" }
|
||||
bytes = { version = "1" }
|
||||
chrono = { version = "0.4", default-features = false, features = ["alloc", "clock", "serde"] }
|
||||
crossbeam-utils = { version = "0.8" }
|
||||
crypto-common = { version = "0.1", default-features = false, features = ["std"] }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "46182c894e5106adba7fb53e9848ce666fb6129b" }
|
||||
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "46182c894e5106adba7fb53e9848ce666fb6129b", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
|
||||
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "46182c894e5106adba7fb53e9848ce666fb6129b", default-features = false, features = ["crypto_expressions", "encoding_expressions", "regex_expressions", "unicode_expressions"] }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "44008d71180f2d03e9d21944788e61cb8845abc7" }
|
||||
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "44008d71180f2d03e9d21944788e61cb8845abc7", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] }
|
||||
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "44008d71180f2d03e9d21944788e61cb8845abc7", default-features = false, features = ["crypto_expressions", "encoding_expressions", "regex_expressions", "unicode_expressions"] }
|
||||
digest = { version = "0.10", features = ["mac", "std"] }
|
||||
either = { version = "1" }
|
||||
either = { version = "1", features = ["serde"] }
|
||||
fixedbitset = { version = "0.4" }
|
||||
flatbuffers = { version = "23" }
|
||||
flate2 = { version = "1" }
|
||||
|
@ -47,8 +45,7 @@ futures-task = { version = "0.3", default-features = false, features = ["std"] }
|
|||
futures-util = { version = "0.3", features = ["channel", "io", "sink"] }
|
||||
getrandom = { version = "0.2", default-features = false, features = ["std"] }
|
||||
hashbrown = { version = "0.14", features = ["raw"] }
|
||||
indexmap-dff4ba8e3ae991db = { package = "indexmap", version = "1", default-features = false, features = ["std"] }
|
||||
indexmap-f595c2ba2a3f28df = { package = "indexmap", version = "2" }
|
||||
indexmap = { version = "2" }
|
||||
itertools = { version = "0.10" }
|
||||
libc = { version = "0.2", features = ["extra_traits"] }
|
||||
lock_api = { version = "0.4", features = ["arc_lock"] }
|
||||
|
@ -74,14 +71,16 @@ regex-automata = { version = "0.3", default-features = false, features = ["dfa-o
|
|||
regex-syntax = { version = "0.7" }
|
||||
reqwest = { version = "0.11", default-features = false, features = ["json", "rustls-tls", "stream"] }
|
||||
ring = { version = "0.16", features = ["std"] }
|
||||
rustls = { version = "0.21", default-features = false, features = ["dangerous_configuration", "logging", "tls12"] }
|
||||
serde = { version = "1", features = ["derive", "rc"] }
|
||||
serde_json = { version = "1", features = ["raw_value"] }
|
||||
sha2 = { version = "0.10" }
|
||||
similar = { version = "2", features = ["inline"] }
|
||||
smallvec = { version = "1", default-features = false, features = ["union"] }
|
||||
sqlparser = { version = "0.35", features = ["visitor"] }
|
||||
sqlx = { version = "0.6", features = ["json", "postgres", "runtime-tokio-rustls", "sqlite", "tls", "uuid"] }
|
||||
sqlx-core = { version = "0.6", default-features = false, features = ["any", "migrate", "postgres", "runtime-tokio-rustls", "sqlite", "uuid"] }
|
||||
sqlx = { version = "0.7", features = ["postgres", "runtime-tokio-rustls", "sqlite", "uuid"] }
|
||||
sqlx-core = { version = "0.7", features = ["_rt-tokio", "_tls-rustls", "any", "json", "migrate", "offline", "uuid"] }
|
||||
sqlx-postgres = { version = "0.7", default-features = false, features = ["any", "json", "migrate", "offline", "uuid"] }
|
||||
sqlx-sqlite = { version = "0.7", default-features = false, features = ["any", "json", "migrate", "offline", "uuid"] }
|
||||
thrift = { version = "0.17" }
|
||||
tokio = { version = "1", features = ["full", "test-util", "tracing"] }
|
||||
tokio-stream = { version = "0.1", features = ["fs", "net"] }
|
||||
|
@ -101,17 +100,15 @@ zstd-safe = { version = "6", default-features = false, features = ["arrays", "le
|
|||
zstd-sys = { version = "2", default-features = false, features = ["legacy", "std", "zdict_builder"] }
|
||||
|
||||
[build-dependencies]
|
||||
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
|
||||
base64-594e8ee84c453af0 = { package = "base64", version = "0.13" }
|
||||
base64-647d43efb71741da = { package = "base64", version = "0.21" }
|
||||
bitflags = { version = "1" }
|
||||
ahash = { version = "0.8" }
|
||||
base64 = { version = "0.21" }
|
||||
byteorder = { version = "1" }
|
||||
bytes = { version = "1" }
|
||||
cc = { version = "1", default-features = false, features = ["parallel"] }
|
||||
crossbeam-utils = { version = "0.8" }
|
||||
crypto-common = { version = "0.1", default-features = false, features = ["std"] }
|
||||
digest = { version = "0.10", features = ["mac", "std"] }
|
||||
either = { version = "1" }
|
||||
either = { version = "1", features = ["serde"] }
|
||||
fixedbitset = { version = "0.4" }
|
||||
futures-channel = { version = "0.3", features = ["sink"] }
|
||||
futures-core = { version = "0.3" }
|
||||
|
@ -123,7 +120,7 @@ futures-util = { version = "0.3", features = ["channel", "io", "sink"] }
|
|||
getrandom = { version = "0.2", default-features = false, features = ["std"] }
|
||||
hashbrown = { version = "0.14", features = ["raw"] }
|
||||
heck = { version = "0.4", features = ["unicode"] }
|
||||
indexmap-dff4ba8e3ae991db = { package = "indexmap", version = "1", default-features = false, features = ["std"] }
|
||||
indexmap = { version = "2" }
|
||||
itertools = { version = "0.10" }
|
||||
libc = { version = "0.2", features = ["extra_traits"] }
|
||||
lock_api = { version = "0.4", features = ["arc_lock"] }
|
||||
|
@ -144,67 +141,78 @@ regex = { version = "1" }
|
|||
regex-automata = { version = "0.3", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] }
|
||||
regex-syntax = { version = "0.7" }
|
||||
ring = { version = "0.16", features = ["std"] }
|
||||
rustls = { version = "0.21", default-features = false, features = ["dangerous_configuration", "logging", "tls12"] }
|
||||
serde = { version = "1", features = ["derive", "rc"] }
|
||||
serde_json = { version = "1", features = ["raw_value"] }
|
||||
sha2 = { version = "0.10" }
|
||||
smallvec = { version = "1", default-features = false, features = ["union"] }
|
||||
sqlx-core = { version = "0.6", default-features = false, features = ["any", "migrate", "postgres", "runtime-tokio-rustls", "sqlite", "uuid"] }
|
||||
sqlx-macros = { version = "0.6", default-features = false, features = ["json", "migrate", "postgres", "runtime-tokio-rustls", "sqlite", "uuid"] }
|
||||
sqlx-core = { version = "0.7", features = ["_rt-tokio", "_tls-rustls", "any", "json", "migrate", "offline", "uuid"] }
|
||||
sqlx-macros = { version = "0.7", features = ["_rt-tokio", "_tls-rustls", "json", "migrate", "postgres", "sqlite", "uuid"] }
|
||||
sqlx-macros-core = { version = "0.7", features = ["_rt-tokio", "_tls-rustls", "json", "migrate", "postgres", "sqlite", "uuid"] }
|
||||
sqlx-postgres = { version = "0.7", default-features = false, features = ["any", "json", "migrate", "offline", "uuid"] }
|
||||
sqlx-sqlite = { version = "0.7", default-features = false, features = ["any", "json", "migrate", "offline", "uuid"] }
|
||||
syn-dff4ba8e3ae991db = { package = "syn", version = "1", features = ["extra-traits", "full"] }
|
||||
syn-f595c2ba2a3f28df = { package = "syn", version = "2", features = ["extra-traits", "full", "visit-mut"] }
|
||||
tokio = { version = "1", features = ["full", "test-util", "tracing"] }
|
||||
tokio-stream = { version = "0.1", features = ["fs", "net"] }
|
||||
tracing = { version = "0.1", features = ["log", "max_level_trace", "release_max_level_trace"] }
|
||||
tracing-core = { version = "0.1" }
|
||||
unicode-bidi = { version = "0.3" }
|
||||
unicode-normalization = { version = "0.1" }
|
||||
url = { version = "2" }
|
||||
uuid = { version = "1", features = ["v4"] }
|
||||
|
||||
[target.x86_64-unknown-linux-gnu.dependencies]
|
||||
bitflags = { version = "2", default-features = false, features = ["std"] }
|
||||
nix = { version = "0.26" }
|
||||
once_cell = { version = "1", default-features = false, features = ["unstable"] }
|
||||
rustix = { version = "0.38", features = ["fs", "termios"] }
|
||||
rustls = { version = "0.21", features = ["dangerous_configuration"] }
|
||||
webpki = { version = "0.22", default-features = false, features = ["std"] }
|
||||
rustls = { version = "0.21" }
|
||||
|
||||
[target.x86_64-unknown-linux-gnu.build-dependencies]
|
||||
bitflags = { version = "2", default-features = false, features = ["std"] }
|
||||
once_cell = { version = "1", default-features = false, features = ["unstable"] }
|
||||
webpki = { version = "0.22", default-features = false, features = ["std"] }
|
||||
rustix = { version = "0.38", features = ["fs", "termios"] }
|
||||
rustls = { version = "0.21" }
|
||||
|
||||
[target.x86_64-apple-darwin.dependencies]
|
||||
bitflags = { version = "2", default-features = false, features = ["std"] }
|
||||
nix = { version = "0.26" }
|
||||
once_cell = { version = "1", default-features = false, features = ["unstable"] }
|
||||
rustix = { version = "0.38", features = ["fs", "termios"] }
|
||||
rustls = { version = "0.21", features = ["dangerous_configuration"] }
|
||||
webpki = { version = "0.22", default-features = false, features = ["std"] }
|
||||
rustls = { version = "0.21" }
|
||||
|
||||
[target.x86_64-apple-darwin.build-dependencies]
|
||||
bitflags = { version = "2", default-features = false, features = ["std"] }
|
||||
once_cell = { version = "1", default-features = false, features = ["unstable"] }
|
||||
webpki = { version = "0.22", default-features = false, features = ["std"] }
|
||||
rustix = { version = "0.38", features = ["fs", "termios"] }
|
||||
rustls = { version = "0.21" }
|
||||
|
||||
[target.aarch64-apple-darwin.dependencies]
|
||||
bitflags = { version = "2", default-features = false, features = ["std"] }
|
||||
nix = { version = "0.26" }
|
||||
once_cell = { version = "1", default-features = false, features = ["unstable"] }
|
||||
rustix = { version = "0.38", features = ["fs", "termios"] }
|
||||
rustls = { version = "0.21", features = ["dangerous_configuration"] }
|
||||
webpki = { version = "0.22", default-features = false, features = ["std"] }
|
||||
rustls = { version = "0.21" }
|
||||
|
||||
[target.aarch64-apple-darwin.build-dependencies]
|
||||
bitflags = { version = "2", default-features = false, features = ["std"] }
|
||||
once_cell = { version = "1", default-features = false, features = ["unstable"] }
|
||||
webpki = { version = "0.22", default-features = false, features = ["std"] }
|
||||
rustix = { version = "0.38", features = ["fs", "termios"] }
|
||||
rustls = { version = "0.21" }
|
||||
|
||||
[target.x86_64-pc-windows-msvc.dependencies]
|
||||
once_cell = { version = "1", default-features = false, features = ["unstable"] }
|
||||
rustls = { version = "0.21", features = ["dangerous_configuration"] }
|
||||
rustls = { version = "0.21" }
|
||||
scopeguard = { version = "1" }
|
||||
webpki = { version = "0.22", default-features = false, features = ["std"] }
|
||||
winapi = { version = "0.3", default-features = false, features = ["basetsd", "cfg", "combaseapi", "consoleapi", "errhandlingapi", "evntrace", "fileapi", "handleapi", "heapapi", "ifdef", "impl-debug", "impl-default", "in6addr", "inaddr", "ioapiset", "iphlpapi", "knownfolders", "lmaccess", "lmapibuf", "lmcons", "memoryapi", "minwinbase", "minwindef", "netioapi", "ntlsa", "ntsecapi", "ntstatus", "objbase", "objidl", "oleauto", "pdh", "powerbase", "processenv", "psapi", "rpcdce", "sddl", "securitybaseapi", "shellapi", "shlobj", "std", "stringapiset", "synchapi", "sysinfoapi", "timezoneapi", "wbemcli", "winbase", "wincon", "windef", "winerror", "winioctl", "winnt", "winreg", "winsock2", "winuser", "ws2ipdef", "ws2tcpip", "wtypesbase"] }
|
||||
winapi = { version = "0.3", default-features = false, features = ["basetsd", "cfg", "combaseapi", "consoleapi", "errhandlingapi", "evntrace", "fileapi", "handleapi", "heapapi", "ifdef", "impl-debug", "impl-default", "in6addr", "inaddr", "ioapiset", "iphlpapi", "lmaccess", "lmapibuf", "lmcons", "memoryapi", "minwinbase", "minwindef", "netioapi", "ntlsa", "ntsecapi", "objidl", "oleauto", "pdh", "powerbase", "processenv", "psapi", "rpcdce", "sddl", "securitybaseapi", "shellapi", "std", "stringapiset", "synchapi", "sysinfoapi", "timezoneapi", "wbemcli", "winbase", "wincon", "windef", "winerror", "winioctl", "winnt", "winreg", "winsock2", "winuser", "ws2ipdef", "ws2tcpip", "wtypesbase"] }
|
||||
windows-sys = { version = "0.48", features = ["Win32_Foundation", "Win32_Networking_WinSock", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Console", "Win32_System_IO", "Win32_System_Pipes", "Win32_System_SystemServices", "Win32_System_Threading", "Win32_System_WindowsProgramming", "Win32_UI_Shell"] }
|
||||
|
||||
[target.x86_64-pc-windows-msvc.build-dependencies]
|
||||
once_cell = { version = "1", default-features = false, features = ["unstable"] }
|
||||
rustls = { version = "0.21" }
|
||||
scopeguard = { version = "1" }
|
||||
webpki = { version = "0.22", default-features = false, features = ["std"] }
|
||||
winapi = { version = "0.3", default-features = false, features = ["basetsd", "cfg", "combaseapi", "consoleapi", "errhandlingapi", "evntrace", "fileapi", "handleapi", "heapapi", "ifdef", "impl-debug", "impl-default", "in6addr", "inaddr", "ioapiset", "iphlpapi", "knownfolders", "lmaccess", "lmapibuf", "lmcons", "memoryapi", "minwinbase", "minwindef", "netioapi", "ntlsa", "ntsecapi", "ntstatus", "objbase", "objidl", "oleauto", "pdh", "powerbase", "processenv", "psapi", "rpcdce", "sddl", "securitybaseapi", "shellapi", "shlobj", "std", "stringapiset", "synchapi", "sysinfoapi", "timezoneapi", "wbemcli", "winbase", "wincon", "windef", "winerror", "winioctl", "winnt", "winreg", "winsock2", "winuser", "ws2ipdef", "ws2tcpip", "wtypesbase"] }
|
||||
winapi = { version = "0.3", default-features = false, features = ["basetsd", "cfg", "combaseapi", "consoleapi", "errhandlingapi", "evntrace", "fileapi", "handleapi", "heapapi", "ifdef", "impl-debug", "impl-default", "in6addr", "inaddr", "ioapiset", "iphlpapi", "lmaccess", "lmapibuf", "lmcons", "memoryapi", "minwinbase", "minwindef", "netioapi", "ntlsa", "ntsecapi", "objidl", "oleauto", "pdh", "powerbase", "processenv", "psapi", "rpcdce", "sddl", "securitybaseapi", "shellapi", "std", "stringapiset", "synchapi", "sysinfoapi", "timezoneapi", "wbemcli", "winbase", "wincon", "windef", "winerror", "winioctl", "winnt", "winreg", "winsock2", "winuser", "ws2ipdef", "ws2tcpip", "wtypesbase"] }
|
||||
windows-sys = { version = "0.48", features = ["Win32_Foundation", "Win32_Networking_WinSock", "Win32_Security", "Win32_Storage_FileSystem", "Win32_System_Console", "Win32_System_IO", "Win32_System_Pipes", "Win32_System_SystemServices", "Win32_System_Threading", "Win32_System_WindowsProgramming", "Win32_UI_Shell"] }
|
||||
|
||||
### END HAKARI SECTION
|
||||
|
|
Loading…
Reference in New Issue