Merge branch 'main' into savage/additive-namespace-schema-caching

pull/24376/head
Fraser Savage 2023-04-26 12:30:52 +01:00 committed by GitHub
commit d9111e2a1a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
102 changed files with 2135 additions and 1842 deletions

101
Cargo.lock generated
View File

@ -149,9 +149,9 @@ checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
[[package]]
name = "arrow"
version = "37.0.0"
version = "38.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1aea9fcb25bbb70f7f922f95b99ca29c1013dab47f6df61a6f24861842dd7f2e"
checksum = "c107a57b5913d852da9d5a40e280e4695f2258b5b87733c13b770c63a7117287"
dependencies = [
"ahash 0.8.3",
"arrow-arith",
@ -171,9 +171,9 @@ dependencies = [
[[package]]
name = "arrow-arith"
version = "37.0.0"
version = "38.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d967b42f7b12c91fd78acd396b20c2973b184c8866846674abbb00c963e93ab"
checksum = "ace6aa3d5617c5d03041a05e01c6819428a8ddf49dd0b055df9b40fef9d96094"
dependencies = [
"arrow-array",
"arrow-buffer",
@ -186,9 +186,9 @@ dependencies = [
[[package]]
name = "arrow-array"
version = "37.0.0"
version = "38.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3190f208ee7aa0f3596fa0098d42911dec5e123ca88c002a08b24877ad14c71e"
checksum = "104a04520692cc674e6afd7682f213ca41f9b13ff1873f63a5a2857a590b87b3"
dependencies = [
"ahash 0.8.3",
"arrow-buffer",
@ -203,9 +203,9 @@ dependencies = [
[[package]]
name = "arrow-buffer"
version = "37.0.0"
version = "38.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d33c733c5b6c44a0fc526f29c09546e04eb56772a7a21e48e602f368be381f6"
checksum = "72c875bcb9530ec403998fb0b2dc6d180a7c64563ca4bc22b90eafb84b113143"
dependencies = [
"half 2.2.1",
"num",
@ -213,9 +213,9 @@ dependencies = [
[[package]]
name = "arrow-cast"
version = "37.0.0"
version = "38.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abd349520b6a1ed4924ae2afc9d23330a3044319e4ec3d5b124c09e4d440ae87"
checksum = "d6d6e18281636c8fc0b93be59834da6bf9a72bb70fd0c98ddfdaf124da466c28"
dependencies = [
"arrow-array",
"arrow-buffer",
@ -230,9 +230,9 @@ dependencies = [
[[package]]
name = "arrow-csv"
version = "37.0.0"
version = "38.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c80af3c3e290a2a7e1cc518f1471dff331878cb4af9a5b088bf030b89debf649"
checksum = "3197dab0963a236ff8e7c82e2272535745955ac1321eb740c29f2f88b353f54e"
dependencies = [
"arrow-array",
"arrow-buffer",
@ -249,9 +249,9 @@ dependencies = [
[[package]]
name = "arrow-data"
version = "37.0.0"
version = "38.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1c8361947aaa96d331da9df3f7a08bdd8ab805a449994c97f5c4d24c4b7e2cf"
checksum = "eb68113d6ecdbe8bba48b2c4042c151bf9e1c61244e45072a50250a6fc59bafe"
dependencies = [
"arrow-buffer",
"arrow-schema",
@ -261,9 +261,9 @@ dependencies = [
[[package]]
name = "arrow-flight"
version = "37.0.0"
version = "38.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd1fc687f3e4ffe91ccb7f2ffb06143ff97029448d427a9641006242bcbd0c24"
checksum = "52045fe4f34dc1529dfb6bb19542cd76d093e4d2f00ac58822755ec59fc14160"
dependencies = [
"arrow-array",
"arrow-buffer",
@ -281,9 +281,9 @@ dependencies = [
[[package]]
name = "arrow-ipc"
version = "37.0.0"
version = "38.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a46ee000b9fbd1e8db6e8b26acb8c760838512b39d8c9f9d73892cb55351d50"
checksum = "eab4bbf2dd3078facb5ce0a9641316a64f42bfd8cf357e6775c8a5e6708e3a8d"
dependencies = [
"arrow-array",
"arrow-buffer",
@ -295,9 +295,9 @@ dependencies = [
[[package]]
name = "arrow-json"
version = "37.0.0"
version = "38.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4bf2366607be867ced681ad7f272371a5cf1fc2941328eef7b4fee14565166fb"
checksum = "48c5b650d23746a494665d914a7fa3d21d939153cff9d53bdebe39bffa88f263"
dependencies = [
"arrow-array",
"arrow-buffer",
@ -315,9 +315,9 @@ dependencies = [
[[package]]
name = "arrow-ord"
version = "37.0.0"
version = "38.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "304069901c867200e21ec868ae7521165875470ef2f1f6d58f979a443d63997e"
checksum = "68c6fce28e5011e30acc7466b5efcb8ed0197c396240bd2b10e167f275a3c208"
dependencies = [
"arrow-array",
"arrow-buffer",
@ -330,9 +330,9 @@ dependencies = [
[[package]]
name = "arrow-row"
version = "37.0.0"
version = "38.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d57fe8ceef3392fdd493269d8a2d589de17bafce151aacbffbddac7a57f441a"
checksum = "f20a421f19799d8b93eb8edde5217e910fa1e2d6ceb3c529f000e57b6db144c0"
dependencies = [
"ahash 0.8.3",
"arrow-array",
@ -345,15 +345,15 @@ dependencies = [
[[package]]
name = "arrow-schema"
version = "37.0.0"
version = "38.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a16b88a93ac8350f0200b1cd336a1f887315925b8dd7aa145a37b8bdbd8497a4"
checksum = "bc85923d8d6662cc66ac6602c7d1876872e671002d60993dfdf492a6badeae92"
[[package]]
name = "arrow-select"
version = "37.0.0"
version = "38.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "98e8a4d6ca37d5212439b24caad4d80743fcbb706706200dd174bb98e68fe9d8"
checksum = "f6ab6613ce65b61d85a3410241744e84e48fbab0fe06e1251b4429d21b3470fd"
dependencies = [
"arrow-array",
"arrow-buffer",
@ -364,9 +364,9 @@ dependencies = [
[[package]]
name = "arrow-string"
version = "37.0.0"
version = "38.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cbb594efa397eb6a546f42b1f8df3d242ea84dbfda5232e06035dc2b2e2c8459"
checksum = "f3008641239e884aefba66d8b8532da6af40d14296349fcc85935de4ba67b89e"
dependencies = [
"arrow-array",
"arrow-buffer",
@ -1441,7 +1441,7 @@ dependencies = [
[[package]]
name = "datafusion"
version = "23.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=181e5ccf2816ccaa05d8aaef0b375d4b7bbceece#181e5ccf2816ccaa05d8aaef0b375d4b7bbceece"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=451e81eafe5e404de50f6ede1bebf25ed90f5eb0#451e81eafe5e404de50f6ede1bebf25ed90f5eb0"
dependencies = [
"ahash 0.8.3",
"arrow",
@ -1490,7 +1490,7 @@ dependencies = [
[[package]]
name = "datafusion-common"
version = "23.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=181e5ccf2816ccaa05d8aaef0b375d4b7bbceece#181e5ccf2816ccaa05d8aaef0b375d4b7bbceece"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=451e81eafe5e404de50f6ede1bebf25ed90f5eb0#451e81eafe5e404de50f6ede1bebf25ed90f5eb0"
dependencies = [
"arrow",
"arrow-array",
@ -1504,7 +1504,7 @@ dependencies = [
[[package]]
name = "datafusion-execution"
version = "23.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=181e5ccf2816ccaa05d8aaef0b375d4b7bbceece#181e5ccf2816ccaa05d8aaef0b375d4b7bbceece"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=451e81eafe5e404de50f6ede1bebf25ed90f5eb0#451e81eafe5e404de50f6ede1bebf25ed90f5eb0"
dependencies = [
"dashmap",
"datafusion-common",
@ -1521,7 +1521,7 @@ dependencies = [
[[package]]
name = "datafusion-expr"
version = "23.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=181e5ccf2816ccaa05d8aaef0b375d4b7bbceece#181e5ccf2816ccaa05d8aaef0b375d4b7bbceece"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=451e81eafe5e404de50f6ede1bebf25ed90f5eb0#451e81eafe5e404de50f6ede1bebf25ed90f5eb0"
dependencies = [
"ahash 0.8.3",
"arrow",
@ -1532,7 +1532,7 @@ dependencies = [
[[package]]
name = "datafusion-optimizer"
version = "23.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=181e5ccf2816ccaa05d8aaef0b375d4b7bbceece#181e5ccf2816ccaa05d8aaef0b375d4b7bbceece"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=451e81eafe5e404de50f6ede1bebf25ed90f5eb0#451e81eafe5e404de50f6ede1bebf25ed90f5eb0"
dependencies = [
"arrow",
"async-trait",
@ -1549,7 +1549,7 @@ dependencies = [
[[package]]
name = "datafusion-physical-expr"
version = "23.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=181e5ccf2816ccaa05d8aaef0b375d4b7bbceece#181e5ccf2816ccaa05d8aaef0b375d4b7bbceece"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=451e81eafe5e404de50f6ede1bebf25ed90f5eb0#451e81eafe5e404de50f6ede1bebf25ed90f5eb0"
dependencies = [
"ahash 0.8.3",
"arrow",
@ -1581,7 +1581,7 @@ dependencies = [
[[package]]
name = "datafusion-proto"
version = "23.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=181e5ccf2816ccaa05d8aaef0b375d4b7bbceece#181e5ccf2816ccaa05d8aaef0b375d4b7bbceece"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=451e81eafe5e404de50f6ede1bebf25ed90f5eb0#451e81eafe5e404de50f6ede1bebf25ed90f5eb0"
dependencies = [
"arrow",
"chrono",
@ -1595,7 +1595,7 @@ dependencies = [
[[package]]
name = "datafusion-row"
version = "23.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=181e5ccf2816ccaa05d8aaef0b375d4b7bbceece#181e5ccf2816ccaa05d8aaef0b375d4b7bbceece"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=451e81eafe5e404de50f6ede1bebf25ed90f5eb0#451e81eafe5e404de50f6ede1bebf25ed90f5eb0"
dependencies = [
"arrow",
"datafusion-common",
@ -1606,7 +1606,7 @@ dependencies = [
[[package]]
name = "datafusion-sql"
version = "23.0.0"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=181e5ccf2816ccaa05d8aaef0b375d4b7bbceece#181e5ccf2816ccaa05d8aaef0b375d4b7bbceece"
source = "git+https://github.com/apache/arrow-datafusion.git?rev=451e81eafe5e404de50f6ede1bebf25ed90f5eb0#451e81eafe5e404de50f6ede1bebf25ed90f5eb0"
dependencies = [
"arrow",
"arrow-schema",
@ -3930,9 +3930,9 @@ dependencies = [
[[package]]
name = "parquet"
version = "37.0.0"
version = "38.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5022d98333271f4ca3e87bab760498e61726bf5a6ca919123c80517e20ded29"
checksum = "4cbd51311f8d9ff3d2697b1522b18a588782e097d313a1a278b0faf2ccf2d3f6"
dependencies = [
"ahash 0.8.3",
"arrow-array",
@ -5826,9 +5826,9 @@ dependencies = [
[[package]]
name = "tokio-stream"
version = "0.1.12"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8fb52b74f05dbf495a8fba459fdc331812b96aa086d9eb78101fa0d4569c3313"
checksum = "76cd2598a37719e3cd4c28af93f978506a97a2920ef4d96e4b12e38b8cbc8940"
dependencies = [
"futures-core",
"pin-project-lite",
@ -5837,9 +5837,9 @@ dependencies = [
[[package]]
name = "tokio-util"
version = "0.7.7"
version = "0.7.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5427d89453009325de0d8f342c9490009f76e999cb7672d77e46267448f7e6b2"
checksum = "806fe8c2c87eccc8b3267cbae29ed3ab2d0bd37fca70ab622e46aaa9375ddb7d"
dependencies = [
"bytes",
"futures-core",
@ -6087,11 +6087,10 @@ dependencies = [
[[package]]
name = "tracing"
version = "0.1.37"
version = "0.1.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8"
checksum = "cf9cf6a813d3f40c88b0b6b6f29a5c95c6cdbf97c1f9cc53fb820200f5ad814d"
dependencies = [
"cfg-if",
"log",
"pin-project-lite",
"tracing-attributes",
@ -6100,13 +6099,13 @@ dependencies = [
[[package]]
name = "tracing-attributes"
version = "0.1.23"
version = "0.1.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4017f8f45139870ca7e672686113917c71c7a6e02d4924eda67186083c03081a"
checksum = "0f57e3ca2a01450b1a921183a9c9cbfda207fd822cef4ccb00a65402cbba7a74"
dependencies = [
"proc-macro2",
"quote",
"syn 1.0.109",
"syn 2.0.15",
]
[[package]]

View File

@ -113,13 +113,13 @@ edition = "2021"
license = "MIT OR Apache-2.0"
[workspace.dependencies]
arrow = { version = "37.0.0" }
arrow-flight = { version = "37.0.0" }
arrow = { version = "38.0.0" }
arrow-flight = { version = "38.0.0" }
chrono-english = { git = "https://github.com/stevedonovan/chrono-english.git", rev = "def5941ebee24b55e1174eb18ab33d91603f907a" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="181e5ccf2816ccaa05d8aaef0b375d4b7bbceece", default-features = false }
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="181e5ccf2816ccaa05d8aaef0b375d4b7bbceece" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="451e81eafe5e404de50f6ede1bebf25ed90f5eb0", default-features = false }
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="451e81eafe5e404de50f6ede1bebf25ed90f5eb0" }
hashbrown = { version = "0.13.2" }
parquet = { version = "37.0.0" }
parquet = { version = "38.0.0" }
tonic = { version = "0.9.2", features = ["tls", "tls-webpki-roots"] }
tonic-build = { version = "0.9.2" }
tonic-health = { version = "0.9.2" }

View File

@ -16,7 +16,7 @@ parking_lot = { version = "0.12", features = ["arc_lock"] }
pdatastructs = { version = "0.7", default-features = false, features = ["fixedbitset"] }
rand = "0.8.3"
tokio = { version = "1.27", features = ["macros", "parking_lot", "rt-multi-thread", "sync", "time"] }
tokio-util = { version = "0.7.7" }
tokio-util = { version = "0.7.8" }
trace = { path = "../trace"}
workspace-hack = { version = "0.1", path = "../workspace-hack" }

View File

@ -1,42 +0,0 @@
//! CLI config for request authorization.
use ::authz::{Authorizer, IoxAuthorizer};
use snafu::Snafu;
use std::{boxed::Box, sync::Arc};
#[derive(Debug, Snafu)]
#[allow(missing_docs)]
pub enum Error {
#[snafu(display("Invalid authz service address {addr}: {source}"))]
BadServiceAddress {
addr: String,
source: Box<dyn std::error::Error>,
},
}
/// Configuration for optional request authorization.
#[derive(Clone, Debug, Default, clap::Parser)]
pub struct AuthzConfig {
#[clap(long = "authz-addr", env = "INFLUXDB_IOX_AUTHZ_ADDR")]
pub(crate) authz_addr: Option<String>,
}
impl AuthzConfig {
/// Authorizer from the configuration.
///
/// An authorizer is optional so will only be created if configured.
/// An error will only occur when the authorizer configuration is
/// invalid.
pub fn authorizer(&self) -> Result<Option<Arc<dyn Authorizer>>, Error> {
if let Some(s) = &self.authz_addr {
IoxAuthorizer::connect_lazy(s.clone())
.map(|c| Some(Arc::new(c) as Arc<dyn Authorizer>))
.map_err(|e| Error::BadServiceAddress {
addr: s.clone(),
source: e,
})
} else {
Ok(None)
}
}
}

View File

@ -12,7 +12,6 @@
clippy::todo,
clippy::dbg_macro
)]
pub mod authz;
pub mod catalog_dsn;
pub mod compactor2;
pub mod garbage_collector;
@ -22,4 +21,5 @@ pub mod object_store;
pub mod querier;
pub mod router2;
pub mod run_config;
pub mod single_tenant;
pub mod socket_addr;

View File

@ -1,11 +1,18 @@
//! Querier-related configs.
use crate::ingester_address::IngesterAddress;
use crate::{
ingester_address::IngesterAddress,
single_tenant::{CONFIG_AUTHZ_ENV_NAME, CONFIG_AUTHZ_FLAG},
};
use std::{collections::HashMap, num::NonZeroUsize};
/// CLI config for querier configuration
#[derive(Debug, Clone, PartialEq, Eq, clap::Parser)]
pub struct QuerierConfig {
/// Addr for connection to authz
#[clap(long = CONFIG_AUTHZ_FLAG, env = CONFIG_AUTHZ_ENV_NAME)]
pub authz_address: Option<String>,
/// The number of threads to use for queries.
///
/// If not specified, defaults to the number of cores on the system

View File

@ -1,6 +1,11 @@
//! CLI config for the router using the RPC write path
use crate::ingester_address::IngesterAddress;
use crate::{
ingester_address::IngesterAddress,
single_tenant::{
CONFIG_AUTHZ_ENV_NAME, CONFIG_AUTHZ_FLAG, CONFIG_CST_ENV_NAME, CONFIG_CST_FLAG,
},
};
use std::{
num::{NonZeroUsize, ParseIntError},
time::Duration,
@ -10,14 +15,23 @@ use std::{
#[derive(Debug, Clone, clap::Parser)]
#[allow(missing_copy_implementations)]
pub struct Router2Config {
/// Addr for connection to authz
#[clap(
long = CONFIG_AUTHZ_FLAG,
env = CONFIG_AUTHZ_ENV_NAME,
requires("single_tenant_deployment"),
)]
pub authz_address: Option<String>,
/// Differential handling based upon deployment to CST vs MT.
///
/// At minimum, differs in supports of v1 endpoint. But also includes
/// differences in namespace handling, etc.
#[clap(
long = "single-tenancy",
env = "INFLUXDB_IOX_SINGLE_TENANCY",
default_value = "false"
long = CONFIG_CST_FLAG,
env = CONFIG_CST_ENV_NAME,
default_value = "false",
requires_if("true", "authz_address")
)]
pub single_tenant_deployment: bool,

View File

@ -0,0 +1,11 @@
//! CLI config for request authorization.
/// Env var providing authz address
pub const CONFIG_AUTHZ_ENV_NAME: &str = "INFLUXDB_IOX_AUTHZ_ADDR";
/// CLI flag for authz address
pub const CONFIG_AUTHZ_FLAG: &str = "authz-addr";
/// Env var for single tenancy deployments
pub const CONFIG_CST_ENV_NAME: &str = "INFLUXDB_IOX_SINGLE_TENANCY";
/// CLI flag for single tenancy deployments
pub const CONFIG_CST_FLAG: &str = "single-tenancy";

View File

@ -25,7 +25,7 @@ rand = "0.8.3"
schema = { path = "../schema" }
sharder = { path = "../sharder" }
tokio = { version = "1", features = ["macros", "rt", "sync"] }
tokio-util = { version = "0.7.7" }
tokio-util = { version = "0.7.8" }
tracker = { path = "../tracker" }
uuid = { version = "1", features = ["v4"] }
workspace-hack = { version = "0.1", path = "../workspace-hack" }

View File

@ -1,7 +1,7 @@
use std::{num::NonZeroUsize, sync::Arc, time::Duration};
use data_types::{CompactionLevel, ParquetFile, ParquetFileParams, PartitionId};
use futures::StreamExt;
use futures::{stream, StreamExt, TryStreamExt};
use observability_deps::tracing::info;
use parquet_file::ParquetFilePath;
use tokio::sync::watch::Sender;
@ -327,25 +327,32 @@ async fn run_plans(
split_or_compact,
input_uuids_inpad,
);
let capacity = plans.iter().map(|p| p.n_output_files()).sum();
let mut created_file_params = Vec::with_capacity(capacity);
for plan_ir in plans
.into_iter()
.filter(|plan| !matches!(plan, PlanIR::None { .. }))
{
created_file_params.extend(
execute_plan(
plan_ir,
partition_info,
components,
Arc::clone(&job_semaphore),
)
.await?,
info!(
partition_id = partition_info.partition_id.get(),
plan_count = plans.len(),
concurrency_limit = job_semaphore.total_permits(),
"compacting plans concurrently",
);
let created_file_params: Vec<Vec<_>> = stream::iter(
plans
.into_iter()
.filter(|plan| !matches!(plan, PlanIR::None { .. })),
)
.map(|plan_ir| {
execute_plan(
plan_ir,
partition_info,
components,
Arc::clone(&job_semaphore),
)
}
})
.buffer_unordered(job_semaphore.total_permits())
.try_collect()
.await?;
Ok(created_file_params)
Ok(created_file_params.into_iter().flatten().collect())
}
async fn execute_plan(

View File

@ -759,76 +759,76 @@ async fn random_backfill_empty_partition() {
- "L0 "
- "L0.?[76,329] 1.04us 2.96mb|-------------------------------------L0.?--------------------------------------| "
- "L0.?[330,356] 1.04us 322.99kb |-L0.?-| "
- "**** Simulation run 71, type=split(ReduceOverlap)(split_times=[658]). 1 Input Files, 3.66mb total:"
- "L0, all files 3.66mb "
- "L0.163[357,670] 1.04us |-----------------------------------------L0.163-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 3.66mb total:"
- "L0 "
- "L0.?[357,658] 1.04us 3.52mb|----------------------------------------L0.?----------------------------------------| "
- "L0.?[659,670] 1.04us 143.55kb |L0.?|"
- "**** Simulation run 72, type=split(ReduceOverlap)(split_times=[329]). 1 Input Files, 3.33mb total:"
- "L0, all files 3.33mb "
- "L0.165[42,356] 1.04us |-----------------------------------------L0.165-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 3.33mb total:"
- "L0 "
- "L0.?[42,329] 1.04us 3.04mb|--------------------------------------L0.?--------------------------------------| "
- "L0.?[330,356] 1.04us 292.88kb |L0.?-| "
- "**** Simulation run 73, type=split(ReduceOverlap)(split_times=[658]). 1 Input Files, 3.32mb total:"
- "**** Simulation run 71, type=split(ReduceOverlap)(split_times=[658]). 1 Input Files, 3.32mb total:"
- "L0, all files 3.32mb "
- "L0.166[357,670] 1.04us |-----------------------------------------L0.166-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 3.32mb total:"
- "L0 "
- "L0.?[357,658] 1.04us 3.19mb|----------------------------------------L0.?----------------------------------------| "
- "L0.?[659,670] 1.04us 130.17kb |L0.?|"
- "**** Simulation run 74, type=split(ReduceOverlap)(split_times=[329]). 1 Input Files, 2.36mb total:"
- "**** Simulation run 72, type=split(ReduceOverlap)(split_times=[329]). 1 Input Files, 2.36mb total:"
- "L0, all files 2.36mb "
- "L0.168[173,356] 1.04us |-----------------------------------------L0.168-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 2.36mb total:"
- "L0 "
- "L0.?[173,329] 1.04us 2.01mb|-----------------------------------L0.?-----------------------------------| "
- "L0.?[330,356] 1.04us 355.83kb |---L0.?---| "
- "**** Simulation run 75, type=split(ReduceOverlap)(split_times=[658]). 1 Input Files, 4.03mb total:"
- "**** Simulation run 73, type=split(ReduceOverlap)(split_times=[658]). 1 Input Files, 4.03mb total:"
- "L0, all files 4.03mb "
- "L0.169[357,670] 1.04us |-----------------------------------------L0.169-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 4.03mb total:"
- "L0 "
- "L0.?[357,658] 1.04us 3.87mb|----------------------------------------L0.?----------------------------------------| "
- "L0.?[659,670] 1.04us 158.15kb |L0.?|"
- "**** Simulation run 76, type=split(ReduceOverlap)(split_times=[329]). 1 Input Files, 5.28mb total:"
- "**** Simulation run 74, type=split(ReduceOverlap)(split_times=[329]). 1 Input Files, 5.28mb total:"
- "L0, all files 5.28mb "
- "L0.171[50,356] 1.04us |-----------------------------------------L0.171-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 5.28mb total:"
- "L0 "
- "L0.?[50,329] 1.04us 4.82mb|--------------------------------------L0.?--------------------------------------| "
- "L0.?[330,356] 1.04us 477.51kb |L0.?-| "
- "**** Simulation run 77, type=split(ReduceOverlap)(split_times=[329]). 1 Input Files, 3.27mb total:"
- "**** Simulation run 75, type=split(ReduceOverlap)(split_times=[329]). 1 Input Files, 3.27mb total:"
- "L0, all files 3.27mb "
- "L0.173[76,356] 1.04us |-----------------------------------------L0.173-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 3.27mb total:"
- "L0 "
- "L0.?[76,329] 1.04us 2.96mb|-------------------------------------L0.?--------------------------------------| "
- "L0.?[330,356] 1.04us 322.99kb |-L0.?-| "
- "**** Simulation run 78, type=split(ReduceOverlap)(split_times=[658]). 1 Input Files, 3.66mb total:"
- "**** Simulation run 76, type=split(ReduceOverlap)(split_times=[658]). 1 Input Files, 3.66mb total:"
- "L0, all files 3.66mb "
- "L0.174[357,670] 1.04us |-----------------------------------------L0.174-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 3.66mb total:"
- "L0 "
- "L0.?[357,658] 1.04us 3.52mb|----------------------------------------L0.?----------------------------------------| "
- "L0.?[659,670] 1.04us 143.55kb |L0.?|"
- "**** Simulation run 79, type=split(ReduceOverlap)(split_times=[329]). 1 Input Files, 3.33mb total:"
- "**** Simulation run 77, type=split(ReduceOverlap)(split_times=[329]). 1 Input Files, 3.33mb total:"
- "L0, all files 3.33mb "
- "L0.176[42,356] 1.05us |-----------------------------------------L0.176-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 3.33mb total:"
- "L0 "
- "L0.?[42,329] 1.05us 3.04mb|--------------------------------------L0.?--------------------------------------| "
- "L0.?[330,356] 1.05us 292.88kb |L0.?-| "
- "**** Simulation run 80, type=split(ReduceOverlap)(split_times=[658]). 1 Input Files, 3.32mb total:"
- "**** Simulation run 78, type=split(ReduceOverlap)(split_times=[658]). 1 Input Files, 3.32mb total:"
- "L0, all files 3.32mb "
- "L0.177[357,670] 1.05us |-----------------------------------------L0.177-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 3.32mb total:"
- "L0 "
- "L0.?[357,658] 1.05us 3.19mb|----------------------------------------L0.?----------------------------------------| "
- "L0.?[659,670] 1.05us 130.17kb |L0.?|"
- "**** Simulation run 79, type=split(ReduceOverlap)(split_times=[658]). 1 Input Files, 3.66mb total:"
- "L0, all files 3.66mb "
- "L0.163[357,670] 1.04us |-----------------------------------------L0.163-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 3.66mb total:"
- "L0 "
- "L0.?[357,658] 1.04us 3.52mb|----------------------------------------L0.?----------------------------------------| "
- "L0.?[659,670] 1.04us 143.55kb |L0.?|"
- "**** Simulation run 80, type=split(ReduceOverlap)(split_times=[329]). 1 Input Files, 3.33mb total:"
- "L0, all files 3.33mb "
- "L0.165[42,356] 1.04us |-----------------------------------------L0.165-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 3.33mb total:"
- "L0 "
- "L0.?[42,329] 1.04us 3.04mb|--------------------------------------L0.?--------------------------------------| "
- "L0.?[330,356] 1.04us 292.88kb |L0.?-| "
- "**** Simulation run 81, type=split(ReduceOverlap)(split_times=[329]). 1 Input Files, 2.36mb total:"
- "L0, all files 2.36mb "
- "L0.179[173,356] 1.05us |-----------------------------------------L0.179-----------------------------------------|"
@ -963,14 +963,14 @@ async fn random_backfill_empty_partition() {
- "L0.?[649,658] 1.04us 131.79kb |L0.?|"
- "**** Simulation run 98, type=split(ReduceOverlap)(split_times=[648]). 1 Input Files, 3.52mb total:"
- "L0, all files 3.52mb "
- "L0.233[357,658] 1.04us |-----------------------------------------L0.233-----------------------------------------|"
- "L0.249[357,658] 1.04us |-----------------------------------------L0.249-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 3.52mb total:"
- "L0 "
- "L0.?[357,648] 1.04us 3.4mb|----------------------------------------L0.?-----------------------------------------| "
- "L0.?[649,658] 1.04us 119.63kb |L0.?|"
- "**** Simulation run 99, type=split(ReduceOverlap)(split_times=[648]). 1 Input Files, 3.19mb total:"
- "L0, all files 3.19mb "
- "L0.237[357,658] 1.04us |-----------------------------------------L0.237-----------------------------------------|"
- "L0.233[357,658] 1.04us |-----------------------------------------L0.233-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 3.19mb total:"
- "L0 "
- "L0.?[357,648] 1.04us 3.08mb|----------------------------------------L0.?-----------------------------------------| "
@ -984,21 +984,21 @@ async fn random_backfill_empty_partition() {
- "L0.?[967,986] 1.04us 218.33kb |L0.?|"
- "**** Simulation run 101, type=split(ReduceOverlap)(split_times=[648]). 1 Input Files, 3.87mb total:"
- "L0, all files 3.87mb "
- "L0.241[357,658] 1.04us |-----------------------------------------L0.241-----------------------------------------|"
- "L0.237[357,658] 1.04us |-----------------------------------------L0.237-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 3.87mb total:"
- "L0 "
- "L0.?[357,648] 1.04us 3.75mb|----------------------------------------L0.?-----------------------------------------| "
- "L0.?[649,658] 1.04us 131.79kb |L0.?|"
- "**** Simulation run 102, type=split(ReduceOverlap)(split_times=[648]). 1 Input Files, 3.52mb total:"
- "L0, all files 3.52mb "
- "L0.247[357,658] 1.04us |-----------------------------------------L0.247-----------------------------------------|"
- "L0.243[357,658] 1.04us |-----------------------------------------L0.243-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 3.52mb total:"
- "L0 "
- "L0.?[357,648] 1.04us 3.4mb|----------------------------------------L0.?-----------------------------------------| "
- "L0.?[649,658] 1.04us 119.63kb |L0.?|"
- "**** Simulation run 103, type=split(ReduceOverlap)(split_times=[648]). 1 Input Files, 3.19mb total:"
- "L0, all files 3.19mb "
- "L0.251[357,658] 1.05us |-----------------------------------------L0.251-----------------------------------------|"
- "L0.247[357,658] 1.05us |-----------------------------------------L0.247-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 3.19mb total:"
- "L0 "
- "L0.?[357,648] 1.05us 3.08mb|----------------------------------------L0.?-----------------------------------------| "
@ -1039,7 +1039,7 @@ async fn random_backfill_empty_partition() {
- "L0.?[671,966] 1.05us 3.14mb|---------------------------------------L0.?---------------------------------------| "
- "L0.?[967,986] 1.05us 218.33kb |L0.?|"
- "Committing partition 1:"
- " Soft Deleting 20 files: L0.145, L0.156, L0.167, L0.178, L0.189, L0.199, L0.205, L0.209, L0.213, L0.219, L0.223, L0.227, L0.233, L0.237, L0.241, L0.247, L0.251, L0.255, L0.261, L0.265"
- " Soft Deleting 20 files: L0.145, L0.156, L0.167, L0.178, L0.189, L0.199, L0.205, L0.209, L0.213, L0.219, L0.223, L0.227, L0.233, L0.237, L0.243, L0.247, L0.249, L0.255, L0.261, L0.265"
- " Creating 40 files"
- "**** Simulation run 109, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[263]). 2 Input Files, 103.14mb total:"
- "L0 "
@ -1132,35 +1132,35 @@ async fn random_backfill_empty_partition() {
- "L0.?[264,329] 1.04us 789.53kb |--------L0.?---------| "
- "**** Simulation run 121, type=split(ReduceOverlap)(split_times=[263]). 1 Input Files, 3.04mb total:"
- "L0, all files 3.04mb "
- "L0.235[42,329] 1.04us |-----------------------------------------L0.235-----------------------------------------|"
- "L0.251[42,329] 1.04us |-----------------------------------------L0.251-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 3.04mb total:"
- "L0 "
- "L0.?[42,263] 1.04us 2.34mb|-------------------------------L0.?--------------------------------| "
- "L0.?[264,329] 1.04us 715.93kb |-------L0.?-------| "
- "**** Simulation run 122, type=split(ReduceOverlap)(split_times=[263]). 1 Input Files, 2.01mb total:"
- "L0, all files 2.01mb "
- "L0.239[173,329] 1.04us |-----------------------------------------L0.239-----------------------------------------|"
- "L0.235[173,329] 1.04us |-----------------------------------------L0.235-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 2.01mb total:"
- "L0 "
- "L0.?[173,263] 1.04us 1.16mb|----------------------L0.?-----------------------| "
- "L0.?[264,329] 1.04us 869.81kb |---------------L0.?----------------| "
- "**** Simulation run 123, type=split(ReduceOverlap)(split_times=[263]). 1 Input Files, 4.82mb total:"
- "L0, all files 4.82mb "
- "L0.243[50,329] 1.04us |-----------------------------------------L0.243-----------------------------------------|"
- "L0.239[50,329] 1.04us |-----------------------------------------L0.239-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 4.82mb total:"
- "L0 "
- "L0.?[50,263] 1.04us 3.68mb|-------------------------------L0.?-------------------------------| "
- "L0.?[264,329] 1.04us 1.14mb |-------L0.?-------| "
- "**** Simulation run 124, type=split(ReduceOverlap)(split_times=[263]). 1 Input Files, 2.96mb total:"
- "L0, all files 2.96mb "
- "L0.245[76,329] 1.04us |-----------------------------------------L0.245-----------------------------------------|"
- "L0.241[76,329] 1.04us |-----------------------------------------L0.241-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 2.96mb total:"
- "L0 "
- "L0.?[76,263] 1.04us 2.18mb|------------------------------L0.?------------------------------| "
- "L0.?[264,329] 1.04us 789.53kb |--------L0.?---------| "
- "**** Simulation run 125, type=split(ReduceOverlap)(split_times=[263]). 1 Input Files, 3.04mb total:"
- "L0, all files 3.04mb "
- "L0.249[42,329] 1.05us |-----------------------------------------L0.249-----------------------------------------|"
- "L0.245[42,329] 1.05us |-----------------------------------------L0.245-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 3.04mb total:"
- "L0 "
- "L0.?[42,263] 1.05us 2.34mb|-------------------------------L0.?--------------------------------| "
@ -1194,7 +1194,7 @@ async fn random_backfill_empty_partition() {
- "L0.?[42,263] 1.05us 2.34mb|-------------------------------L0.?--------------------------------| "
- "L0.?[264,329] 1.05us 715.93kb |-------L0.?-------| "
- "Committing partition 1:"
- " Soft Deleting 20 files: L0.197, L0.201, L0.203, L0.207, L0.211, L0.215, L0.217, L0.221, L0.225, L0.229, L0.231, L0.235, L0.239, L0.243, L0.245, L0.249, L0.253, L0.257, L0.259, L0.263"
- " Soft Deleting 20 files: L0.197, L0.201, L0.203, L0.207, L0.211, L0.215, L0.217, L0.221, L0.225, L0.229, L0.231, L0.235, L0.239, L0.241, L0.245, L0.251, L0.253, L0.257, L0.259, L0.263"
- " Creating 40 files"
- "**** Simulation run 130, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[570, 876]). 9 Input Files, 229.77mb total:"
- "L0 "
@ -2050,20 +2050,20 @@ async fn random_backfill_empty_partition() {
- "L0.522[584,590] 1.04us 84.83kb |L0.522| "
- "L0.455[591,648] 1.04us 702.84kb |L0.455| "
- "L0.289[649,658] 1.04us 119.63kb |L0.289| "
- "L0.234[659,670] 1.04us 143.55kb |L0.234| "
- "L0.250[659,670] 1.04us 143.55kb |L0.250| "
- "L0.523[671,870] 1.04us 2.34mb |-----L0.523-----| "
- "L0.524[871,876] 1.04us 72.33kb |L0.524| "
- "L0.388[877,932] 1.04us 675.04kb |L0.388| "
- "L0.334[42,263] 1.04us 2.34mb|------L0.334-------| "
- "L0.460[264,295] 1.04us 341.44kb |L0.460| "
- "L0.461[296,329] 1.04us 374.49kb |L0.461| "
- "L0.236[330,356] 1.04us 292.88kb |L0.236| "
- "L0.252[330,356] 1.04us 292.88kb |L0.252| "
- "L0.389[357,570] 1.04us 2.26mb |------L0.389------| "
- "L0.525[571,583] 1.04us 131.86kb |L0.525| "
- "L0.526[584,590] 1.04us 76.92kb |L0.526| "
- "L0.459[591,648] 1.04us 637.32kb |L0.459| "
- "L0.291[649,658] 1.04us 108.47kb |L0.291| "
- "L0.238[659,670] 1.04us 130.17kb |L0.238| "
- "L0.234[659,670] 1.04us 130.17kb |L0.234| "
- "L0.527[671,870] 1.04us 2.12mb |-----L0.527-----| "
- "L0.528[871,876] 1.04us 65.5kb |L0.528| "
- "L0.392[877,966] 1.04us 982.47kb |L0.392| "
@ -2071,20 +2071,20 @@ async fn random_backfill_empty_partition() {
- "L0.336[173,263] 1.04us 1.16mb |L0.336| "
- "L0.464[264,295] 1.04us 414.83kb |L0.464| "
- "L0.465[296,329] 1.04us 454.98kb |L0.465| "
- "L0.240[330,356] 1.04us 355.83kb |L0.240| "
- "L0.236[330,356] 1.04us 355.83kb |L0.236| "
- "L0.393[357,570] 1.04us 2.74mb |------L0.393------| "
- "L0.529[571,583] 1.04us 160.2kb |L0.529| "
- "L0.530[584,590] 1.04us 93.45kb |L0.530| "
- "L0.463[591,648] 1.04us 774.3kb |L0.463| "
- "L0.295[649,658] 1.04us 131.79kb |L0.295| "
- "L0.242[659,670] 1.04us 158.15kb |L0.242| "
- "L0.238[659,670] 1.04us 158.15kb |L0.238| "
- "L0.531[671,870] 1.04us 2.58mb |-----L0.531-----| "
- "L0.532[871,876] 1.04us 79.64kb |L0.532| "
- "L0.396[877,950] 1.04us 982.23kb |L0.396| "
- "L0.338[50,263] 1.04us 3.68mb|------L0.338------| "
- "L0.468[264,295] 1.04us 556.69kb |L0.468| "
- "L0.469[296,329] 1.04us 610.56kb |L0.469| "
- "L0.244[330,356] 1.04us 477.51kb |L0.244| "
- "L0.240[330,356] 1.04us 477.51kb |L0.240| "
- "L0.397[357,570] 1.04us 3.69mb |------L0.397------| "
- "L0.533[571,583] 1.04us 216.68kb |L0.533| "
- "L0.534[584,590] 1.04us 126.4kb |L0.534| "
@ -2092,26 +2092,26 @@ async fn random_backfill_empty_partition() {
- "L0.340[76,263] 1.04us 2.18mb |----L0.340-----| "
- "L0.472[264,295] 1.04us 376.55kb |L0.472| "
- "L0.473[296,329] 1.04us 412.99kb |L0.473| "
- "L0.246[330,356] 1.04us 322.99kb |L0.246| "
- "L0.242[330,356] 1.04us 322.99kb |L0.242| "
- "L0.399[357,570] 1.04us 2.49mb |------L0.399------| "
- "L0.535[571,583] 1.04us 145.42kb |L0.535| "
- "L0.536[584,590] 1.04us 84.83kb |L0.536| "
- "L0.471[591,648] 1.04us 702.84kb |L0.471| "
- "L0.297[649,658] 1.04us 119.63kb |L0.297| "
- "L0.248[659,670] 1.04us 143.55kb |L0.248| "
- "L0.244[659,670] 1.04us 143.55kb |L0.244| "
- "L0.537[671,870] 1.04us 2.34mb |-----L0.537-----| "
- "L0.538[871,876] 1.04us 72.33kb |L0.538| "
- "L0.402[877,932] 1.04us 675.04kb |L0.402| "
- "L0.342[42,263] 1.05us 2.34mb|------L0.342-------| "
- "L0.476[264,295] 1.05us 341.44kb |L0.476| "
- "L0.477[296,329] 1.05us 374.49kb |L0.477| "
- "L0.250[330,356] 1.05us 292.88kb |L0.250| "
- "L0.246[330,356] 1.05us 292.88kb |L0.246| "
- "L0.403[357,570] 1.05us 2.26mb |------L0.403------| "
- "L0.539[571,583] 1.05us 131.86kb |L0.539| "
- "L0.540[584,590] 1.05us 76.92kb |L0.540| "
- "L0.475[591,648] 1.05us 637.32kb |L0.475| "
- "L0.299[649,658] 1.05us 108.47kb |L0.299| "
- "L0.252[659,670] 1.05us 130.17kb |L0.252| "
- "L0.248[659,670] 1.05us 130.17kb |L0.248| "
- "L0.541[671,870] 1.05us 2.12mb |-----L0.541-----| "
- "L0.542[871,876] 1.05us 65.5kb |L0.542| "
- "L0.406[877,966] 1.05us 982.47kb |L0.406| "
@ -3639,76 +3639,76 @@ async fn random_backfill_over_l2s() {
- "L0 "
- "L0.?[592,626] 1.03us 374.74kb|----------------L0.?-----------------| "
- "L0.?[627,670] 1.03us 484.96kb |---------------------L0.?----------------------| "
- "**** Simulation run 142, type=split(ReduceOverlap)(split_times=[334]). 1 Input Files, 672.54kb total:"
- "L0, all files 672.54kb "
- "L0.279[295,356] 1.03us |-----------------------------------------L0.279-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 672.54kb total:"
- "L0 "
- "L0.?[295,334] 1.03us 429.99kb|-------------------------L0.?--------------------------| "
- "L0.?[335,356] 1.03us 242.56kb |------------L0.?------------| "
- "**** Simulation run 143, type=split(ReduceOverlap)(split_times=[626]). 1 Input Files, 1.02mb total:"
- "L0, all files 1.02mb "
- "L0.324[592,670] 1.03us |-----------------------------------------L0.324-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.02mb total:"
- "L0 "
- "L0.?[592,626] 1.03us 455.28kb|----------------L0.?-----------------| "
- "L0.?[627,670] 1.03us 589.19kb |---------------------L0.?----------------------| "
- "**** Simulation run 144, type=split(ReduceOverlap)(split_times=[334]). 1 Input Files, 817.09kb total:"
- "**** Simulation run 142, type=split(ReduceOverlap)(split_times=[334]). 1 Input Files, 817.09kb total:"
- "L0, all files 817.09kb "
- "L0.281[295,356] 1.03us |-----------------------------------------L0.281-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 817.09kb total:"
- "L0 "
- "L0.?[295,334] 1.03us 522.4kb|-------------------------L0.?--------------------------| "
- "L0.?[335,356] 1.03us 294.69kb |------------L0.?------------| "
- "**** Simulation run 145, type=split(ReduceOverlap)(split_times=[626]). 1 Input Files, 677.02kb total:"
- "**** Simulation run 143, type=split(ReduceOverlap)(split_times=[626]). 1 Input Files, 677.02kb total:"
- "L0, all files 677.02kb "
- "L0.328[592,629] 1.03us |-----------------------------------------L0.328-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 677.02kb total:"
- "L0 "
- "L0.?[592,626] 1.03us 622.12kb|--------------------------------------L0.?--------------------------------------| "
- "L0.?[627,629] 1.03us 54.89kb |L0.?|"
- "**** Simulation run 146, type=split(ReduceOverlap)(split_times=[334]). 1 Input Files, 1.07mb total:"
- "**** Simulation run 144, type=split(ReduceOverlap)(split_times=[334]). 1 Input Files, 1.07mb total:"
- "L0, all files 1.07mb "
- "L0.283[295,356] 1.03us |-----------------------------------------L0.283-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.07mb total:"
- "L0 "
- "L0.?[295,334] 1.03us 701.05kb|-------------------------L0.?--------------------------| "
- "L0.?[335,356] 1.03us 395.46kb |------------L0.?------------| "
- "**** Simulation run 147, type=split(ReduceOverlap)(split_times=[626]). 1 Input Files, 948.08kb total:"
- "**** Simulation run 145, type=split(ReduceOverlap)(split_times=[626]). 1 Input Files, 948.08kb total:"
- "L0, all files 948.08kb "
- "L0.330[592,670] 1.04us |-----------------------------------------L0.330-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 948.08kb total:"
- "L0 "
- "L0.?[592,626] 1.04us 413.26kb|----------------L0.?-----------------| "
- "L0.?[627,670] 1.04us 534.81kb |---------------------L0.?----------------------| "
- "**** Simulation run 148, type=split(ReduceOverlap)(split_times=[334]). 1 Input Files, 741.68kb total:"
- "**** Simulation run 146, type=split(ReduceOverlap)(split_times=[334]). 1 Input Files, 741.68kb total:"
- "L0, all files 741.68kb "
- "L0.285[295,356] 1.04us |-----------------------------------------L0.285-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 741.68kb total:"
- "L0 "
- "L0.?[295,334] 1.04us 474.19kb|-------------------------L0.?--------------------------| "
- "L0.?[335,356] 1.04us 267.49kb |------------L0.?------------| "
- "**** Simulation run 149, type=split(ReduceOverlap)(split_times=[626]). 1 Input Files, 859.7kb total:"
- "**** Simulation run 147, type=split(ReduceOverlap)(split_times=[626]). 1 Input Files, 859.7kb total:"
- "L0, all files 859.7kb "
- "L0.334[592,670] 1.04us |-----------------------------------------L0.334-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 859.7kb total:"
- "L0 "
- "L0.?[592,626] 1.04us 374.74kb|----------------L0.?-----------------| "
- "L0.?[627,670] 1.04us 484.96kb |---------------------L0.?----------------------| "
- "**** Simulation run 150, type=split(ReduceOverlap)(split_times=[334]). 1 Input Files, 672.54kb total:"
- "**** Simulation run 148, type=split(ReduceOverlap)(split_times=[334]). 1 Input Files, 672.54kb total:"
- "L0, all files 672.54kb "
- "L0.287[295,356] 1.04us |-----------------------------------------L0.287-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 672.54kb total:"
- "L0 "
- "L0.?[295,334] 1.04us 429.99kb|-------------------------L0.?--------------------------| "
- "L0.?[335,356] 1.04us 242.56kb |------------L0.?------------| "
- "**** Simulation run 151, type=split(ReduceOverlap)(split_times=[626]). 1 Input Files, 1.02mb total:"
- "**** Simulation run 149, type=split(ReduceOverlap)(split_times=[626]). 1 Input Files, 1.02mb total:"
- "L0, all files 1.02mb "
- "L0.338[592,670] 1.04us |-----------------------------------------L0.338-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.02mb total:"
- "L0 "
- "L0.?[592,626] 1.04us 455.28kb|----------------L0.?-----------------| "
- "L0.?[627,670] 1.04us 589.19kb |---------------------L0.?----------------------| "
- "**** Simulation run 150, type=split(ReduceOverlap)(split_times=[334]). 1 Input Files, 672.54kb total:"
- "L0, all files 672.54kb "
- "L0.279[295,356] 1.03us |-----------------------------------------L0.279-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 672.54kb total:"
- "L0 "
- "L0.?[295,334] 1.03us 429.99kb|-------------------------L0.?--------------------------| "
- "L0.?[335,356] 1.03us 242.56kb |------------L0.?------------| "
- "**** Simulation run 151, type=split(ReduceOverlap)(split_times=[626]). 1 Input Files, 1.02mb total:"
- "L0, all files 1.02mb "
- "L0.324[592,670] 1.03us |-----------------------------------------L0.324-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.02mb total:"
- "L0 "
- "L0.?[592,626] 1.03us 455.28kb|----------------L0.?-----------------| "
- "L0.?[627,670] 1.03us 589.19kb |---------------------L0.?----------------------| "
- "**** Simulation run 152, type=split(ReduceOverlap)(split_times=[334]). 1 Input Files, 817.09kb total:"
- "L0, all files 817.09kb "
- "L0.289[295,356] 1.04us |-----------------------------------------L0.289-----------------------------------------|"
@ -3910,7 +3910,7 @@ async fn random_backfill_over_l2s() {
- "L0.?[904,986] 1.03us 918.23kb |----------------------------------L0.?-----------------------------------| "
- "**** Simulation run 178, type=split(ReduceOverlap)(split_times=[619]). 1 Input Files, 455.28kb total:"
- "L0, all files 455.28kb "
- "L0.388[592,626] 1.03us |-----------------------------------------L0.388-----------------------------------------|"
- "L0.404[592,626] 1.03us |-----------------------------------------L0.404-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 455.28kb total:"
- "L0 "
- "L0.?[592,619] 1.03us 361.55kb|--------------------------------L0.?---------------------------------| "
@ -3924,14 +3924,14 @@ async fn random_backfill_over_l2s() {
- "L0.?[904,950] 1.03us 636.2kb |------------------------------L0.?------------------------------| "
- "**** Simulation run 180, type=split(ReduceOverlap)(split_times=[619]). 1 Input Files, 622.12kb total:"
- "L0, all files 622.12kb "
- "L0.392[592,626] 1.03us |-----------------------------------------L0.392-----------------------------------------|"
- "L0.388[592,626] 1.03us |-----------------------------------------L0.388-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 622.12kb total:"
- "L0 "
- "L0.?[592,619] 1.03us 494.04kb|--------------------------------L0.?---------------------------------| "
- "L0.?[620,626] 1.03us 128.08kb |----L0.?-----| "
- "**** Simulation run 181, type=split(ReduceOverlap)(split_times=[619]). 1 Input Files, 413.26kb total:"
- "L0, all files 413.26kb "
- "L0.396[592,626] 1.04us |-----------------------------------------L0.396-----------------------------------------|"
- "L0.392[592,626] 1.04us |-----------------------------------------L0.392-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 413.26kb total:"
- "L0 "
- "L0.?[592,619] 1.04us 328.18kb|--------------------------------L0.?---------------------------------| "
@ -3945,7 +3945,7 @@ async fn random_backfill_over_l2s() {
- "L0.?[904,932] 1.04us 358.9kb |-------------------------L0.?--------------------------| "
- "**** Simulation run 183, type=split(ReduceOverlap)(split_times=[619]). 1 Input Files, 374.74kb total:"
- "L0, all files 374.74kb "
- "L0.400[592,626] 1.04us |-----------------------------------------L0.400-----------------------------------------|"
- "L0.396[592,626] 1.04us |-----------------------------------------L0.396-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 374.74kb total:"
- "L0 "
- "L0.?[592,619] 1.04us 297.59kb|--------------------------------L0.?---------------------------------| "
@ -3959,7 +3959,7 @@ async fn random_backfill_over_l2s() {
- "L0.?[904,986] 1.04us 918.23kb |----------------------------------L0.?-----------------------------------| "
- "**** Simulation run 185, type=split(ReduceOverlap)(split_times=[619]). 1 Input Files, 455.28kb total:"
- "L0, all files 455.28kb "
- "L0.404[592,626] 1.04us |-----------------------------------------L0.404-----------------------------------------|"
- "L0.400[592,626] 1.04us |-----------------------------------------L0.400-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 455.28kb total:"
- "L0 "
- "L0.?[592,619] 1.04us 361.55kb|--------------------------------L0.?---------------------------------| "
@ -4236,8 +4236,8 @@ async fn random_backfill_over_l2s() {
- " Creating 32 files"
- "**** Simulation run 223, type=split(CompactAndSplitOutput(ManySmallFiles))(split_times=[610]). 200 Input Files, 166.42mb total:"
- "L0 "
- "L0.386[295,334] 1.03us 429.99kb |L0.386| "
- "L0.387[335,356] 1.03us 242.56kb |L0.387| "
- "L0.402[295,334] 1.03us 429.99kb |L0.402| "
- "L0.403[335,356] 1.03us 242.56kb |L0.403| "
- "L0.319[358,591] 1.03us 2.48mb |-------L0.319-------| "
- "L0.455[592,619] 1.03us 297.59kb |L0.455| "
- "L0.456[620,626] 1.03us 77.15kb |L0.456| "
@ -4247,48 +4247,48 @@ async fn random_backfill_over_l2s() {
- "L0.458[904,986] 1.03us 918.23kb |L0.458|"
- "L0.517[173,275] 1.03us 1.31mb |L0.517-| "
- "L0.518[276,294] 1.03us 250.4kb |L0.518| "
- "L0.390[295,334] 1.03us 522.4kb |L0.390| "
- "L0.391[335,356] 1.03us 294.69kb |L0.391| "
- "L0.386[295,334] 1.03us 522.4kb |L0.386| "
- "L0.387[335,356] 1.03us 294.69kb |L0.387| "
- "L0.216[357,357] 1.03us 0b |L0.216| "
- "L0.323[358,591] 1.03us 3.01mb |-------L0.323-------| "
- "L0.459[592,619] 1.03us 361.55kb |L0.459| "
- "L0.460[620,626] 1.03us 93.73kb |L0.460| "
- "L0.389[627,670] 1.03us 589.19kb |L0.389| "
- "L0.405[627,670] 1.03us 589.19kb |L0.405| "
- "L0.218[671,672] 1.03us 13.27kb |L0.218| "
- "L0.325[673,887] 1.03us 2.78mb |------L0.325------| "
- "L0.461[888,903] 1.03us 203.04kb |L0.461| "
- "L0.462[904,950] 1.03us 636.2kb |L0.462|"
- "L0.519[50,275] 1.03us 3.89mb|------L0.519-------| "
- "L0.520[276,294] 1.03us 336.03kb |L0.520| "
- "L0.394[295,334] 1.03us 701.05kb |L0.394| "
- "L0.395[335,356] 1.03us 395.46kb |L0.395| "
- "L0.390[295,334] 1.03us 701.05kb |L0.390| "
- "L0.391[335,356] 1.03us 395.46kb |L0.391| "
- "L0.220[357,357] 1.03us 0b |L0.220| "
- "L0.327[358,591] 1.03us 4.05mb |-------L0.327-------| "
- "L0.463[592,619] 1.03us 494.04kb |L0.463| "
- "L0.464[620,626] 1.03us 128.08kb |L0.464| "
- "L0.393[627,629] 1.03us 54.89kb |L0.393| "
- "L0.389[627,629] 1.03us 54.89kb |L0.389| "
- "L0.521[76,275] 1.04us 2.32mb |-----L0.521-----| "
- "L0.522[276,294] 1.04us 227.29kb |L0.522| "
- "L0.398[295,334] 1.04us 474.19kb |L0.398| "
- "L0.399[335,356] 1.04us 267.49kb |L0.399| "
- "L0.394[295,334] 1.04us 474.19kb |L0.394| "
- "L0.395[335,356] 1.04us 267.49kb |L0.395| "
- "L0.222[357,357] 1.04us 0b |L0.222| "
- "L0.329[358,591] 1.04us 2.73mb |-------L0.329-------| "
- "L0.465[592,619] 1.04us 328.18kb |L0.465| "
- "L0.466[620,626] 1.04us 85.08kb |L0.466| "
- "L0.397[627,670] 1.04us 534.81kb |L0.397| "
- "L0.393[627,670] 1.04us 534.81kb |L0.393| "
- "L0.224[671,672] 1.04us 12.05kb |L0.224| "
- "L0.331[673,887] 1.04us 2.53mb |------L0.331------| "
- "L0.467[888,903] 1.04us 185.64kb |L0.467| "
- "L0.468[904,932] 1.04us 358.9kb |L0.468|"
- "L0.523[42,275] 1.04us 2.47mb|-------L0.523-------| "
- "L0.524[276,294] 1.04us 206.1kb |L0.524| "
- "L0.402[295,334] 1.04us 429.99kb |L0.402| "
- "L0.403[335,356] 1.04us 242.56kb |L0.403| "
- "L0.398[295,334] 1.04us 429.99kb |L0.398| "
- "L0.399[335,356] 1.04us 242.56kb |L0.399| "
- "L0.226[357,357] 1.04us 0b |L0.226| "
- "L0.333[358,591] 1.04us 2.48mb |-------L0.333-------| "
- "L0.469[592,619] 1.04us 297.59kb |L0.469| "
- "L0.470[620,626] 1.04us 77.15kb |L0.470| "
- "L0.401[627,670] 1.04us 484.96kb |L0.401| "
- "L0.397[627,670] 1.04us 484.96kb |L0.397| "
- "L0.228[671,672] 1.04us 10.92kb |L0.228| "
- "L0.335[673,887] 1.04us 2.29mb |------L0.335------| "
- "L0.471[888,903] 1.04us 165.94kb |L0.471| "
@ -4301,7 +4301,7 @@ async fn random_backfill_over_l2s() {
- "L0.337[358,591] 1.04us 3.01mb |-------L0.337-------| "
- "L0.473[592,619] 1.04us 361.55kb |L0.473| "
- "L0.474[620,626] 1.04us 93.73kb |L0.474| "
- "L0.405[627,670] 1.04us 589.19kb |L0.405| "
- "L0.401[627,670] 1.04us 589.19kb |L0.401| "
- "L0.232[671,672] 1.04us 13.27kb |L0.232| "
- "L0.339[673,887] 1.04us 2.78mb |------L0.339------| "
- "L0.475[888,903] 1.04us 203.04kb |L0.475| "

View File

@ -858,39 +858,39 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
- "L0.?[171444,200000] 5ns 1.29mb|-----------------------------------------L0.?------------------------------------------| "
- "**** Simulation run 51, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:"
- "L0, all files 1.29mb "
- "L0.52[171443,200000] 6ns |-----------------------------------------L0.52------------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
- "L0 "
- "L0.?[171443,171443] 6ns 0b|L0.?| "
- "L0.?[171444,200000] 6ns 1.29mb|-----------------------------------------L0.?------------------------------------------| "
- "**** Simulation run 52, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:"
- "L0, all files 1.29mb "
- "L0.59[171443,200000] 7ns |-----------------------------------------L0.59------------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
- "L0 "
- "L0.?[171443,171443] 7ns 0b|L0.?| "
- "L0.?[171444,200000] 7ns 1.29mb|-----------------------------------------L0.?------------------------------------------| "
- "**** Simulation run 53, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:"
- "L0, all files 1.29mb "
- "L0.66[171443,200000] 8ns |-----------------------------------------L0.66------------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
- "L0 "
- "L0.?[171443,171443] 8ns 0b|L0.?| "
- "L0.?[171444,200000] 8ns 1.29mb|-----------------------------------------L0.?------------------------------------------| "
- "**** Simulation run 54, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:"
- "**** Simulation run 52, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:"
- "L0, all files 1.29mb "
- "L0.73[171443,200000] 9ns |-----------------------------------------L0.73------------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
- "L0 "
- "L0.?[171443,171443] 9ns 0b|L0.?| "
- "L0.?[171444,200000] 9ns 1.29mb|-----------------------------------------L0.?------------------------------------------| "
- "**** Simulation run 55, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:"
- "**** Simulation run 53, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:"
- "L0, all files 1.29mb "
- "L0.80[171443,200000] 10ns|-----------------------------------------L0.80------------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
- "L0 "
- "L0.?[171443,171443] 10ns 0b|L0.?| "
- "L0.?[171444,200000] 10ns 1.29mb|-----------------------------------------L0.?------------------------------------------| "
- "**** Simulation run 54, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:"
- "L0, all files 1.29mb "
- "L0.52[171443,200000] 6ns |-----------------------------------------L0.52------------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
- "L0 "
- "L0.?[171443,171443] 6ns 0b|L0.?| "
- "L0.?[171444,200000] 6ns 1.29mb|-----------------------------------------L0.?------------------------------------------| "
- "**** Simulation run 55, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:"
- "L0, all files 1.29mb "
- "L0.59[171443,200000] 7ns |-----------------------------------------L0.59------------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
- "L0 "
- "L0.?[171443,171443] 7ns 0b|L0.?| "
- "L0.?[171444,200000] 7ns 1.29mb|-----------------------------------------L0.?------------------------------------------| "
- "Committing partition 1:"
- " Soft Deleting 27 files: L0.42, L0.44, L0.45, L0.49, L0.51, L0.52, L0.56, L0.58, L0.59, L0.63, L0.65, L0.66, L0.70, L0.72, L0.73, L0.77, L0.79, L0.80, L0.99, L0.103, L0.107, L0.111, L0.115, L0.119, L1.121, L1.122, L1.123"
- " Creating 55 files"
@ -1213,7 +1213,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
- "L0.?[156351,160867] 6ns 208.25kb |--------L0.?--------| "
- "**** Simulation run 95, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1.29mb total:"
- "L0, all files 1.29mb "
- "L0.170[171444,200000] 6ns|-----------------------------------------L0.170-----------------------------------------|"
- "L0.176[171444,200000] 6ns|-----------------------------------------L0.176-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
- "L0 "
- "L0.?[171444,198370] 6ns 1.21mb|---------------------------------------L0.?---------------------------------------| "
@ -1227,7 +1227,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
- "L0.?[156351,160867] 7ns 208.25kb |--------L0.?--------| "
- "**** Simulation run 97, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1.29mb total:"
- "L0, all files 1.29mb "
- "L0.172[171444,200000] 7ns|-----------------------------------------L0.172-----------------------------------------|"
- "L0.178[171444,200000] 7ns|-----------------------------------------L0.178-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
- "L0 "
- "L0.?[171444,198370] 7ns 1.21mb|---------------------------------------L0.?---------------------------------------| "
@ -1241,7 +1241,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
- "L0.?[156351,160867] 8ns 208.25kb |--------L0.?--------| "
- "**** Simulation run 99, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1.29mb total:"
- "L0, all files 1.29mb "
- "L0.174[171444,200000] 8ns|-----------------------------------------L0.174-----------------------------------------|"
- "L0.170[171444,200000] 8ns|-----------------------------------------L0.170-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
- "L0 "
- "L0.?[171444,198370] 8ns 1.21mb|---------------------------------------L0.?---------------------------------------| "
@ -1255,7 +1255,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
- "L0.?[156351,160867] 9ns 208.25kb |--------L0.?--------| "
- "**** Simulation run 101, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1.29mb total:"
- "L0, all files 1.29mb "
- "L0.176[171444,200000] 9ns|-----------------------------------------L0.176-----------------------------------------|"
- "L0.172[171444,200000] 9ns|-----------------------------------------L0.172-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
- "L0 "
- "L0.?[171444,198370] 9ns 1.21mb|---------------------------------------L0.?---------------------------------------| "
@ -1269,7 +1269,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
- "L0.?[156351,160867] 10ns 208.25kb |--------L0.?--------| "
- "**** Simulation run 103, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1.29mb total:"
- "L0, all files 1.29mb "
- "L0.178[171444,200000] 10ns|-----------------------------------------L0.178-----------------------------------------|"
- "L0.174[171444,200000] 10ns|-----------------------------------------L0.174-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
- "L0 "
- "L0.?[171444,198370] 10ns 1.21mb|---------------------------------------L0.?---------------------------------------| "
@ -1389,7 +1389,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
- "L0.257[142887,156350] 6ns 620.71kb |---L0.257---| "
- "L0.258[156351,160867] 6ns 208.25kb |L0.258| "
- "L0.186[160868,171442] 6ns 487.56kb |-L0.186--| "
- "L0.169[171443,171443] 6ns 0b |L0.169| "
- "L0.175[171443,171443] 6ns 0b |L0.175| "
- "L0.259[171444,198370] 6ns 1.21mb |----------L0.259----------| "
- "L0.260[198371,200000] 6ns 75.17kb |L0.260|"
- "L1 "
@ -1404,7 +1404,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
- "L1.?[149666,185000] 6ns 10mb |---------------L1.?----------------| "
- "L1.?[185001,200000] 6ns 4.25mb |----L1.?-----| "
- "Committing partition 1:"
- " Soft Deleting 14 files: L0.104, L0.144, L0.155, L0.169, L0.186, L1.252, L1.253, L1.254, L1.255, L1.256, L0.257, L0.258, L0.259, L0.260"
- " Soft Deleting 14 files: L0.104, L0.144, L0.155, L0.175, L0.186, L1.252, L1.253, L1.254, L1.255, L1.256, L0.257, L0.258, L0.259, L0.260"
- " Creating 3 files"
- "**** Simulation run 116, type=split(HighL0OverlapTotalBacklog)(split_times=[142886]). 1 Input Files, 10mb total:"
- "L1, all files 10mb "
@ -1743,7 +1743,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
- "**** Simulation run 156, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[170977]). 8 Input Files, 19.54mb total:"
- "L0 "
- "L0.190[160868,171442] 7ns 487.56kb |----L0.190----| "
- "L0.171[171443,171443] 7ns 0b |L0.171| "
- "L0.177[171443,171443] 7ns 0b |L0.177| "
- "L0.309[171444,185000] 7ns 625.13kb |------L0.309------| "
- "L0.310[185001,198370] 7ns 616.55kb |------L0.310------| "
- "L0.264[198371,200000] 7ns 75.17kb |L0.264|"
@ -1756,7 +1756,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
- "L1.?[140564,170977] 7ns 10mb|--------------------L1.?--------------------| "
- "L1.?[170978,200000] 7ns 9.54mb |------------------L1.?-------------------| "
- "Committing partition 1:"
- " Soft Deleting 8 files: L0.171, L0.190, L0.264, L1.302, L1.306, L0.309, L0.310, L1.356"
- " Soft Deleting 8 files: L0.177, L0.190, L0.264, L1.302, L1.306, L0.309, L0.310, L1.356"
- " Creating 2 files"
- "**** Simulation run 157, type=split(ReduceOverlap)(split_times=[170977]). 1 Input Files, 487.56kb total:"
- "L0, all files 487.56kb "
@ -1924,7 +1924,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
- "L0.266[156351,160867] 8ns 208.25kb |L0.266| "
- "L0.387[160868,170977] 8ns 466.12kb |---L0.387----| "
- "L0.388[170978,171442] 8ns 21.44kb |L0.388| "
- "L0.173[171443,171443] 8ns 0b |L0.173| "
- "L0.169[171443,171443] 8ns 0b |L0.169| "
- "L0.313[171444,185000] 8ns 625.13kb |------L0.313------| "
- "L0.314[185001,198370] 8ns 616.55kb |------L0.314------| "
- "L0.268[198371,200000] 8ns 75.17kb |L0.268|"
@ -1937,7 +1937,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
- "L1.?[167315,194064] 8ns 10mb |-----------------L1.?-----------------| "
- "L1.?[194065,200000] 8ns 2.22mb |-L1.?-| "
- "Committing partition 1:"
- " Soft Deleting 13 files: L0.159, L0.173, L0.266, L0.268, L0.311, L0.312, L0.313, L0.314, L0.376, L1.385, L1.386, L0.387, L0.388"
- " Soft Deleting 13 files: L0.159, L0.169, L0.266, L0.268, L0.311, L0.312, L0.313, L0.314, L0.376, L1.385, L1.386, L0.387, L0.388"
- " Creating 3 files"
- "**** Simulation run 173, type=split(ReduceOverlap)(split_times=[167314]). 1 Input Files, 466.12kb total:"
- "L0, all files 466.12kb "
@ -2118,7 +2118,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
- "L0 "
- "L0.423[167315,170977] 9ns 168.9kb|-L0.423-| "
- "L0.390[170978,171442] 9ns 21.44kb |L0.390| "
- "L0.175[171443,171443] 9ns 0b |L0.175| "
- "L0.171[171443,171443] 9ns 0b |L0.171| "
- "L0.317[171444,185000] 9ns 625.13kb |--------------L0.317---------------| "
- "L0.424[185001,194064] 9ns 417.97kb |--------L0.424--------| "
- "L0.425[194065,198370] 9ns 198.58kb |-L0.425--| "
@ -2131,7 +2131,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
- "L1.?[167315,191189] 9ns 10mb|-----------------------------L1.?------------------------------| "
- "L1.?[191190,200000] 9ns 3.69mb |---------L1.?---------| "
- "Committing partition 1:"
- " Soft Deleting 9 files: L0.175, L0.272, L0.317, L0.390, L1.420, L1.421, L0.423, L0.424, L0.425"
- " Soft Deleting 9 files: L0.171, L0.272, L0.317, L0.390, L1.420, L1.421, L0.423, L0.424, L0.425"
- " Creating 2 files"
- "**** Simulation run 189, type=split(ReduceOverlap)(split_times=[191189]). 1 Input Files, 417.97kb total:"
- "L0, all files 417.97kb "
@ -2201,7 +2201,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
- "L0.458[191190,194064] 10ns 132.59kb |L0.458| "
- "L0.457[185001,191189] 10ns 285.38kb |-L0.457--| "
- "L0.321[171444,185000] 10ns 625.13kb |---------L0.321---------| "
- "L0.177[171443,171443] 10ns 0b |L0.177| "
- "L0.173[171443,171443] 10ns 0b |L0.173| "
- "L0.392[170978,171442] 10ns 21.44kb |L0.392| "
- "L0.427[167315,170977] 10ns 168.9kb |L0.427| "
- "L0.426[160868,167314] 10ns 297.22kb |--L0.426--| "
@ -2217,7 +2217,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition() {
- "L1.?[175140,196813] 10ns 10mb |-----------------L1.?------------------| "
- "L1.?[196814,200000] 10ns 1.47mb |L1.?| "
- "Committing partition 1:"
- " Soft Deleting 14 files: L0.177, L0.274, L0.276, L0.321, L0.392, L0.426, L0.427, L0.429, L1.450, L0.454, L1.455, L1.456, L0.457, L0.458"
- " Soft Deleting 14 files: L0.173, L0.274, L0.276, L0.321, L0.392, L0.426, L0.427, L0.429, L1.450, L0.454, L1.455, L1.456, L0.457, L0.458"
- " Creating 3 files"
- "**** Simulation run 193, type=split(CompactAndSplitOutput(TotalSizeLessThanMaxCompactSize))(split_times=[126690, 148648]). 16 Input Files, 22.19mb total:"
- "L0 "
@ -2812,39 +2812,39 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
- "L0.?[171444,200000] 5ns 1.29mb|-----------------------------------------L0.?------------------------------------------| "
- "**** Simulation run 51, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:"
- "L0, all files 1.29mb "
- "L0.52[171443,200000] 6ns |-----------------------------------------L0.52------------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
- "L0 "
- "L0.?[171443,171443] 6ns 0b|L0.?| "
- "L0.?[171444,200000] 6ns 1.29mb|-----------------------------------------L0.?------------------------------------------| "
- "**** Simulation run 52, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:"
- "L0, all files 1.29mb "
- "L0.59[171443,200000] 7ns |-----------------------------------------L0.59------------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
- "L0 "
- "L0.?[171443,171443] 7ns 0b|L0.?| "
- "L0.?[171444,200000] 7ns 1.29mb|-----------------------------------------L0.?------------------------------------------| "
- "**** Simulation run 53, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:"
- "L0, all files 1.29mb "
- "L0.66[171443,200000] 8ns |-----------------------------------------L0.66------------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
- "L0 "
- "L0.?[171443,171443] 8ns 0b|L0.?| "
- "L0.?[171444,200000] 8ns 1.29mb|-----------------------------------------L0.?------------------------------------------| "
- "**** Simulation run 54, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:"
- "**** Simulation run 52, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:"
- "L0, all files 1.29mb "
- "L0.73[171443,200000] 9ns |-----------------------------------------L0.73------------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
- "L0 "
- "L0.?[171443,171443] 9ns 0b|L0.?| "
- "L0.?[171444,200000] 9ns 1.29mb|-----------------------------------------L0.?------------------------------------------| "
- "**** Simulation run 55, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:"
- "**** Simulation run 53, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:"
- "L0, all files 1.29mb "
- "L0.80[171443,200000] 10ns|-----------------------------------------L0.80------------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
- "L0 "
- "L0.?[171443,171443] 10ns 0b|L0.?| "
- "L0.?[171444,200000] 10ns 1.29mb|-----------------------------------------L0.?------------------------------------------| "
- "**** Simulation run 54, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:"
- "L0, all files 1.29mb "
- "L0.52[171443,200000] 6ns |-----------------------------------------L0.52------------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
- "L0 "
- "L0.?[171443,171443] 6ns 0b|L0.?| "
- "L0.?[171444,200000] 6ns 1.29mb|-----------------------------------------L0.?------------------------------------------| "
- "**** Simulation run 55, type=split(HighL0OverlapTotalBacklog)(split_times=[171443]). 1 Input Files, 1.29mb total:"
- "L0, all files 1.29mb "
- "L0.59[171443,200000] 7ns |-----------------------------------------L0.59------------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
- "L0 "
- "L0.?[171443,171443] 7ns 0b|L0.?| "
- "L0.?[171444,200000] 7ns 1.29mb|-----------------------------------------L0.?------------------------------------------| "
- "Committing partition 1:"
- " Soft Deleting 27 files: L0.42, L0.44, L0.45, L0.49, L0.51, L0.52, L0.56, L0.58, L0.59, L0.63, L0.65, L0.66, L0.70, L0.72, L0.73, L0.77, L0.79, L0.80, L0.99, L0.103, L0.107, L0.111, L0.115, L0.119, L1.121, L1.122, L1.123"
- " Creating 55 files"
@ -3167,7 +3167,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
- "L0.?[156351,160867] 6ns 208.25kb |--------L0.?--------| "
- "**** Simulation run 95, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1.29mb total:"
- "L0, all files 1.29mb "
- "L0.170[171444,200000] 6ns|-----------------------------------------L0.170-----------------------------------------|"
- "L0.176[171444,200000] 6ns|-----------------------------------------L0.176-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
- "L0 "
- "L0.?[171444,198370] 6ns 1.21mb|---------------------------------------L0.?---------------------------------------| "
@ -3181,7 +3181,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
- "L0.?[156351,160867] 7ns 208.25kb |--------L0.?--------| "
- "**** Simulation run 97, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1.29mb total:"
- "L0, all files 1.29mb "
- "L0.172[171444,200000] 7ns|-----------------------------------------L0.172-----------------------------------------|"
- "L0.178[171444,200000] 7ns|-----------------------------------------L0.178-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
- "L0 "
- "L0.?[171444,198370] 7ns 1.21mb|---------------------------------------L0.?---------------------------------------| "
@ -3195,7 +3195,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
- "L0.?[156351,160867] 8ns 208.25kb |--------L0.?--------| "
- "**** Simulation run 99, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1.29mb total:"
- "L0, all files 1.29mb "
- "L0.174[171444,200000] 8ns|-----------------------------------------L0.174-----------------------------------------|"
- "L0.170[171444,200000] 8ns|-----------------------------------------L0.170-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
- "L0 "
- "L0.?[171444,198370] 8ns 1.21mb|---------------------------------------L0.?---------------------------------------| "
@ -3209,7 +3209,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
- "L0.?[156351,160867] 9ns 208.25kb |--------L0.?--------| "
- "**** Simulation run 101, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1.29mb total:"
- "L0, all files 1.29mb "
- "L0.176[171444,200000] 9ns|-----------------------------------------L0.176-----------------------------------------|"
- "L0.172[171444,200000] 9ns|-----------------------------------------L0.172-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
- "L0 "
- "L0.?[171444,198370] 9ns 1.21mb|---------------------------------------L0.?---------------------------------------| "
@ -3223,7 +3223,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
- "L0.?[156351,160867] 10ns 208.25kb |--------L0.?--------| "
- "**** Simulation run 103, type=split(ReduceOverlap)(split_times=[198370]). 1 Input Files, 1.29mb total:"
- "L0, all files 1.29mb "
- "L0.178[171444,200000] 10ns|-----------------------------------------L0.178-----------------------------------------|"
- "L0.174[171444,200000] 10ns|-----------------------------------------L0.174-----------------------------------------|"
- "**** 2 Output Files (parquet_file_id not yet assigned), 1.29mb total:"
- "L0 "
- "L0.?[171444,198370] 10ns 1.21mb|---------------------------------------L0.?---------------------------------------| "
@ -3343,7 +3343,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
- "L0.257[142887,156350] 6ns 620.71kb |---L0.257---| "
- "L0.258[156351,160867] 6ns 208.25kb |L0.258| "
- "L0.186[160868,171442] 6ns 487.56kb |-L0.186--| "
- "L0.169[171443,171443] 6ns 0b |L0.169| "
- "L0.175[171443,171443] 6ns 0b |L0.175| "
- "L0.259[171444,198370] 6ns 1.21mb |----------L0.259----------| "
- "L0.260[198371,200000] 6ns 75.17kb |L0.260|"
- "L1 "
@ -3358,7 +3358,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
- "L1.?[149666,185000] 6ns 10mb |---------------L1.?----------------| "
- "L1.?[185001,200000] 6ns 4.25mb |----L1.?-----| "
- "Committing partition 1:"
- " Soft Deleting 14 files: L0.104, L0.144, L0.155, L0.169, L0.186, L1.252, L1.253, L1.254, L1.255, L1.256, L0.257, L0.258, L0.259, L0.260"
- " Soft Deleting 14 files: L0.104, L0.144, L0.155, L0.175, L0.186, L1.252, L1.253, L1.254, L1.255, L1.256, L0.257, L0.258, L0.259, L0.260"
- " Creating 3 files"
- "**** Simulation run 116, type=split(HighL0OverlapTotalBacklog)(split_times=[142886]). 1 Input Files, 10mb total:"
- "L1, all files 10mb "
@ -3697,7 +3697,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
- "**** Simulation run 156, type=split(CompactAndSplitOutput(FoundSubsetLessThanMaxCompactSize))(split_times=[170977]). 8 Input Files, 19.54mb total:"
- "L0 "
- "L0.190[160868,171442] 7ns 487.56kb |----L0.190----| "
- "L0.171[171443,171443] 7ns 0b |L0.171| "
- "L0.177[171443,171443] 7ns 0b |L0.177| "
- "L0.309[171444,185000] 7ns 625.13kb |------L0.309------| "
- "L0.310[185001,198370] 7ns 616.55kb |------L0.310------| "
- "L0.264[198371,200000] 7ns 75.17kb |L0.264|"
@ -3710,7 +3710,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
- "L1.?[140564,170977] 7ns 10mb|--------------------L1.?--------------------| "
- "L1.?[170978,200000] 7ns 9.54mb |------------------L1.?-------------------| "
- "Committing partition 1:"
- " Soft Deleting 8 files: L0.171, L0.190, L0.264, L1.302, L1.306, L0.309, L0.310, L1.356"
- " Soft Deleting 8 files: L0.177, L0.190, L0.264, L1.302, L1.306, L0.309, L0.310, L1.356"
- " Creating 2 files"
- "**** Simulation run 157, type=split(ReduceOverlap)(split_times=[170977]). 1 Input Files, 487.56kb total:"
- "L0, all files 487.56kb "
@ -3878,7 +3878,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
- "L0.266[156351,160867] 8ns 208.25kb |L0.266| "
- "L0.387[160868,170977] 8ns 466.12kb |---L0.387----| "
- "L0.388[170978,171442] 8ns 21.44kb |L0.388| "
- "L0.173[171443,171443] 8ns 0b |L0.173| "
- "L0.169[171443,171443] 8ns 0b |L0.169| "
- "L0.313[171444,185000] 8ns 625.13kb |------L0.313------| "
- "L0.314[185001,198370] 8ns 616.55kb |------L0.314------| "
- "L0.268[198371,200000] 8ns 75.17kb |L0.268|"
@ -3891,7 +3891,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
- "L1.?[167315,194064] 8ns 10mb |-----------------L1.?-----------------| "
- "L1.?[194065,200000] 8ns 2.22mb |-L1.?-| "
- "Committing partition 1:"
- " Soft Deleting 13 files: L0.159, L0.173, L0.266, L0.268, L0.311, L0.312, L0.313, L0.314, L0.376, L1.385, L1.386, L0.387, L0.388"
- " Soft Deleting 13 files: L0.159, L0.169, L0.266, L0.268, L0.311, L0.312, L0.313, L0.314, L0.376, L1.385, L1.386, L0.387, L0.388"
- " Creating 3 files"
- "**** Simulation run 173, type=split(ReduceOverlap)(split_times=[167314]). 1 Input Files, 466.12kb total:"
- "L0, all files 466.12kb "
@ -4072,7 +4072,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
- "L0 "
- "L0.423[167315,170977] 9ns 168.9kb|-L0.423-| "
- "L0.390[170978,171442] 9ns 21.44kb |L0.390| "
- "L0.175[171443,171443] 9ns 0b |L0.175| "
- "L0.171[171443,171443] 9ns 0b |L0.171| "
- "L0.317[171444,185000] 9ns 625.13kb |--------------L0.317---------------| "
- "L0.424[185001,194064] 9ns 417.97kb |--------L0.424--------| "
- "L0.425[194065,198370] 9ns 198.58kb |-L0.425--| "
@ -4085,7 +4085,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
- "L1.?[167315,191189] 9ns 10mb|-----------------------------L1.?------------------------------| "
- "L1.?[191190,200000] 9ns 3.69mb |---------L1.?---------| "
- "Committing partition 1:"
- " Soft Deleting 9 files: L0.175, L0.272, L0.317, L0.390, L1.420, L1.421, L0.423, L0.424, L0.425"
- " Soft Deleting 9 files: L0.171, L0.272, L0.317, L0.390, L1.420, L1.421, L0.423, L0.424, L0.425"
- " Creating 2 files"
- "**** Simulation run 189, type=split(ReduceOverlap)(split_times=[191189]). 1 Input Files, 417.97kb total:"
- "L0, all files 417.97kb "
@ -4155,7 +4155,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
- "L0.458[191190,194064] 10ns 132.59kb |L0.458| "
- "L0.457[185001,191189] 10ns 285.38kb |-L0.457--| "
- "L0.321[171444,185000] 10ns 625.13kb |---------L0.321---------| "
- "L0.177[171443,171443] 10ns 0b |L0.177| "
- "L0.173[171443,171443] 10ns 0b |L0.173| "
- "L0.392[170978,171442] 10ns 21.44kb |L0.392| "
- "L0.427[167315,170977] 10ns 168.9kb |L0.427| "
- "L0.426[160868,167314] 10ns 297.22kb |--L0.426--| "
@ -4171,7 +4171,7 @@ async fn all_overlapping_l0_max_input_bytes_per_partition_small_max_desired_file
- "L1.?[175140,196813] 10ns 10mb |-----------------L1.?------------------| "
- "L1.?[196814,200000] 10ns 1.47mb |L1.?| "
- "Committing partition 1:"
- " Soft Deleting 14 files: L0.177, L0.274, L0.276, L0.321, L0.392, L0.426, L0.427, L0.429, L1.450, L0.454, L1.455, L1.456, L0.457, L0.458"
- " Soft Deleting 14 files: L0.173, L0.274, L0.276, L0.321, L0.392, L0.426, L0.427, L0.429, L1.450, L0.454, L1.455, L1.456, L0.457, L0.458"
- " Creating 3 files"
- "**** Simulation run 193, type=split(CompactAndSplitOutput(TotalSizeLessThanMaxCompactSize))(split_times=[126690, 148648]). 16 Input Files, 22.19mb total:"
- "L0 "

View File

@ -2039,25 +2039,6 @@ impl TableSummary {
}
}
/// Shard index plus offset
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct Sequence {
/// The shard index
pub shard_index: ShardIndex,
/// The sequence number
pub sequence_number: SequenceNumber,
}
impl Sequence {
/// Create a new Sequence
pub fn new(shard_index: ShardIndex, sequence_number: SequenceNumber) -> Self {
Self {
shard_index,
sequence_number,
}
}
}
/// minimum time that can be represented.
///
/// 1677-09-21 00:12:43.145224194 +0000 UTC

View File

@ -1,6 +1,5 @@
use std::{borrow::Cow, ops::RangeInclusive};
use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC};
use thiserror::Error;
/// Length constraints for a [`NamespaceName`] name.
@ -8,6 +7,13 @@ use thiserror::Error;
/// A `RangeInclusive` is a closed interval, covering [1, 64]
const LENGTH_CONSTRAINT: RangeInclusive<usize> = 1..=64;
/// Allowlist of chars for a [`NamespaceName`] name.
///
/// '/' | '_' | '-' are utilized by the platforms.
fn is_allowed(c: char) -> bool {
c.is_alphanumeric() || matches!(c, '/' | '_' | '-')
}
/// Errors returned when attempting to construct a [`NamespaceName`] from an org
/// & bucket string pair.
#[derive(Debug, Error)]
@ -41,7 +47,7 @@ pub enum NamespaceNameError {
/// The provided namespace name contains an unacceptable character.
#[error(
"namespace name '{}' contains invalid character, character number {} \
is a control which is not allowed",
is not whitelisted",
name,
bad_char_offset
)]
@ -92,7 +98,7 @@ impl<'a> NamespaceName<'a> {
//
// NOTE: If changing these characters, please update the error message
// above.
if let Some(bad_char_offset) = name.chars().position(|c| c.is_control()) {
if let Some(bad_char_offset) = name.chars().position(|c| !is_allowed(c)) {
return Err(NamespaceNameError::BadChars {
bad_char_offset,
name: name.to_string(),
@ -123,11 +129,7 @@ impl<'a> NamespaceName<'a> {
return Err(OrgBucketMappingError::NoOrgBucketSpecified);
}
let prefix: Cow<'_, str> = utf8_percent_encode(org, NON_ALPHANUMERIC).into();
let suffix: Cow<'_, str> = utf8_percent_encode(bucket, NON_ALPHANUMERIC).into();
let db_name = format!("{}_{}", prefix, suffix);
Ok(Self::new(db_name)?)
Ok(Self::new(format!("{}_{}", org, bucket))?)
}
}
@ -188,34 +190,46 @@ mod tests {
#[test]
fn test_org_bucket_map_db_contains_underscore() {
let got = NamespaceName::from_org_and_bucket("my_org", "bucket").unwrap();
assert_eq!(got.as_str(), "my%5Forg_bucket");
assert_eq!(got.as_str(), "my_org_bucket");
let got = NamespaceName::from_org_and_bucket("org", "my_bucket").unwrap();
assert_eq!(got.as_str(), "org_my%5Fbucket");
assert_eq!(got.as_str(), "org_my_bucket");
let got = NamespaceName::from_org_and_bucket("org", "my__bucket").unwrap();
assert_eq!(got.as_str(), "org_my%5F%5Fbucket");
assert_eq!(got.as_str(), "org_my__bucket");
let got = NamespaceName::from_org_and_bucket("my_org", "my_bucket").unwrap();
assert_eq!(got.as_str(), "my%5Forg_my%5Fbucket");
assert_eq!(got.as_str(), "my_org_my_bucket");
}
#[test]
fn test_org_bucket_map_db_contains_underscore_and_percent() {
let got = NamespaceName::from_org_and_bucket("my%5Forg", "bucket").unwrap();
assert_eq!(got.as_str(), "my%255Forg_bucket");
let err = NamespaceName::from_org_and_bucket("my%5Forg", "bucket");
assert!(matches!(
err,
Err(OrgBucketMappingError::InvalidNamespaceName { .. })
));
let got = NamespaceName::from_org_and_bucket("my%5Forg_", "bucket").unwrap();
assert_eq!(got.as_str(), "my%255Forg%5F_bucket");
let err = NamespaceName::from_org_and_bucket("my%5Forg_", "bucket");
assert!(matches!(
err,
Err(OrgBucketMappingError::InvalidNamespaceName { .. })
));
}
#[test]
fn test_bad_namespace_name_is_encoded() {
let got = NamespaceName::from_org_and_bucket("org", "bucket?").unwrap();
assert_eq!(got.as_str(), "org_bucket%3F");
fn test_bad_namespace_name_fails_validation() {
let err = NamespaceName::from_org_and_bucket("org", "bucket?");
assert!(matches!(
err,
Err(OrgBucketMappingError::InvalidNamespaceName { .. })
));
let got = NamespaceName::from_org_and_bucket("org!", "bucket").unwrap();
assert_eq!(got.as_str(), "org%21_bucket");
let err = NamespaceName::from_org_and_bucket("org!", "bucket");
assert!(matches!(
err,
Err(OrgBucketMappingError::InvalidNamespaceName { .. })
));
}
#[test]
@ -256,30 +270,50 @@ mod tests {
#[test]
fn test_bad_chars_null() {
let got = NamespaceName::new("example\x00").unwrap_err();
assert_eq!(got.to_string() , "namespace name 'example\x00' contains invalid character, character number 7 is a control which is not allowed");
assert_eq!(got.to_string() , "namespace name 'example\x00' contains invalid character, character number 7 is not whitelisted");
}
#[test]
fn test_bad_chars_high_control() {
let got = NamespaceName::new("\u{007f}example").unwrap_err();
assert_eq!(got.to_string() , "namespace name '\u{007f}example' contains invalid character, character number 0 is a control which is not allowed");
assert_eq!(got.to_string() , "namespace name '\u{007f}example' contains invalid character, character number 0 is not whitelisted");
}
#[test]
fn test_bad_chars_tab() {
let got = NamespaceName::new("example\tdb").unwrap_err();
assert_eq!(got.to_string() , "namespace name 'example\tdb' contains invalid character, character number 7 is a control which is not allowed");
assert_eq!(got.to_string() , "namespace name 'example\tdb' contains invalid character, character number 7 is not whitelisted");
}
#[test]
fn test_bad_chars_newline() {
let got = NamespaceName::new("my_example\ndb").unwrap_err();
assert_eq!(got.to_string() , "namespace name 'my_example\ndb' contains invalid character, character number 10 is a control which is not allowed");
assert_eq!(got.to_string() , "namespace name 'my_example\ndb' contains invalid character, character number 10 is not whitelisted");
}
#[test]
fn test_bad_chars_whitespace() {
let got = NamespaceName::new("my_example db").unwrap_err();
assert_eq!(got.to_string() , "namespace name 'my_example db' contains invalid character, character number 10 is not whitelisted");
}
#[test]
fn test_bad_chars_single_quote() {
let got = NamespaceName::new("my_example'db").unwrap_err();
assert_eq!(got.to_string() , "namespace name 'my_example\'db' contains invalid character, character number 10 is not whitelisted");
}
#[test]
fn test_ok_chars() {
let db = NamespaceName::new("my-example-db_with_underscores and spaces").unwrap();
assert_eq!(&*db, "my-example-db_with_underscores and spaces");
let db =
NamespaceName::new("my-example-db_with_underscores/and/fwd/slash/AndCaseSensitive")
.unwrap();
assert_eq!(
&*db,
"my-example-db_with_underscores/and/fwd/slash/AndCaseSensitive"
);
let db = NamespaceName::new("a_ã_京").unwrap();
assert_eq!(&*db, "a_ã_京");
}
}

View File

@ -22,6 +22,14 @@ pub fn iox_session_config() -> SessionConfig {
options.execution.parquet.reorder_filters = true;
options.optimizer.repartition_sorts = true;
// DataFusion skips failed optimizer rules by default. Relevant issues:
// - https://github.com/apache/arrow-datafusion/issues/4615
// - https://github.com/apache/arrow-datafusion/issues/4685
//
// However IOx should not skip them, since that will result in more helpful error messages
// when they fail, e.g., 'HandleGapFill`.
options.optimizer.skip_failed_rules = false;
SessionConfig::from(options)
.with_batch_size(BATCH_SIZE)
.with_create_default_catalog_and_schema(true)

View File

@ -16,8 +16,8 @@
use std::time::Duration;
use data_types::{
DeletePredicate, NamespaceId, NonEmptyString, PartitionKey, Sequence, StatValues, Statistics,
TableId,
DeletePredicate, NamespaceId, NonEmptyString, PartitionKey, SequenceNumber, StatValues,
Statistics, TableId,
};
use hashbrown::HashMap;
use iox_time::{Time, TimeProvider};
@ -28,7 +28,7 @@ use trace::ctx::SpanContext;
#[derive(Debug, Default, Clone, PartialEq)]
pub struct DmlMeta {
/// The sequence number associated with this write
sequence: Option<Sequence>,
sequence_number: Option<SequenceNumber>,
/// When this write was ingested into the write buffer
producer_ts: Option<Time>,
@ -43,13 +43,13 @@ pub struct DmlMeta {
impl DmlMeta {
/// Create a new [`DmlMeta`] for a sequenced operation
pub fn sequenced(
sequence: Sequence,
sequence_number: SequenceNumber,
producer_ts: Time,
span_ctx: Option<SpanContext>,
bytes_read: usize,
) -> Self {
Self {
sequence: Some(sequence),
sequence_number: Some(sequence_number),
producer_ts: Some(producer_ts),
span_ctx,
bytes_read: Some(bytes_read),
@ -59,7 +59,7 @@ impl DmlMeta {
/// Create a new [`DmlMeta`] for an unsequenced operation
pub fn unsequenced(span_ctx: Option<SpanContext>) -> Self {
Self {
sequence: None,
sequence_number: None,
producer_ts: None,
span_ctx,
bytes_read: None,
@ -67,8 +67,8 @@ impl DmlMeta {
}
/// Gets the sequence number associated with the write if any
pub fn sequence(&self) -> Option<&Sequence> {
self.sequence.as_ref()
pub fn sequence(&self) -> Option<SequenceNumber> {
self.sequence_number
}
/// Gets the producer timestamp associated with the write if any
@ -438,7 +438,7 @@ pub mod test_util {
Time::from_timestamp_millis(timestamp.timestamp_millis()).expect("ts in range");
DmlMeta::sequenced(
*m.sequence().unwrap(),
m.sequence().unwrap(),
timestamp,
m.span_context().cloned(),
m.bytes_read().unwrap(),

View File

@ -12,7 +12,7 @@ once_cell = { version = "1.17", features = ["parking_lot"] }
parking_lot = "0.12"
pin-project = "1.0"
tokio = { version = "1.27" }
tokio-util = { version = "0.7.7" }
tokio-util = { version = "0.7.8" }
workspace-hack = { version = "0.1", path = "../workspace-hack" }
# use libc on unix like platforms to set worker priority in DedicatedExecutor

View File

@ -6,7 +6,7 @@ use arrow_flight::sql::{
ActionClosePreparedStatementRequest, ActionCreatePreparedStatementRequest, Any,
CommandGetCatalogs, CommandGetCrossReference, CommandGetDbSchemas, CommandGetExportedKeys,
CommandGetImportedKeys, CommandGetPrimaryKeys, CommandGetSqlInfo, CommandGetTableTypes,
CommandGetTables, CommandPreparedStatementQuery, CommandStatementQuery,
CommandGetTables, CommandGetXdbcTypeInfo, CommandPreparedStatementQuery, CommandStatementQuery,
};
use bytes::Bytes;
use prost::Message;
@ -94,7 +94,10 @@ pub enum FlightSQLCommand {
CommandGetPrimaryKeys(CommandGetPrimaryKeys),
/// Get a list of the available tables
CommandGetTables(CommandGetTables),
/// Get a list of the available table tyypes
/// Get information about data types supported.
/// See [`CommandGetXdbcTypeInfo`] for details.
CommandGetXdbcTypeInfo(CommandGetXdbcTypeInfo),
/// Get a list of the available table types
CommandGetTableTypes(CommandGetTableTypes),
/// Create a prepared statement
ActionCreatePreparedStatementRequest(ActionCreatePreparedStatementRequest),
@ -224,6 +227,13 @@ impl Display for FlightSQLCommand {
Self::CommandGetTableTypes(CommandGetTableTypes {}) => {
write!(f, "CommandGetTableTypes")
}
Self::CommandGetXdbcTypeInfo(CommandGetXdbcTypeInfo { data_type }) => {
write!(
f,
"CommandGetXdbcTypeInfo(data_type={})",
data_type.as_ref().copied().unwrap_or(0),
)
}
Self::ActionCreatePreparedStatementRequest(ActionCreatePreparedStatementRequest {
query,
}) => {
@ -269,6 +279,8 @@ impl FlightSQLCommand {
Ok(Self::CommandGetTables(decode_cmd))
} else if let Some(decoded_cmd) = Any::unpack::<CommandGetTableTypes>(&msg)? {
Ok(Self::CommandGetTableTypes(decoded_cmd))
} else if let Some(decoded_cmd) = Any::unpack::<CommandGetXdbcTypeInfo>(&msg)? {
Ok(Self::CommandGetXdbcTypeInfo(decoded_cmd))
} else if let Some(decoded_cmd) = Any::unpack::<ActionCreatePreparedStatementRequest>(&msg)?
{
Ok(Self::ActionCreatePreparedStatementRequest(decoded_cmd))
@ -308,6 +320,7 @@ impl FlightSQLCommand {
FlightSQLCommand::CommandGetPrimaryKeys(cmd) => Any::pack(&cmd),
FlightSQLCommand::CommandGetTables(cmd) => Any::pack(&cmd),
FlightSQLCommand::CommandGetTableTypes(cmd) => Any::pack(&cmd),
FlightSQLCommand::CommandGetXdbcTypeInfo(cmd) => Any::pack(&cmd),
FlightSQLCommand::ActionCreatePreparedStatementRequest(cmd) => Any::pack(&cmd),
FlightSQLCommand::ActionClosePreparedStatementRequest(handle) => {
let prepared_statement_handle = handle.encode();

View File

@ -13,7 +13,7 @@ use arrow_flight::{
ActionCreatePreparedStatementRequest, ActionCreatePreparedStatementResult, Any,
CommandGetCatalogs, CommandGetCrossReference, CommandGetDbSchemas, CommandGetExportedKeys,
CommandGetImportedKeys, CommandGetPrimaryKeys, CommandGetSqlInfo, CommandGetTableTypes,
CommandGetTables, CommandStatementQuery,
CommandGetTables, CommandGetXdbcTypeInfo, CommandStatementQuery,
},
IpcMessage, SchemaAsIpc,
};
@ -86,6 +86,9 @@ impl FlightSQLPlanner {
FlightSQLCommand::CommandGetTableTypes(CommandGetTableTypes { .. }) => {
encode_schema(&GET_TABLE_TYPE_SCHEMA)
}
FlightSQLCommand::CommandGetXdbcTypeInfo(CommandGetXdbcTypeInfo { .. }) => {
encode_schema(&GET_XDBC_TYPE_INFO_SCHEMA)
}
FlightSQLCommand::ActionCreatePreparedStatementRequest(_)
| FlightSQLCommand::ActionClosePreparedStatementRequest(_) => ProtocolSnafu {
cmd: format!("{cmd:?}"),
@ -239,6 +242,11 @@ impl FlightSQLPlanner {
let plan = plan_get_table_types(ctx).await?;
Ok(ctx.create_physical_plan(&plan).await?)
}
FlightSQLCommand::CommandGetXdbcTypeInfo(CommandGetXdbcTypeInfo { data_type }) => {
debug!(?data_type, "Planning GetXdbcTypeInfo query");
let plan = plan_get_xdbc_type_info(ctx, data_type).await?;
Ok(ctx.create_physical_plan(&plan).await?)
}
FlightSQLCommand::ActionClosePreparedStatementRequest(_)
| FlightSQLCommand::ActionCreatePreparedStatementRequest(_) => ProtocolSnafu {
cmd: format!("{cmd:?}"),
@ -417,6 +425,15 @@ async fn plan_get_table_types(ctx: &IOxSessionContext) -> Result<LogicalPlan> {
Ok(ctx.batch_to_logical_plan(TABLE_TYPES_RECORD_BATCH.clone())?)
}
/// Return a `LogicalPlan` for GetXdbcTypeInfo
async fn plan_get_xdbc_type_info(
ctx: &IOxSessionContext,
_data_type: Option<i32>,
) -> Result<LogicalPlan> {
let batch = RecordBatch::new_empty(Arc::clone(&GET_XDBC_TYPE_INFO_SCHEMA));
Ok(ctx.batch_to_logical_plan(batch)?)
}
/// The schema for GetTableTypes
static GET_TABLE_TYPE_SCHEMA: Lazy<SchemaRef> = Lazy::new(|| {
Arc::new(Schema::new(vec![Field::new(
@ -505,3 +522,32 @@ static GET_PRIMARY_KEYS_SCHEMA: Lazy<SchemaRef> = Lazy::new(|| {
Field::new("key_sequence", DataType::Int32, false),
]))
});
// From https://github.com/apache/arrow/blob/9588da967c756b2923e213ccc067378ba6c90a86/format/FlightSql.proto#L1064-L1113
static GET_XDBC_TYPE_INFO_SCHEMA: Lazy<SchemaRef> = Lazy::new(|| {
Arc::new(Schema::new(vec![
Field::new("type_name", DataType::Utf8, false),
Field::new("data_type", DataType::Int32, false),
Field::new("column_size", DataType::Int32, true),
Field::new("literal_prefix", DataType::Utf8, true),
Field::new("literal_suffix", DataType::Utf8, true),
Field::new(
"create_params",
DataType::List(Arc::new(Field::new("item", DataType::Utf8, false))),
true,
),
Field::new("nullable", DataType::Int32, false), // Nullable enum: https://github.com/apache/arrow/blob/9588da967c756b2923e213ccc067378ba6c90a86/format/FlightSql.proto#L1014-L1029
Field::new("case_sensitive", DataType::Boolean, false),
Field::new("searchable", DataType::Int32, false), // Searchable enum: https://github.com/apache/arrow/blob/9588da967c756b2923e213ccc067378ba6c90a86/format/FlightSql.proto#L1031-L1056
Field::new("unsigned_attribute", DataType::Boolean, true),
Field::new("fixed_prec_scale", DataType::Boolean, false),
Field::new("auto_increment", DataType::Boolean, true),
Field::new("local_type_name", DataType::Utf8, true),
Field::new("minimum_scale", DataType::Int32, true),
Field::new("maximum_scale", DataType::Int32, true),
Field::new("sql_data_type", DataType::Int32, false),
Field::new("datetime_subcode", DataType::Int32, true),
Field::new("num_prec_radix", DataType::Int32, true),
Field::new("interval_precision", DataType::Int32, true),
]))
});

View File

@ -18,7 +18,7 @@ observability_deps = { path = "../observability_deps" }
snafu = "0.7"
tokio = { version = "1", features = ["macros", "rt", "sync"] }
tokio-stream = "0.1"
tokio-util = { version = "0.7.7" }
tokio-util = { version = "0.7.8" }
uuid = { version = "1", features = ["v4"] }
[dev-dependencies]

View File

@ -1,6 +1,6 @@
use futures::{StreamExt, TryStreamExt};
use object_store::{DynObjectStore, ObjectMeta};
use observability_deps::tracing::{debug, info};
use observability_deps::tracing::info;
use snafu::prelude::*;
use std::sync::Arc;
use tokio::sync::mpsc;
@ -23,7 +23,7 @@ pub(crate) async fn perform(
info!(?path, "Not deleting due to dry run");
Ok(())
} else {
debug!("Deleting {path}");
info!("Deleting {path}");
object_store
.delete(&path)
.await

View File

@ -3,11 +3,12 @@ use crate::expression::arithmetic::{
arithmetic, call_expression, var_ref, ArithmeticParsers, Expr,
};
use crate::expression::Call;
use crate::internal::Error as InternalError;
use crate::internal::{expect, verify, ParseResult};
use crate::functions::is_scalar_math_function;
use crate::internal::{expect, verify, Error as InternalError, ParseResult};
use crate::keywords::keyword;
use crate::literal::{literal_no_regex, literal_regex, Literal};
use crate::parameter::parameter;
use crate::select::is_valid_now_call;
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::character::complete::char;
@ -289,21 +290,23 @@ fn reduce_expr(
})
}
/// Returns true if `expr` is a valid [`Expr::Call`] expression for the `now` function.
pub(crate) fn is_valid_now_call(expr: &Expr) -> bool {
match expr {
Expr::Call(Call { name, args }) => name.to_lowercase() == "now" && args.is_empty(),
_ => false,
}
/// Returns true if `expr` is a valid [`Expr::Call`] expression for condtional expressions
/// in the WHERE clause.
pub(crate) fn is_valid_conditional_call(expr: &Expr) -> bool {
is_valid_now_call(expr)
|| match expr {
Expr::Call(Call { name, .. }) => is_scalar_math_function(name),
_ => false,
}
}
impl ConditionalExpression {
/// Parse the `now()` function call
fn call(i: &str) -> ParseResult<&str, Expr> {
verify(
"invalid expression, the only valid function call is 'now' with no arguments",
"invalid expression, the only valid function calls are 'now' with no arguments, or scalar math functions",
call_expression::<Self>,
is_valid_now_call,
is_valid_conditional_call,
)(i)
}
}
@ -384,16 +387,20 @@ mod test {
let (_, got) = arithmetic_expression("now() + 3").unwrap();
assert_eq!(got, binary_op!(call!("now"), Add, 3));
// arithmetic functions calls are permitted
let (_, got) = arithmetic_expression("abs(f) + 3").unwrap();
assert_eq!(got, binary_op!(call!("abs", var_ref!("f")), Add, 3));
// Fallible cases
assert_expect_error!(
arithmetic_expression("sum(foo)"),
"invalid expression, the only valid function call is 'now' with no arguments"
"invalid expression, the only valid function calls are 'now' with no arguments, or scalar math functions"
);
assert_expect_error!(
arithmetic_expression("now(1)"),
"invalid expression, the only valid function call is 'now' with no arguments"
"invalid expression, the only valid function calls are 'now' with no arguments, or scalar math functions"
);
}

View File

@ -0,0 +1,74 @@
//! # [Functions] supported by InfluxQL
//!
//! [Functions]: https://docs.influxdata.com/influxdb/v1.8/query_language/functions/
use std::collections::HashSet;
use once_cell::sync::Lazy;
/// Returns `true` if `name` is a mathematical scalar function
/// supported by InfluxQL.
pub fn is_scalar_math_function(name: &str) -> bool {
static FUNCTIONS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
HashSet::from([
"abs", "sin", "cos", "tan", "asin", "acos", "atan", "atan2", "exp", "log", "ln",
"log2", "log10", "sqrt", "pow", "floor", "ceil", "round",
])
});
FUNCTIONS.contains(name)
}
/// Returns `true` if `name` is an aggregate or aggregate function
/// supported by InfluxQL.
pub fn is_aggregate_function(name: &str) -> bool {
static FUNCTIONS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
HashSet::from([
// Scalar-like functions
"cumulative_sum",
"derivative",
"difference",
"elapsed",
"moving_average",
"non_negative_derivative",
"non_negative_difference",
// Selector functions
"bottom",
"first",
"last",
"max",
"min",
"percentile",
"sample",
"top",
// Aggregate functions
"count",
"integral",
"mean",
"median",
"mode",
"spread",
"stddev",
"sum",
// Prediction functions
"holt_winters",
"holt_winters_with_fit",
// Technical analysis functions
"chande_momentum_oscillator",
"exponential_moving_average",
"double_exponential_moving_average",
"kaufmans_efficiency_ratio",
"kaufmans_adaptive_moving_average",
"triple_exponential_moving_average",
"triple_exponential_derivative",
"relative_strength_index",
])
});
FUNCTIONS.contains(name)
}
/// Returns `true` if `name` is `"now"`.
pub fn is_now_function(name: &str) -> bool {
name == "now"
}

View File

@ -30,6 +30,7 @@ pub mod delete;
pub mod drop;
pub mod explain;
pub mod expression;
pub mod functions;
pub mod identifier;
mod internal;
mod keywords;

View File

@ -11,8 +11,8 @@ use crate::expression::arithmetic::Expr::Wildcard;
use crate::expression::arithmetic::{
arithmetic, call_expression, var_ref, ArithmeticParsers, Expr, WildcardType,
};
use crate::expression::conditional::is_valid_now_call;
use crate::expression::VarRef;
use crate::expression::{Call, VarRef};
use crate::functions::is_now_function;
use crate::identifier::{identifier, Identifier};
use crate::impl_tuple_clause;
use crate::internal::{expect, map_fail, verify, ParseResult};
@ -301,6 +301,14 @@ impl ArithmeticParsers for TimeCallIntervalArgument {
/// The offset argument accepts either a duration, datetime-like string or `now`.
struct TimeCallOffsetArgument;
/// Returns true if `expr` is a valid [`Expr::Call`] expression for the `now` function.
pub(crate) fn is_valid_now_call(expr: &Expr) -> bool {
match expr {
Expr::Call(Call { name, args }) => is_now_function(&name.to_lowercase()) && args.is_empty(),
_ => false,
}
}
impl TimeCallOffsetArgument {
/// Parse the `now()` function call
fn now_call(i: &str) -> ParseResult<&str, Expr> {

View File

@ -9,10 +9,14 @@ expression: "visit_statement!(\"DELETE WHERE 'foo bar' =~ /foo/\")"
- pre_visit_conditional_binary
- pre_visit_conditional_expression
- pre_visit_expr
- pre_visit_literal
- post_visit_literal
- post_visit_expr
- post_visit_conditional_expression
- pre_visit_conditional_expression
- pre_visit_expr
- pre_visit_literal
- post_visit_literal
- post_visit_expr
- post_visit_conditional_expression
- post_visit_conditional_binary

View File

@ -65,6 +65,8 @@ expression: "visit_statement!(r#\"SELECT value FROM (SELECT usage FROM cpu WHERE
- post_visit_conditional_expression
- pre_visit_conditional_expression
- pre_visit_expr
- pre_visit_literal
- post_visit_literal
- post_visit_expr
- post_visit_conditional_expression
- post_visit_conditional_binary
@ -79,6 +81,8 @@ expression: "visit_statement!(r#\"SELECT value FROM (SELECT usage FROM cpu WHERE
- post_visit_conditional_expression
- pre_visit_conditional_expression
- pre_visit_expr
- pre_visit_literal
- post_visit_literal
- post_visit_expr
- post_visit_conditional_expression
- post_visit_conditional_binary
@ -90,6 +94,8 @@ expression: "visit_statement!(r#\"SELECT value FROM (SELECT usage FROM cpu WHERE
- pre_visit_select_dimension
- pre_visit_select_time_dimension
- pre_visit_expr
- pre_visit_literal
- post_visit_literal
- post_visit_expr
- post_visit_select_time_dimension
- post_visit_select_dimension

View File

@ -15,6 +15,8 @@ expression: "visit_statement!(\"SHOW MEASUREMENTS WHERE host = 'west'\")"
- post_visit_conditional_expression
- pre_visit_conditional_expression
- pre_visit_expr
- pre_visit_literal
- post_visit_literal
- post_visit_expr
- post_visit_conditional_expression
- post_visit_conditional_binary

View File

@ -23,6 +23,8 @@ expression: "visit_statement!(\"SHOW MEASUREMENTS ON * WITH MEASUREMENT =~ /foo/
- post_visit_conditional_expression
- pre_visit_conditional_expression
- pre_visit_expr
- pre_visit_literal
- post_visit_literal
- post_visit_expr
- post_visit_conditional_expression
- post_visit_conditional_binary

View File

@ -9,10 +9,14 @@ expression: "visit_statement!(\"DELETE WHERE 'foo bar' =~ /foo/\")"
- pre_visit_conditional_binary
- pre_visit_conditional_expression
- pre_visit_expr
- pre_visit_literal
- post_visit_literal
- post_visit_expr
- post_visit_conditional_expression
- pre_visit_conditional_expression
- pre_visit_expr
- pre_visit_literal
- post_visit_literal
- post_visit_expr
- post_visit_conditional_expression
- post_visit_conditional_binary

View File

@ -65,6 +65,8 @@ expression: "visit_statement!(r#\"SELECT value FROM (SELECT usage FROM cpu WHERE
- post_visit_conditional_expression
- pre_visit_conditional_expression
- pre_visit_expr
- pre_visit_literal
- post_visit_literal
- post_visit_expr
- post_visit_conditional_expression
- post_visit_conditional_binary
@ -79,6 +81,8 @@ expression: "visit_statement!(r#\"SELECT value FROM (SELECT usage FROM cpu WHERE
- post_visit_conditional_expression
- pre_visit_conditional_expression
- pre_visit_expr
- pre_visit_literal
- post_visit_literal
- post_visit_expr
- post_visit_conditional_expression
- post_visit_conditional_binary
@ -90,6 +94,8 @@ expression: "visit_statement!(r#\"SELECT value FROM (SELECT usage FROM cpu WHERE
- pre_visit_select_dimension
- pre_visit_select_time_dimension
- pre_visit_expr
- pre_visit_literal
- post_visit_literal
- post_visit_expr
- post_visit_select_time_dimension
- post_visit_select_dimension

View File

@ -15,6 +15,8 @@ expression: "visit_statement!(\"SHOW MEASUREMENTS WHERE host = 'west'\")"
- post_visit_conditional_expression
- pre_visit_conditional_expression
- pre_visit_expr
- pre_visit_literal
- post_visit_literal
- post_visit_expr
- post_visit_conditional_expression
- post_visit_conditional_binary

View File

@ -23,6 +23,8 @@ expression: "visit_statement!(\"SHOW MEASUREMENTS ON * WITH MEASUREMENT =~ /foo/
- post_visit_conditional_expression
- pre_visit_conditional_expression
- pre_visit_expr
- pre_visit_literal
- post_visit_literal
- post_visit_expr
- post_visit_conditional_expression
- post_visit_conditional_binary

View File

@ -35,6 +35,7 @@ use crate::explain::ExplainStatement;
use crate::expression::arithmetic::Expr;
use crate::expression::conditional::ConditionalExpression;
use crate::expression::{Binary, Call, ConditionalBinary, VarRef};
use crate::literal::Literal;
use crate::select::{
Dimension, Field, FieldList, FillClause, FromMeasurementClause, GroupByClause,
MeasurementSelection, SLimitClause, SOffsetClause, SelectStatement, TimeDimension,
@ -585,6 +586,16 @@ pub trait Visitor: Sized {
fn post_visit_conditional_binary(self, _n: &ConditionalBinary) -> Result<Self, Self::Error> {
Ok(self)
}
/// Invoked before any children of a literal are visited.
fn pre_visit_literal(self, _n: &Literal) -> Result<Recursion<Self>, Self::Error> {
Ok(Continue(self))
}
/// Invoked after a literal is visited.
fn post_visit_literal(self, _n: &Literal) -> Result<Self, Self::Error> {
Ok(self)
}
}
/// Trait for types that can be visited by [`Visitor`]
@ -1242,18 +1253,28 @@ impl Visitable for Expr {
Self::Binary(expr) => expr.accept(visitor),
Self::Nested(expr) => expr.accept(visitor),
Self::VarRef(expr) => expr.accept(visitor),
Self::Literal(expr) => expr.accept(visitor),
// We explicitly list out each enumeration, to ensure
// we revisit if new items are added to the Expr enumeration.
Self::BindParameter(_) | Self::Literal(_) | Self::Wildcard(_) | Self::Distinct(_) => {
Ok(visitor)
}
Self::BindParameter(_) | Self::Wildcard(_) | Self::Distinct(_) => Ok(visitor),
}?;
visitor.post_visit_expr(self)
}
}
impl Visitable for Literal {
fn accept<V: Visitor>(&self, visitor: V) -> Result<V, V::Error> {
let visitor = match visitor.pre_visit_literal(self)? {
Continue(visitor) => visitor,
Stop(visitor) => return Ok(visitor),
};
visitor.post_visit_literal(self)
}
}
impl Visitable for OnClause {
fn accept<V: Visitor>(&self, visitor: V) -> Result<V, V::Error> {
let visitor = match visitor.pre_visit_on_clause(self)? {
@ -1331,6 +1352,7 @@ mod test {
use crate::expression::arithmetic::Expr;
use crate::expression::conditional::ConditionalExpression;
use crate::expression::{Binary, Call, ConditionalBinary, VarRef};
use crate::literal::Literal;
use crate::select::{
Dimension, Field, FieldList, FillClause, FromMeasurementClause, GroupByClause,
MeasurementSelection, SLimitClause, SOffsetClause, SelectStatement, TimeDimension,
@ -1427,6 +1449,7 @@ mod test {
trace_visit!(call, Call);
trace_visit!(expr_binary, Binary);
trace_visit!(conditional_binary, ConditionalBinary);
trace_visit!(literal, Literal);
}
macro_rules! visit_statement {

View File

@ -35,6 +35,7 @@ use crate::explain::ExplainStatement;
use crate::expression::arithmetic::Expr;
use crate::expression::conditional::ConditionalExpression;
use crate::expression::{Binary, Call, ConditionalBinary, VarRef};
use crate::literal::Literal;
use crate::select::{
Dimension, Field, FieldList, FillClause, FromMeasurementClause, GroupByClause,
MeasurementSelection, SLimitClause, SOffsetClause, SelectStatement, TimeDimension,
@ -616,6 +617,16 @@ pub trait VisitorMut: Sized {
) -> Result<(), Self::Error> {
Ok(())
}
/// Invoked before any children of a literal are visited.
fn pre_visit_literal(&mut self, _n: &mut Literal) -> Result<Recursion, Self::Error> {
Ok(Continue)
}
/// Invoked after a literal is visited.
fn post_visit_literal(&mut self, _n: &mut Literal) -> Result<(), Self::Error> {
Ok(())
}
}
/// Trait for types that can be visited by [`VisitorMut`]
@ -1185,16 +1196,27 @@ impl VisitableMut for Expr {
Self::Binary(expr) => expr.accept(visitor)?,
Self::Nested(expr) => expr.accept(visitor)?,
Self::VarRef(expr) => expr.accept(visitor)?,
Self::Literal(expr) => expr.accept(visitor)?,
// We explicitly list out each enumeration, to ensure
// we revisit if new items are added to the Expr enumeration.
Self::BindParameter(_) | Self::Literal(_) | Self::Wildcard(_) | Self::Distinct(_) => {}
Self::BindParameter(_) | Self::Wildcard(_) | Self::Distinct(_) => {}
};
visitor.post_visit_expr(self)
}
}
impl VisitableMut for Literal {
fn accept<V: VisitorMut>(&mut self, visitor: &mut V) -> Result<(), V::Error> {
if let Stop = visitor.pre_visit_literal(self)? {
return Ok(());
};
visitor.post_visit_literal(self)
}
}
impl VisitableMut for OnClause {
fn accept<V: VisitorMut>(&mut self, visitor: &mut V) -> Result<(), V::Error> {
if let Stop = visitor.pre_visit_on_clause(self)? {
@ -1267,6 +1289,7 @@ mod test {
use crate::expression::arithmetic::Expr;
use crate::expression::conditional::ConditionalExpression;
use crate::expression::{Binary, Call, ConditionalBinary, VarRef};
use crate::literal::Literal;
use crate::parse_statements;
use crate::select::{
Dimension, Field, FieldList, FillClause, FromMeasurementClause, GroupByClause,
@ -1362,6 +1385,7 @@ mod test {
trace_visit!(call, Call);
trace_visit!(expr_binary, Binary);
trace_visit!(conditional_binary, ConditionalBinary);
trace_visit!(literal, Literal);
}
macro_rules! visit_statement {

View File

@ -70,7 +70,7 @@ thiserror = "1.0.40"
tikv-jemalloc-ctl = { version = "0.5.0", optional = true }
tokio = { version = "1.27", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time", "io-std"] }
tokio-stream = { version = "0.1", features = ["net"] }
tokio-util = { version = "0.7.7", features = ["compat"] }
tokio-util = { version = "0.7.8", features = ["compat"] }
tonic = { workspace = true }
uuid = { version = "1", features = ["v4"] }
# jemalloc-sys with unprefixed_malloc_on_supported_platforms feature and heappy are mutually exclusive

View File

@ -3,9 +3,7 @@
use crate::process_info::setup_metric_registry;
use super::main;
use authz::Authorizer;
use clap_blocks::{
authz::AuthzConfig,
catalog_dsn::CatalogDsnConfig,
compactor2::Compactor2Config,
ingester2::Ingester2Config,
@ -14,6 +12,9 @@ use clap_blocks::{
querier::QuerierConfig,
router2::Router2Config,
run_config::RunConfig,
single_tenant::{
CONFIG_AUTHZ_ENV_NAME, CONFIG_AUTHZ_FLAG, CONFIG_CST_ENV_NAME, CONFIG_CST_FLAG,
},
socket_addr::SocketAddr,
};
use compactor2::object_store::metrics::MetricsStore;
@ -89,9 +90,6 @@ pub enum Error {
#[error("Invalid config: {0}")]
InvalidConfig(#[from] CommonServerStateError),
#[error("Authz configuration error: {0}")]
AuthzConfig(#[from] clap_blocks::authz::Error),
#[error("Authz service error: {0}")]
AuthzService(#[from] authz::Error),
}
@ -173,9 +171,20 @@ pub type Result<T, E = Error> = std::result::Result<T, E>;
)]
#[group(skip)]
pub struct Config {
/// Authorizer options.
#[clap(flatten)]
pub(crate) authz_config: AuthzConfig,
#[clap(
long = CONFIG_AUTHZ_FLAG,
env = CONFIG_AUTHZ_ENV_NAME,
requires("single_tenant_deployment"),
)]
pub(crate) authz_address: Option<String>,
#[clap(
long = CONFIG_CST_FLAG,
env = CONFIG_CST_ENV_NAME,
default_value = "false",
requires_if("true", "authz_address")
)]
pub(crate) single_tenant_deployment: bool,
/// logging options
#[clap(flatten)]
@ -185,17 +194,6 @@ pub struct Config {
#[clap(flatten)]
pub(crate) tracing_config: TracingConfig,
/// Differential handling based upon deployment to CST vs MT.
///
/// At minimum, differs in supports of v1 endpoint. But also includes
/// differences in namespace handling, etc.
#[clap(
long = "single-tenancy",
env = "INFLUXDB_IOX_SINGLE_TENANCY",
default_value = "false"
)]
pub single_tenant_deployment: bool,
/// Maximum size of HTTP requests.
#[clap(
long = "max-http-request-size",
@ -361,6 +359,7 @@ impl Config {
/// configuration for each individual IOx service
fn specialize(self) -> SpecializedConfig {
let Self {
authz_address,
logging_config,
tracing_config,
max_http_request_size,
@ -381,7 +380,6 @@ impl Config {
querier_ram_pool_data_bytes,
querier_max_concurrent_queries,
exec_mem_pool_bytes,
authz_config,
single_tenant_deployment,
} = self;
@ -472,6 +470,8 @@ impl Config {
};
let router_config = Router2Config {
authz_address: authz_address.clone(),
single_tenant_deployment,
query_pool_name: QUERY_POOL_NAME.to_string(),
http_request_limit: 1_000,
ingester_addresses: ingester_addresses.clone(),
@ -481,7 +481,6 @@ impl Config {
topic: QUERY_POOL_NAME.to_string(),
rpc_write_timeout_seconds: Duration::new(3, 0),
rpc_write_replicas: None,
single_tenant_deployment,
rpc_write_max_outgoing_bytes: ingester_config.rpc_write_max_incoming_bytes,
};
@ -514,6 +513,7 @@ impl Config {
};
let querier_config = QuerierConfig {
authz_address,
num_query_threads: None, // will be ignored
ingester_addresses,
ram_pool_metadata_bytes: querier_ram_pool_metadata_bytes,
@ -536,7 +536,6 @@ impl Config {
router_config,
compactor_config,
querier_config,
authz_config,
}
}
}
@ -564,7 +563,6 @@ struct SpecializedConfig {
router_config: Router2Config,
compactor_config: Compactor2Config,
querier_config: QuerierConfig,
authz_config: AuthzConfig,
}
pub async fn command(config: Config) -> Result<()> {
@ -578,7 +576,6 @@ pub async fn command(config: Config) -> Result<()> {
router_config,
compactor_config,
querier_config,
authz_config,
} = config.specialize();
let metrics = setup_metric_registry();
@ -606,10 +603,6 @@ pub async fn command(config: Config) -> Result<()> {
let time_provider: Arc<dyn TimeProvider> = Arc::new(SystemProvider::new());
let authz = authz_config.authorizer()?;
// Verify the connection to the authorizer, if configured.
authz.probe().await?;
// create common state from the router and use it below
let common_state = CommonServerState::from_config(router_run_config.clone())?;
@ -636,7 +629,6 @@ pub async fn command(config: Config) -> Result<()> {
Arc::clone(&metrics),
Arc::clone(&catalog),
Arc::clone(&object_store),
authz.as_ref().map(Arc::clone),
&router_config,
)
.await?;
@ -684,7 +676,6 @@ pub async fn command(config: Config) -> Result<()> {
exec,
time_provider,
querier_config,
authz: authz.as_ref().map(Arc::clone),
})
.await?;

View File

@ -3,10 +3,9 @@
use crate::process_info::setup_metric_registry;
use super::main;
use authz::Authorizer;
use clap_blocks::{
authz::AuthzConfig, catalog_dsn::CatalogDsnConfig, object_store::make_object_store,
querier::QuerierConfig, run_config::RunConfig,
catalog_dsn::CatalogDsnConfig, object_store::make_object_store, querier::QuerierConfig,
run_config::RunConfig,
};
use iox_query::exec::Executor;
use iox_time::{SystemProvider, TimeProvider};
@ -41,9 +40,6 @@ pub enum Error {
#[error("Querier error: {0}")]
Querier(#[from] ioxd_querier::Error),
#[error("Authz configuration error: {0}")]
AuthzConfig(#[from] clap_blocks::authz::Error),
#[error("Authz service error: {0}")]
AuthzService(#[from] authz::Error),
}
@ -64,10 +60,6 @@ Configuration is loaded from the following sources (highest precedence first):
- pre-configured default values"
)]
pub struct Config {
/// Authorizer options.
#[clap(flatten)]
pub(crate) authz_config: AuthzConfig,
#[clap(flatten)]
pub(crate) run_config: RunConfig,
@ -100,10 +92,6 @@ pub async fn command(config: Config) -> Result<(), Error> {
let time_provider = Arc::new(SystemProvider::new());
let authz = config.authz_config.authorizer()?;
// Verify the connection to the authorizer, if configured.
authz.probe().await?;
let num_query_threads = config.querier_config.num_query_threads();
let num_threads = num_query_threads.unwrap_or_else(|| {
NonZeroUsize::new(num_cpus::get()).unwrap_or_else(|| NonZeroUsize::new(1).unwrap())
@ -126,7 +114,6 @@ pub async fn command(config: Config) -> Result<(), Error> {
exec,
time_provider,
querier_config: config.querier_config,
authz: authz.as_ref().map(Arc::clone),
})
.await?;

View File

@ -1,10 +1,9 @@
//! Command line options for running a router2 that uses the RPC write path.
use super::main;
use crate::process_info::setup_metric_registry;
use authz::Authorizer;
use clap_blocks::{
authz::AuthzConfig, catalog_dsn::CatalogDsnConfig, object_store::make_object_store,
router2::Router2Config, run_config::RunConfig,
catalog_dsn::CatalogDsnConfig, object_store::make_object_store, router2::Router2Config,
run_config::RunConfig,
};
use iox_time::{SystemProvider, TimeProvider};
use ioxd_common::{
@ -36,9 +35,6 @@ pub enum Error {
#[error("Catalog DSN error: {0}")]
CatalogDsn(#[from] clap_blocks::catalog_dsn::Error),
#[error("Authz configuration error: {0}")]
AuthzConfig(#[from] clap_blocks::authz::Error),
#[error("Authz service error: {0}")]
AuthzService(#[from] authz::Error),
}
@ -61,9 +57,6 @@ Configuration is loaded from the following sources (highest precedence first):
- pre-configured default values"
)]
pub struct Config {
#[clap(flatten)]
pub(crate) authz_config: AuthzConfig,
#[clap(flatten)]
pub(crate) run_config: RunConfig,
@ -98,16 +91,12 @@ pub async fn command(config: Config) -> Result<()> {
time_provider,
&metrics,
));
let authz = config.authz_config.authorizer()?;
// Verify the connection to the authorizer, if configured.
authz.probe().await?;
let server_type = create_router2_server_type(
&common_state,
Arc::clone(&metrics),
catalog,
object_store,
authz,
&config.router_config,
)
.await?;

View File

@ -1134,6 +1134,48 @@ async fn flightsql_get_primary_keys() {
.await
}
#[tokio::test]
async fn flightsql_get_xdbc_type_info() {
test_helpers::maybe_start_logging();
let database_url = maybe_skip_integration!();
let table_name = "the_table";
// Set up the cluster ====================================
let mut cluster = MiniCluster::create_shared2(database_url).await;
StepTest::new(
&mut cluster,
vec![
Step::WriteLineProtocol(format!(
"{table_name},tag1=A,tag2=B val=42i 123456\n\
{table_name},tag1=A,tag2=C val=43i 123457"
)),
Step::Custom(Box::new(move |state: &mut StepTestState| {
async move {
let mut client = flightsql_client(state.cluster());
let data_type: Option<i32> = None;
let stream = client.get_xdbc_type_info(data_type).await.unwrap();
let batches = collect_stream(stream).await;
insta::assert_yaml_snapshot!(
batches_to_sorted_lines(&batches),
@r###"
---
- ++
- ++
"###
);
}
.boxed()
})),
],
)
.run()
.await
}
#[tokio::test]
/// Runs the `jdbc_client` program against IOx to verify JDBC via FlightSQL is working
///

View File

@ -169,12 +169,15 @@ public class Main {
System.out.println("**************");
print_result_set(md.getTableTypes());
System.out.println("**************");
System.out.println("getColumns:");
System.out.println("**************");
print_result_set(md.getColumns(null, null, null, null));
// TODO uncomment when GetTables is implemented
//System.out.println("**************");
//System.out.println("getColumns:");
//System.out.println("**************");
//print_result_set(md.getColumns(null, null, null, null));
System.out.println("**************");
System.out.println("Type Info:");
System.out.println("**************");
print_result_set(md.getTypeInfo());
System.out.println("**************");
System.out.println("getFunctions:");

View File

@ -49,6 +49,18 @@ async fn basic() {
.await;
}
#[tokio::test]
async fn date_bin() {
test_helpers::maybe_start_logging();
TestCase {
input: "cases/in/date_bin.sql",
chunk_stage: ChunkStage::All,
}
.run()
.await;
}
#[tokio::test]
async fn dedup_and_predicates_parquet() {
test_helpers::maybe_start_logging();
@ -313,6 +325,18 @@ async fn different_tag_sets() {
.await;
}
#[tokio::test]
async fn bugs() {
test_helpers::maybe_start_logging();
TestCase {
input: "cases/in/bugs.sql",
chunk_stage: ChunkStage::Ingester,
}
.run()
.await;
}
mod influxql {
use super::*;

View File

@ -0,0 +1,7 @@
-- IOX_SETUP: Bugs
-- https://github.com/influxdata/influxdb_iox/issues/7644
-- internal error in select list
SELECT id FROM checks WHERE id in ('2','3','4','5');
SELECT id FROM checks WHERE id in ('1', '2','3','4','5');

View File

@ -0,0 +1,15 @@
-- Test Setup: Bugs
-- SQL: SELECT id FROM checks WHERE id in ('2','3','4','5');
+----+
| id |
+----+
+----+
-- SQL: SELECT id FROM checks WHERE id in ('1', '2','3','4','5');
+----+
| id |
+----+
| 1 |
| 1 |
| 1 |
| 1 |
+----+

View File

@ -0,0 +1,104 @@
-- Date_bin tests
-- IOX_SETUP: OneMeasurementTwoSeries
-- CONSTANT DATA or ARRAY DATA
-- 1 month
select date_bin(INTERVAL '1 month', column1)
from (values
(timestamp '2022-01-01 00:00:00'),
(timestamp '2022-01-01 01:00:00'),
(timestamp '2022-01-02 00:00:00'),
(timestamp '2022-02-02 00:00:00'),
(timestamp '2022-02-15 00:00:00'),
(timestamp '2022-03-31 00:00:00')
) as sq;
-- 1 year
select date_bin('1 year', column1)
from (values
(timestamp '2022-01-01 00:00:00'),
(timestamp '2023-01-01 01:00:00'),
(timestamp '2022-01-02 00:00:00'),
(timestamp '2022-02-02 00:00:00'),
(timestamp '2022-02-15 00:00:00'),
(timestamp '2022-03-31 00:00:00')
) as sq;
-- origin is last date of the month 1970-12-31T00:15:00Z and not at midnight
select date_bin('1 month', column1, '1970-12-31T00:15:00Z')
from (values
(timestamp '2022-01-01 00:00:00'),
(timestamp '2022-01-01 01:00:00'),
(timestamp '2022-01-02 00:00:00'),
(timestamp '2022-02-02 00:00:00'),
(timestamp '2022-02-15 00:00:00'),
(timestamp '2022-03-31 00:00:00')
) as sq;
-- five months interval on constant
SELECT DATE_BIN('5 month', '2022-01-01T00:00:00Z');
-- origin is May 31 (last date of the month) to produce bin on Feb 28
SELECT DATE_BIN('3 month', '2022-04-01T00:00:00Z', '2021-05-31T00:04:00Z');
-- origin is on Feb 29 and interval is one month. The bins will be:
-- # '2000-02-29T00:00:00'
-- # '2000-01-29T00:00:00'
-- # '1999-12-29T00:00:00'
-- # ....
-- # Reason: Even though 29 (or 28 for non-leap year) is the last date of Feb but it
-- # is not last date of other month. Months' chrono consider a month before or after that
-- # will land on the same 29th date.
select date_bin('1 month', timestamp '2000-01-31T00:00:00', timestamp '2000-02-29T00:00:00');
-- similar for the origin March 29
select date_bin('1 month', timestamp '2000-01-31T00:00:00', timestamp '2000-03-29T00:00:00');
-- 3 year 1 months = 37 months
SELECT DATE_BIN('3 years 1 months', '2022-09-01 00:00:00Z');
-- DATA FORM TABLE
-- Input data (by region, time)
SELECT *
FROM cpu
ORDER BY REGION, TIME;
-- Input data (by time)
SELECT *
FROM cpu
ORDER BY TIME;
-- 1 month
SELECT
date_bin('1 month', time) as month,
count(cpu.user)
from cpu
where time between timestamp '2000-05-05T12:00:00Z' and timestamp '2000-05-05T12:59:00Z'
group by month;
-- 1 month with origin
SELECT
date_bin('1 month', time, '1970-12-31T00:15:00Z') as month,
count(cpu.user)
from cpu
where time between timestamp '2000-05-05T12:00:00Z' and timestamp '2000-05-05T12:59:00Z'
group by month;
-- 3 months with origin on the last date of the month
select
date_bin('2 month', time, timestamp '2000-02-29T00:00:00') as month,
count(cpu.user)
from cpu
where time between timestamp '2000-05-05T12:00:00Z' and timestamp '2000-05-05T12:59:00Z'
group by month;
-- EXPLAIN
-- IOX_COMPARE: uuid
EXPLAIN SELECT
date_bin('1 month', time, '1970-12-31T00:15:00Z') as month,
count(cpu.user)
from cpu
where time between timestamp '2000-05-05T12:00:00Z' and timestamp '2000-05-05T12:59:00Z'
group by month;

View File

@ -0,0 +1,119 @@
-- Test Setup: OneMeasurementTwoSeries
-- SQL: select date_bin(INTERVAL '1 month', column1) from (values (timestamp '2022-01-01 00:00:00'), (timestamp '2022-01-01 01:00:00'), (timestamp '2022-01-02 00:00:00'), (timestamp '2022-02-02 00:00:00'), (timestamp '2022-02-15 00:00:00'), (timestamp '2022-03-31 00:00:00') ) as sq;
+---------------------------------------------------------------------------+
| datebin(IntervalMonthDayNano("79228162514264337593543950336"),sq.column1) |
+---------------------------------------------------------------------------+
| 2022-01-01T00:00:00Z |
| 2022-01-01T00:00:00Z |
| 2022-01-01T00:00:00Z |
| 2022-02-01T00:00:00Z |
| 2022-02-01T00:00:00Z |
| 2022-03-01T00:00:00Z |
+---------------------------------------------------------------------------+
-- SQL: select date_bin('1 year', column1) from (values (timestamp '2022-01-01 00:00:00'), (timestamp '2023-01-01 01:00:00'), (timestamp '2022-01-02 00:00:00'), (timestamp '2022-02-02 00:00:00'), (timestamp '2022-02-15 00:00:00'), (timestamp '2022-03-31 00:00:00') ) as sq;
+------------------------------------+
| datebin(Utf8("1 year"),sq.column1) |
+------------------------------------+
| 2022-01-01T00:00:00Z |
| 2023-01-01T00:00:00Z |
| 2022-01-01T00:00:00Z |
| 2022-01-01T00:00:00Z |
| 2022-01-01T00:00:00Z |
| 2022-01-01T00:00:00Z |
+------------------------------------+
-- SQL: select date_bin('1 month', column1, '1970-12-31T00:15:00Z') from (values (timestamp '2022-01-01 00:00:00'), (timestamp '2022-01-01 01:00:00'), (timestamp '2022-01-02 00:00:00'), (timestamp '2022-02-02 00:00:00'), (timestamp '2022-02-15 00:00:00'), (timestamp '2022-03-31 00:00:00') ) as sq;
+------------------------------------------------------------------+
| datebin(Utf8("1 month"),sq.column1,Utf8("1970-12-31T00:15:00Z")) |
+------------------------------------------------------------------+
| 2021-12-31T00:15:00Z |
| 2021-12-31T00:15:00Z |
| 2021-12-31T00:15:00Z |
| 2022-01-31T00:15:00Z |
| 2022-01-31T00:15:00Z |
| 2022-02-28T00:15:00Z |
+------------------------------------------------------------------+
-- SQL: SELECT DATE_BIN('5 month', '2022-01-01T00:00:00Z');
+-------------------------------------------------------+
| datebin(Utf8("5 month"),Utf8("2022-01-01T00:00:00Z")) |
+-------------------------------------------------------+
| 2021-09-01T00:00:00Z |
+-------------------------------------------------------+
-- SQL: SELECT DATE_BIN('3 month', '2022-04-01T00:00:00Z', '2021-05-31T00:04:00Z');
+------------------------------------------------------------------------------------+
| datebin(Utf8("3 month"),Utf8("2022-04-01T00:00:00Z"),Utf8("2021-05-31T00:04:00Z")) |
+------------------------------------------------------------------------------------+
| 2022-02-28T00:04:00Z |
+------------------------------------------------------------------------------------+
-- SQL: select date_bin('1 month', timestamp '2000-01-31T00:00:00', timestamp '2000-02-29T00:00:00');
+----------------------------------------------------------------------------------+
| datebin(Utf8("1 month"),Utf8("2000-01-31T00:00:00"),Utf8("2000-02-29T00:00:00")) |
+----------------------------------------------------------------------------------+
| 2000-01-29T00:00:00Z |
+----------------------------------------------------------------------------------+
-- SQL: select date_bin('1 month', timestamp '2000-01-31T00:00:00', timestamp '2000-03-29T00:00:00');
+----------------------------------------------------------------------------------+
| datebin(Utf8("1 month"),Utf8("2000-01-31T00:00:00"),Utf8("2000-03-29T00:00:00")) |
+----------------------------------------------------------------------------------+
| 2000-01-29T00:00:00Z |
+----------------------------------------------------------------------------------+
-- SQL: SELECT DATE_BIN('3 years 1 months', '2022-09-01 00:00:00Z');
+----------------------------------------------------------------+
| datebin(Utf8("3 years 1 months"),Utf8("2022-09-01 00:00:00Z")) |
+----------------------------------------------------------------+
| 2022-06-01T00:00:00Z |
+----------------------------------------------------------------+
-- SQL: SELECT * FROM cpu ORDER BY REGION, TIME;
+------+--------+----------------------+------+
| idle | region | time | user |
+------+--------+----------------------+------+
| 70.0 | a | 2000-05-05T12:20:00Z | 23.2 |
| | a | 2000-05-05T12:40:00Z | 21.0 |
| | b | 2000-05-05T12:31:00Z | 25.2 |
| 60.0 | b | 2000-05-05T12:39:00Z | 28.9 |
+------+--------+----------------------+------+
-- SQL: SELECT * FROM cpu ORDER BY TIME;
+------+--------+----------------------+------+
| idle | region | time | user |
+------+--------+----------------------+------+
| 70.0 | a | 2000-05-05T12:20:00Z | 23.2 |
| | b | 2000-05-05T12:31:00Z | 25.2 |
| 60.0 | b | 2000-05-05T12:39:00Z | 28.9 |
| | a | 2000-05-05T12:40:00Z | 21.0 |
+------+--------+----------------------+------+
-- SQL: SELECT date_bin('1 month', time) as month, count(cpu.user) from cpu where time between timestamp '2000-05-05T12:00:00Z' and timestamp '2000-05-05T12:59:00Z' group by month;
+----------------------+-----------------+
| month | COUNT(cpu.user) |
+----------------------+-----------------+
| 2000-05-01T00:00:00Z | 4 |
+----------------------+-----------------+
-- SQL: SELECT date_bin('1 month', time, '1970-12-31T00:15:00Z') as month, count(cpu.user) from cpu where time between timestamp '2000-05-05T12:00:00Z' and timestamp '2000-05-05T12:59:00Z' group by month;
+----------------------+-----------------+
| month | COUNT(cpu.user) |
+----------------------+-----------------+
| 2000-04-30T00:15:00Z | 4 |
+----------------------+-----------------+
-- SQL: select date_bin('2 month', time, timestamp '2000-02-29T00:00:00') as month, count(cpu.user) from cpu where time between timestamp '2000-05-05T12:00:00Z' and timestamp '2000-05-05T12:59:00Z' group by month;
+----------------------+-----------------+
| month | COUNT(cpu.user) |
+----------------------+-----------------+
| 2000-04-29T00:00:00Z | 4 |
+----------------------+-----------------+
-- SQL: EXPLAIN SELECT date_bin('1 month', time, '1970-12-31T00:15:00Z') as month, count(cpu.user) from cpu where time between timestamp '2000-05-05T12:00:00Z' and timestamp '2000-05-05T12:59:00Z' group by month;
-- Results After Normalizing UUIDs
----------
| plan_type | plan |
----------
| logical_plan | Projection: datebin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z")) AS month, COUNT(cpu.user) |
| | Aggregate: groupBy=[[datebin(IntervalMonthDayNano("79228162514264337593543950336"), cpu.time, TimestampNanosecond(31450500000000000, None)) AS datebin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))]], aggr=[[COUNT(cpu.user)]] |
| | TableScan: cpu projection=[time, user], full_filters=[cpu.time >= TimestampNanosecond(957528000000000000, None), cpu.time <= TimestampNanosecond(957531540000000000, None)] |
| physical_plan | ProjectionExec: expr=[datebin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))@0 as month, COUNT(cpu.user)@1 as COUNT(cpu.user)] |
| | AggregateExec: mode=FinalPartitioned, gby=[datebin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))@0 as datebin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))], aggr=[COUNT(cpu.user)] |
| | CoalesceBatchesExec: target_batch_size=8192 |
| | RepartitionExec: partitioning=Hash([Column { name: "datebin(Utf8(\"1 month\"),cpu.time,Utf8(\"1970-12-31T00:15:00Z\"))", index: 0 }], 4), input_partitions=4 |
| | AggregateExec: mode=Partial, gby=[datebin(79228162514264337593543950336, time@0, 31450500000000000) as datebin(Utf8("1 month"),cpu.time,Utf8("1970-12-31T00:15:00Z"))], aggr=[COUNT(cpu.user)] |
| | RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 |
| | CoalesceBatchesExec: target_batch_size=8192 |
| | FilterExec: time@0 >= 957528000000000000 AND time@0 <= 957531540000000000 |
| | ParquetExec: limit=None, partitions={1 group: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet]]}, predicate=time@2 >= 957528000000000000 AND time@2 <= 957531540000000000, pruning_predicate=time_max@0 >= 957528000000000000 AND time_min@1 <= 957531540000000000, output_ordering=[time@0 ASC], projection=[time, user] |
| | |
----------

View File

@ -233,6 +233,12 @@ SELECT f64 FROM m0 WHERE f64 >= 19.5 AND non_existent = 1;
SELECT f64 FROM m0 WHERE f64 >= 19.5 AND f64 =~ /foo/;
SELECT f64 FROM m0 WHERE f64 >= 19.5 OR f64 =~ /foo/;
-- arithmetic scalar function calls work
SELECT time, floor(f64) FROM m0 WHERE floor(f64) = 19.0;
-- aggregate function calls produce an error
SELECT *, floor(f64) FROM m0 WHERE sum(f64) > 100.0;
--
-- Validate column expressions
--

View File

@ -439,6 +439,15 @@ name: m0
+---------------------+------+
| 2022-10-31T02:00:10 | 21.2 |
+---------------------+------+
-- InfluxQL: SELECT time, floor(f64) FROM m0 WHERE floor(f64) = 19.0;
name: m0
+---------------------+-------+
| time | floor |
+---------------------+-------+
| 2022-10-31T02:00:30 | 19.0 |
+---------------------+-------+
-- InfluxQL: SELECT *, floor(f64) FROM m0 WHERE sum(f64) > 100.0;
Error while planning query: Error during planning: invalid expression, the only valid function calls are 'now' with no arguments, or scalar math functions at pos 35
-- InfluxQL: SELECT tag0, f64, f64 * 0.5, f64 + str FROM m0 WHERE f64 > 19;
name: m0
+---------------------+-------+------+-------+---------+

View File

@ -21,6 +21,25 @@ static RETENTION_SETUP: Lazy<RetentionSetup> = Lazy::new(RetentionSetup::new);
/// All possible setups for the [`TestCase`][crate::TestCase]s to use, indexed by name
pub static SETUPS: Lazy<HashMap<SetupName, SetupSteps>> = Lazy::new(|| {
HashMap::from([
(
"Bugs",
vec![
Step::RecordNumParquetFiles,
Step::WriteLineProtocol(
[
r#"checks,id=1,method=POST,name=Writes,url="https://example.com",user_id=1 elapsed=66909i,status=204i 1678578528255989730"#,
r#"checks,id=1,method=POST,name=Writes,url="https://example.com",user_id=1 elapsed=112928i,status=204i 1678578532192972168"#,
r#"checks,id=1,method=POST,name=Writes,url="https://example.com",user_id=1 elapsed=147683i,status=204i 1678578588416052133"#,
r#"checks,id=1,method=POST,name=Writes,url="https://example.com",user_id=1 elapsed=78000i,status=204i 1678578592147893402"#,
]
.join("\n"),
),
Step::Persist,
Step::WaitForPersisted2 {
expected_increase: 1,
},
],
),
(
"TwoMeasurements",
vec![

View File

@ -26,7 +26,7 @@ reqwest = { version = "0.11", default-features = false, features = ["stream", "r
schema = { path = "../schema" }
serde_json = "1.0.96"
tokio = { version = "1.27", features = ["macros", "parking_lot", "rt-multi-thread"] }
tokio-stream = "0.1.12"
tokio-stream = "0.1.13"
thiserror = "1.0.40"
tonic = { workspace = true }

View File

@ -31,7 +31,8 @@ use arrow_flight::{
ActionCreatePreparedStatementRequest, ActionCreatePreparedStatementResult, Any,
CommandGetCatalogs, CommandGetCrossReference, CommandGetDbSchemas, CommandGetExportedKeys,
CommandGetImportedKeys, CommandGetPrimaryKeys, CommandGetSqlInfo, CommandGetTableTypes,
CommandGetTables, CommandPreparedStatementQuery, CommandStatementQuery, ProstMessageExt,
CommandGetTables, CommandGetXdbcTypeInfo, CommandPreparedStatementQuery,
CommandStatementQuery, ProstMessageExt,
},
Action, FlightClient, FlightDescriptor, FlightInfo, IpcMessage, Ticket,
};
@ -361,6 +362,26 @@ impl FlightSqlClient {
self.do_get_with_cmd(msg.as_any()).await
}
/// List information about data type supported on this server
/// using a [`CommandGetXdbcTypeInfo`] message.
///
/// # Parameters
///
/// Definition from <https://github.com/apache/arrow/blob/9588da967c756b2923e213ccc067378ba6c90a86/format/FlightSql.proto#L1058-L1123>
///
/// data_type: Specifies the data type to search for the info.
///
/// This implementation does not support alternate endpoints
pub async fn get_xdbc_type_info(
&mut self,
data_type: Option<impl Into<i32> + Send>,
) -> Result<FlightRecordBatchStream> {
let msg = CommandGetXdbcTypeInfo {
data_type: data_type.map(|dt| dt.into()),
};
self.do_get_with_cmd(msg.as_any()).await
}
/// Implements the canonical interaction for most FlightSQL messages:
///
/// 1. Call `GetFlightInfo` with the provided message, and get a

View File

@ -43,7 +43,7 @@ sharder = { version = "0.1.0", path = "../sharder" }
test_helpers = { path = "../test_helpers", features = ["future_timeout"], optional = true }
thiserror = "1.0.40"
tokio = { version = "1.27", features = ["macros", "parking_lot", "rt-multi-thread", "sync", "time"] }
tokio-util = "0.7.7"
tokio-util = "0.7.8"
tonic = { workspace = true }
trace = { version = "0.1.0", path = "../trace" }
uuid = "1.3.1"

View File

@ -1,7 +1,7 @@
use std::sync::Arc;
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use data_types::{PartitionKey, Sequence, SequenceNumber, ShardIndex};
use data_types::{PartitionKey, SequenceNumber};
use dml::{DmlMeta, DmlWrite};
use futures::{stream::FuturesUnordered, StreamExt};
use generated_types::influxdata::{
@ -67,7 +67,7 @@ async fn init(lp: impl AsRef<str>) -> (TestContext<impl IngesterRpcInterface>, D
batches_by_ids,
PartitionKey::from(PARTITION_KEY),
DmlMeta::sequenced(
Sequence::new(ShardIndex::new(42), SequenceNumber::new(42)),
SequenceNumber::new(42),
iox_time::SystemProvider::new().now(),
None,
50,

View File

@ -85,7 +85,7 @@ impl<O> NamespaceData<O> {
/// Initialize new tables with default partition template of daily
pub(super) fn new(
namespace_id: NamespaceId,
namespace_name: DeferredLoad<NamespaceName>,
namespace_name: Arc<DeferredLoad<NamespaceName>>,
table_name_resolver: Arc<dyn TableNameProvider>,
partition_provider: Arc<dyn PartitionProvider>,
post_write_observer: Arc<O>,
@ -101,7 +101,7 @@ impl<O> NamespaceData<O> {
Self {
namespace_id,
namespace_name: Arc::new(namespace_name),
namespace_name,
tables: Default::default(),
table_name_resolver,
table_count,
@ -147,8 +147,7 @@ where
let sequence_number = op
.meta()
.sequence()
.expect("applying unsequenced op")
.sequence_number;
.expect("applying unsequenced op");
match op {
DmlOperation::Write(write) => {
@ -162,7 +161,7 @@ where
self.table_count.inc(1);
Arc::new(TableData::new(
table_id,
self.table_name_resolver.for_table(table_id),
Arc::new(self.table_name_resolver.for_table(table_id)),
self.namespace_id,
Arc::clone(&self.namespace_name),
Arc::clone(&self.partition_provider),
@ -229,56 +228,39 @@ where
#[cfg(test)]
mod tests {
use std::{sync::Arc, time::Duration};
use std::sync::Arc;
use data_types::{PartitionId, PartitionKey, ShardId};
use data_types::TRANSITION_SHARD_ID;
use metric::{Attributes, Metric};
use super::*;
use crate::{
buffer_tree::{
namespace::NamespaceData,
partition::{resolver::mock::MockPartitionProvider, PartitionData, SortKeyState},
namespace::NamespaceData, partition::resolver::mock::MockPartitionProvider,
post_write::mock::MockPostWriteObserver,
table::{name_resolver::mock::MockTableNameProvider, TableName},
},
deferred_load::{self, DeferredLoad},
test_util::make_write_op,
deferred_load,
test_util::{
make_write_op, PartitionDataBuilder, ARBITRARY_NAMESPACE_ID, ARBITRARY_NAMESPACE_NAME,
ARBITRARY_PARTITION_KEY, ARBITRARY_TABLE_ID, ARBITRARY_TABLE_NAME,
ARBITRARY_TABLE_NAME_PROVIDER, DEFER_NAMESPACE_NAME_1_MS,
},
};
const TABLE_NAME: &str = "bananas";
const TABLE_ID: TableId = TableId::new(44);
const NAMESPACE_NAME: &str = "platanos";
const NAMESPACE_ID: NamespaceId = NamespaceId::new(42);
const TRANSITION_SHARD_ID: ShardId = ShardId::new(84);
#[tokio::test]
async fn test_namespace_init_table() {
let metrics = Arc::new(metric::Registry::default());
// Configure the mock partition provider to return a partition for this
// table ID.
let partition_provider = Arc::new(MockPartitionProvider::default().with_partition(
PartitionData::new(
PartitionId::new(0),
PartitionKey::from("banana-split"),
NAMESPACE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NamespaceName::from(NAMESPACE_NAME)
})),
TABLE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from(TABLE_NAME)
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
),
));
let partition_provider = Arc::new(
MockPartitionProvider::default().with_partition(PartitionDataBuilder::new().build()),
);
let ns = NamespaceData::new(
NAMESPACE_ID,
DeferredLoad::new(Duration::from_millis(1), async { NAMESPACE_NAME.into() }),
Arc::new(MockTableNameProvider::new(TABLE_NAME)),
ARBITRARY_NAMESPACE_ID,
Arc::clone(&*DEFER_NAMESPACE_NAME_1_MS),
Arc::clone(&*ARBITRARY_TABLE_NAME_PROVIDER),
partition_provider,
Arc::new(MockPostWriteObserver::default()),
&metrics,
@ -288,27 +270,31 @@ mod tests {
// Assert the namespace name was stored
let name = ns.namespace_name().to_string();
assert!(
(name == NAMESPACE_NAME) || (name == deferred_load::UNRESOLVED_DISPLAY_STRING),
(name.as_str() == &***ARBITRARY_NAMESPACE_NAME)
|| (name == deferred_load::UNRESOLVED_DISPLAY_STRING),
"unexpected namespace name: {name}"
);
// Assert the namespace does not contain the test data
assert!(ns.table(TABLE_ID).is_none());
assert!(ns.table(ARBITRARY_TABLE_ID).is_none());
// Write some test data
ns.apply(DmlOperation::Write(make_write_op(
&PartitionKey::from("banana-split"),
NAMESPACE_ID,
TABLE_NAME,
TABLE_ID,
&ARBITRARY_PARTITION_KEY,
ARBITRARY_NAMESPACE_ID,
&ARBITRARY_TABLE_NAME,
ARBITRARY_TABLE_ID,
0,
r#"bananas,city=Medford day="sun",temp=55 22"#,
&format!(
r#"{},city=Medford day="sun",temp=55 22"#,
&*ARBITRARY_TABLE_NAME
),
)))
.await
.expect("buffer op should succeed");
// Referencing the table should succeed
assert!(ns.table(TABLE_ID).is_some());
assert!(ns.table(ARBITRARY_TABLE_ID).is_some());
// And the table counter metric should increase
let tables = metrics
@ -321,7 +307,10 @@ mod tests {
// Ensure the deferred namespace name is loaded.
let name = ns.namespace_name().get().await;
assert_eq!(&**name, NAMESPACE_NAME);
assert_eq!(ns.namespace_name().to_string(), NAMESPACE_NAME);
assert_eq!(&*name, &**ARBITRARY_NAMESPACE_NAME);
assert_eq!(
ns.namespace_name().to_string().as_str(),
&***ARBITRARY_NAMESPACE_NAME
);
}
}

View File

@ -90,12 +90,6 @@ pub(crate) mod mock {
}
}
impl Default for MockNamespaceNameProvider {
fn default() -> Self {
Self::new("bananas")
}
}
impl NamespaceNameProvider for MockNamespaceNameProvider {
fn for_namespace(&self, _id: NamespaceId) -> DeferredLoad<NamespaceName> {
let name = self.name.clone();

View File

@ -354,20 +354,13 @@ mod tests {
};
use datafusion_util::test_collect;
use iox_catalog::interface::Catalog;
use lazy_static::lazy_static;
use mutable_batch_lp::test_helpers::lp_to_mutable_batch;
use super::*;
use crate::{buffer_tree::partition::resolver::SortKeyResolver, test_util::populate_catalog};
const PARTITION_ID: PartitionId = PartitionId::new(1);
const TRANSITION_SHARD_ID: ShardId = ShardId::new(84);
lazy_static! {
static ref PARTITION_KEY: PartitionKey = PartitionKey::from("platanos");
static ref TABLE_NAME: TableName = TableName::from("bananas");
static ref NAMESPACE_NAME: NamespaceName = NamespaceName::from("namespace-bananas");
}
use crate::{
buffer_tree::partition::resolver::SortKeyResolver,
test_util::{populate_catalog, PartitionDataBuilder, ARBITRARY_PARTITION_ID},
};
// Write some data and read it back from the buffer.
//
@ -375,20 +368,7 @@ mod tests {
// generation & query all work as intended.
#[tokio::test]
async fn test_write_read() {
let mut p = PartitionData::new(
PARTITION_ID,
PARTITION_KEY.clone(),
NamespaceId::new(3),
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NAMESPACE_NAME.clone()
})),
TableId::new(4),
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TABLE_NAME.clone()
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
);
let mut p = PartitionDataBuilder::new().build();
// And no data should be returned when queried.
assert!(p.get_query_data().is_none());
@ -401,7 +381,7 @@ mod tests {
// The data should be readable.
{
let data = p.get_query_data().expect("should return data");
assert_eq!(data.partition_id(), PARTITION_ID);
assert_eq!(data.partition_id(), ARBITRARY_PARTITION_ID);
let expected = [
"+--------+--------+----------+--------------------------------+",
@ -430,7 +410,7 @@ mod tests {
// And finally both writes should be readable.
{
let data = p.get_query_data().expect("should contain data");
assert_eq!(data.partition_id(), PARTITION_ID);
assert_eq!(data.partition_id(), ARBITRARY_PARTITION_ID);
let expected = [
"+--------+--------+----------+--------------------------------+",
@ -456,20 +436,7 @@ mod tests {
// both before, during, and after a persist takes place.
#[tokio::test]
async fn test_persist() {
let mut p = PartitionData::new(
PARTITION_ID,
PARTITION_KEY.clone(),
NamespaceId::new(3),
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NAMESPACE_NAME.clone()
})),
TableId::new(4),
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TABLE_NAME.clone()
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
);
let mut p = PartitionDataBuilder::new().build();
assert!(p.get_query_data().is_none());
@ -485,7 +452,7 @@ mod tests {
// Begin persisting the partition.
let persisting_data = p.mark_persisting().expect("must contain existing data");
// And validate the data being persisted.
assert_eq!(persisting_data.partition_id(), PARTITION_ID);
assert_eq!(persisting_data.partition_id(), ARBITRARY_PARTITION_ID);
assert_eq!(persisting_data.record_batches().len(), 1);
let expected = [
"+--------+--------+----------+--------------------------------+",
@ -519,7 +486,7 @@ mod tests {
// Which must be readable, alongside the ongoing persist data.
{
let data = p.get_query_data().expect("must have data");
assert_eq!(data.partition_id(), PARTITION_ID);
assert_eq!(data.partition_id(), ARBITRARY_PARTITION_ID);
assert_eq!(data.record_batches().len(), 2);
let expected = [
"+--------+--------+----------+--------------------------------+",
@ -553,7 +520,7 @@ mod tests {
// Querying the buffer should now return only the second write.
{
let data = p.get_query_data().expect("must have data");
assert_eq!(data.partition_id(), PARTITION_ID);
assert_eq!(data.partition_id(), ARBITRARY_PARTITION_ID);
assert_eq!(data.record_batches().len(), 1);
let expected = [
"+--------+--------+---------+--------------------------------+",
@ -604,26 +571,15 @@ mod tests {
let input = Arc::new(MemoryExec::try_new(&[batch], schema, projection).unwrap());
// Create and run the deduplicator
let exec = Arc::new(iox_query::provider::DeduplicateExec::new(input, sort_keys, false));
let exec = Arc::new(iox_query::provider::DeduplicateExec::new(
input, sort_keys, false,
));
let got = test_collect(Arc::clone(&exec) as Arc<dyn ExecutionPlan>).await;
assert_batches_eq!(expect, &*got);
}
let mut p = PartitionData::new(
PARTITION_ID,
PARTITION_KEY.clone(),
NamespaceId::new(3),
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NAMESPACE_NAME.clone()
})),
TableId::new(4),
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TABLE_NAME.clone()
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
);
let mut p = PartitionDataBuilder::new().build();
// Perform the initial write.
//
@ -795,20 +751,7 @@ mod tests {
// which return the correct SequenceNumberSet instances.
#[tokio::test]
async fn test_out_of_order_persist() {
let mut p = PartitionData::new(
PARTITION_ID,
PARTITION_KEY.clone(),
NamespaceId::new(3),
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NAMESPACE_NAME.clone()
})),
TableId::new(4),
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TABLE_NAME.clone()
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
);
let mut p = PartitionDataBuilder::new().build();
// Perform the initial write.
//
@ -982,20 +925,9 @@ mod tests {
let starting_state =
SortKeyState::Provided(Some(SortKey::from_columns(["banana", "time"])));
let mut p = PartitionData::new(
PartitionId::new(1),
"bananas".into(),
NamespaceId::new(42),
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NAMESPACE_NAME.clone()
})),
TableId::new(1),
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from("platanos")
})),
starting_state,
TRANSITION_SHARD_ID,
);
let mut p = PartitionDataBuilder::new()
.with_sort_key_state(starting_state)
.build();
let want = Some(SortKey::from_columns(["banana", "platanos", "time"]));
p.update_sort_key(want.clone());
@ -1042,20 +974,9 @@ mod tests {
let starting_state = SortKeyState::Deferred(fetcher);
let mut p = PartitionData::new(
PartitionId::new(1),
"bananas".into(),
NamespaceId::new(42),
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NAMESPACE_NAME.clone()
})),
TableId::new(1),
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from("platanos")
})),
starting_state,
shard_id,
);
let mut p = PartitionDataBuilder::new()
.with_sort_key_state(starting_state)
.build();
let want = Some(SortKey::from_columns(["banana", "platanos", "time"]));
p.update_sort_key(want.clone());
@ -1067,20 +988,7 @@ mod tests {
// Perform writes with non-monotonic sequence numbers.
#[tokio::test]
async fn test_non_monotonic_writes() {
let mut p = PartitionData::new(
PARTITION_ID,
PARTITION_KEY.clone(),
NamespaceId::new(3),
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NAMESPACE_NAME.clone()
})),
TableId::new(4),
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TABLE_NAME.clone()
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
);
let mut p = PartitionDataBuilder::new().build();
// Perform out of order writes.
p.buffer_write(
@ -1115,40 +1023,14 @@ mod tests {
#[tokio::test]
async fn test_mark_persisting_no_data() {
let mut p = PartitionData::new(
PARTITION_ID,
PARTITION_KEY.clone(),
NamespaceId::new(3),
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NAMESPACE_NAME.clone()
})),
TableId::new(4),
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TABLE_NAME.clone()
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
);
let mut p = PartitionDataBuilder::new().build();
assert!(p.mark_persisting().is_none());
}
#[tokio::test]
async fn test_mark_persisting_twice() {
let mut p = PartitionData::new(
PARTITION_ID,
PARTITION_KEY.clone(),
NamespaceId::new(3),
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NAMESPACE_NAME.clone()
})),
TableId::new(4),
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TABLE_NAME.clone()
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
);
let mut p = PartitionDataBuilder::new().build();
let mb = lp_to_mutable_batch(r#"bananas,city=London people=2,pigeons="millions" 10"#).1;
p.buffer_write(mb, SequenceNumber::new(2))
@ -1162,20 +1044,7 @@ mod tests {
// QueryAdaptor.
#[tokio::test]
async fn test_empty_partition_no_queryadaptor_panic() {
let mut p = PartitionData::new(
PARTITION_ID,
PARTITION_KEY.clone(),
NamespaceId::new(3),
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NAMESPACE_NAME.clone()
})),
TableId::new(4),
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TABLE_NAME.clone()
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
);
let mut p = PartitionDataBuilder::new().build();
assert!(p.get_query_data().is_none());
}

View File

@ -221,19 +221,19 @@ mod tests {
// Harmless in tests - saves a bunch of extra vars.
#![allow(clippy::await_holding_lock)]
use data_types::ShardId;
use data_types::{ShardId, TRANSITION_SHARD_ID};
use iox_catalog::mem::MemCatalog;
use super::*;
use crate::buffer_tree::partition::resolver::mock::MockPartitionProvider;
const PARTITION_KEY: &str = "bananas";
const PARTITION_ID: PartitionId = PartitionId::new(42);
const NAMESPACE_ID: NamespaceId = NamespaceId::new(2);
const NAMESPACE_NAME: &str = "ns-bananas";
const TABLE_ID: TableId = TableId::new(3);
const TABLE_NAME: &str = "platanos";
const TRANSITION_SHARD_ID: ShardId = ShardId::new(84);
use crate::{
buffer_tree::partition::resolver::mock::MockPartitionProvider,
test_util::{
arbitrary_partition, PartitionDataBuilder, ARBITRARY_NAMESPACE_ID,
ARBITRARY_NAMESPACE_NAME, ARBITRARY_PARTITION_ID, ARBITRARY_PARTITION_KEY,
ARBITRARY_PARTITION_KEY_STR, ARBITRARY_TABLE_ID, ARBITRARY_TABLE_NAME,
DEFER_NAMESPACE_NAME_1_SEC, DEFER_TABLE_NAME_1_SEC,
},
};
fn new_cache<P>(
inner: MockPartitionProvider,
@ -253,42 +253,31 @@ mod tests {
#[tokio::test]
async fn test_miss() {
let data = PartitionData::new(
PARTITION_ID,
PARTITION_KEY.into(),
NAMESPACE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NamespaceName::from(NAMESPACE_NAME)
})),
TABLE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from(TABLE_NAME)
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
);
let data = PartitionDataBuilder::new().build();
let inner = MockPartitionProvider::default().with_partition(data);
let cache = new_cache(inner, []);
let got = cache
.get_partition(
PARTITION_KEY.into(),
NAMESPACE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NamespaceName::from(NAMESPACE_NAME)
})),
TABLE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from(TABLE_NAME)
})),
ARBITRARY_PARTITION_KEY.clone(),
ARBITRARY_NAMESPACE_ID,
Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
ARBITRARY_TABLE_ID,
Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
TRANSITION_SHARD_ID,
)
.await;
assert_eq!(got.lock().partition_id(), PARTITION_ID);
assert_eq!(got.lock().table_id(), TABLE_ID);
assert_eq!(&**got.lock().table_name().get().await, TABLE_NAME);
assert_eq!(&**got.lock().namespace_name().get().await, NAMESPACE_NAME);
assert_eq!(got.lock().partition_id(), ARBITRARY_PARTITION_ID);
assert_eq!(got.lock().table_id(), ARBITRARY_TABLE_ID);
assert_eq!(
&**got.lock().table_name().get().await,
&***ARBITRARY_TABLE_NAME
);
assert_eq!(
&**got.lock().namespace_name().get().await,
&***ARBITRARY_NAMESPACE_NAME
);
assert!(cache.inner.is_empty());
}
@ -296,42 +285,40 @@ mod tests {
async fn test_hit() {
let inner = MockPartitionProvider::default();
let stored_partition_key = PartitionKey::from(PARTITION_KEY);
let stored_partition_key = PartitionKey::from(ARBITRARY_PARTITION_KEY_STR);
let partition = Partition {
id: PARTITION_ID,
shard_id: TRANSITION_SHARD_ID,
table_id: TABLE_ID,
partition_key: stored_partition_key.clone(),
sort_key: vec!["dos".to_string(), "bananas".to_string()],
persisted_sequence_number: Default::default(),
new_file_at: Default::default(),
..arbitrary_partition()
};
let cache = new_cache(inner, [partition]);
let callers_partition_key = PartitionKey::from(PARTITION_KEY);
let callers_partition_key = PartitionKey::from(ARBITRARY_PARTITION_KEY_STR);
let got = cache
.get_partition(
callers_partition_key.clone(),
NAMESPACE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NamespaceName::from(NAMESPACE_NAME)
})),
TABLE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from(TABLE_NAME)
})),
ARBITRARY_NAMESPACE_ID,
Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
ARBITRARY_TABLE_ID,
Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
TRANSITION_SHARD_ID,
)
.await;
assert_eq!(got.lock().partition_id(), PARTITION_ID);
assert_eq!(got.lock().table_id(), TABLE_ID);
assert_eq!(&**got.lock().table_name().get().await, TABLE_NAME);
assert_eq!(&**got.lock().namespace_name().get().await, NAMESPACE_NAME);
assert_eq!(got.lock().partition_id(), ARBITRARY_PARTITION_ID);
assert_eq!(got.lock().table_id(), ARBITRARY_TABLE_ID);
assert_eq!(
&**got.lock().table_name().get().await,
&***ARBITRARY_TABLE_NAME
);
assert_eq!(
&**got.lock().namespace_name().get().await,
&***ARBITRARY_NAMESPACE_NAME
);
assert_eq!(
*got.lock().partition_key(),
PartitionKey::from(PARTITION_KEY)
PartitionKey::from(ARBITRARY_PARTITION_KEY_STR)
);
// The cache should have been cleaned up as it was consumed.
@ -350,98 +337,63 @@ mod tests {
async fn test_miss_partition_key() {
let other_key = PartitionKey::from("test");
let other_key_id = PartitionId::new(99);
let inner = MockPartitionProvider::default().with_partition(PartitionData::new(
other_key_id,
other_key.clone(),
NAMESPACE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NamespaceName::from(NAMESPACE_NAME)
})),
TABLE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from(TABLE_NAME)
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
));
let inner = MockPartitionProvider::default().with_partition(
PartitionDataBuilder::new()
.with_partition_key(other_key.clone())
.with_partition_id(other_key_id)
.build(),
);
let partition = Partition {
id: PARTITION_ID,
shard_id: TRANSITION_SHARD_ID,
table_id: TABLE_ID,
partition_key: PARTITION_KEY.into(),
sort_key: Default::default(),
persisted_sequence_number: Default::default(),
new_file_at: Default::default(),
};
let partition = arbitrary_partition();
let cache = new_cache(inner, [partition]);
let got = cache
.get_partition(
other_key.clone(),
NAMESPACE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NamespaceName::from(NAMESPACE_NAME)
})),
TABLE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from(TABLE_NAME)
})),
other_key,
ARBITRARY_NAMESPACE_ID,
Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
ARBITRARY_TABLE_ID,
Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
TRANSITION_SHARD_ID,
)
.await;
assert_eq!(got.lock().partition_id(), other_key_id);
assert_eq!(got.lock().table_id(), TABLE_ID);
assert_eq!(&**got.lock().table_name().get().await, TABLE_NAME);
assert_eq!(got.lock().table_id(), ARBITRARY_TABLE_ID);
assert_eq!(
&**got.lock().table_name().get().await,
&***ARBITRARY_TABLE_NAME
);
}
#[tokio::test]
async fn test_miss_table_id() {
let other_table = TableId::new(1234);
let inner = MockPartitionProvider::default().with_partition(PartitionData::new(
PARTITION_ID,
PARTITION_KEY.into(),
NAMESPACE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NamespaceName::from(NAMESPACE_NAME)
})),
other_table,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from(TABLE_NAME)
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
));
let inner = MockPartitionProvider::default().with_partition(
PartitionDataBuilder::new()
.with_table_id(other_table)
.build(),
);
let partition = Partition {
id: PARTITION_ID,
shard_id: TRANSITION_SHARD_ID,
table_id: TABLE_ID,
partition_key: PARTITION_KEY.into(),
sort_key: Default::default(),
persisted_sequence_number: Default::default(),
new_file_at: Default::default(),
};
let partition = arbitrary_partition();
let cache = new_cache(inner, [partition]);
let got = cache
.get_partition(
PARTITION_KEY.into(),
NAMESPACE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NamespaceName::from(NAMESPACE_NAME)
})),
ARBITRARY_PARTITION_KEY.clone(),
ARBITRARY_NAMESPACE_ID,
Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
other_table,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from(TABLE_NAME)
})),
Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
TRANSITION_SHARD_ID,
)
.await;
assert_eq!(got.lock().partition_id(), PARTITION_ID);
assert_eq!(got.lock().partition_id(), ARBITRARY_PARTITION_ID);
assert_eq!(got.lock().table_id(), other_table);
assert_eq!(&**got.lock().table_name().get().await, TABLE_NAME);
assert_eq!(
&**got.lock().table_name().get().await,
&***ARBITRARY_TABLE_NAME
);
}
}

View File

@ -282,26 +282,16 @@ mod tests {
use test_helpers::timeout::FutureTimeout;
use tokio::sync::{Notify, Semaphore};
use crate::buffer_tree::partition::{resolver::mock::MockPartitionProvider, SortKeyState};
use crate::{
buffer_tree::partition::{resolver::mock::MockPartitionProvider, SortKeyState},
test_util::{
PartitionDataBuilder, ARBITRARY_NAMESPACE_ID, ARBITRARY_PARTITION_KEY,
ARBITRARY_TABLE_ID, DEFER_NAMESPACE_NAME_1_SEC, DEFER_TABLE_NAME_1_SEC,
},
};
use super::*;
const PARTITION_ID: PartitionId = PartitionId::new(4242);
const PARTITION_KEY: &str = "bananas";
const NAMESPACE_ID: NamespaceId = NamespaceId::new(42);
const TABLE_ID: TableId = TableId::new(42);
lazy_static! {
static ref NAMESPACE_NAME: Arc<DeferredLoad<NamespaceName>> =
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NamespaceName::from("ns-platanos")
}));
static ref TABLE_NAME: Arc<DeferredLoad<TableName>> =
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from("platanos")
}));
}
/// This test proves that parallel queries for the same partition are
/// coalesced, returning the same [`PartitionData`] instance and submitting
/// a single query to the inner resolver.
@ -309,16 +299,7 @@ mod tests {
async fn test_coalesce() {
const MAX_TASKS: usize = 50;
let data = PartitionData::new(
PARTITION_ID,
PartitionKey::from(PARTITION_KEY),
NAMESPACE_ID,
Arc::clone(&*NAMESPACE_NAME),
TABLE_ID,
Arc::clone(&*TABLE_NAME),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
);
let data = PartitionDataBuilder::new().build();
// Add a single instance of the partition - if more than one call is
// made, this will cause a panic.
@ -327,14 +308,12 @@ mod tests {
let results = (0..MAX_TASKS)
.map(|_| {
let namespace_name = Arc::clone(&*NAMESPACE_NAME);
let table_name = Arc::clone(&*TABLE_NAME);
layer.get_partition(
PartitionKey::from(PARTITION_KEY),
NAMESPACE_ID,
namespace_name,
TABLE_ID,
table_name,
ARBITRARY_PARTITION_KEY.clone(),
ARBITRARY_NAMESPACE_ID,
Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
ARBITRARY_TABLE_ID,
Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
TRANSITION_SHARD_ID,
)
})
@ -382,7 +361,7 @@ mod tests {
'life0: 'async_trait,
Self: 'async_trait,
{
if partition_key == PartitionKey::from(PARTITION_KEY) {
if partition_key == *ARBITRARY_PARTITION_KEY {
return future::pending().boxed();
}
future::ready(Arc::clone(&self.p)).boxed()
@ -394,16 +373,7 @@ mod tests {
async fn test_disjoint_parallelised() {
use futures::Future;
let data = PartitionData::new(
PARTITION_ID,
PartitionKey::from(PARTITION_KEY),
NAMESPACE_ID,
Arc::clone(&*NAMESPACE_NAME),
TABLE_ID,
Arc::clone(&*TABLE_NAME),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
);
let data = PartitionDataBuilder::new().build();
// Add a single instance of the partition - if more than one call is
// made to the mock, it will panic.
@ -415,19 +385,19 @@ mod tests {
// The following two partitions are for the same (blocked) partition and
// neither resolve.
let pa_1 = layer.get_partition(
PartitionKey::from(PARTITION_KEY),
NAMESPACE_ID,
Arc::clone(&*NAMESPACE_NAME),
TABLE_ID,
Arc::clone(&*TABLE_NAME),
ARBITRARY_PARTITION_KEY.clone(),
ARBITRARY_NAMESPACE_ID,
Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
ARBITRARY_TABLE_ID,
Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
TRANSITION_SHARD_ID,
);
let pa_2 = layer.get_partition(
PartitionKey::from(PARTITION_KEY),
NAMESPACE_ID,
Arc::clone(&*NAMESPACE_NAME),
TABLE_ID,
Arc::clone(&*TABLE_NAME),
ARBITRARY_PARTITION_KEY.clone(),
ARBITRARY_NAMESPACE_ID,
Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
ARBITRARY_TABLE_ID,
Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
TRANSITION_SHARD_ID,
);
@ -444,11 +414,11 @@ mod tests {
// But a non-blocked partition is resolved without issue.
let _ = layer
.get_partition(
PartitionKey::from("platanos"),
NAMESPACE_ID,
Arc::clone(&*NAMESPACE_NAME),
TABLE_ID,
Arc::clone(&*TABLE_NAME),
PartitionKey::from("orange you glad i didn't say bananas"),
ARBITRARY_NAMESPACE_ID,
Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
ARBITRARY_TABLE_ID,
Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
TRANSITION_SHARD_ID,
)
.with_timeout_panic(Duration::from_secs(5))
@ -510,26 +480,17 @@ mod tests {
let inner = Arc::new(SemaphoreResolver {
sem: Arc::clone(&fake_conn_pool),
wait: Arc::clone(&notify),
p: Arc::new(Mutex::new(PartitionData::new(
PARTITION_ID,
PartitionKey::from(PARTITION_KEY),
NAMESPACE_ID,
Arc::clone(&*NAMESPACE_NAME),
TABLE_ID,
Arc::clone(&*TABLE_NAME),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
))),
p: Arc::new(Mutex::new(PartitionDataBuilder::new().build())),
});
let layer = Arc::new(CoalescePartitionResolver::new(inner));
let fut = layer.get_partition(
PartitionKey::from(PARTITION_KEY),
NAMESPACE_ID,
Arc::clone(&*NAMESPACE_NAME),
TABLE_ID,
Arc::clone(&*TABLE_NAME),
ARBITRARY_PARTITION_KEY.clone(),
ARBITRARY_NAMESPACE_ID,
Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
ARBITRARY_TABLE_ID,
Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
TRANSITION_SHARD_ID,
);

View File

@ -9,7 +9,7 @@ use parking_lot::Mutex;
use super::r#trait::PartitionProvider;
use crate::{
buffer_tree::{namespace::NamespaceName, partition::PartitionData, table::TableName},
deferred_load::DeferredLoad,
deferred_load::{self, DeferredLoad},
};
/// A mock [`PartitionProvider`] for testing that returns pre-initialised
@ -65,8 +65,27 @@ impl PartitionProvider for MockPartitionProvider {
});
assert_eq!(p.namespace_id(), namespace_id);
assert_eq!(p.namespace_name().to_string(), namespace_name.to_string());
assert_eq!(p.table_name().to_string(), table_name.to_string());
let actual_namespace_name = p.namespace_name().to_string();
let expected_namespace_name = namespace_name.get().await.to_string();
assert!(
(actual_namespace_name.as_str() == expected_namespace_name)
|| (actual_namespace_name == deferred_load::UNRESOLVED_DISPLAY_STRING),
"unexpected namespace name: {actual_namespace_name}. \
expected {expected_namespace_name} or {}",
deferred_load::UNRESOLVED_DISPLAY_STRING,
);
let actual_table_name = p.table_name().to_string();
let expected_table_name = table_name.get().await.to_string();
assert!(
(actual_table_name.as_str() == expected_table_name)
|| (actual_table_name == deferred_load::UNRESOLVED_DISPLAY_STRING),
"unexpected table name: {actual_table_name}. \
expected {expected_table_name} or {}",
deferred_load::UNRESOLVED_DISPLAY_STRING,
);
Arc::new(Mutex::new(p))
}
}

View File

@ -60,54 +60,43 @@ where
mod tests {
use std::{sync::Arc, time::Duration};
use data_types::{PartitionId, ShardId};
use data_types::{PartitionId, ShardId, TRANSITION_SHARD_ID};
use super::*;
use crate::buffer_tree::partition::{resolver::mock::MockPartitionProvider, SortKeyState};
const TRANSITION_SHARD_ID: ShardId = ShardId::new(84);
use crate::{
buffer_tree::partition::{resolver::mock::MockPartitionProvider, SortKeyState},
test_util::{
PartitionDataBuilder, ARBITRARY_NAMESPACE_ID, ARBITRARY_PARTITION_ID,
ARBITRARY_PARTITION_KEY, ARBITRARY_TABLE_ID, DEFER_NAMESPACE_NAME_1_SEC,
DEFER_TABLE_NAME_1_SEC,
},
};
#[tokio::test]
async fn test_arc_impl() {
let key = PartitionKey::from("bananas");
let namespace_id = NamespaceId::new(1234);
let namespace_name = Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NamespaceName::from("ns-platanos")
}));
let table_id = TableId::new(24);
let table_name = Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from("platanos")
}));
let partition = PartitionId::new(4242);
let data = PartitionData::new(
partition,
"bananas".into(),
namespace_id,
Arc::clone(&namespace_name),
table_id,
Arc::clone(&table_name),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
);
let data = PartitionDataBuilder::new().build();
let mock = Arc::new(MockPartitionProvider::default().with_partition(data));
let got = mock
.get_partition(
key,
namespace_id,
Arc::clone(&namespace_name),
table_id,
Arc::clone(&table_name),
ARBITRARY_PARTITION_KEY.clone(),
ARBITRARY_NAMESPACE_ID,
Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
ARBITRARY_TABLE_ID,
Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
TRANSITION_SHARD_ID,
)
.await;
assert_eq!(got.lock().partition_id(), partition);
assert_eq!(got.lock().namespace_id(), namespace_id);
assert_eq!(got.lock().partition_id(), ARBITRARY_PARTITION_ID);
assert_eq!(got.lock().namespace_id(), ARBITRARY_NAMESPACE_ID);
assert_eq!(
got.lock().namespace_name().to_string(),
namespace_name.to_string()
DEFER_NAMESPACE_NAME_1_SEC.to_string()
);
assert_eq!(
got.lock().table_name().to_string(),
DEFER_TABLE_NAME_1_SEC.to_string()
);
assert_eq!(got.lock().table_name().to_string(), table_name.to_string());
}
}

View File

@ -180,7 +180,7 @@ where
Arc::new(NamespaceData::new(
namespace_id,
self.namespace_name_resolver.for_namespace(namespace_id),
Arc::new(self.namespace_name_resolver.for_namespace(namespace_id)),
Arc::clone(&self.table_name_resolver),
Arc::clone(&self.partition_provider),
Arc::clone(&self.post_write_observer),
@ -234,7 +234,7 @@ mod tests {
use std::{sync::Arc, time::Duration};
use assert_matches::assert_matches;
use data_types::{PartitionId, PartitionKey};
use data_types::{PartitionId, PartitionKey, TRANSITION_SHARD_ID};
use datafusion::{assert_batches_eq, assert_batches_sorted_eq};
use futures::{StreamExt, TryStreamExt};
use metric::{Attributes, Metric};
@ -242,52 +242,35 @@ mod tests {
use super::*;
use crate::{
buffer_tree::{
namespace::{
name_resolver::mock::MockNamespaceNameProvider, NamespaceData, NamespaceName,
},
partition::{resolver::mock::MockPartitionProvider, PartitionData, SortKeyState},
namespace::{name_resolver::mock::MockNamespaceNameProvider, NamespaceData},
partition::resolver::mock::MockPartitionProvider,
post_write::mock::MockPostWriteObserver,
table::{name_resolver::mock::MockTableNameProvider, TableName},
table::TableName,
},
deferred_load::{self, DeferredLoad},
query::partition_response::PartitionResponse,
test_util::make_write_op,
test_util::{
make_write_op, PartitionDataBuilder, ARBITRARY_NAMESPACE_ID, ARBITRARY_NAMESPACE_NAME,
ARBITRARY_PARTITION_KEY, ARBITRARY_TABLE_ID, ARBITRARY_TABLE_NAME,
ARBITRARY_TABLE_NAME_PROVIDER, DEFER_NAMESPACE_NAME_1_MS,
},
};
const TABLE_ID: TableId = TableId::new(44);
const TABLE_NAME: &str = "bananas";
const NAMESPACE_NAME: &str = "platanos";
const NAMESPACE_ID: NamespaceId = NamespaceId::new(42);
const TRANSITION_SHARD_ID: ShardId = ShardId::new(84);
#[tokio::test]
async fn test_namespace_init_table() {
let metrics = Arc::new(metric::Registry::default());
// Configure the mock partition provider to return a partition for this
// table ID.
let partition_provider = Arc::new(MockPartitionProvider::default().with_partition(
PartitionData::new(
PartitionId::new(0),
PartitionKey::from("banana-split"),
NAMESPACE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NamespaceName::from(NAMESPACE_NAME)
})),
TABLE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from(TABLE_NAME)
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
),
));
let partition_provider = Arc::new(
MockPartitionProvider::default().with_partition(PartitionDataBuilder::new().build()),
);
// Init the namespace
let ns = NamespaceData::new(
NAMESPACE_ID,
DeferredLoad::new(Duration::from_millis(1), async { NAMESPACE_NAME.into() }),
Arc::new(MockTableNameProvider::new(TABLE_NAME)),
ARBITRARY_NAMESPACE_ID,
Arc::clone(&*DEFER_NAMESPACE_NAME_1_MS),
Arc::clone(&*ARBITRARY_TABLE_NAME_PROVIDER),
partition_provider,
Arc::new(MockPostWriteObserver::default()),
&metrics,
@ -297,27 +280,31 @@ mod tests {
// Assert the namespace name was stored
let name = ns.namespace_name().to_string();
assert!(
(name == NAMESPACE_NAME) || (name == deferred_load::UNRESOLVED_DISPLAY_STRING),
(name.as_str() == &***ARBITRARY_NAMESPACE_NAME)
|| (name == deferred_load::UNRESOLVED_DISPLAY_STRING),
"unexpected namespace name: {name}"
);
// Assert the namespace does not contain the test data
assert!(ns.table(TABLE_ID).is_none());
assert!(ns.table(ARBITRARY_TABLE_ID).is_none());
// Write some test data
ns.apply(DmlOperation::Write(make_write_op(
&PartitionKey::from("banana-split"),
NAMESPACE_ID,
TABLE_NAME,
TABLE_ID,
&ARBITRARY_PARTITION_KEY,
ARBITRARY_NAMESPACE_ID,
&ARBITRARY_TABLE_NAME,
ARBITRARY_TABLE_ID,
0,
r#"bananas,city=Madrid day="sun",temp=55 22"#,
&format!(
r#"{},city=Madrid day="sun",temp=55 22"#,
&*ARBITRARY_TABLE_NAME
),
)))
.await
.expect("buffer op should succeed");
// Referencing the table should succeed
assert!(ns.table(TABLE_ID).is_some());
assert!(ns.table(ARBITRARY_TABLE_ID).is_some());
// And the table counter metric should increase
let tables = metrics
@ -330,18 +317,21 @@ mod tests {
// Ensure the deferred namespace name is loaded.
let name = ns.namespace_name().get().await;
assert_eq!(&**name, NAMESPACE_NAME);
assert_eq!(ns.namespace_name().to_string(), NAMESPACE_NAME);
assert_eq!(&**name, &***ARBITRARY_NAMESPACE_NAME);
assert_eq!(
ns.namespace_name().to_string().as_str(),
&***ARBITRARY_NAMESPACE_NAME
);
}
/// Generate a test that performs a set of writes and assert the data within
/// the table with TABLE_ID in the namespace with NAMESPACE_ID.
/// the table with ARBITRARY_TABLE_ID in the namespace with ARBITRARY_NAMESPACE_ID.
macro_rules! test_write_query {
(
$name:ident,
partitions = [$($partition:expr), +], // The set of PartitionData for the mock partition provider
writes = [$($write:expr), *], // The set of DmlWrite to apply()
want = $want:expr // The expected results of querying NAMESPACE_ID and TABLE_ID
want = $want:expr // The expected results of querying ARBITRARY_NAMESPACE_ID and ARBITRARY_TABLE_ID
) => {
paste::paste! {
#[tokio::test]
@ -356,8 +346,8 @@ mod tests {
// Init the buffer tree
let buf = BufferTree::new(
Arc::new(MockNamespaceNameProvider::default()),
Arc::new(MockTableNameProvider::new(TABLE_NAME)),
Arc::new(MockNamespaceNameProvider::new(&**ARBITRARY_NAMESPACE_NAME)),
Arc::clone(&*ARBITRARY_TABLE_NAME_PROVIDER),
partition_provider,
Arc::new(MockPostWriteObserver::default()),
Arc::new(metric::Registry::default()),
@ -371,9 +361,9 @@ mod tests {
.expect("failed to perform write");
)*
// Execute the query against NAMESPACE_ID and TABLE_ID
// Execute the query against ARBITRARY_NAMESPACE_ID and ARBITRARY_TABLE_ID
let batches = buf
.query_exec(NAMESPACE_ID, TABLE_ID, vec![], None)
.query_exec(ARBITRARY_NAMESPACE_ID, ARBITRARY_TABLE_ID, vec![], None)
.await
.expect("query should succeed")
.into_record_batches()
@ -381,7 +371,7 @@ mod tests {
.await
.expect("query failed");
// Assert the contents of NAMESPACE_ID and TABLE_ID
// Assert the contents of ARBITRARY_NAMESPACE_ID and ARBITRARY_TABLE_ID
assert_batches_sorted_eq!(
$want,
&batches
@ -394,27 +384,20 @@ mod tests {
// A simple "read your writes" test.
test_write_query!(
read_writes,
partitions = [PartitionData::new(
PartitionId::new(0),
PartitionKey::from("p1"),
NAMESPACE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NamespaceName::from(NAMESPACE_NAME)
})),
TABLE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from(TABLE_NAME)
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
)],
partitions = [PartitionDataBuilder::new()
.with_partition_id(PartitionId::new(0))
.with_partition_key(PartitionKey::from("p1"))
.build()],
writes = [make_write_op(
&PartitionKey::from("p1"),
NAMESPACE_ID,
TABLE_NAME,
TABLE_ID,
ARBITRARY_NAMESPACE_ID,
&ARBITRARY_TABLE_NAME,
ARBITRARY_TABLE_ID,
0,
r#"bananas,region=Asturias temp=35 4242424242"#,
&format!(
r#"{},region=Asturias temp=35 4242424242"#,
&*ARBITRARY_TABLE_NAME
),
)],
want = [
"+----------+------+-------------------------------+",
@ -430,51 +413,37 @@ mod tests {
test_write_query!(
multiple_partitions,
partitions = [
PartitionData::new(
PartitionId::new(0),
PartitionKey::from("p1"),
NAMESPACE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NamespaceName::from(NAMESPACE_NAME)
})),
TABLE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from(TABLE_NAME)
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
),
PartitionData::new(
PartitionId::new(1),
PartitionKey::from("p2"),
NAMESPACE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NamespaceName::from(NAMESPACE_NAME)
})),
TABLE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from(TABLE_NAME)
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
)
PartitionDataBuilder::new()
.with_partition_id(PartitionId::new(0))
.with_partition_key(PartitionKey::from("p1"))
.build(),
PartitionDataBuilder::new()
.with_partition_id(PartitionId::new(1))
.with_partition_key(PartitionKey::from("p2"))
.build()
],
writes = [
make_write_op(
&PartitionKey::from("p1"),
NAMESPACE_ID,
TABLE_NAME,
TABLE_ID,
ARBITRARY_NAMESPACE_ID,
&ARBITRARY_TABLE_NAME,
ARBITRARY_TABLE_ID,
0,
r#"bananas,region=Madrid temp=35 4242424242"#,
&format!(
r#"{},region=Madrid temp=35 4242424242"#,
&*ARBITRARY_TABLE_NAME
),
),
make_write_op(
&PartitionKey::from("p2"),
NAMESPACE_ID,
TABLE_NAME,
TABLE_ID,
ARBITRARY_NAMESPACE_ID,
&ARBITRARY_TABLE_NAME,
ARBITRARY_TABLE_ID,
0,
r#"bananas,region=Asturias temp=25 4242424242"#,
&format!(
r#"{},region=Asturias temp=25 4242424242"#,
&*ARBITRARY_TABLE_NAME
),
)
],
want = [
@ -492,51 +461,39 @@ mod tests {
test_write_query!(
filter_multiple_namespaces,
partitions = [
PartitionData::new(
PartitionId::new(0),
PartitionKey::from("p1"),
NAMESPACE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NamespaceName::from(NAMESPACE_NAME)
})),
TABLE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from(TABLE_NAME)
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
),
PartitionData::new(
PartitionId::new(1),
PartitionKey::from("p2"),
NamespaceId::new(4321), // A different namespace ID.
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NamespaceName::from(NAMESPACE_NAME)
})),
TableId::new(1234), // A different table ID.
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from(TABLE_NAME)
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
)
PartitionDataBuilder::new()
.with_partition_id(PartitionId::new(0))
.with_partition_key(PartitionKey::from("p1"))
.build(),
PartitionDataBuilder::new()
.with_partition_id(PartitionId::new(1))
.with_partition_key(PartitionKey::from("p2"))
.with_namespace_id(NamespaceId::new(4321)) // A different namespace ID.
.with_table_id(TableId::new(1234)) // A different table ID.
.build()
],
writes = [
make_write_op(
&PartitionKey::from("p1"),
NAMESPACE_ID,
TABLE_NAME,
TABLE_ID,
ARBITRARY_NAMESPACE_ID,
&ARBITRARY_TABLE_NAME,
ARBITRARY_TABLE_ID,
0,
r#"bananas,region=Madrid temp=25 4242424242"#,
&format!(
r#"{},region=Madrid temp=25 4242424242"#,
&*ARBITRARY_TABLE_NAME
),
),
make_write_op(
&PartitionKey::from("p2"),
NamespaceId::new(4321), // A different namespace ID.
TABLE_NAME,
&ARBITRARY_TABLE_NAME,
TableId::new(1234), // A different table ID
0,
r#"bananas,region=Asturias temp=35 4242424242"#,
&format!(
r#"{},region=Asturias temp=35 4242424242"#,
&*ARBITRARY_TABLE_NAME
),
)
],
want = [
@ -553,51 +510,38 @@ mod tests {
test_write_query!(
filter_multiple_tabls,
partitions = [
PartitionData::new(
PartitionId::new(0),
PartitionKey::from("p1"),
NAMESPACE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NamespaceName::from(NAMESPACE_NAME)
})),
TABLE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from(TABLE_NAME)
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
),
PartitionData::new(
PartitionId::new(1),
PartitionKey::from("p2"),
NAMESPACE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NamespaceName::from(NAMESPACE_NAME)
})),
TableId::new(1234), // A different table ID.
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from(TABLE_NAME)
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
)
PartitionDataBuilder::new()
.with_partition_id(PartitionId::new(0))
.with_partition_key(PartitionKey::from("p1"))
.build(),
PartitionDataBuilder::new()
.with_partition_id(PartitionId::new(1))
.with_partition_key(PartitionKey::from("p2"))
.with_table_id(TableId::new(1234)) // A different table ID.
.build()
],
writes = [
make_write_op(
&PartitionKey::from("p1"),
NAMESPACE_ID,
TABLE_NAME,
TABLE_ID,
ARBITRARY_NAMESPACE_ID,
&ARBITRARY_TABLE_NAME,
ARBITRARY_TABLE_ID,
0,
r#"bananas,region=Madrid temp=25 4242424242"#,
&format!(
r#"{},region=Madrid temp=25 4242424242"#,
&*ARBITRARY_TABLE_NAME
),
),
make_write_op(
&PartitionKey::from("p2"),
NAMESPACE_ID,
TABLE_NAME,
ARBITRARY_NAMESPACE_ID,
&ARBITRARY_TABLE_NAME,
TableId::new(1234), // A different table ID
0,
r#"bananas,region=Asturias temp=35 4242424242"#,
&format!(
r#"{},region=Asturias temp=35 4242424242"#,
&*ARBITRARY_TABLE_NAME
),
)
],
want = [
@ -615,36 +559,32 @@ mod tests {
// writes).
test_write_query!(
duplicate_writes,
partitions = [PartitionData::new(
PartitionId::new(0),
PartitionKey::from("p1"),
NAMESPACE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NamespaceName::from(NAMESPACE_NAME)
})),
TABLE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from(TABLE_NAME)
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
)],
partitions = [PartitionDataBuilder::new()
.with_partition_id(PartitionId::new(0))
.with_partition_key(PartitionKey::from("p1"))
.build()],
writes = [
make_write_op(
&PartitionKey::from("p1"),
NAMESPACE_ID,
TABLE_NAME,
TABLE_ID,
ARBITRARY_NAMESPACE_ID,
&ARBITRARY_TABLE_NAME,
ARBITRARY_TABLE_ID,
0,
r#"bananas,region=Asturias temp=35 4242424242"#,
&format!(
r#"{},region=Asturias temp=35 4242424242"#,
&*ARBITRARY_TABLE_NAME
),
),
make_write_op(
&PartitionKey::from("p1"),
NAMESPACE_ID,
TABLE_NAME,
TABLE_ID,
ARBITRARY_NAMESPACE_ID,
&ARBITRARY_TABLE_NAME,
ARBITRARY_TABLE_ID,
1,
r#"bananas,region=Asturias temp=12 4242424242"#,
&format!(
r#"{},region=Asturias temp=12 4242424242"#,
&*ARBITRARY_TABLE_NAME
),
)
],
want = [
@ -665,56 +605,43 @@ mod tests {
// p1.
let partition_provider = Arc::new(
MockPartitionProvider::default()
.with_partition(PartitionData::new(
PartitionId::new(0),
PartitionKey::from("p1"),
NAMESPACE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NamespaceName::from(NAMESPACE_NAME)
})),
TABLE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from(TABLE_NAME)
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
))
.with_partition(PartitionData::new(
PartitionId::new(0),
PartitionKey::from("p2"),
NAMESPACE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NamespaceName::from(NAMESPACE_NAME)
})),
TABLE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from(TABLE_NAME)
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
)),
.with_partition(
PartitionDataBuilder::new()
.with_partition_id(PartitionId::new(0))
.with_partition_key(PartitionKey::from("p1"))
.build(),
)
.with_partition(
PartitionDataBuilder::new()
.with_partition_id(PartitionId::new(0))
.with_partition_key(PartitionKey::from("p2"))
.build(),
),
);
let metrics = Arc::new(metric::Registry::default());
// Init the buffer tree
let buf = BufferTree::new(
Arc::new(MockNamespaceNameProvider::default()),
Arc::new(MockTableNameProvider::new(TABLE_NAME)),
Arc::new(MockNamespaceNameProvider::new(&**ARBITRARY_NAMESPACE_NAME)),
Arc::clone(&*ARBITRARY_TABLE_NAME_PROVIDER),
partition_provider,
Arc::new(MockPostWriteObserver::default()),
Arc::clone(&metrics),
TRANSITION_SHARD_ID,
);
// Write data to partition p1, in table "bananas".
// Write data to partition p1, in the arbitrary table
buf.apply(DmlOperation::Write(make_write_op(
&PartitionKey::from("p1"),
NAMESPACE_ID,
TABLE_NAME,
TABLE_ID,
ARBITRARY_NAMESPACE_ID,
&ARBITRARY_TABLE_NAME,
ARBITRARY_TABLE_ID,
0,
r#"bananas,region=Asturias temp=35 4242424242"#,
&format!(
r#"{},region=Asturias temp=35 4242424242"#,
&*ARBITRARY_TABLE_NAME
),
)))
.await
.expect("failed to write initial data");
@ -723,11 +650,14 @@ mod tests {
// different temp value.
buf.apply(DmlOperation::Write(make_write_op(
&PartitionKey::from("p2"),
NAMESPACE_ID,
TABLE_NAME,
TABLE_ID,
ARBITRARY_NAMESPACE_ID,
&ARBITRARY_TABLE_NAME,
ARBITRARY_TABLE_ID,
1,
r#"bananas,region=Asturias temp=12 4242424242"#,
&format!(
r#"{},region=Asturias temp=12 4242424242"#,
&*ARBITRARY_TABLE_NAME
),
)))
.await
.expect("failed to overwrite data");
@ -757,59 +687,41 @@ mod tests {
#[tokio::test]
async fn test_partition_iter() {
const TABLE2_ID: TableId = TableId::new(1234321);
const TABLE2_NAME: &str = "another_table";
// Configure the mock partition provider to return a single partition, named
// p1.
let partition_provider = Arc::new(
MockPartitionProvider::default()
.with_partition(PartitionData::new(
PartitionId::new(0),
PartitionKey::from("p1"),
NAMESPACE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NamespaceName::from(NAMESPACE_NAME)
})),
TABLE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from(TABLE_NAME)
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
))
.with_partition(PartitionData::new(
PartitionId::new(1),
PartitionKey::from("p2"),
NAMESPACE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NamespaceName::from(NAMESPACE_NAME)
})),
TABLE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from(TABLE_NAME)
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
))
.with_partition(PartitionData::new(
PartitionId::new(2),
PartitionKey::from("p3"),
NAMESPACE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NamespaceName::from(NAMESPACE_NAME)
})),
TABLE2_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from("another_table")
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
)),
.with_partition(
PartitionDataBuilder::new()
.with_partition_id(PartitionId::new(0))
.with_partition_key(PartitionKey::from("p1"))
.build(),
)
.with_partition(
PartitionDataBuilder::new()
.with_partition_id(PartitionId::new(1))
.with_partition_key(PartitionKey::from("p2"))
.build(),
)
.with_partition(
PartitionDataBuilder::new()
.with_partition_id(PartitionId::new(2))
.with_partition_key(PartitionKey::from("p3"))
.with_table_id(TABLE2_ID)
.with_table_name(Arc::new(DeferredLoad::new(
Duration::from_secs(1),
async move { TableName::from(TABLE2_NAME) },
)))
.build(),
),
);
// Init the buffer tree
let buf = BufferTree::new(
Arc::new(MockNamespaceNameProvider::default()),
Arc::new(MockTableNameProvider::new(TABLE_NAME)),
Arc::new(MockNamespaceNameProvider::new(&**ARBITRARY_NAMESPACE_NAME)),
Arc::clone(&*ARBITRARY_TABLE_NAME_PROVIDER),
partition_provider,
Arc::new(MockPostWriteObserver::default()),
Arc::clone(&Arc::new(metric::Registry::default())),
@ -818,28 +730,34 @@ mod tests {
assert_eq!(buf.partitions().count(), 0);
// Write data to partition p1, in table "bananas".
// Write data to partition p1, in the arbitrary table
buf.apply(DmlOperation::Write(make_write_op(
&PartitionKey::from("p1"),
NAMESPACE_ID,
TABLE_NAME,
TABLE_ID,
ARBITRARY_NAMESPACE_ID,
&ARBITRARY_TABLE_NAME,
ARBITRARY_TABLE_ID,
0,
r#"bananas,region=Asturias temp=35 4242424242"#,
&format!(
r#"{},region=Asturias temp=35 4242424242"#,
&*ARBITRARY_TABLE_NAME
),
)))
.await
.expect("failed to write initial data");
assert_eq!(buf.partitions().count(), 1);
// Write data to partition p2, in table "bananas".
// Write data to partition p2, in the arbitrary table
buf.apply(DmlOperation::Write(make_write_op(
&PartitionKey::from("p2"),
NAMESPACE_ID,
TABLE_NAME,
TABLE_ID,
ARBITRARY_NAMESPACE_ID,
&ARBITRARY_TABLE_NAME,
ARBITRARY_TABLE_ID,
0,
r#"bananas,region=Asturias temp=35 4242424242"#,
&format!(
r#"{},region=Asturias temp=35 4242424242"#,
&*ARBITRARY_TABLE_NAME
),
)))
.await
.expect("failed to write initial data");
@ -849,11 +767,11 @@ mod tests {
// Write data to partition p3, in the second table
buf.apply(DmlOperation::Write(make_write_op(
&PartitionKey::from("p3"),
NAMESPACE_ID,
"another_table",
ARBITRARY_NAMESPACE_ID,
TABLE2_NAME,
TABLE2_ID,
0,
r#"another_table,region=Asturias temp=35 4242424242"#,
&format!(r#"{},region=Asturias temp=35 4242424242"#, TABLE2_NAME),
)))
.await
.expect("failed to write initial data");
@ -874,27 +792,19 @@ mod tests {
/// returns no data (as opposed to panicking, etc).
#[tokio::test]
async fn test_not_found() {
let partition_provider = Arc::new(MockPartitionProvider::default().with_partition(
PartitionData::new(
PartitionId::new(0),
PartitionKey::from("p1"),
NAMESPACE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NamespaceName::from(NAMESPACE_NAME)
})),
TABLE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from(TABLE_NAME)
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
let partition_provider = Arc::new(
MockPartitionProvider::default().with_partition(
PartitionDataBuilder::new()
.with_partition_id(PartitionId::new(0))
.with_partition_key(PartitionKey::from("p1"))
.build(),
),
));
);
// Init the BufferTree
let buf = BufferTree::new(
Arc::new(MockNamespaceNameProvider::default()),
Arc::new(MockTableNameProvider::new(TABLE_NAME)),
Arc::new(MockNamespaceNameProvider::new(&**ARBITRARY_NAMESPACE_NAME)),
Arc::clone(&*ARBITRARY_TABLE_NAME_PROVIDER),
partition_provider,
Arc::new(MockPostWriteObserver::default()),
Arc::new(metric::Registry::default()),
@ -903,37 +813,40 @@ mod tests {
// Query the empty tree
let err = buf
.query_exec(NAMESPACE_ID, TABLE_ID, vec![], None)
.query_exec(ARBITRARY_NAMESPACE_ID, ARBITRARY_TABLE_ID, vec![], None)
.await
.expect_err("query should fail");
assert_matches!(err, QueryError::NamespaceNotFound(ns) => {
assert_eq!(ns, NAMESPACE_ID);
assert_eq!(ns, ARBITRARY_NAMESPACE_ID);
});
// Write data to partition p1, in table "bananas".
// Write data to partition p1, in the arbitrary table
buf.apply(DmlOperation::Write(make_write_op(
&PartitionKey::from("p1"),
NAMESPACE_ID,
TABLE_NAME,
TABLE_ID,
ARBITRARY_NAMESPACE_ID,
&ARBITRARY_TABLE_NAME,
ARBITRARY_TABLE_ID,
0,
r#"bananas,region=Asturias temp=35 4242424242"#,
&format!(
r#"{},region=Asturias temp=35 4242424242"#,
&*ARBITRARY_TABLE_NAME
),
)))
.await
.expect("failed to write data");
// Ensure an unknown table errors
let err = buf
.query_exec(NAMESPACE_ID, TableId::new(1234), vec![], None)
.query_exec(ARBITRARY_NAMESPACE_ID, TableId::new(1234), vec![], None)
.await
.expect_err("query should fail");
assert_matches!(err, QueryError::TableNotFound(ns, t) => {
assert_eq!(ns, NAMESPACE_ID);
assert_eq!(ns, ARBITRARY_NAMESPACE_ID);
assert_eq!(t, TableId::new(1234));
});
// Ensure a valid namespace / table does not error
buf.query_exec(NAMESPACE_ID, TABLE_ID, vec![], None)
buf.query_exec(ARBITRARY_NAMESPACE_ID, ARBITRARY_TABLE_ID, vec![], None)
.await
.expect("namespace / table should exist");
}
@ -960,54 +873,41 @@ mod tests {
// p1 and p2.
let partition_provider = Arc::new(
MockPartitionProvider::default()
.with_partition(PartitionData::new(
PartitionId::new(0),
PartitionKey::from("p1"),
NAMESPACE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NamespaceName::from(NAMESPACE_NAME)
})),
TABLE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from(TABLE_NAME)
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
))
.with_partition(PartitionData::new(
PartitionId::new(1),
PartitionKey::from("p2"),
NAMESPACE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NamespaceName::from(NAMESPACE_NAME)
})),
TABLE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from(TABLE_NAME)
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
)),
.with_partition(
PartitionDataBuilder::new()
.with_partition_id(PartitionId::new(0))
.with_partition_key(PartitionKey::from("p1"))
.build(),
)
.with_partition(
PartitionDataBuilder::new()
.with_partition_id(PartitionId::new(1))
.with_partition_key(PartitionKey::from("p2"))
.build(),
),
);
// Init the buffer tree
let buf = BufferTree::new(
Arc::new(MockNamespaceNameProvider::default()),
Arc::new(MockTableNameProvider::new(TABLE_NAME)),
Arc::new(MockNamespaceNameProvider::new(&**ARBITRARY_NAMESPACE_NAME)),
Arc::clone(&*ARBITRARY_TABLE_NAME_PROVIDER),
partition_provider,
Arc::new(MockPostWriteObserver::default()),
Arc::new(metric::Registry::default()),
TRANSITION_SHARD_ID,
);
// Write data to partition p1, in table "bananas".
// Write data to partition p1, in the arbitrary table
buf.apply(DmlOperation::Write(make_write_op(
&PartitionKey::from("p1"),
NAMESPACE_ID,
TABLE_NAME,
TABLE_ID,
ARBITRARY_NAMESPACE_ID,
&ARBITRARY_TABLE_NAME,
ARBITRARY_TABLE_ID,
0,
r#"bananas,region=Madrid temp=35 4242424242"#,
&format!(
r#"{},region=Madrid temp=35 4242424242"#,
&*ARBITRARY_TABLE_NAME
),
)))
.await
.expect("failed to write initial data");
@ -1015,7 +915,7 @@ mod tests {
// Execute a query of the buffer tree, generating the result stream, but
// DO NOT consume it.
let stream = buf
.query_exec(NAMESPACE_ID, TABLE_ID, vec![], None)
.query_exec(ARBITRARY_NAMESPACE_ID, ARBITRARY_TABLE_ID, vec![], None)
.await
.expect("query should succeed")
.into_partition_stream();
@ -1024,11 +924,14 @@ mod tests {
// that creates a new partition (p2) in the same table.
buf.apply(DmlOperation::Write(make_write_op(
&PartitionKey::from("p2"),
NAMESPACE_ID,
TABLE_NAME,
TABLE_ID,
ARBITRARY_NAMESPACE_ID,
&ARBITRARY_TABLE_NAME,
ARBITRARY_TABLE_ID,
1,
r#"bananas,region=Asturias temp=20 4242424242"#,
&format!(
r#"{},region=Asturias temp=20 4242424242"#,
&*ARBITRARY_TABLE_NAME
),
)))
.await
.expect("failed to perform concurrent write to new partition");
@ -1037,11 +940,14 @@ mod tests {
// results snapshot (p1) before the partition is read.
buf.apply(DmlOperation::Write(make_write_op(
&PartitionKey::from("p1"),
NAMESPACE_ID,
TABLE_NAME,
TABLE_ID,
ARBITRARY_NAMESPACE_ID,
&ARBITRARY_TABLE_NAME,
ARBITRARY_TABLE_ID,
2,
r#"bananas,region=Murcia temp=30 4242424242"#,
&format!(
r#"{},region=Murcia temp=30 4242424242"#,
&*ARBITRARY_TABLE_NAME
),
)))
.await
.expect("failed to perform concurrent write to existing partition");

View File

@ -95,7 +95,7 @@ impl<O> TableData<O> {
/// for the first time.
pub(super) fn new(
table_id: TableId,
table_name: DeferredLoad<TableName>,
table_name: Arc<DeferredLoad<TableName>>,
namespace_id: NamespaceId,
namespace_name: Arc<DeferredLoad<NamespaceName>>,
partition_provider: Arc<dyn PartitionProvider>,
@ -104,7 +104,7 @@ impl<O> TableData<O> {
) -> Self {
Self {
table_id,
table_name: Arc::new(table_name),
table_name,
namespace_id,
namespace_name,
partition_data: Default::default(),
@ -260,74 +260,63 @@ where
#[cfg(test)]
mod tests {
use std::{sync::Arc, time::Duration};
use std::sync::Arc;
use data_types::PartitionId;
use data_types::TRANSITION_SHARD_ID;
use mutable_batch_lp::lines_to_batches;
use super::*;
use crate::buffer_tree::{
partition::{resolver::mock::MockPartitionProvider, PartitionData, SortKeyState},
post_write::mock::MockPostWriteObserver,
use crate::{
buffer_tree::{
partition::resolver::mock::MockPartitionProvider,
post_write::mock::MockPostWriteObserver,
},
test_util::{
PartitionDataBuilder, ARBITRARY_NAMESPACE_ID, ARBITRARY_PARTITION_KEY,
ARBITRARY_TABLE_ID, ARBITRARY_TABLE_NAME, DEFER_NAMESPACE_NAME_1_SEC,
DEFER_TABLE_NAME_1_SEC,
},
};
const TABLE_NAME: &str = "bananas";
const TABLE_ID: TableId = TableId::new(44);
const NAMESPACE_ID: NamespaceId = NamespaceId::new(42);
const PARTITION_KEY: &str = "platanos";
const PARTITION_ID: PartitionId = PartitionId::new(0);
const TRANSITION_SHARD_ID: ShardId = ShardId::new(84);
#[tokio::test]
async fn test_partition_init() {
// Configure the mock partition provider to return a partition for this
// table ID.
let partition_provider = Arc::new(MockPartitionProvider::default().with_partition(
PartitionData::new(
PARTITION_ID,
PARTITION_KEY.into(),
NAMESPACE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NamespaceName::from("platanos")
})),
TABLE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from(TABLE_NAME)
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
),
));
// Configure the mock partition provider to return a partition for a table ID.
let partition_provider = Arc::new(
MockPartitionProvider::default().with_partition(PartitionDataBuilder::new().build()),
);
let table = TableData::new(
TABLE_ID,
DeferredLoad::new(Duration::from_secs(1), async {
TableName::from(TABLE_NAME)
}),
NAMESPACE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NamespaceName::from("platanos")
})),
ARBITRARY_TABLE_ID,
Arc::clone(&*DEFER_TABLE_NAME_1_SEC),
ARBITRARY_NAMESPACE_ID,
Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
partition_provider,
Arc::new(MockPostWriteObserver::default()),
TRANSITION_SHARD_ID,
);
let batch = lines_to_batches(r#"bananas,bat=man value=24 42"#, 0)
.unwrap()
.remove(TABLE_NAME)
.unwrap();
let batch = lines_to_batches(
&format!(r#"{},bat=man value=24 42"#, &*ARBITRARY_TABLE_NAME),
0,
)
.unwrap()
.remove(&***ARBITRARY_TABLE_NAME)
.unwrap();
// Assert the table does not contain the test partition
assert!(table.partition_data.get(&PARTITION_KEY.into()).is_none());
assert!(table.partition_data.get(&ARBITRARY_PARTITION_KEY).is_none());
// Write some test data
table
.buffer_table_write(SequenceNumber::new(42), batch, PARTITION_KEY.into())
.buffer_table_write(
SequenceNumber::new(42),
batch,
ARBITRARY_PARTITION_KEY.clone(),
)
.await
.expect("buffer op should succeed");
// Referencing the partition should succeed
assert!(table.partition_data.get(&PARTITION_KEY.into()).is_some());
assert!(table.partition_data.get(&ARBITRARY_PARTITION_KEY).is_some());
}
}

View File

@ -49,9 +49,7 @@ impl TableNameResolver {
.get_by_id(table_id)
.await?
.unwrap_or_else(|| {
panic!(
"resolving table name for non-existent table id {table_id}"
)
panic!("resolving table name for non-existent table id {table_id}")
})
.name
.into();

View File

@ -130,7 +130,7 @@ mod tests {
// Populate the metadata with a span context.
let meta = op.meta();
op.set_meta(DmlMeta::sequenced(
*meta.sequence().unwrap(),
meta.sequence().unwrap(),
meta.producer_ts().unwrap(),
Some(span),
42,
@ -166,7 +166,7 @@ mod tests {
// Populate the metadata with a span context.
let meta = op.meta();
op.set_meta(DmlMeta::sequenced(
*meta.sequence().unwrap(),
meta.sequence().unwrap(),
meta.producer_ts().unwrap(),
Some(span),
42,

View File

@ -16,6 +16,11 @@ pub enum DmlError {
/// An error appending the [`DmlOperation`] to the write-ahead log.
#[error("wal commit failure: {0}")]
Wal(String),
/// The write has hit an internal timeout designed to prevent writes from
/// retrying indefinitely.
#[error("buffer apply request timeout")]
ApplyTimeout,
}
/// A [`DmlSink`] handles [`DmlOperation`] instances in some abstract way.

View File

@ -132,51 +132,29 @@ mod tests {
use std::{future::ready, sync::Arc, task::Poll};
use assert_matches::assert_matches;
use data_types::{NamespaceId, PartitionId, PartitionKey, SequenceNumber, ShardId, TableId};
use data_types::SequenceNumber;
use futures::FutureExt;
use lazy_static::lazy_static;
use mutable_batch_lp::test_helpers::lp_to_mutable_batch;
use parking_lot::Mutex;
use test_helpers::timeout::FutureTimeout;
use crate::{
buffer_tree::{
namespace::NamespaceName, partition::PartitionData, partition::SortKeyState,
table::TableName,
},
deferred_load::DeferredLoad,
buffer_tree::partition::PartitionData,
persist::queue::mock::MockPersistQueue,
test_util::{PartitionDataBuilder, ARBITRARY_TABLE_NAME},
};
use super::*;
const PARTITION_ID: PartitionId = PartitionId::new(1);
const TRANSITION_SHARD_ID: ShardId = ShardId::new(84);
lazy_static! {
static ref PARTITION_KEY: PartitionKey = PartitionKey::from("platanos");
static ref TABLE_NAME: TableName = TableName::from("bananas");
static ref NAMESPACE_NAME: NamespaceName = NamespaceName::from("namespace-bananas");
}
// Initialise a partition containing buffered data.
fn new_partition() -> Arc<Mutex<PartitionData>> {
let mut partition = PartitionData::new(
PARTITION_ID,
PARTITION_KEY.clone(),
NamespaceId::new(3),
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NAMESPACE_NAME.clone()
})),
TableId::new(4),
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TABLE_NAME.clone()
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
);
let mut partition = PartitionDataBuilder::new().build();
let mb = lp_to_mutable_batch(r#"bananas,city=London people=2,pigeons="millions" 10"#).1;
let mb = lp_to_mutable_batch(&format!(
r#"{},city=London people=2,pigeons="millions" 10"#,
&*ARBITRARY_TABLE_NAME
))
.1;
partition
.buffer_write(mb, SequenceNumber::new(1))
.expect("failed to write dummy data");

View File

@ -1,4 +1,4 @@
use data_types::{NamespaceId, PartitionKey, Sequence, SequenceNumber, TableId};
use data_types::{NamespaceId, PartitionKey, SequenceNumber, TableId};
use dml::{DmlMeta, DmlOperation, DmlWrite};
use generated_types::influxdata::iox::wal::v1::sequenced_wal_op::Op;
use metric::U64Counter;
@ -12,7 +12,6 @@ use crate::{
dml_sink::{DmlError, DmlSink},
partition_iter::PartitionIter,
persist::{drain_buffer::persist_partitions, queue::PersistQueue},
TRANSITION_SHARD_INDEX,
};
/// Errors returned when replaying the write-ahead log.
@ -235,10 +234,7 @@ where
partition_key,
// The tracing context should be propagated over the RPC boundary.
DmlMeta::sequenced(
Sequence {
shard_index: TRANSITION_SHARD_INDEX, // TODO: remove this from DmlMeta
sequence_number,
},
sequence_number,
iox_time::Time::MAX, // TODO: remove this from DmlMeta
// TODO: A tracing context should be added for WAL replay.
None,
@ -258,32 +254,28 @@ where
#[cfg(test)]
mod tests {
use std::{sync::Arc, time::Duration};
use std::sync::Arc;
use assert_matches::assert_matches;
use async_trait::async_trait;
use data_types::{NamespaceId, PartitionId, PartitionKey, ShardId, TableId};
use metric::{Attributes, Metric};
use parking_lot::Mutex;
use wal::Wal;
use crate::{
buffer_tree::partition::{PartitionData, SortKeyState},
deferred_load::DeferredLoad,
buffer_tree::partition::PartitionData,
dml_sink::mock_sink::MockDmlSink,
persist::queue::mock::MockPersistQueue,
test_util::{assert_dml_writes_eq, make_write_op},
test_util::{
assert_dml_writes_eq, make_write_op, PartitionDataBuilder, ARBITRARY_NAMESPACE_ID,
ARBITRARY_PARTITION_ID, ARBITRARY_PARTITION_KEY, ARBITRARY_TABLE_ID,
ARBITRARY_TABLE_NAME,
},
wal::wal_sink::WalSink,
};
use super::*;
const PARTITION_ID: PartitionId = PartitionId::new(42);
const TABLE_ID: TableId = TableId::new(44);
const TABLE_NAME: &str = "bananas";
const NAMESPACE_NAME: &str = "platanos";
const NAMESPACE_ID: NamespaceId = NamespaceId::new(42);
#[derive(Debug)]
struct MockIter {
sink: MockDmlSink,
@ -311,28 +303,38 @@ mod tests {
// Generate the test ops that will be appended and read back
let op1 = make_write_op(
&PartitionKey::from("p1"),
NAMESPACE_ID,
TABLE_NAME,
TABLE_ID,
&ARBITRARY_PARTITION_KEY,
ARBITRARY_NAMESPACE_ID,
&ARBITRARY_TABLE_NAME,
ARBITRARY_TABLE_ID,
24,
r#"bananas,region=Madrid temp=35 4242424242"#,
&format!(
r#"{},region=Madrid temp=35 4242424242"#,
&*ARBITRARY_TABLE_NAME
),
);
let op2 = make_write_op(
&PartitionKey::from("p1"),
NAMESPACE_ID,
TABLE_NAME,
TABLE_ID,
&ARBITRARY_PARTITION_KEY,
ARBITRARY_NAMESPACE_ID,
&ARBITRARY_TABLE_NAME,
ARBITRARY_TABLE_ID,
25,
r#"bananas,region=Asturias temp=25 4242424242"#,
&format!(
r#"{},region=Asturias temp=25 4242424242"#,
&*ARBITRARY_TABLE_NAME
),
);
let op3 = make_write_op(
&PartitionKey::from("p1"),
NAMESPACE_ID,
TABLE_NAME,
TABLE_ID,
&ARBITRARY_PARTITION_KEY,
ARBITRARY_NAMESPACE_ID,
&ARBITRARY_TABLE_NAME,
ARBITRARY_TABLE_ID,
42,
r#"bananas,region=Asturias temp=15 4242424242"#, // Overwrite op2
// Overwrite op2
&format!(
r#"{},region=Asturias temp=15 4242424242"#,
&*ARBITRARY_TABLE_NAME
),
);
// The write portion of this test.
@ -384,20 +386,7 @@ mod tests {
// Replay the results into a mock to capture the DmlWrites and returns
// some dummy partitions when iterated over.
let mock_sink = MockDmlSink::default().with_apply_return(vec![Ok(()), Ok(()), Ok(())]);
let mut partition = PartitionData::new(
PARTITION_ID,
PartitionKey::from("bananas"),
NAMESPACE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NAMESPACE_NAME.into()
})),
TABLE_ID,
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TABLE_NAME.into()
})),
SortKeyState::Provided(None),
ShardId::new(1234),
);
let mut partition = PartitionDataBuilder::new().build();
// Put at least one write into the buffer so it is a candidate for persistence
partition
.buffer_write(
@ -419,16 +408,23 @@ mod tests {
// Assert the ops were pushed into the DmlSink exactly as generated.
let ops = mock_iter.sink.get_calls();
assert_matches!(&*ops, &[DmlOperation::Write(ref w1),DmlOperation::Write(ref w2),DmlOperation::Write(ref w3)] => {
assert_dml_writes_eq(w1.clone(), op1);
assert_dml_writes_eq(w2.clone(), op2);
assert_dml_writes_eq(w3.clone(), op3);
});
assert_matches!(
&*ops,
&[
DmlOperation::Write(ref w1),
DmlOperation::Write(ref w2),
DmlOperation::Write(ref w3)
] => {
assert_dml_writes_eq(w1.clone(), op1);
assert_dml_writes_eq(w2.clone(), op2);
assert_dml_writes_eq(w3.clone(), op3);
}
);
// Ensure all partitions were persisted
let calls = persist.calls();
assert_matches!(&*calls, [p] => {
assert_eq!(p.lock().partition_id(), PARTITION_ID);
assert_eq!(p.lock().partition_id(), ARBITRARY_PARTITION_ID);
});
// Ensure there were no partition persist panics.

View File

@ -209,20 +209,23 @@ impl PersistHandle {
let queue_duration = metrics
.register_metric_with_options::<DurationHistogram, _>(
"ingester_persist_enqueue_duration",
"the distribution of duration a persist job spent enqueued, waiting to be processed in seconds",
|| DurationHistogramOptions::new([
Duration::from_millis(500),
Duration::from_secs(1),
Duration::from_secs(2),
Duration::from_secs(4),
Duration::from_secs(8),
Duration::from_secs(16),
Duration::from_secs(32),
Duration::from_secs(64),
Duration::from_secs(128),
Duration::from_secs(256),
DURATION_MAX,
])
"the distribution of duration a persist job spent enqueued, \
waiting to be processed in seconds",
|| {
DurationHistogramOptions::new([
Duration::from_millis(500),
Duration::from_secs(1),
Duration::from_secs(2),
Duration::from_secs(4),
Duration::from_secs(8),
Duration::from_secs(16),
Duration::from_secs(32),
Duration::from_secs(64),
Duration::from_secs(128),
Duration::from_secs(256),
DURATION_MAX,
])
},
)
.recorder(&[]);
@ -472,7 +475,7 @@ mod tests {
use std::{sync::Arc, task::Poll, time::Duration};
use assert_matches::assert_matches;
use data_types::{NamespaceId, PartitionId, PartitionKey, ShardId, TableId};
use data_types::TRANSITION_SHARD_ID;
use dml::DmlOperation;
use futures::Future;
use iox_catalog::mem::MemCatalog;
@ -486,11 +489,9 @@ mod tests {
use super::*;
use crate::{
buffer_tree::{
namespace::{name_resolver::mock::MockNamespaceNameProvider, NamespaceName},
namespace::name_resolver::mock::MockNamespaceNameProvider,
partition::resolver::mock::MockPartitionProvider,
post_write::mock::MockPostWriteObserver,
table::{name_resolver::mock::MockTableNameProvider, TableName},
BufferTree,
post_write::mock::MockPostWriteObserver, BufferTree,
},
deferred_load::DeferredLoad,
dml_sink::DmlSink,
@ -499,49 +500,29 @@ mod tests {
completion_observer::{mock::MockCompletionObserver, NopObserver},
tests::{assert_metric_counter, assert_metric_gauge},
},
test_util::make_write_op,
test_util::{
make_write_op, PartitionDataBuilder, ARBITRARY_NAMESPACE_ID, ARBITRARY_NAMESPACE_NAME,
ARBITRARY_PARTITION_ID, ARBITRARY_PARTITION_KEY, ARBITRARY_TABLE_ID,
ARBITRARY_TABLE_NAME, ARBITRARY_TABLE_NAME_PROVIDER,
},
};
const PARTITION_ID: PartitionId = PartitionId::new(42);
const NAMESPACE_ID: NamespaceId = NamespaceId::new(24);
const TABLE_ID: TableId = TableId::new(2442);
const TABLE_NAME: &str = "banana-report";
const NAMESPACE_NAME: &str = "platanos";
const TRANSITION_SHARD_ID: ShardId = ShardId::new(84);
lazy_static! {
static ref EXEC: Arc<Executor> = Arc::new(Executor::new_testing());
static ref PARTITION_KEY: PartitionKey = PartitionKey::from("bananas");
static ref NAMESPACE_NAME_LOADER: Arc<DeferredLoad<NamespaceName>> =
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NamespaceName::from(NAMESPACE_NAME)
}));
static ref TABLE_NAME_LOADER: Arc<DeferredLoad<TableName>> =
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TableName::from(TABLE_NAME)
}));
}
/// Construct a partition with the above constants, with the given sort key,
/// and containing a single write.
async fn new_partition(
partition_id: PartitionId,
sort_key: SortKeyState,
) -> Arc<Mutex<PartitionData>> {
async fn new_partition(sort_key: SortKeyState) -> Arc<Mutex<PartitionData>> {
let buffer_tree = BufferTree::new(
Arc::new(MockNamespaceNameProvider::new(NAMESPACE_NAME)),
Arc::new(MockTableNameProvider::new(TABLE_NAME)),
Arc::new(MockNamespaceNameProvider::new(&**ARBITRARY_NAMESPACE_NAME)),
Arc::clone(&*ARBITRARY_TABLE_NAME_PROVIDER),
Arc::new(
MockPartitionProvider::default().with_partition(PartitionData::new(
partition_id,
PARTITION_KEY.clone(),
NAMESPACE_ID,
Arc::clone(&NAMESPACE_NAME_LOADER),
TABLE_ID,
Arc::clone(&TABLE_NAME_LOADER),
sort_key,
TRANSITION_SHARD_ID,
)),
MockPartitionProvider::default().with_partition(
PartitionDataBuilder::new()
.with_sort_key_state(sort_key)
.build(),
),
),
Arc::new(MockPostWriteObserver::default()),
Default::default(),
@ -550,12 +531,12 @@ mod tests {
buffer_tree
.apply(DmlOperation::Write(make_write_op(
&PARTITION_KEY,
NAMESPACE_ID,
TABLE_NAME,
TABLE_ID,
&ARBITRARY_PARTITION_KEY,
ARBITRARY_NAMESPACE_ID,
&ARBITRARY_TABLE_NAME,
ARBITRARY_TABLE_ID,
0,
r#"banana-report,good=yes level=1000 4242424242"#,
&format!("{},good=yes level=1000 4242424242", &*ARBITRARY_TABLE_NAME),
)))
.await
.expect("failed to write partition test dataa");
@ -595,7 +576,7 @@ mod tests {
handle.worker_queues = JumpHash::new([worker1_tx, worker2_tx]);
// Generate a partition with no known sort key.
let p = new_partition(PARTITION_ID, SortKeyState::Provided(None)).await;
let p = new_partition(SortKeyState::Provided(None)).await;
let data = p.lock().mark_persisting().unwrap();
// Enqueue it
@ -616,7 +597,7 @@ mod tests {
.expect("message was not found in either worker")
}
};
assert_eq!(msg.partition_id(), PARTITION_ID);
assert_eq!(msg.partition_id(), ARBITRARY_PARTITION_ID);
// Drop the message, and ensure the notification becomes inactive.
drop(msg);
@ -626,7 +607,7 @@ mod tests {
);
// Enqueue another partition for the same ID.
let p = new_partition(PARTITION_ID, SortKeyState::Provided(None)).await;
let p = new_partition(SortKeyState::Provided(None)).await;
let data = p.lock().mark_persisting().unwrap();
// Enqueue it
@ -636,7 +617,7 @@ mod tests {
let msg = assigned_worker
.try_recv()
.expect("message was not found in either worker");
assert_eq!(msg.partition_id(), PARTITION_ID);
assert_eq!(msg.partition_id(), ARBITRARY_PARTITION_ID);
}
/// A test that ensures the correct destination of a partition that has no
@ -671,12 +652,10 @@ mod tests {
handle.worker_queues = JumpHash::new([worker1_tx, worker2_tx]);
// Generate a partition with a resolved, but empty sort key.
let p = new_partition(
PARTITION_ID,
SortKeyState::Deferred(Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
None
}))),
)
let p = new_partition(SortKeyState::Deferred(Arc::new(DeferredLoad::new(
Duration::from_secs(1),
async { None },
))))
.await;
let (loader, data) = {
let mut p = p.lock();
@ -703,7 +682,7 @@ mod tests {
.expect("message was not found in either worker")
}
};
assert_eq!(msg.partition_id(), PARTITION_ID);
assert_eq!(msg.partition_id(), ARBITRARY_PARTITION_ID);
// Drop the message, and ensure the notification becomes inactive.
drop(msg);
@ -714,7 +693,7 @@ mod tests {
// Enqueue another partition for the same ID and same (resolved)
// deferred load instance.
let p = new_partition(PARTITION_ID, loader).await;
let p = new_partition(loader).await;
let data = p.lock().mark_persisting().unwrap();
// Enqueue it
@ -724,7 +703,7 @@ mod tests {
let msg = assigned_worker
.try_recv()
.expect("message was not found in either worker");
assert_eq!(msg.partition_id(), PARTITION_ID);
assert_eq!(msg.partition_id(), ARBITRARY_PARTITION_ID);
}
/// A test that ensures the correct destination of a partition that has an
@ -760,12 +739,10 @@ mod tests {
// Generate a partition with a resolved sort key that does not reflect
// the data within the partition's buffer.
let p = new_partition(
PARTITION_ID,
SortKeyState::Deferred(Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
Some(SortKey::from_columns(["time", "some-other-column"]))
}))),
)
let p = new_partition(SortKeyState::Deferred(Arc::new(DeferredLoad::new(
Duration::from_secs(1),
async { Some(SortKey::from_columns(["time", "some-other-column"])) },
))))
.await;
let (loader, data) = {
let mut p = p.lock();
@ -792,7 +769,7 @@ mod tests {
.expect("message was not found in either worker")
}
};
assert_eq!(msg.partition_id(), PARTITION_ID);
assert_eq!(msg.partition_id(), ARBITRARY_PARTITION_ID);
// Drop the message, and ensure the notification becomes inactive.
drop(msg);
@ -803,7 +780,7 @@ mod tests {
// Enqueue another partition for the same ID and same (resolved)
// deferred load instance.
let p = new_partition(PARTITION_ID, loader).await;
let p = new_partition(loader).await;
let data = p.lock().mark_persisting().unwrap();
// Enqueue it
@ -813,7 +790,7 @@ mod tests {
let msg = assigned_worker
.try_recv()
.expect("message was not found in either worker");
assert_eq!(msg.partition_id(), PARTITION_ID);
assert_eq!(msg.partition_id(), ARBITRARY_PARTITION_ID);
}
/// A test that a partition that does not require a sort key update is
@ -848,12 +825,10 @@ mod tests {
// Generate a partition with a resolved sort key that does not reflect
// the data within the partition's buffer.
let p = new_partition(
PARTITION_ID,
SortKeyState::Deferred(Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
Some(SortKey::from_columns(["time", "good"]))
}))),
)
let p = new_partition(SortKeyState::Deferred(Arc::new(DeferredLoad::new(
Duration::from_secs(1),
async { Some(SortKey::from_columns(["time", "good"])) },
))))
.await;
let (loader, data) = {
let mut p = p.lock();
@ -873,7 +848,7 @@ mod tests {
let msg = global_rx
.try_recv()
.expect("task should be in global queue");
assert_eq!(msg.partition_id(), PARTITION_ID);
assert_eq!(msg.partition_id(), ARBITRARY_PARTITION_ID);
// Drop the message, and ensure the notification becomes inactive.
drop(msg);
@ -884,7 +859,7 @@ mod tests {
// Enqueue another partition for the same ID and same (resolved)
// deferred load instance.
let p = new_partition(PARTITION_ID, loader).await;
let p = new_partition(loader).await;
let data = p.lock().mark_persisting().unwrap();
// Enqueue it
@ -894,7 +869,7 @@ mod tests {
let msg = global_rx
.try_recv()
.expect("task should be in global queue");
assert_eq!(msg.partition_id(), PARTITION_ID);
assert_eq!(msg.partition_id(), ARBITRARY_PARTITION_ID);
}
/// A test that a ensures tasks waiting to be enqueued (waiting on the
@ -930,12 +905,10 @@ mod tests {
handle.worker_queues = JumpHash::new([worker1_tx, worker2_tx]);
// Generate a partition
let p = new_partition(
PARTITION_ID,
SortKeyState::Deferred(Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
Some(SortKey::from_columns(["time", "good"]))
}))),
)
let p = new_partition(SortKeyState::Deferred(Arc::new(DeferredLoad::new(
Duration::from_secs(1),
async { Some(SortKey::from_columns(["time", "good"])) },
))))
.await;
let data = p.lock().mark_persisting().unwrap();
@ -943,12 +916,10 @@ mod tests {
let _notify1 = handle.enqueue(p, data).await;
// Generate a second partition
let p = new_partition(
PARTITION_ID,
SortKeyState::Deferred(Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
Some(SortKey::from_columns(["time", "good"]))
}))),
)
let p = new_partition(SortKeyState::Deferred(Arc::new(DeferredLoad::new(
Duration::from_secs(1),
async { Some(SortKey::from_columns(["time", "good"])) },
))))
.await;
let data = p.lock().mark_persisting().unwrap();

View File

@ -102,52 +102,28 @@ where
#[cfg(test)]
mod tests {
use std::sync::Arc;
use std::time::Duration;
use assert_matches::assert_matches;
use data_types::{NamespaceId, PartitionId, PartitionKey, SequenceNumber, ShardId, TableId};
use lazy_static::lazy_static;
use data_types::SequenceNumber;
use mutable_batch_lp::test_helpers::lp_to_mutable_batch;
use parking_lot::Mutex;
use crate::{
buffer_tree::{
namespace::NamespaceName, partition::PartitionData, partition::SortKeyState,
table::TableName,
},
deferred_load::DeferredLoad,
persist::queue::mock::MockPersistQueue,
test_util::{PartitionDataBuilder, ARBITRARY_TABLE_NAME},
};
use super::*;
const PARTITION_ID: PartitionId = PartitionId::new(1);
const TRANSITION_SHARD_ID: ShardId = ShardId::new(84);
lazy_static! {
static ref PARTITION_KEY: PartitionKey = PartitionKey::from("pohtaytoes");
static ref TABLE_NAME: TableName = TableName::from("potatoes");
static ref NAMESPACE_NAME: NamespaceName = NamespaceName::from("namespace-potatoes");
}
#[tokio::test]
async fn test_hot_partition_persist() {
let mut p = PartitionData::new(
PARTITION_ID,
PARTITION_KEY.clone(),
NamespaceId::new(3),
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NAMESPACE_NAME.clone()
})),
TableId::new(4),
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TABLE_NAME.clone()
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
);
let mut p = PartitionDataBuilder::new().build();
let mb = lp_to_mutable_batch(r#"potatoes,city=Hereford people=1,crisps="good" 10"#).1;
let mb = lp_to_mutable_batch(&format!(
r#"{},city=Hereford people=1,crisps="good" 10"#,
&*ARBITRARY_TABLE_NAME
))
.1;
p.buffer_write(mb, SequenceNumber::new(1))
.expect("write should succeed");
let max_cost = p.persist_cost_estimate() + 1; // Require additional data to be buffered before enqueuing
@ -177,7 +153,11 @@ mod tests {
// Write more data to the partition
let want_query_data = {
let mb = lp_to_mutable_batch(r#"potatoes,city=Worcester people=2,crisps="fine" 5"#).1;
let mb = lp_to_mutable_batch(&format!(
r#"{},city=Worcester people=2,crisps="fine" 5"#,
&*ARBITRARY_TABLE_NAME
))
.1;
let mut guard = p.lock();
guard
.buffer_write(mb, SequenceNumber::new(2))

View File

@ -15,7 +15,7 @@ mod tests {
use std::{sync::Arc, time::Duration};
use assert_matches::assert_matches;
use data_types::{CompactionLevel, ParquetFile, PartitionKey, SequenceNumber, ShardId};
use data_types::{CompactionLevel, ParquetFile, SequenceNumber, TRANSITION_SHARD_ID};
use dml::DmlOperation;
use futures::TryStreamExt;
use iox_catalog::{
@ -36,28 +36,25 @@ mod tests {
use crate::{
buffer_tree::{
namespace::name_resolver::mock::MockNamespaceNameProvider,
partition::{resolver::CatalogPartitionResolver, PartitionData, SortKeyState},
post_write::mock::MockPostWriteObserver,
table::name_resolver::mock::MockTableNameProvider,
BufferTree,
},
dml_sink::DmlSink,
ingest_state::IngestState,
persist::{completion_observer::mock::MockCompletionObserver, queue::PersistQueue},
test_util::{make_write_op, populate_catalog},
test_util::{
make_write_op, populate_catalog, ARBITRARY_NAMESPACE_NAME,
ARBITRARY_NAMESPACE_NAME_PROVIDER, ARBITRARY_PARTITION_KEY, ARBITRARY_TABLE_NAME,
ARBITRARY_TABLE_NAME_PROVIDER,
},
TRANSITION_SHARD_INDEX,
};
use super::handle::PersistHandle;
const TABLE_NAME: &str = "bananas";
const NAMESPACE_NAME: &str = "platanos";
const TRANSITION_SHARD_ID: ShardId = ShardId::new(84);
lazy_static! {
static ref EXEC: Arc<Executor> = Arc::new(Executor::new_testing());
static ref PARTITION_KEY: PartitionKey = PartitionKey::from("bananas");
}
/// Generate a [`PartitionData`] containing one write, and populate the
@ -68,15 +65,15 @@ mod tests {
let (_shard_id, namespace_id, table_id) = populate_catalog(
&*catalog,
TRANSITION_SHARD_INDEX,
NAMESPACE_NAME,
TABLE_NAME,
&ARBITRARY_NAMESPACE_NAME,
&ARBITRARY_TABLE_NAME,
)
.await;
// Init the buffer tree
let buf = BufferTree::new(
Arc::new(MockNamespaceNameProvider::default()),
Arc::new(MockTableNameProvider::new(TABLE_NAME)),
Arc::clone(&*ARBITRARY_NAMESPACE_NAME_PROVIDER),
Arc::clone(&*ARBITRARY_TABLE_NAME_PROVIDER),
Arc::new(CatalogPartitionResolver::new(Arc::clone(&catalog))),
Arc::new(MockPostWriteObserver::default()),
Arc::new(metric::Registry::default()),
@ -84,12 +81,15 @@ mod tests {
);
let write = make_write_op(
&PARTITION_KEY,
&ARBITRARY_PARTITION_KEY,
namespace_id,
TABLE_NAME,
&ARBITRARY_TABLE_NAME,
table_id,
0,
r#"bananas,region=Asturias temp=35 4242424242"#,
&format!(
r#"{},region=Asturias temp=35 4242424242"#,
&*ARBITRARY_TABLE_NAME
),
);
let mut repos = catalog
@ -103,7 +103,9 @@ mod tests {
// Insert the schema elements into the catalog
validate_or_insert_schema(
write.tables().map(|(_id, data)| (TABLE_NAME, data)),
write
.tables()
.map(|(_id, data)| (&***ARBITRARY_TABLE_NAME, data)),
&schema,
&mut *repos,
)

View File

@ -174,7 +174,7 @@ where
// Predicate pushdown is part of the API, but not implemented.
if let Some(p) = request.predicate {
warn!(predicate=?p, "ignoring query predicate (unsupported)");
debug!(predicate=?p, "ignoring query predicate (unsupported)");
}
let response = match self

View File

@ -1,6 +1,6 @@
use std::sync::Arc;
use data_types::{NamespaceId, PartitionKey, Sequence, TableId};
use data_types::{NamespaceId, PartitionKey, TableId};
use dml::{DmlMeta, DmlOperation, DmlWrite};
use generated_types::influxdata::iox::ingester::v1::{
self as proto, write_service_server::WriteService,
@ -15,7 +15,6 @@ use crate::{
dml_sink::{DmlError, DmlSink},
ingest_state::{IngestState, IngestStateError},
timestamp_oracle::TimestampOracle,
TRANSITION_SHARD_INDEX,
};
/// A list of error states when handling an RPC write request.
@ -63,6 +62,7 @@ impl From<DmlError> for tonic::Status {
match e {
DmlError::Buffer(e) => map_write_error(e),
DmlError::Wal(_) => Self::internal(e.to_string()),
DmlError::ApplyTimeout => Self::internal(e.to_string()),
}
}
}
@ -188,10 +188,7 @@ where
.collect(),
partition_key,
DmlMeta::sequenced(
Sequence {
shard_index: TRANSITION_SHARD_INDEX, // TODO: remove this from DmlMeta
sequence_number: self.timestamp.next(),
},
self.timestamp.next(),
iox_time::Time::MAX, // TODO: remove this from DmlMeta
// The tracing context should be propagated over the RPC boundary.
//
@ -296,7 +293,7 @@ mod tests {
assert_eq!(w.namespace_id(), NAMESPACE_ID);
assert_eq!(w.table_count(), 1);
assert_eq!(*w.partition_key(), PartitionKey::from(PARTITION_KEY));
assert_eq!(w.meta().sequence().unwrap().sequence_number.get(), 1);
assert_eq!(w.meta().sequence().unwrap().get(), 1);
}
);
@ -404,8 +401,8 @@ mod tests {
assert_matches!(
*mock.get_calls(),
[DmlOperation::Write(ref w1), DmlOperation::Write(ref w2)] => {
let w1 = w1.meta().sequence().unwrap().sequence_number.get();
let w2 = w2.meta().sequence().unwrap().sequence_number.get();
let w1 = w1.meta().sequence().unwrap().get();
let w2 = w2.meta().sequence().unwrap().get();
assert!(w1 < w2);
}
);

View File

@ -1,13 +1,141 @@
use std::collections::BTreeMap;
use std::{collections::BTreeMap, sync::Arc, time::Duration};
use data_types::{
NamespaceId, PartitionKey, Sequence, SequenceNumber, ShardId, ShardIndex, TableId,
NamespaceId, Partition, PartitionId, PartitionKey, SequenceNumber, ShardId, ShardIndex,
TableId, TRANSITION_SHARD_ID,
};
use dml::{DmlMeta, DmlWrite};
use iox_catalog::interface::Catalog;
use lazy_static::lazy_static;
use mutable_batch_lp::lines_to_batches;
use schema::Projection;
use crate::{
buffer_tree::{
namespace::{
name_resolver::{mock::MockNamespaceNameProvider, NamespaceNameProvider},
NamespaceName,
},
partition::{PartitionData, SortKeyState},
table::{
name_resolver::{mock::MockTableNameProvider, TableNameProvider},
TableName,
},
},
deferred_load::DeferredLoad,
};
pub(crate) const ARBITRARY_PARTITION_ID: PartitionId = PartitionId::new(1);
pub(crate) const ARBITRARY_NAMESPACE_ID: NamespaceId = NamespaceId::new(3);
pub(crate) const ARBITRARY_TABLE_ID: TableId = TableId::new(4);
pub(crate) const ARBITRARY_PARTITION_KEY_STR: &str = "platanos";
lazy_static! {
pub(crate) static ref ARBITRARY_PARTITION_KEY: PartitionKey =
PartitionKey::from(ARBITRARY_PARTITION_KEY_STR);
pub(crate) static ref ARBITRARY_NAMESPACE_NAME: NamespaceName =
NamespaceName::from("namespace-bananas");
pub(crate) static ref DEFER_NAMESPACE_NAME_1_SEC: Arc<DeferredLoad<NamespaceName>> =
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
ARBITRARY_NAMESPACE_NAME.clone()
}));
pub(crate) static ref DEFER_NAMESPACE_NAME_1_MS: Arc<DeferredLoad<NamespaceName>> =
Arc::new(DeferredLoad::new(Duration::from_millis(1), async {
ARBITRARY_NAMESPACE_NAME.clone()
}));
pub(crate) static ref ARBITRARY_NAMESPACE_NAME_PROVIDER: Arc<dyn NamespaceNameProvider> =
Arc::new(MockNamespaceNameProvider::new(&**ARBITRARY_NAMESPACE_NAME));
pub(crate) static ref ARBITRARY_TABLE_NAME: TableName = TableName::from("bananas");
pub(crate) static ref DEFER_TABLE_NAME_1_SEC: Arc<DeferredLoad<TableName>> =
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
ARBITRARY_TABLE_NAME.clone()
}));
pub(crate) static ref DEFER_TABLE_NAME_1_MS: Arc<DeferredLoad<TableName>> =
Arc::new(DeferredLoad::new(Duration::from_millis(1), async {
ARBITRARY_TABLE_NAME.clone()
}));
pub(crate) static ref ARBITRARY_TABLE_NAME_PROVIDER: Arc<dyn TableNameProvider> =
Arc::new(MockTableNameProvider::new(&**ARBITRARY_TABLE_NAME));
}
/// Build a [`PartitionData`] with mostly arbitrary-yet-valid values for tests.
#[derive(Debug, Clone, Default)]
pub(crate) struct PartitionDataBuilder {
partition_id: Option<PartitionId>,
partition_key: Option<PartitionKey>,
namespace_id: Option<NamespaceId>,
table_id: Option<TableId>,
table_name: Option<Arc<DeferredLoad<TableName>>>,
sort_key: Option<SortKeyState>,
}
impl PartitionDataBuilder {
pub(crate) fn new() -> Self {
Self::default()
}
pub(crate) fn with_partition_id(mut self, partition_id: PartitionId) -> Self {
self.partition_id = Some(partition_id);
self
}
pub(crate) fn with_partition_key(mut self, partition_key: PartitionKey) -> Self {
self.partition_key = Some(partition_key);
self
}
pub(crate) fn with_namespace_id(mut self, namespace_id: NamespaceId) -> Self {
self.namespace_id = Some(namespace_id);
self
}
pub(crate) fn with_table_id(mut self, table_id: TableId) -> Self {
self.table_id = Some(table_id);
self
}
pub(crate) fn with_table_name(mut self, table_name: Arc<DeferredLoad<TableName>>) -> Self {
self.table_name = Some(table_name);
self
}
pub(crate) fn with_sort_key_state(mut self, sort_key_state: SortKeyState) -> Self {
self.sort_key = Some(sort_key_state);
self
}
/// Generate a valid [`PartitionData`] for use in tests where the exact values (or at least
/// some of them) don't particularly matter.
pub(crate) fn build(self) -> PartitionData {
PartitionData::new(
self.partition_id.unwrap_or(ARBITRARY_PARTITION_ID),
self.partition_key
.unwrap_or_else(|| ARBITRARY_PARTITION_KEY.clone()),
self.namespace_id.unwrap_or(ARBITRARY_NAMESPACE_ID),
Arc::clone(&*DEFER_NAMESPACE_NAME_1_SEC),
self.table_id.unwrap_or(ARBITRARY_TABLE_ID),
self.table_name
.unwrap_or_else(|| Arc::clone(&*DEFER_TABLE_NAME_1_SEC)),
self.sort_key.unwrap_or(SortKeyState::Provided(None)),
TRANSITION_SHARD_ID,
)
}
}
/// Generate a valid [`Partition`] for use in the tests where the exact values (or at least some of
/// them) don't particularly matter.
pub(crate) fn arbitrary_partition() -> Partition {
Partition {
id: ARBITRARY_PARTITION_ID,
shard_id: TRANSITION_SHARD_ID,
table_id: ARBITRARY_TABLE_ID,
partition_key: ARBITRARY_PARTITION_KEY.clone(),
sort_key: Default::default(),
persisted_sequence_number: Default::default(),
new_file_at: Default::default(),
}
}
/// Generate a [`RecordBatch`] & [`Schema`] with the specified columns and
/// values:
///
@ -147,10 +275,7 @@ pub(crate) fn make_write_op(
tables_by_id,
partition_key.clone(),
DmlMeta::sequenced(
Sequence {
shard_index: ShardIndex::new(i32::MAX),
sequence_number: SequenceNumber::new(sequence_number),
},
SequenceNumber::new(sequence_number),
iox_time::Time::MIN,
None,
42,
@ -195,8 +320,8 @@ pub(crate) fn assert_dml_writes_eq(a: DmlWrite, b: DmlWrite) {
assert_eq!(a.partition_key(), b.partition_key(), "partition key");
// Assert sequence numbers were reassigned
let seq_a = a.meta().sequence().map(|s| s.sequence_number);
let seq_b = b.meta().sequence().map(|s| s.sequence_number);
let seq_a = a.meta().sequence();
let seq_b = b.meta().sequence();
assert_eq!(seq_a, seq_b, "sequence numbers differ");
let a = a.into_tables().collect::<BTreeMap<_, _>>();

View File

@ -143,53 +143,25 @@ mod tests {
use assert_matches::assert_matches;
use async_trait::async_trait;
use data_types::{NamespaceId, PartitionId, PartitionKey, SequenceNumber, ShardId, TableId};
use lazy_static::lazy_static;
use data_types::SequenceNumber;
use mutable_batch_lp::test_helpers::lp_to_mutable_batch;
use parking_lot::Mutex;
use tempfile::tempdir;
use test_helpers::timeout::FutureTimeout;
use tokio::sync::oneshot;
use super::*;
use crate::{
buffer_tree::{
namespace::NamespaceName,
partition::PartitionData,
partition::{persisting::PersistingData, SortKeyState},
table::TableName,
},
deferred_load::DeferredLoad,
buffer_tree::{partition::persisting::PersistingData, partition::PartitionData},
persist::queue::mock::MockPersistQueue,
test_util::{PartitionDataBuilder, ARBITRARY_PARTITION_ID},
};
use super::*;
const PARTITION_ID: PartitionId = PartitionId::new(1);
const TRANSITION_SHARD_ID: ShardId = ShardId::new(84);
const TICK_INTERVAL: Duration = Duration::from_millis(10);
lazy_static! {
static ref PARTITION_KEY: PartitionKey = PartitionKey::from("platanos");
static ref TABLE_NAME: TableName = TableName::from("bananas");
static ref NAMESPACE_NAME: NamespaceName = NamespaceName::from("namespace-bananas");
}
#[tokio::test]
async fn test_persist() {
let mut p = PartitionData::new(
PARTITION_ID,
PARTITION_KEY.clone(),
NamespaceId::new(3),
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NAMESPACE_NAME.clone()
})),
TableId::new(4),
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TABLE_NAME.clone()
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
);
let mut p = PartitionDataBuilder::new().build();
// Perform a single write to populate the partition.
let mb = lp_to_mutable_batch(r#"bananas,city=London people=2,pigeons="millions" 10"#).1;
@ -271,7 +243,7 @@ mod tests {
assert_matches!(persist.calls().as_slice(), [got] => {
let guard = got.lock();
assert_eq!(guard.partition_id(), PARTITION_ID);
assert_eq!(guard.partition_id(), ARBITRARY_PARTITION_ID);
})
}
@ -305,20 +277,7 @@ mod tests {
#[tokio::test]
async fn test_persist_ticks_when_blocked() {
let mut p = PartitionData::new(
PARTITION_ID,
PARTITION_KEY.clone(),
NamespaceId::new(3),
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
NAMESPACE_NAME.clone()
})),
TableId::new(4),
Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
TABLE_NAME.clone()
})),
SortKeyState::Provided(None),
TRANSITION_SHARD_ID,
);
let mut p = PartitionDataBuilder::new().build();
// Perform a single write to populate the partition.
let mb = lp_to_mutable_batch(r#"bananas,city=London people=2,pigeons="millions" 10"#).1;

View File

@ -2,7 +2,7 @@ use async_trait::async_trait;
use dml::DmlOperation;
use generated_types::influxdata::iox::wal::v1::sequenced_wal_op::Op;
use mutable_batch_pb::encode::encode_write;
use std::sync::Arc;
use std::{sync::Arc, time::Duration};
use tokio::sync::watch::Receiver;
use wal::{SequencedWalOp, WriteResult};
@ -13,6 +13,17 @@ use crate::{
use super::traits::WalAppender;
/// [`DELEGATE_APPLY_TIMEOUT`] defines how long the inner [`DmlSink`] is given
/// to complete the write [`DmlSink::apply()`] call.
///
/// If this limit weren't enforced, a write that does not make progress would
/// consume resources forever. Instead, a reasonable duration of time is given
/// to attempt the write before an error is returned to the caller.
///
/// In practice, this limit SHOULD only ever be reached as a symptom of a larger
/// problem (catalog unavailable, etc) preventing a write from making progress.
const DELEGATE_APPLY_TIMEOUT: Duration = Duration::from_secs(15);
/// A [`DmlSink`] decorator that ensures any [`DmlOperation`] is committed to
/// the write-ahead log before passing the operation to the inner [`DmlSink`].
#[derive(Debug)]
@ -49,7 +60,11 @@ where
// durable.
//
// Ensure that this future is always driven to completion now that the
// WAL entry is being committed, otherwise they'll diverge.
// WAL entry is being committed, otherwise they'll diverge. At the same
// time, do not allow the spawned task to live forever, consuming
// resources without making progress - instead shed load after a
// reasonable duration of time (DELEGATE_APPLY_TIMEOUT) has passed,
// before returning a write error (if the caller is still listening).
//
// If this buffer apply fails, the entry remains in the WAL and will be
// attempted again during WAL replay after a crash. If this can never
@ -58,9 +73,14 @@ where
// https://github.com/influxdata/influxdb_iox/issues/7111
//
let inner = self.inner.clone();
CancellationSafe::new(async move { inner.apply(op).await })
.await
.map_err(Into::into)?;
CancellationSafe::new(async move {
let res = tokio::time::timeout(DELEGATE_APPLY_TIMEOUT, inner.apply(op))
.await
.map_err(|_| DmlError::ApplyTimeout)?;
res.map_err(Into::into)
})
.await?;
// Wait for the write to be durable before returning to the user
write_result
@ -82,7 +102,6 @@ impl WalAppender for Arc<wal::Wal> {
.meta()
.sequence()
.expect("committing unsequenced dml operation to wal")
.sequence_number
.get() as u64;
let namespace_id = op.namespace_id();
@ -101,7 +120,8 @@ impl WalAppender for Arc<wal::Wal> {
#[cfg(test)]
mod tests {
use std::sync::Arc;
use core::{future::Future, marker::Send, pin::Pin};
use std::{future, sync::Arc};
use assert_matches::assert_matches;
use data_types::{NamespaceId, PartitionKey, TableId};
@ -181,4 +201,66 @@ mod tests {
assert_eq!(want, *payload);
}
/// A [`DmlSink`] implementation that hangs forever and never completes.
#[derive(Debug, Default, Clone)]
struct BlockingDmlSink;
impl DmlSink for BlockingDmlSink {
type Error = DmlError;
fn apply<'life0, 'async_trait>(
&'life0 self,
_op: DmlOperation,
) -> Pin<Box<dyn Future<Output = Result<(), Self::Error>> + Send + 'async_trait>>
where
'life0: 'async_trait,
Self: 'async_trait,
{
Box::pin(future::pending())
}
}
#[tokio::test]
async fn test_timeout() {
let dir = tempfile::tempdir().unwrap();
// Generate the test op
let op = make_write_op(
&PartitionKey::from("p1"),
NAMESPACE_ID,
TABLE_NAME,
TABLE_ID,
42,
r#"bananas,region=Madrid temp=35 4242424242"#,
);
let wal = Wal::new(dir.path())
.await
.expect("failed to initialise WAL");
let wal_sink = WalSink::new(BlockingDmlSink::default(), wal);
// Allow tokio to automatically advance time past the timeout duration,
// when all threads are blocked on await points.
//
// This allows the test to drive the timeout logic without actually
// waiting for the timeout duration in the test.
tokio::time::pause();
let start = tokio::time::Instant::now();
// Apply the op through the decorator, which should time out
let err = wal_sink
.apply(DmlOperation::Write(op.clone()))
.await
.expect_err("write should time out");
assert_matches!(err, DmlError::ApplyTimeout);
// Ensure that "time" advanced at least the timeout amount of time
// before erroring.
let duration = tokio::time::Instant::now().duration_since(start);
assert!(duration > DELEGATE_APPLY_TIMEOUT);
}
}

View File

@ -31,7 +31,7 @@ prost = { version = "0.11.9", default-features = false, features = ["std"] }
tempfile = { version = "3.5.0" }
test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
tokio = { version = "1.27", features = ["macros", "parking_lot", "rt-multi-thread", "sync", "time"] }
tokio-util = "0.7.7"
tokio-util = "0.7.8"
tonic = { workspace = true }
wal = { version = "0.1.0", path = "../wal" }
workspace-hack = { version = "0.1", path = "../workspace-hack" }

View File

@ -17,8 +17,8 @@ use std::{collections::HashMap, sync::Arc, time::Duration};
use arrow::record_batch::RecordBatch;
use arrow_flight::{decode::FlightRecordBatchStream, flight_service_server::FlightService, Ticket};
use data_types::{
Namespace, NamespaceId, NamespaceSchema, ParquetFile, PartitionKey, QueryPoolId, Sequence,
SequenceNumber, ShardIndex, TableId, TopicId,
Namespace, NamespaceId, NamespaceSchema, ParquetFile, PartitionKey, QueryPoolId,
SequenceNumber, TableId, TopicId,
};
use dml::{DmlMeta, DmlWrite};
use futures::{stream::FuturesUnordered, FutureExt, StreamExt, TryStreamExt};
@ -179,8 +179,6 @@ impl TestContextBuilder {
}
}
const SHARD_INDEX: ShardIndex = ShardIndex::new(42);
/// A command interface to the underlying [`ingester2`] instance.
///
/// When the [`TestContext`] is dropped, the underlying [`ingester2`] instance
@ -309,7 +307,7 @@ where
batches_by_ids,
partition_key,
DmlMeta::sequenced(
Sequence::new(SHARD_INDEX, SequenceNumber::new(sequence_number)),
SequenceNumber::new(sequence_number),
iox_time::SystemProvider::new().now(),
None,
50,

View File

@ -16,6 +16,10 @@ use std::{
};
use uuid::Uuid;
/// Maximum number of files deleted by [`ParquetFileRepo::delete_old_ids_only`] and
/// [`ParquetFileRepo::flag_for_delete_by_retention`] at a time.
pub const MAX_PARQUET_FILES_SELECTED_ONCE: i64 = 1_000;
/// An error wrapper detailing the reason for a compare-and-swap failure.
#[derive(Debug)]
pub enum CasFailure<T> {
@ -1835,6 +1839,7 @@ pub(crate) mod test_helpers {
.expect("delete namespace should succeed");
}
/// tests many interactions with the catalog and parquet files. See the individual conditions herein
async fn test_parquet_file(catalog: Arc<dyn Catalog>) {
let mut repos = catalog.repositories().await;
let topic = repos.topics().create_or_get("foo").await.unwrap();
@ -2074,7 +2079,7 @@ pub(crate) mod test_helpers {
min_time: Timestamp::new(50),
max_time: Timestamp::new(60),
max_sequence_number: SequenceNumber::new(11),
..f1_params
..f1_params.clone()
};
let f2 = repos
.parquet_files()
@ -2219,6 +2224,41 @@ pub(crate) mod test_helpers {
.await
.unwrap();
assert!(ids.is_empty());
// test that flag_for_delete_by_retention respects UPDATE LIMIT
// create limit + the meaning of life parquet files that are all older than the retention (>1hr)
const LIMIT: usize = 1000;
const MOL: usize = 42;
for _ in 0..LIMIT + MOL {
let params = ParquetFileParams {
object_store_id: Uuid::new_v4(),
max_time: Timestamp::new(
// a bit over an hour ago
(catalog.time_provider().now() - Duration::from_secs(60 * 65))
.timestamp_nanos(),
),
..f1_params.clone()
};
repos.parquet_files().create(params.clone()).await.unwrap();
}
let ids = repos
.parquet_files()
.flag_for_delete_by_retention()
.await
.unwrap();
assert_eq!(ids.len(), LIMIT);
let ids = repos
.parquet_files()
.flag_for_delete_by_retention()
.await
.unwrap();
assert_eq!(ids.len(), MOL); // second call took remainder
let ids = repos
.parquet_files()
.flag_for_delete_by_retention()
.await
.unwrap();
assert_eq!(ids.len(), 0); // none left
}
async fn test_parquet_file_delete_broken(catalog: Arc<dyn Catalog>) {

View File

@ -37,7 +37,6 @@ pub const DEFAULT_MAX_COLUMNS_PER_TABLE: i32 = 200;
/// Default retention period for data in the catalog.
pub const DEFAULT_RETENTION_PERIOD: Option<i64> = None;
/// A string value representing an infinite retention policy.
pub mod interface;
pub mod mem;
pub mod metrics;

View File

@ -6,6 +6,7 @@ use crate::{
sealed::TransactionFinalize, CasFailure, Catalog, ColumnRepo, ColumnTypeMismatchSnafu,
Error, NamespaceRepo, ParquetFileRepo, PartitionRepo, QueryPoolRepo, RepoCollection,
Result, ShardRepo, SoftDeletedRows, TableRepo, TopicMetadataRepo, Transaction,
MAX_PARQUET_FILES_SELECTED_ONCE,
},
metrics::MetricDecorator,
DEFAULT_MAX_COLUMNS_PER_TABLE, DEFAULT_MAX_TABLES, SHARED_TOPIC_ID, SHARED_TOPIC_NAME,
@ -1034,6 +1035,7 @@ impl ParquetFileRepo for MemTxn {
})
})
})
.take(MAX_PARQUET_FILES_SELECTED_ONCE as usize)
.collect())
}
@ -1090,7 +1092,11 @@ impl ParquetFileRepo for MemTxn {
stage.parquet_files = keep;
let delete = delete.into_iter().map(|f| f.id).collect();
let delete = delete
.into_iter()
.take(MAX_PARQUET_FILES_SELECTED_ONCE as usize)
.map(|f| f.id)
.collect();
Ok(delete)
}

View File

@ -5,7 +5,7 @@ use crate::{
self, sealed::TransactionFinalize, CasFailure, Catalog, ColumnRepo,
ColumnTypeMismatchSnafu, Error, NamespaceRepo, ParquetFileRepo, PartitionRepo,
QueryPoolRepo, RepoCollection, Result, ShardRepo, SoftDeletedRows, TableRepo,
TopicMetadataRepo, Transaction,
TopicMetadataRepo, Transaction, MAX_PARQUET_FILES_SELECTED_ONCE,
},
metrics::MetricDecorator,
DEFAULT_MAX_COLUMNS_PER_TABLE, DEFAULT_MAX_TABLES, SHARED_TOPIC_ID, SHARED_TOPIC_NAME,
@ -31,9 +31,6 @@ use std::{collections::HashMap, fmt::Display, str::FromStr, sync::Arc, time::Dur
static MIGRATOR: Migrator = sqlx::migrate!();
/// Maximum number of files deleted by [`ParquetFileRepo::delete_old_ids_only].
const MAX_PARQUET_FILES_DELETED_ONCE: i64 = 1_000;
/// Postgres connection options.
#[derive(Debug, Clone)]
pub struct PostgresConnectionOptions {
@ -1537,17 +1534,23 @@ RETURNING *;
// TODO - include check of table retention period once implemented
let flagged = sqlx::query(
r#"
UPDATE parquet_file
SET to_delete = $1
FROM namespace
WHERE namespace.retention_period_ns IS NOT NULL
AND parquet_file.to_delete IS NULL
AND parquet_file.max_time < $1 - namespace.retention_period_ns
AND namespace.id = parquet_file.namespace_id
RETURNING parquet_file.id;
WITH parquet_file_ids as (
SELECT parquet_file.id
FROM namespace, parquet_file
WHERE namespace.retention_period_ns IS NOT NULL
AND parquet_file.to_delete IS NULL
AND parquet_file.max_time < $1 - namespace.retention_period_ns
AND namespace.id = parquet_file.namespace_id
LIMIT $2
)
UPDATE parquet_file
SET to_delete = $1
WHERE id IN (SELECT id FROM parquet_file_ids)
RETURNING id;
"#,
)
.bind(flagged_at) // $1
.bind(MAX_PARQUET_FILES_SELECTED_ONCE) // $2
.fetch_all(&mut self.inner)
.await
.map_err(|e| Error::SqlxError { source: e })?;
@ -1634,7 +1637,7 @@ RETURNING id;
"#,
)
.bind(older_than) // $1
.bind(MAX_PARQUET_FILES_DELETED_ONCE) // $2
.bind(MAX_PARQUET_FILES_SELECTED_ONCE) // $2
.fetch_all(&mut self.inner)
.await
.map_err(|e| Error::SqlxError { source: e })?;

View File

@ -5,7 +5,7 @@ use crate::{
self, sealed::TransactionFinalize, CasFailure, Catalog, ColumnRepo,
ColumnTypeMismatchSnafu, Error, NamespaceRepo, ParquetFileRepo, PartitionRepo,
QueryPoolRepo, RepoCollection, Result, ShardRepo, SoftDeletedRows, TableRepo,
TopicMetadataRepo, Transaction,
TopicMetadataRepo, Transaction, MAX_PARQUET_FILES_SELECTED_ONCE,
},
metrics::MetricDecorator,
DEFAULT_MAX_COLUMNS_PER_TABLE, DEFAULT_MAX_TABLES, SHARED_TOPIC_ID, SHARED_TOPIC_NAME,
@ -36,9 +36,6 @@ use std::sync::Arc;
static MIGRATOR: Migrator = sqlx::migrate!("sqlite/migrations");
/// Maximum number of files deleted by [`ParquetFileRepo::delete_old_ids_only].
const MAX_PARQUET_FILES_DELETED_ONCE: i64 = 1_000;
/// SQLite connection options.
#[derive(Debug, Clone)]
pub struct SqliteConnectionOptions {
@ -1405,17 +1402,23 @@ RETURNING *;
// TODO - include check of table retention period once implemented
let flagged = sqlx::query(
r#"
UPDATE parquet_file
SET to_delete = $1
FROM namespace
WHERE namespace.retention_period_ns IS NOT NULL
AND parquet_file.to_delete IS NULL
AND parquet_file.max_time < $1 - namespace.retention_period_ns
AND namespace.id = parquet_file.namespace_id
RETURNING parquet_file.id;
WITH parquet_file_ids as (
SELECT parquet_file.id
FROM namespace, parquet_file
WHERE namespace.retention_period_ns IS NOT NULL
AND parquet_file.to_delete IS NULL
AND parquet_file.max_time < $1 - namespace.retention_period_ns
AND namespace.id = parquet_file.namespace_id
LIMIT $2
)
UPDATE parquet_file
SET to_delete = $1
WHERE id IN (SELECT id FROM parquet_file_ids)
RETURNING id;
"#,
)
.bind(flagged_at) // $1
.bind(MAX_PARQUET_FILES_SELECTED_ONCE) // $2
.fetch_all(self.inner.get_mut())
.await
.map_err(|e| Error::SqlxError { source: e })?;
@ -1511,7 +1514,7 @@ RETURNING id;
"#,
)
.bind(older_than) // $1
.bind(MAX_PARQUET_FILES_DELETED_ONCE) // $2
.bind(MAX_PARQUET_FILES_SELECTED_ONCE) // $2
.fetch_all(self.inner.get_mut())
.await
.map_err(|e| Error::SqlxError { source: e })?;

View File

@ -41,6 +41,9 @@ use influxdb_influxql_parser::expression::{
Binary, Call, ConditionalBinary, ConditionalExpression, ConditionalOperator, VarRef,
VarRefDataType,
};
use influxdb_influxql_parser::functions::{
is_aggregate_function, is_now_function, is_scalar_math_function,
};
use influxdb_influxql_parser::select::{
FillClause, GroupByClause, SLimitClause, SOffsetClause, TimeZoneClause,
};
@ -65,7 +68,6 @@ use iox_query::exec::IOxSessionContext;
use iox_query::logical_optimizer::range_predicate::find_time_range;
use itertools::Itertools;
use observability_deps::tracing::debug;
use once_cell::sync::Lazy;
use query_functions::selectors::{
selector_first, selector_last, selector_max, selector_min, SelectorOutput,
};
@ -1112,7 +1114,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
match ctx.scope {
ExprScope::Where => {
if call.name.eq_ignore_ascii_case("now") {
if is_now_function(&call.name) {
error::not_implemented("now")
} else {
let name = &call.name;
@ -2181,68 +2183,6 @@ pub(crate) fn find_time_column_index(fields: &[Field]) -> Option<usize> {
.map(|(i, _)| i)
}
/// Returns `true` if `name` is a mathematical scalar function
/// supported by InfluxQL.
pub(crate) fn is_scalar_math_function(name: &str) -> bool {
static FUNCTIONS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
HashSet::from([
"abs", "sin", "cos", "tan", "asin", "acos", "atan", "atan2", "exp", "log", "ln",
"log2", "log10", "sqrt", "pow", "floor", "ceil", "round",
])
});
FUNCTIONS.contains(name)
}
/// Returns `true` if `name` is an aggregate or aggregate function
/// supported by InfluxQL.
fn is_aggregate_function(name: &str) -> bool {
static FUNCTIONS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
HashSet::from([
// Scalar-like functions
"cumulative_sum",
"derivative",
"difference",
"elapsed",
"moving_average",
"non_negative_derivative",
"non_negative_difference",
// Selector functions
"bottom",
"first",
"last",
"max",
"min",
"percentile",
"sample",
"top",
// Aggregate functions
"count",
"integral",
"mean",
"median",
"mode",
"spread",
"stddev",
"sum",
// Prediction functions
"holt_winters",
"holt_winters_with_fit",
// Technical analysis functions
"chande_momentum_oscillator",
"exponential_moving_average",
"double_exponential_moving_average",
"kaufmans_efficiency_ratio",
"kaufmans_adaptive_moving_average",
"triple_exponential_moving_average",
"triple_exponential_derivative",
"relative_strength_index",
])
});
FUNCTIONS.contains(name)
}
/// Returns true if the conditional expression is a single node that
/// refers to the `time` column.
///

View File

@ -4,6 +4,7 @@ use crate::plan::util::binary_operator_to_df_operator;
use datafusion::common::{DataFusionError, Result, ScalarValue};
use datafusion::logical_expr::{binary_expr, lit, now, BinaryExpr, Expr as DFExpr, Operator};
use influxdb_influxql_parser::expression::{Binary, BinaryOperator, Call};
use influxdb_influxql_parser::functions::is_now_function;
use influxdb_influxql_parser::{expression::Expr, literal::Literal};
type ExprResult = Result<DFExpr>;
@ -103,7 +104,7 @@ fn reduce_expr(expr: &Expr, tz: Option<chrono_tz::Tz>) -> ExprResult {
match expr {
Expr::Binary(v) => reduce_binary_expr(v, tz).map_err(map_expr_err(expr)),
Expr::Call (Call { name, .. }) => {
if !name.eq_ignore_ascii_case("now") {
if !is_now_function(name) {
return error::query(
format!("invalid function call '{name}'"),
);

View File

@ -3,12 +3,12 @@
use crate::plan::expr_type_evaluator::evaluate_type;
use crate::plan::field::field_name;
use crate::plan::field_mapper::{field_and_dimensions, FieldTypeMap, TagSet};
use crate::plan::planner::is_scalar_math_function;
use crate::plan::{error, util, SchemaProvider};
use datafusion::common::{DataFusionError, Result};
use influxdb_influxql_parser::common::{MeasurementName, QualifiedMeasurementName};
use influxdb_influxql_parser::expression::walk::{walk_expr, walk_expr_mut};
use influxdb_influxql_parser::expression::{Call, Expr, VarRef, VarRefDataType, WildcardType};
use influxdb_influxql_parser::functions::is_scalar_math_function;
use influxdb_influxql_parser::identifier::Identifier;
use influxdb_influxql_parser::literal::Literal;
use influxdb_influxql_parser::select::{

View File

@ -43,7 +43,7 @@ serde_urlencoded = "0.7.0"
snafu = "0.7"
tokio = { version = "1.27", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time"] }
tokio-stream = { version = "0.1", features = ["net"] }
tokio-util = { version = "0.7.7" }
tokio-util = { version = "0.7.8" }
tonic = { workspace = true }
tonic-health = { workspace = true }
tonic-reflection = { workspace = true }

View File

@ -18,6 +18,6 @@ iox_query = { path = "../iox_query" }
ioxd_common = { path = "../ioxd_common" }
metric = { path = "../metric" }
parquet_file = { path = "../parquet_file" }
tokio-util = "0.7.7"
tokio-util = "0.7.8"
trace = { path = "../trace" }
workspace-hack = { version = "0.1", path = "../workspace-hack" }

View File

@ -17,4 +17,4 @@ snafu = "0.7"
tokio = { version = "1", features = ["sync"] }
trace = { path = "../trace" }
workspace-hack = { version = "0.1", path = "../workspace-hack" }
tokio-util = "0.7.7"
tokio-util = "0.7.8"

View File

@ -20,6 +20,6 @@ metric = { path = "../metric" }
parquet_file = { version = "0.1.0", path = "../parquet_file" }
thiserror = "1.0.40"
tokio = { version = "1.27", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time"] }
tokio-util = { version = "0.7.7" }
tokio-util = { version = "0.7.8" }
trace = { path = "../trace" }
workspace-hack = { version = "0.1", path = "../workspace-hack" }

View File

@ -31,7 +31,7 @@ thiserror = "1.0.40"
tokio = { version = "1.27", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time"] }
tonic = { workspace = true }
workspace-hack = { version = "0.1", path = "../workspace-hack" }
tokio-util = "0.7.7"
tokio-util = "0.7.8"
[dev-dependencies]
# Workspace dependencies, in alphabetical order

View File

@ -1,5 +1,5 @@
use async_trait::async_trait;
use authz::Authorizer;
use authz::{Authorizer, IoxAuthorizer};
use clap_blocks::querier::QuerierConfig;
use datafusion_util::config::register_iox_object_store;
use hyper::{Body, Request, Response};
@ -160,13 +160,18 @@ pub struct QuerierServerTypeArgs<'a> {
pub exec: Arc<Executor>,
pub time_provider: Arc<dyn TimeProvider>,
pub querier_config: QuerierConfig,
pub authz: Option<Arc<dyn Authorizer>>,
}
#[derive(Debug, Error)]
pub enum Error {
#[error("querier error: {0}")]
Querier(#[from] querier::QuerierDatabaseError),
#[error("authz configuration error for '{addr}': '{source}'")]
AuthzConfig {
source: Box<dyn std::error::Error>,
addr: String,
},
}
/// Instantiate a querier server
@ -197,6 +202,21 @@ pub async fn create_querier_server_type(
);
assert!(existing.is_none());
let authz = match &args.querier_config.authz_address {
Some(addr) => {
let authz = IoxAuthorizer::connect_lazy(addr.clone())
.map(|c| Arc::new(c) as Arc<dyn Authorizer>)
.map_err(|source| Error::AuthzConfig {
source,
addr: addr.clone(),
})?;
authz.probe().await.expect("Authz connection test failed.");
Some(authz)
}
None => None,
};
let ingester_connections = if args.querier_config.ingester_addresses.is_empty() {
None
} else {
@ -235,6 +255,6 @@ pub async fn create_querier_server_type(
querier,
database,
args.common_state,
args.authz.as_ref().map(Arc::clone),
authz,
)))
}

View File

@ -21,6 +21,6 @@ router = { path = "../router" }
sharder = { path = "../sharder" }
thiserror = "1.0.40"
tokio = { version = "1.27", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time"] }
tokio-util = { version = "0.7.7" }
tokio-util = { version = "0.7.8" }
trace = { path = "../trace" }
workspace-hack = { version = "0.1", path = "../workspace-hack" }

View File

@ -4,7 +4,7 @@ use std::{
};
use async_trait::async_trait;
use authz::Authorizer;
use authz::{Authorizer, IoxAuthorizer};
use clap_blocks::router2::Router2Config;
use data_types::{NamespaceName, PartitionTemplate, TemplatePart};
use hashbrown::HashMap;
@ -66,6 +66,12 @@ pub enum Error {
#[error("No topic named '{topic_name}' found in the catalog")]
TopicCatalogLookup { topic_name: String },
#[error("authz configuration error for '{addr}': '{source}'")]
AuthzConfig {
source: Box<dyn std::error::Error>,
addr: String,
},
}
pub type Result<T, E = Error> = std::result::Result<T, E>;
@ -193,7 +199,6 @@ pub async fn create_router2_server_type(
metrics: Arc<metric::Registry>,
catalog: Arc<dyn Catalog>,
object_store: Arc<DynObjectStore>,
authz: Option<Arc<dyn Authorizer>>,
router_config: &Router2Config,
) -> Result<Arc<dyn ServerType>> {
let ingester_connections = router_config.ingester_addresses.iter().map(|addr| {
@ -349,13 +354,35 @@ pub async fn create_router2_server_type(
let handler_stack = InstrumentationDecorator::new("request", &metrics, handler_stack);
// Initialize the HTTP API delegate
let write_request_unifier: Result<Box<dyn WriteRequestUnifier>> =
match (router_config.single_tenant_deployment, authz) {
(true, Some(auth)) => Ok(Box::new(SingleTenantRequestUnifier::new(auth))),
(true, None) => unreachable!("INFLUXDB_IOX_SINGLE_TENANCY is set, but could not create an authz service. Check the INFLUXDB_IOX_AUTHZ_ADDR."),
(false, None) => Ok(Box::<MultiTenantRequestUnifier>::default()),
(false, Some(_)) => unreachable!("INFLUXDB_IOX_AUTHZ_ADDR is set, but authz only exists for single_tenancy. Check the INFLUXDB_IOX_SINGLE_TENANCY."),
};
let write_request_unifier: Result<Box<dyn WriteRequestUnifier>> = match (
router_config.single_tenant_deployment,
&router_config.authz_address,
) {
(true, Some(addr)) => {
let authz = IoxAuthorizer::connect_lazy(addr.clone())
.map(|c| Arc::new(c) as Arc<dyn Authorizer>)
.map_err(|source| Error::AuthzConfig {
source,
addr: addr.clone(),
})?;
authz.probe().await.expect("Authz connection test failed.");
Ok(Box::new(SingleTenantRequestUnifier::new(authz)))
}
(true, None) => {
// Single tenancy was requested, but no auth was provided - the
// router's clap flag parse configuration should not allow this
// combination to be accepted and therefore execution should
// never reach here.
unreachable!("INFLUXDB_IOX_SINGLE_TENANCY is set, but could not create an authz service. Check the INFLUXDB_IOX_AUTHZ_ADDR")
}
(false, None) => Ok(Box::<MultiTenantRequestUnifier>::default()),
(false, Some(_)) => {
// As above, this combination should be prevented by the
// router's clap flag parse configuration.
unreachable!("INFLUXDB_IOX_AUTHZ_ADDR is set, but authz only exists for single_tenancy. Check the INFLUXDB_IOX_SINGLE_TENANCY")
}
};
let http = HttpDelegate::new(
common_state.run_config().max_http_request_size,
router_config.http_request_limit,

View File

@ -16,5 +16,5 @@ async-trait = "0.1"
clap = { version = "4", features = ["derive", "env"] }
hyper = "0.14"
snafu = "0.7"
tokio-util = "0.7.7"
tokio-util = "0.7.8"
workspace-hack = { version = "0.1", path = "../workspace-hack" }

View File

@ -39,7 +39,7 @@ schema = { path = "../schema" }
snafu = "0.7"
thiserror = "1.0"
tokio = { version = "1.27", features = ["macros", "parking_lot", "rt-multi-thread", "sync", "time"] }
tokio-util = { version = "0.7.7" }
tokio-util = { version = "0.7.8" }
tonic = { workspace = true }
trace = { path = "../trace" }
trace_exporters = { path = "../trace_exporters" }

View File

@ -54,7 +54,7 @@ pretty_assertions = "1.3.0"
rand = "0.8.3"
test_helpers = { version = "0.1.0", path = "../test_helpers", features = ["future_timeout"] }
tokio = { version = "1", features = ["test-util"] }
tokio-stream = { version = "0.1.12", default_features = false, features = [] }
tokio-stream = { version = "0.1.13", default_features = false, features = [] }
[lib]
# Allow --save-baseline to work

View File

@ -78,6 +78,7 @@ fn parse_v2(req: &Request<Body>) -> Result<WriteParams, MultiTenantExtractError>
#[cfg(test)]
mod tests {
use assert_matches::assert_matches;
use data_types::NamespaceNameError;
use super::*;
use crate::server::http::write::Precision;
@ -171,7 +172,7 @@ mod tests {
namespace,
..
}) => {
assert_eq!(namespace.as_str(), "cool%5Fconfusing_bucket");
assert_eq!(namespace.as_str(), "cool_confusing_bucket");
}
);
@ -189,12 +190,11 @@ mod tests {
test_parse_v2!(
encoded_quotation,
query_string = "?org=cool'confusing&bucket=bucket",
want = Ok(WriteParams {
namespace,
..
}) => {
assert_eq!(namespace.as_str(), "cool%27confusing_bucket");
}
want = Err(Error::MultiTenantError(
MultiTenantExtractError::InvalidOrgAndBucket(
OrgBucketMappingError::InvalidNamespaceName(NamespaceNameError::BadChars { .. })
)
))
);
test_parse_v2!(
@ -204,7 +204,7 @@ mod tests {
namespace,
..
}) => {
assert_eq!(namespace.as_str(), "%5Fcoolconfusing_bucket");
assert_eq!(namespace.as_str(), "_coolconfusing_bucket");
}
);

View File

@ -381,9 +381,9 @@ mod tests {
test_parse_v1!(
encoded_quotation,
query_string = "?db=ban'anas",
want = Ok(WriteParams{ namespace, precision: _ }) => {
assert_eq!(namespace.as_str(), "ban'anas");
}
want = Err(Error::SingleTenantError(
SingleTenantExtractError::InvalidNamespace(NamespaceNameError::BadChars { .. })
))
);
test_parse_v1!(
@ -483,17 +483,24 @@ mod tests {
))
);
// Do not encode potentially problematic input.
test_parse_v2!(
no_encoding,
url_encoding,
// URL-encoded input that is decoded in the HTTP layer
query_string = "?bucket=cool%2Fconfusing%F0%9F%8D%8C&prg=org",
query_string = "?bucket=cool%2Fconfusing&prg=org",
want = Ok(WriteParams {namespace, ..}) => {
// Yielding a not-encoded string as the namespace.
assert_eq!(namespace.as_str(), "cool/confusing🍌");
assert_eq!(namespace.as_str(), "cool/confusing");
}
);
test_parse_v2!(
encoded_emoji,
query_string = "?bucket=confusing%F0%9F%8D%8C&prg=org",
want = Err(Error::SingleTenantError(
SingleTenantExtractError::InvalidNamespace(NamespaceNameError::BadChars { .. })
))
);
test_parse_v2!(
org_ignored,
query_string = "?org=wat&bucket=bananas",
@ -518,14 +525,11 @@ mod tests {
);
test_parse_v2!(
encoded_quotation,
single_quotation,
query_string = "?bucket=buc'ket",
want = Ok(WriteParams {
namespace,
..
}) => {
assert_eq!(namespace.as_str(), "buc'ket");
}
want = Err(Error::SingleTenantError(
SingleTenantExtractError::InvalidNamespace(NamespaceNameError::BadChars { .. })
))
);
test_parse_v2!(

View File

@ -3,20 +3,15 @@
mod request;
use arrow::{
datatypes::SchemaRef, error::ArrowError, ipc::writer::IpcWriteOptions,
record_batch::RecordBatch,
};
use arrow::error::ArrowError;
use arrow_flight::{
encode::{FlightDataEncoder, FlightDataEncoderBuilder},
flight_descriptor::DescriptorType,
flight_service_server::{FlightService as Flight, FlightServiceServer as FlightServer},
Action, ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightEndpoint, FlightInfo,
HandshakeRequest, HandshakeResponse, PutResult, SchemaAsIpc, SchemaResult, Ticket,
HandshakeRequest, HandshakeResponse, PutResult, SchemaResult, Ticket,
};
use arrow_util::flight::prepare_schema_for_flight;
use authz::Authorizer;
use bytes::Bytes;
use data_types::NamespaceNameError;
use datafusion::{error::DataFusionError, physical_plan::ExecutionPlan};
use flightsql::FlightSQLCommand;
@ -790,6 +785,7 @@ fn flightsql_permissions(namespace_name: &str, cmd: &FlightSQLCommand) -> Vec<au
FlightSQLCommand::CommandGetPrimaryKeys(_) => authz::Action::ReadSchema,
FlightSQLCommand::CommandGetTables(_) => authz::Action::ReadSchema,
FlightSQLCommand::CommandGetTableTypes(_) => authz::Action::ReadSchema,
FlightSQLCommand::CommandGetXdbcTypeInfo(_) => authz::Action::ReadSchema,
FlightSQLCommand::ActionCreatePreparedStatementRequest(_) => authz::Action::Read,
FlightSQLCommand::ActionClosePreparedStatementRequest(_) => authz::Action::Read,
};
@ -799,7 +795,7 @@ fn flightsql_permissions(namespace_name: &str, cmd: &FlightSQLCommand) -> Vec<au
/// Wrapper over a FlightDataEncodeStream that adds IOx specfic
/// metadata and records completion
struct GetStream {
inner: IOxFlightDataEncoder,
inner: FlightDataEncoder,
#[allow(dead_code)]
permit: InstrumentedAsyncOwnedSemaphorePermit,
query_completed_token: QueryCompletedToken,
@ -830,7 +826,8 @@ impl GetStream {
});
// setup inner stream
let inner = IOxFlightDataEncoderBuilder::new(schema)
let inner = FlightDataEncoderBuilder::new()
.with_schema(schema)
.with_metadata(app_metadata.encode_to_vec().into())
.build(query_results);
@ -843,94 +840,6 @@ impl GetStream {
}
}
/// workaround for <https://github.com/apache/arrow-rs/issues/3591>
///
/// data encoder stream that always sends a Schema message even if the
/// underlying stream is empty
struct IOxFlightDataEncoder {
inner: FlightDataEncoder,
// The schema of the inner stream. Set to None when a schema
// message has been sent.
schema: Option<SchemaRef>,
done: bool,
}
impl IOxFlightDataEncoder {
fn new(inner: FlightDataEncoder, schema: SchemaRef) -> Self {
Self {
inner,
schema: Some(schema),
done: false,
}
}
}
#[derive(Debug)]
struct IOxFlightDataEncoderBuilder {
inner: FlightDataEncoderBuilder,
schema: SchemaRef,
}
impl IOxFlightDataEncoderBuilder {
fn new(schema: SchemaRef) -> Self {
Self {
inner: FlightDataEncoderBuilder::new().with_schema(Arc::clone(&schema)),
schema: prepare_schema_for_flight(schema),
}
}
pub fn with_metadata(mut self, app_metadata: Bytes) -> Self {
self.inner = self.inner.with_metadata(app_metadata);
self
}
pub fn build<S>(self, input: S) -> IOxFlightDataEncoder
where
S: Stream<Item = arrow_flight::error::Result<RecordBatch>> + Send + 'static,
{
let Self { inner, schema } = self;
IOxFlightDataEncoder::new(inner.build(input), schema)
}
}
impl Stream for IOxFlightDataEncoder {
type Item = arrow_flight::error::Result<FlightData>;
fn poll_next(
mut self: Pin<&mut Self>,
cx: &mut std::task::Context<'_>,
) -> Poll<Option<Self::Item>> {
loop {
if self.done {
return Poll::Ready(None);
}
let res = ready!(self.inner.poll_next_unpin(cx));
match res {
None => {
self.done = true;
// return a schema message if we haven't sent any data
if let Some(schema) = self.schema.take() {
let options = IpcWriteOptions::default();
let data: FlightData = SchemaAsIpc::new(schema.as_ref(), &options).into();
return Poll::Ready(Some(Ok(data)));
}
}
Some(Ok(data)) => {
// If any data is returned from the underlying stream no need to resend schema
self.schema = None;
return Poll::Ready(Some(Ok(data)));
}
Some(Err(e)) => {
self.done = true;
return Poll::Ready(Some(Err(e)));
}
}
}
}
}
impl Stream for GetStream {
type Item = Result<FlightData, tonic::Status>;

View File

@ -633,23 +633,26 @@ mod tests {
test_create_namespace_name!(ok, name = "bananas", want = Ok("bananas"));
test_create_namespace_name!(multi_byte, name = "🍌", want = Ok("🍌"));
test_create_namespace_name!(multi_byte, name = "🍌", want = Err(e) => {
assert_eq!(e.code(), Code::InvalidArgument);
assert_eq!(e.message(), "namespace name '🍌' contains invalid character, character number 0 is not whitelisted");
});
test_create_namespace_name!(
tab,
name = "it\tis\ttabtasitc",
want = Err(e) => {
assert_eq!(e.code(), Code::InvalidArgument);
assert_eq!(e.message(), "namespace name 'it\tis\ttabtasitc' contains invalid character, character number 2 is a control which is not allowed");
assert_eq!(e.message(), "namespace name 'it\tis\ttabtasitc' contains invalid character, character number 2 is not whitelisted");
}
);
test_create_namespace_name!(
null,
name = "bad \0 bananas",
name = "bad\0bananas",
want = Err(e) => {
assert_eq!(e.code(), Code::InvalidArgument);
assert_eq!(e.message(), "namespace name 'bad \0 bananas' contains invalid character, character number 4 is a control which is not allowed");
assert_eq!(e.message(), "namespace name 'bad\0bananas' contains invalid character, character number 3 is not whitelisted");
}
);

Some files were not shown because too many files have changed in this diff Show More