Merge branch 'main' into cn/remove-obsolete-docs-infra
commit
bc3b69ef3f
File diff suppressed because it is too large
Load Diff
17
Cargo.toml
17
Cargo.toml
|
@ -81,7 +81,6 @@ members = [
|
|||
"trogging",
|
||||
"wal",
|
||||
"workspace-hack",
|
||||
"write_summary",
|
||||
]
|
||||
default-members = ["influxdb_iox"]
|
||||
|
||||
|
@ -115,12 +114,18 @@ edition = "2021"
|
|||
license = "MIT OR Apache-2.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
arrow = { version = "36.0.0" }
|
||||
arrow-flight = { version = "36.0.0" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="b87871fdd1f4ce64201eb1f7c79a0547627f37e9", default-features = false }
|
||||
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="b87871fdd1f4ce64201eb1f7c79a0547627f37e9" }
|
||||
arrow = { version = "37.0.0" }
|
||||
arrow-flight = { version = "37.0.0" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="6e819d6c2b9280198c67fa16df3e54c79ce46ca2", default-features = false }
|
||||
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="6e819d6c2b9280198c67fa16df3e54c79ce46ca2" }
|
||||
hashbrown = { version = "0.13.2" }
|
||||
parquet = { version = "36.0.0" }
|
||||
parquet = { version = "37.0.0" }
|
||||
tonic = { version = "0.9.1", features = ["tls", "tls-webpki-roots"] }
|
||||
tonic-build = { version = "0.9.1" }
|
||||
tonic-health = { version = "0.9.1" }
|
||||
tonic-reflection = { version = "0.9.1" }
|
||||
|
||||
|
||||
|
||||
# This profile optimizes for runtime performance and small binary size at the expense of longer
|
||||
# build times. It's most suitable for final release builds.
|
||||
|
|
|
@ -36,20 +36,17 @@ RUN \
|
|||
du -cshx /usr/local/rustup /usr/local/cargo/registry /usr/local/cargo/git /influxdb_iox/target
|
||||
|
||||
|
||||
|
||||
FROM debian:bullseye-slim
|
||||
|
||||
RUN apt update \
|
||||
&& apt install --yes ca-certificates gettext-base libssl1.1 --no-install-recommends \
|
||||
&& rm -rf /var/lib/{apt,dpkg,cache,log}
|
||||
|
||||
RUN groupadd --gid 1500 iox \
|
||||
&& rm -rf /var/lib/{apt,dpkg,cache,log} \
|
||||
&& groupadd --gid 1500 iox \
|
||||
&& useradd --uid 1500 --gid iox --shell /bin/bash --create-home iox
|
||||
|
||||
USER iox
|
||||
|
||||
RUN mkdir ~/.influxdb_iox
|
||||
RUN ls -la ~/.influxdb_iox
|
||||
|
||||
ARG PACKAGE=influxdb_iox
|
||||
ENV PACKAGE=$PACKAGE
|
||||
|
@ -57,7 +54,6 @@ ENV PACKAGE=$PACKAGE
|
|||
COPY --from=build "/root/$PACKAGE" "/usr/bin/$PACKAGE"
|
||||
COPY docker/entrypoint.sh /usr/bin/entrypoint.sh
|
||||
|
||||
|
||||
EXPOSE 8080 8082
|
||||
|
||||
ENTRYPOINT ["/usr/bin/entrypoint.sh"]
|
||||
|
|
|
@ -153,7 +153,7 @@ impl StringDictionary<i32> {
|
|||
))
|
||||
.len(keys.len())
|
||||
.add_buffer(keys.collect())
|
||||
.add_child_data(self.storage.to_arrow(dictionary_nulls).data().clone())
|
||||
.add_child_data(self.storage.to_arrow(dictionary_nulls).into_data())
|
||||
.nulls(nulls)
|
||||
// TODO consider skipping the validation checks by using
|
||||
// `build_unchecked()`
|
||||
|
|
|
@ -1,22 +1,24 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
|
||||
use arrow::datatypes::{DataType, Field, Fields, Schema, SchemaRef};
|
||||
|
||||
/// Prepare an arrow Schema for transport over the Arrow Flight protocol
|
||||
///
|
||||
/// Converts dictionary types to underlying types due to <https://github.com/apache/arrow-rs/issues/3389>
|
||||
pub fn prepare_schema_for_flight(schema: SchemaRef) -> SchemaRef {
|
||||
let fields = schema
|
||||
let fields: Fields = schema
|
||||
.fields()
|
||||
.iter()
|
||||
.map(|field| match field.data_type() {
|
||||
DataType::Dictionary(_, value_type) => Field::new(
|
||||
field.name(),
|
||||
value_type.as_ref().clone(),
|
||||
field.is_nullable(),
|
||||
)
|
||||
.with_metadata(field.metadata().clone()),
|
||||
_ => field.clone(),
|
||||
DataType::Dictionary(_, value_type) => Arc::new(
|
||||
Field::new(
|
||||
field.name(),
|
||||
value_type.as_ref().clone(),
|
||||
field.is_nullable(),
|
||||
)
|
||||
.with_metadata(field.metadata().clone()),
|
||||
),
|
||||
_ => Arc::clone(field),
|
||||
})
|
||||
.collect();
|
||||
|
||||
|
|
|
@ -288,9 +288,9 @@ mod tests {
|
|||
Box::new(DataType::Utf8),
|
||||
))
|
||||
.len(keys.len())
|
||||
.add_buffer(keys.data().buffers()[0].clone())
|
||||
.add_buffer(keys.to_data().buffers()[0].clone())
|
||||
.nulls(keys.nulls().cloned())
|
||||
.add_child_data(values.data().clone())
|
||||
.add_child_data(values.into_data())
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
|
|
|
@ -193,7 +193,7 @@ pub fn equalize_batch_schemas(batches: Vec<RecordBatch>) -> Result<Vec<RecordBat
|
|||
/// `32/51/216/13452/1d325760-2b20-48de-ab48-2267b034133d.parquet`
|
||||
///
|
||||
/// matches `1d325760-2b20-48de-ab48-2267b034133d`
|
||||
static REGEX_UUID: Lazy<Regex> = Lazy::new(|| {
|
||||
pub static REGEX_UUID: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}").expect("UUID regex")
|
||||
});
|
||||
|
||||
|
@ -249,6 +249,11 @@ fn normalize_for_variable_width(s: Cow<'_, str>) -> String {
|
|||
REGEX_COL.replace_all(&s, " |").to_string()
|
||||
}
|
||||
|
||||
pub fn strip_table_lines(s: Cow<'_, str>) -> String {
|
||||
let s = REGEX_LINESEP.replace_all(&s, "----------");
|
||||
REGEX_COL.replace_all(&s, "").to_string()
|
||||
}
|
||||
|
||||
fn normalize_time_ops(s: &str) -> String {
|
||||
REGEX_TIME_OP
|
||||
.replace_all(s, |c: &Captures<'_>| {
|
||||
|
@ -276,6 +281,9 @@ pub struct Normalizer {
|
|||
/// if true, normalize filter predicates for explain plans
|
||||
/// `FilterExec: <REDACTED>`
|
||||
pub normalized_filters: bool,
|
||||
|
||||
/// if `true`, render tables without borders.
|
||||
pub no_table_borders: bool,
|
||||
}
|
||||
|
||||
impl Normalizer {
|
||||
|
@ -403,5 +411,8 @@ impl Normalizer {
|
|||
if self.normalized_filters {
|
||||
output.push("-- Results After Normalizing Filters".into())
|
||||
}
|
||||
if self.no_table_borders {
|
||||
output.push("-- Results After No Table Borders".into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,5 +16,4 @@ workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
|||
# crates.io dependencies in alphabetical order.
|
||||
async-trait = "0.1"
|
||||
snafu = "0.7"
|
||||
tonic = "0.8"
|
||||
|
||||
tonic = { workspace = true }
|
||||
|
|
|
@ -18,7 +18,7 @@ metric = { path = "../metric" }
|
|||
object_store = "0.5.6"
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.95"
|
||||
serde_json = "1.0.96"
|
||||
snafu = "0.7"
|
||||
tempfile = "3.5.0"
|
||||
trace = { path = "../trace" }
|
||||
|
|
|
@ -1,37 +0,0 @@
|
|||
//! CLI config for the ingest_replica
|
||||
|
||||
use crate::ingester_address::IngesterAddress;
|
||||
|
||||
/// CLI config for the ingest_replica
|
||||
#[derive(Debug, Clone, clap::Parser)]
|
||||
#[allow(missing_copy_implementations)]
|
||||
pub struct IngestReplicaConfig {
|
||||
/// gRPC address for the replica to talk with the ingesters. For
|
||||
/// example:
|
||||
///
|
||||
/// "http://127.0.0.1:8083"
|
||||
///
|
||||
/// or
|
||||
///
|
||||
/// "http://10.10.10.1:8083,http://10.10.10.2:8083"
|
||||
///
|
||||
/// for multiple addresses.
|
||||
#[clap(
|
||||
long = "ingester-addresses",
|
||||
env = "INFLUXDB_IOX_INGESTER_ADDRESSES",
|
||||
required = true,
|
||||
num_args=1..,
|
||||
value_delimiter = ','
|
||||
)]
|
||||
pub ingester_addresses: Vec<IngesterAddress>,
|
||||
|
||||
/// Sets how many queries the replica will handle simultaneously before
|
||||
/// rejecting further incoming requests.
|
||||
#[clap(
|
||||
long = "concurrent-query-limit",
|
||||
env = "INFLUXDB_IOX_CONCURRENT_QUERY_LIMIT",
|
||||
default_value = "200",
|
||||
action
|
||||
)]
|
||||
pub concurrent_query_limit: usize,
|
||||
}
|
|
@ -16,7 +16,6 @@ pub mod authz;
|
|||
pub mod catalog_dsn;
|
||||
pub mod compactor2;
|
||||
pub mod garbage_collector;
|
||||
pub mod ingest_replica;
|
||||
pub mod ingester2;
|
||||
pub mod ingester_address;
|
||||
pub mod object_store;
|
||||
|
|
|
@ -1,50 +1,7 @@
|
|||
//! Querier-related configs.
|
||||
|
||||
use crate::ingester_address::IngesterAddress;
|
||||
use data_types::{IngesterMapping, ShardIndex};
|
||||
use serde::Deserialize;
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use std::{
|
||||
collections::HashMap, fs, io, num::NonZeroUsize, path::PathBuf, str::FromStr, sync::Arc,
|
||||
};
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[allow(missing_docs)]
|
||||
pub enum Error {
|
||||
#[snafu(display("Could not read shard to ingester file `{}`: {source}", file.display()))]
|
||||
ShardToIngesterFileReading { source: io::Error, file: PathBuf },
|
||||
|
||||
#[snafu(display("Could not deserialize JSON from ingester config: {source}"))]
|
||||
ShardToIngesterDeserializing { source: serde_json::Error },
|
||||
|
||||
#[snafu(display(
|
||||
"Specifying `\"ignoreAll\": true` requires that both the `ingesters` and \
|
||||
`shards` configurations are empty. `ingesters`: `{:#?}`, `shards`: `{:#?}`",
|
||||
ingesters,
|
||||
shards,
|
||||
))]
|
||||
IgnoreAllRequiresEmptyConfig {
|
||||
ingesters: HashMap<Arc<str>, Arc<IngesterConfig>>,
|
||||
shards: HashMap<ShardIndex, ShardConfig>,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Ingester `{name}` must either set the `addr` to a non-empty value or set `ignore` to true"
|
||||
))]
|
||||
IngesterAddrRequired { name: Arc<str> },
|
||||
|
||||
#[snafu(display(
|
||||
"Could not find ingester `{name}` specified for shard index `{shard_index}`"
|
||||
))]
|
||||
IngesterNotFound {
|
||||
shard_index: ShardIndex,
|
||||
name: Arc<str>,
|
||||
},
|
||||
|
||||
#[snafu(context(false))]
|
||||
IngesterAddress {
|
||||
source: crate::ingester_address::Error,
|
||||
},
|
||||
}
|
||||
use std::num::NonZeroUsize;
|
||||
|
||||
/// CLI config for querier configuration
|
||||
#[derive(Debug, Clone, PartialEq, Eq, clap::Parser)]
|
||||
|
@ -71,144 +28,6 @@ pub struct QuerierConfig {
|
|||
)]
|
||||
pub exec_mem_pool_bytes: usize,
|
||||
|
||||
/// Path to a JSON file containing a Shard index to ingesters gRPC mapping. For example:
|
||||
///
|
||||
/// ```json
|
||||
/// {
|
||||
/// // Flag to ignore all ingesters and only query persisted data. Useful for development
|
||||
/// // or creating "cold data only" clusters.
|
||||
/// //
|
||||
/// // If this is set to `true`, having non-empty `ingesters` or `shards` is a startup
|
||||
/// // error.
|
||||
/// //
|
||||
/// // default: false
|
||||
/// "ignoreAll": false,
|
||||
///
|
||||
/// // Mapping of ingester name to config.
|
||||
/// //
|
||||
/// // default: {}
|
||||
/// "ingesters": {
|
||||
/// "i1": {
|
||||
/// // Ingester address as URL.
|
||||
/// //
|
||||
/// // If this is `null` but `ignore` is false, it is an error.
|
||||
/// //
|
||||
/// // default: null
|
||||
/// "addr": "http://ingester-1:1234"
|
||||
/// },
|
||||
/// "i2": {
|
||||
/// // Flag to ignore this ingester at query time and not contact it.
|
||||
/// //
|
||||
/// // default: false
|
||||
/// "ignore": true
|
||||
/// }
|
||||
/// },
|
||||
///
|
||||
/// // Mapping of shard indexes (as strings) to ingester names. Queries to shards that do
|
||||
/// // not appear in this mapping will return an error. Using an ingester name in the
|
||||
/// // `shards` mapping that does not appear in the `ingesters` mapping is a startup error.
|
||||
/// //
|
||||
/// // default: {}
|
||||
/// "shards": {
|
||||
/// "1": {
|
||||
/// // Name of an ingester from the `ingester` mapping.
|
||||
/// //
|
||||
/// // If this is `null`, queries to this shard will error.
|
||||
/// //
|
||||
/// // default: null
|
||||
/// "ingester": "i1"
|
||||
/// },
|
||||
/// "2": {
|
||||
/// "ingester": "i1"
|
||||
/// },
|
||||
/// "3": {
|
||||
/// "ingester": "i2"
|
||||
/// },
|
||||
/// "5": {
|
||||
/// // Flag to not fetch data from any ingester for queries to this shard.
|
||||
/// //
|
||||
/// // default: false
|
||||
/// "ignore": true
|
||||
/// }
|
||||
/// }
|
||||
/// }
|
||||
/// ```
|
||||
#[clap(
|
||||
long = "shard-to-ingesters-file",
|
||||
env = "INFLUXDB_IOX_SHARD_TO_INGESTERS_FILE",
|
||||
action
|
||||
)]
|
||||
pub shard_to_ingesters_file: Option<PathBuf>,
|
||||
|
||||
/// JSON containing a Shard index to ingesters gRPC mapping. For example:
|
||||
///
|
||||
/// ```json
|
||||
/// {
|
||||
/// // Flag to ignore all ingesters and only query persisted data. Useful for development
|
||||
/// // or creating "cold data only" clusters.
|
||||
/// //
|
||||
/// // If this is set to `true`, having non-empty `ingesters` or `shards` is a startup
|
||||
/// // error.
|
||||
/// //
|
||||
/// // default: false
|
||||
/// "ignoreAll": false,
|
||||
///
|
||||
/// // Mapping of ingester name to config.
|
||||
/// //
|
||||
/// // default: {}
|
||||
/// "ingesters": {
|
||||
/// "i1": {
|
||||
/// // Ingester address as URL.
|
||||
/// //
|
||||
/// // If this is `null` but `ignore` is false, it is an error.
|
||||
/// //
|
||||
/// // default: null
|
||||
/// "addr": "http://ingester-1:1234"
|
||||
/// },
|
||||
/// "i2": {
|
||||
/// // Flag to ignore this ingester at query time and not contact it.
|
||||
/// //
|
||||
/// // default: false
|
||||
/// "ignore": true
|
||||
/// }
|
||||
/// },
|
||||
///
|
||||
/// // Mapping of shard indexes (as strings) to ingester names. Queries to shards that do
|
||||
/// // not appear in this mapping will return an error. Using an ingester name in the
|
||||
/// // `shards` mapping that does not appear in the `ingesters` mapping is a startup error.
|
||||
/// //
|
||||
/// // default: {}
|
||||
/// "shards": {
|
||||
/// "1": {
|
||||
/// // Name of an ingester from the `ingester` mapping.
|
||||
/// //
|
||||
/// // If this is `null`, queries to this shard will error.
|
||||
/// //
|
||||
/// // default: null
|
||||
/// "ingester": "i1"
|
||||
/// },
|
||||
/// "2": {
|
||||
/// "ingester": "i1"
|
||||
/// },
|
||||
/// "3": {
|
||||
/// "ingester": "i2"
|
||||
/// },
|
||||
/// "5": {
|
||||
/// // Flag to not fetch data from any ingester for queries to this shard.
|
||||
/// //
|
||||
/// // default: false
|
||||
/// "ignore": true
|
||||
/// }
|
||||
/// }
|
||||
/// }
|
||||
/// ```
|
||||
#[clap(
|
||||
long = "shard-to-ingesters",
|
||||
env = "INFLUXDB_IOX_SHARD_TO_INGESTERS",
|
||||
action
|
||||
)]
|
||||
pub shard_to_ingesters: Option<String>,
|
||||
|
||||
/// gRPC address for the router to talk with the ingesters. For
|
||||
/// example:
|
||||
///
|
||||
|
@ -219,8 +38,14 @@ pub struct QuerierConfig {
|
|||
/// "http://10.10.10.1:8083,http://10.10.10.2:8083"
|
||||
///
|
||||
/// for multiple addresses.
|
||||
#[clap(long = "ingester-addresses", env = "INFLUXDB_IOX_INGESTER_ADDRESSES", num_args=1.., value_delimiter = ',')]
|
||||
pub ingester_addresses: Vec<String>,
|
||||
#[clap(
|
||||
long = "ingester-addresses",
|
||||
env = "INFLUXDB_IOX_INGESTER_ADDRESSES",
|
||||
required = false,
|
||||
num_args = 0..,
|
||||
value_delimiter = ','
|
||||
)]
|
||||
pub ingester_addresses: Vec<IngesterAddress>,
|
||||
|
||||
/// Size of the RAM cache used to store catalog metadata information in bytes.
|
||||
#[clap(
|
||||
|
@ -256,11 +81,12 @@ pub struct QuerierConfig {
|
|||
/// returning results that do not include unpersisted data and enter "circuit breaker mode"
|
||||
/// to avoid continually retrying the failing connection on subsequent queries.
|
||||
///
|
||||
/// If circuits are open, the querier will NOT contact the ingester and no unpersisted data will be presented to the user.
|
||||
/// If circuits are open, the querier will NOT contact the ingester and no unpersisted data
|
||||
/// will be presented to the user.
|
||||
///
|
||||
/// Circuits will switch to "half open" after some jittered timeout and the querier will try to use the ingester in
|
||||
/// question again. If this succeeds, we are back to normal, otherwise it will back off exponentially before trying
|
||||
/// again (and again ...).
|
||||
/// Circuits will switch to "half open" after some jittered timeout and the querier will try to
|
||||
/// use the ingester in question again. If this succeeds, we are back to normal, otherwise it
|
||||
/// will back off exponentially before trying again (and again ...).
|
||||
///
|
||||
/// In a production environment the `ingester_circuit_state` metric should be monitored.
|
||||
#[clap(
|
||||
|
@ -279,46 +105,6 @@ impl QuerierConfig {
|
|||
self.num_query_threads
|
||||
}
|
||||
|
||||
/// Return the querier config's ingester addresses. If `--shard-to-ingesters-file` is used to
|
||||
/// specify a JSON file containing shard to ingester address mappings, this returns `Err` if
|
||||
/// there are any problems reading, deserializing, or interpreting the file.
|
||||
|
||||
// When we have switched to using the RPC write path only, this method can be changed to be
|
||||
// infallible as clap will handle failure to parse the list of strings.
|
||||
//
|
||||
// Switching into the RPC write path mode requires *both* the `INFLUXDB_IOX_RPC_MODE`
|
||||
// environment variable to be specified *and* `--ingester-addresses` to be set in order to
|
||||
// switch. Setting `INFLUXDB_IOX_RPC_MODE` and shard-to-ingesters mapping, or not setting
|
||||
// `INFLUXDB_IOX_RPC_MODE` and setting ingester addresses, will panic.
|
||||
pub fn ingester_addresses(&self) -> Result<IngesterAddresses, Error> {
|
||||
if let Some(file) = &self.shard_to_ingesters_file {
|
||||
let contents =
|
||||
fs::read_to_string(file).context(ShardToIngesterFileReadingSnafu { file })?;
|
||||
let map = deserialize_shard_ingester_map(&contents)?;
|
||||
if map.is_empty() {
|
||||
Ok(IngesterAddresses::None)
|
||||
} else {
|
||||
Ok(IngesterAddresses::ByShardIndex(map))
|
||||
}
|
||||
} else if let Some(contents) = &self.shard_to_ingesters {
|
||||
let map = deserialize_shard_ingester_map(contents)?;
|
||||
if map.is_empty() {
|
||||
Ok(IngesterAddresses::None)
|
||||
} else {
|
||||
Ok(IngesterAddresses::ByShardIndex(map))
|
||||
}
|
||||
} else if !self.ingester_addresses.is_empty() {
|
||||
Ok(IngesterAddresses::List(
|
||||
self.ingester_addresses
|
||||
.iter()
|
||||
.map(|addr| IngesterAddress::from_str(addr))
|
||||
.collect::<Result<Vec<_>, _>>()?,
|
||||
))
|
||||
} else {
|
||||
Ok(IngesterAddresses::None)
|
||||
}
|
||||
}
|
||||
|
||||
/// Size of the RAM cache pool for metadata in bytes.
|
||||
pub fn ram_pool_metadata_bytes(&self) -> usize {
|
||||
self.ram_pool_metadata_bytes
|
||||
|
@ -335,131 +121,18 @@ impl QuerierConfig {
|
|||
}
|
||||
}
|
||||
|
||||
fn deserialize_shard_ingester_map(
|
||||
contents: &str,
|
||||
) -> Result<HashMap<ShardIndex, IngesterMapping>, Error> {
|
||||
let ingesters_config: IngestersConfig =
|
||||
serde_json::from_str(contents).context(ShardToIngesterDeserializingSnafu)?;
|
||||
|
||||
if ingesters_config.ignore_all
|
||||
&& (!ingesters_config.ingesters.is_empty() || !ingesters_config.shards.is_empty())
|
||||
{
|
||||
return IgnoreAllRequiresEmptyConfigSnafu {
|
||||
ingesters: ingesters_config.ingesters,
|
||||
shards: ingesters_config.shards,
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
|
||||
let mut ingester_mapping_by_name = HashMap::new();
|
||||
|
||||
for (name, config) in &ingesters_config.ingesters {
|
||||
match (config.ignore, config.addr.as_ref()) {
|
||||
(true, _) => {
|
||||
ingester_mapping_by_name.insert(name, IngesterMapping::Ignore);
|
||||
}
|
||||
(false, None) => {
|
||||
return IngesterAddrRequiredSnafu {
|
||||
name: Arc::clone(name),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
(false, Some(addr)) if addr.is_empty() => {
|
||||
return IngesterAddrRequiredSnafu {
|
||||
name: Arc::clone(name),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
(false, Some(addr)) => {
|
||||
ingester_mapping_by_name.insert(name, IngesterMapping::Addr(Arc::clone(addr)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut map = HashMap::new();
|
||||
|
||||
for (shard_index, shard_config) in ingesters_config.shards {
|
||||
if shard_config.ignore {
|
||||
map.insert(shard_index, IngesterMapping::Ignore);
|
||||
continue;
|
||||
}
|
||||
match shard_config.ingester {
|
||||
Some(ingester) => match ingester_mapping_by_name.get(&ingester) {
|
||||
Some(ingester_mapping) => {
|
||||
map.insert(shard_index, ingester_mapping.clone());
|
||||
}
|
||||
None => {
|
||||
return IngesterNotFoundSnafu {
|
||||
name: Arc::clone(&ingester),
|
||||
shard_index,
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
},
|
||||
None => {
|
||||
map.insert(shard_index, IngesterMapping::NotMapped);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(map)
|
||||
}
|
||||
|
||||
/// Ingester addresses.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum IngesterAddresses {
|
||||
/// A mapping from shard index to ingesters.
|
||||
ByShardIndex(HashMap<ShardIndex, IngesterMapping>),
|
||||
|
||||
/// A list of ingester2 addresses.
|
||||
List(Vec<IngesterAddress>),
|
||||
|
||||
/// No connections, meaning only persisted data should be used.
|
||||
None,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct IngestersConfig {
|
||||
#[serde(default)]
|
||||
ignore_all: bool,
|
||||
#[serde(default)]
|
||||
ingesters: HashMap<Arc<str>, Arc<IngesterConfig>>,
|
||||
#[serde(default)]
|
||||
shards: HashMap<ShardIndex, ShardConfig>,
|
||||
}
|
||||
|
||||
/// Ingester config.
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct IngesterConfig {
|
||||
addr: Option<Arc<str>>,
|
||||
#[serde(default)]
|
||||
ignore: bool,
|
||||
}
|
||||
|
||||
/// Shard config.
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct ShardConfig {
|
||||
ingester: Option<Arc<str>>,
|
||||
#[serde(default)]
|
||||
ignore: bool,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use clap::Parser;
|
||||
use test_helpers::assert_error;
|
||||
use test_helpers::assert_contains;
|
||||
|
||||
#[test]
|
||||
fn test_default() {
|
||||
let actual = QuerierConfig::try_parse_from(["my_binary"]).unwrap();
|
||||
|
||||
assert_eq!(actual.num_query_threads(), None);
|
||||
assert!(matches!(
|
||||
actual.ingester_addresses().unwrap(),
|
||||
IngesterAddresses::None,
|
||||
));
|
||||
assert!(actual.ingester_addresses.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -471,26 +144,25 @@ mod tests {
|
|||
actual.num_query_threads(),
|
||||
Some(NonZeroUsize::new(42).unwrap())
|
||||
);
|
||||
assert!(matches!(
|
||||
actual.ingester_addresses().unwrap(),
|
||||
IngesterAddresses::None,
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ingester_addresses_list() {
|
||||
let actual = QuerierConfig::try_parse_from([
|
||||
let querier = QuerierConfig::try_parse_from([
|
||||
"my_binary",
|
||||
"--ingester-addresses",
|
||||
"http://ingester-0:8082,http://ingester-1:8082",
|
||||
])
|
||||
.unwrap();
|
||||
|
||||
let expected = IngesterAddresses::List(vec![
|
||||
IngesterAddress::from_str("http://ingester-0:8082").unwrap(),
|
||||
IngesterAddress::from_str("http://ingester-1:8082").unwrap(),
|
||||
]);
|
||||
assert_eq!(actual.ingester_addresses().unwrap(), expected);
|
||||
let actual: Vec<_> = querier
|
||||
.ingester_addresses
|
||||
.iter()
|
||||
.map(ToString::to_string)
|
||||
.collect();
|
||||
|
||||
let expected = vec!["http://ingester-0:8082/", "http://ingester-1:8082/"];
|
||||
assert_eq!(actual, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -500,285 +172,15 @@ mod tests {
|
|||
"--ingester-addresses",
|
||||
"\\ingester-0:8082",
|
||||
])
|
||||
.unwrap()
|
||||
.ingester_addresses();
|
||||
assert_error!(actual, Error::IngesterAddress { .. });
|
||||
}
|
||||
.unwrap_err()
|
||||
.to_string();
|
||||
|
||||
#[test]
|
||||
fn supply_json_value() {
|
||||
let actual = QuerierConfig::try_parse_from([
|
||||
"my_binary",
|
||||
"--shard-to-ingesters",
|
||||
r#"{
|
||||
"ignoreAll": false,
|
||||
"ingesters": {
|
||||
"i1": {
|
||||
"addr": "http://ingester-1:1234"
|
||||
},
|
||||
"i2": {
|
||||
"ignore": true
|
||||
},
|
||||
"i3": {
|
||||
"ignore": true,
|
||||
"addr": "http://ingester-2:2345"
|
||||
}
|
||||
},
|
||||
"shards": {
|
||||
"1": {
|
||||
"ingester": "i1"
|
||||
},
|
||||
"2": {
|
||||
"ingester": "i2"
|
||||
},
|
||||
"5": {
|
||||
"ignore": true
|
||||
}
|
||||
}
|
||||
}"#,
|
||||
])
|
||||
.unwrap();
|
||||
|
||||
let expected = IngesterAddresses::ByShardIndex(
|
||||
[
|
||||
(
|
||||
ShardIndex::new(1),
|
||||
IngesterMapping::Addr("http://ingester-1:1234".into()),
|
||||
),
|
||||
(ShardIndex::new(2), IngesterMapping::Ignore),
|
||||
(ShardIndex::new(5), IngesterMapping::Ignore),
|
||||
]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
assert_contains!(
|
||||
actual,
|
||||
"error: \
|
||||
invalid value '\\ingester-0:8082' \
|
||||
for '--ingester-addresses [<INGESTER_ADDRESSES>...]': \
|
||||
Invalid: invalid uri character"
|
||||
);
|
||||
|
||||
assert_eq!(actual.ingester_addresses().unwrap(), expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn successful_deserialization() {
|
||||
let contents = r#"{
|
||||
"ignoreAll": false,
|
||||
"ingesters": {
|
||||
"i1": {
|
||||
"addr": "http://ingester-1:1234"
|
||||
},
|
||||
"i2": {
|
||||
"ignore": true
|
||||
},
|
||||
"i3": {
|
||||
"ignore": true,
|
||||
"addr": "http://ingester-2:2345"
|
||||
}
|
||||
},
|
||||
"shards": {
|
||||
"1": {
|
||||
"ingester": "i1"
|
||||
},
|
||||
"2": {
|
||||
"ingester": "i2"
|
||||
},
|
||||
"3": {
|
||||
"ingester": "i1",
|
||||
"ignore": true
|
||||
},
|
||||
"5": {
|
||||
"ignore": true
|
||||
}
|
||||
}
|
||||
}"#;
|
||||
|
||||
let map = deserialize_shard_ingester_map(contents).unwrap();
|
||||
|
||||
let expected = [
|
||||
(
|
||||
ShardIndex::new(1),
|
||||
IngesterMapping::Addr("http://ingester-1:1234".into()),
|
||||
),
|
||||
(ShardIndex::new(2), IngesterMapping::Ignore),
|
||||
(ShardIndex::new(3), IngesterMapping::Ignore),
|
||||
(ShardIndex::new(5), IngesterMapping::Ignore),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
assert_eq!(map, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unsuccessful_deserialization() {
|
||||
let map = deserialize_shard_ingester_map("");
|
||||
assert_error!(map, Error::ShardToIngesterDeserializing { .. });
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignore_all_requires_empty_maps() {
|
||||
let expected = HashMap::new();
|
||||
|
||||
let map = deserialize_shard_ingester_map(
|
||||
r#"{
|
||||
"ignoreAll": true
|
||||
}"#,
|
||||
);
|
||||
assert_eq!(map.unwrap(), expected);
|
||||
|
||||
let map = deserialize_shard_ingester_map(
|
||||
r#"{
|
||||
"ignoreAll": true,
|
||||
"ingesters": {},
|
||||
"shards": {}
|
||||
}"#,
|
||||
);
|
||||
assert_eq!(map.unwrap(), expected);
|
||||
|
||||
let map = deserialize_shard_ingester_map(
|
||||
r#"{
|
||||
"ignoreAll": true,
|
||||
"ingesters": {
|
||||
"i1": {
|
||||
"addr": "http://ingester-1:1234"
|
||||
}
|
||||
},
|
||||
"shards": {}
|
||||
}"#,
|
||||
);
|
||||
assert_error!(map, Error::IgnoreAllRequiresEmptyConfig { .. });
|
||||
|
||||
let map = deserialize_shard_ingester_map(
|
||||
r#"{
|
||||
"ignoreAll": true,
|
||||
"ingesters": {},
|
||||
"shards": {
|
||||
"1": {
|
||||
"ingester": "i1"
|
||||
}
|
||||
}
|
||||
}"#,
|
||||
);
|
||||
assert_error!(map, Error::IgnoreAllRequiresEmptyConfig { .. });
|
||||
|
||||
let map = deserialize_shard_ingester_map(
|
||||
r#"{
|
||||
"ignoreAll": true,
|
||||
"ingesters": {
|
||||
"i1": {
|
||||
"addr": "http://ingester-1:1234"
|
||||
}
|
||||
},
|
||||
"shards": {
|
||||
"1": {
|
||||
"ingester": "i1"
|
||||
}
|
||||
}
|
||||
}"#,
|
||||
);
|
||||
assert_error!(map, Error::IgnoreAllRequiresEmptyConfig { .. });
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ingester_addr_must_be_specified_if_not_ignored() {
|
||||
let map = deserialize_shard_ingester_map(
|
||||
r#"{
|
||||
"ingesters": {
|
||||
"i1": {}
|
||||
}
|
||||
}"#,
|
||||
);
|
||||
assert_error!(map, Error::IngesterAddrRequired { ref name } if name.as_ref() == "i1");
|
||||
|
||||
let map = deserialize_shard_ingester_map(
|
||||
r#"{
|
||||
"ingesters": {
|
||||
"i1": {
|
||||
"addr": ""
|
||||
}
|
||||
}
|
||||
}"#,
|
||||
);
|
||||
assert_error!(map, Error::IngesterAddrRequired { ref name } if name.as_ref() == "i1");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ingester_must_be_found() {
|
||||
let map = deserialize_shard_ingester_map(
|
||||
r#"{
|
||||
"ingesters": {},
|
||||
"shards": {
|
||||
"1": {
|
||||
"ingester": "i1"
|
||||
}
|
||||
}
|
||||
}"#,
|
||||
);
|
||||
assert_error!(
|
||||
map,
|
||||
Error::IngesterNotFound { shard_index, ref name }
|
||||
if shard_index.get() == 1 && name.as_ref() == "i1"
|
||||
);
|
||||
|
||||
let map = deserialize_shard_ingester_map(
|
||||
r#"{
|
||||
"ingesters": {},
|
||||
"shards": {
|
||||
"1": {
|
||||
"ingester": ""
|
||||
}
|
||||
}
|
||||
}"#,
|
||||
);
|
||||
assert_error!(
|
||||
map,
|
||||
Error::IngesterNotFound { shard_index, ref name }
|
||||
if shard_index.get() == 1 && name.as_ref() == ""
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shard_to_ingester_varieties() {
|
||||
let map = deserialize_shard_ingester_map(
|
||||
r#"{
|
||||
"ingesters": {
|
||||
"i1": {
|
||||
"addr": "http://ingester-1:1234"
|
||||
}
|
||||
},
|
||||
"shards": {
|
||||
"1": {
|
||||
"ingester": "i1"
|
||||
},
|
||||
"2": {},
|
||||
"3": {
|
||||
"ingester": null
|
||||
},
|
||||
"4": {
|
||||
"ignore": true
|
||||
},
|
||||
"5": {
|
||||
"ignore": true,
|
||||
"ingester": "i1"
|
||||
},
|
||||
"6": {
|
||||
"ignore": true,
|
||||
"ingester": null
|
||||
}
|
||||
}
|
||||
}"#,
|
||||
);
|
||||
|
||||
let expected = [
|
||||
(
|
||||
ShardIndex::new(1),
|
||||
IngesterMapping::Addr("http://ingester-1:1234".into()),
|
||||
),
|
||||
(ShardIndex::new(2), IngesterMapping::NotMapped),
|
||||
(ShardIndex::new(3), IngesterMapping::NotMapped),
|
||||
(ShardIndex::new(4), IngesterMapping::Ignore),
|
||||
(ShardIndex::new(5), IngesterMapping::Ignore),
|
||||
(ShardIndex::new(6), IngesterMapping::Ignore),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
assert_eq!(map.unwrap(), expected);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,7 +10,7 @@ license.workspace = true
|
|||
http = "0.2.9"
|
||||
reqwest = { version = "0.11", default-features = false, features = ["stream", "rustls-tls"] }
|
||||
thiserror = "1.0.40"
|
||||
tonic = { version = "0.8", features = ["tls", "tls-webpki-roots"] }
|
||||
tonic = { workspace = true }
|
||||
tower = "0.4"
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
|
||||
|
|
|
@ -358,6 +358,16 @@ async fn execute_plan(
|
|||
// Adjust concurrency based on the column count in the partition.
|
||||
let permits = compute_permits(job_semaphore.total_permits(), partition_info.column_count());
|
||||
|
||||
info!(
|
||||
partition_id = partition_info.partition_id.get(),
|
||||
jobs_running = job_semaphore.holders_acquired(),
|
||||
jobs_pending = job_semaphore.holders_pending(),
|
||||
permits_needed = permits,
|
||||
permits_acquired = job_semaphore.permits_acquired(),
|
||||
permits_pending = job_semaphore.permits_pending(),
|
||||
"requesting job semaphore",
|
||||
);
|
||||
|
||||
// draw semaphore BEFORE creating the DataFusion plan and drop it directly AFTER finishing the
|
||||
// DataFusion computation (but BEFORE doing any additional external IO).
|
||||
//
|
||||
|
|
|
@ -270,19 +270,6 @@ impl std::str::FromStr for ShardIndex {
|
|||
}
|
||||
}
|
||||
|
||||
/// Potential configurations of ingester connections for the querier to associate with a shard.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum IngesterMapping {
|
||||
/// Deliberately not mapping this shard to an ingester. If the querier gets a query for
|
||||
/// this shard, it should return an error.
|
||||
NotMapped,
|
||||
/// Deliberately not contacting ingesters for this shard. If the querier gets a query for
|
||||
/// this shard, it should only return persisted data.
|
||||
Ignore,
|
||||
/// The address of the ingester to contact for this shard.
|
||||
Addr(Arc<str>),
|
||||
}
|
||||
|
||||
/// Unique ID for a `Partition`
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type, sqlx::FromRow)]
|
||||
#[sqlx(transparent)]
|
||||
|
@ -2300,20 +2287,6 @@ impl TimestampMinMax {
|
|||
}
|
||||
}
|
||||
|
||||
/// Specifies the status of data in the ingestion process.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum ShardWriteStatus {
|
||||
/// Nothing is known about this write (e.g. it refers to a shard for which we have no
|
||||
/// information)
|
||||
ShardUnknown,
|
||||
/// The data has not yet been processed by the ingester, and thus is unreadable
|
||||
Durable,
|
||||
/// The data is readable, but not yet persisted
|
||||
Readable,
|
||||
/// The data is both readable and persisted to parquet
|
||||
Persisted,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::borrow::Cow;
|
||||
|
|
|
@ -12,7 +12,6 @@
|
|||
|
||||
pub mod config;
|
||||
pub mod sender;
|
||||
pub mod sort_exprs;
|
||||
pub mod watch;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
@ -20,7 +19,7 @@ use std::task::{Context, Poll};
|
|||
|
||||
use datafusion::arrow::array::BooleanArray;
|
||||
use datafusion::arrow::compute::filter_record_batch;
|
||||
use datafusion::arrow::datatypes::DataType;
|
||||
use datafusion::arrow::datatypes::{DataType, Fields};
|
||||
use datafusion::common::{DataFusionError, ToDFSchema};
|
||||
use datafusion::datasource::MemTable;
|
||||
use datafusion::execution::context::TaskContext;
|
||||
|
@ -354,12 +353,12 @@ pub fn nullable_schema(schema: SchemaRef) -> SchemaRef {
|
|||
schema
|
||||
} else {
|
||||
// make a new schema with all nullable fields
|
||||
let new_fields = schema
|
||||
let new_fields: Fields = schema
|
||||
.fields()
|
||||
.iter()
|
||||
.map(|f| {
|
||||
// make a copy of the field, but allow it to be nullable
|
||||
f.clone().with_nullable(true)
|
||||
f.as_ref().clone().with_nullable(true)
|
||||
})
|
||||
.collect();
|
||||
|
||||
|
|
|
@ -1,52 +0,0 @@
|
|||
use datafusion::{
|
||||
arrow::compute::SortOptions,
|
||||
physical_expr::{PhysicalSortExpr, PhysicalSortRequirement},
|
||||
};
|
||||
|
||||
/// Structure to build [`PhysicalSortRequirement`]s for ExecutionPlans.
|
||||
///
|
||||
/// Replace with `PhysicalSortExpr::from_sort_exprs` when
|
||||
/// <https://github.com/apache/arrow-datafusion/pull/5863> is merged
|
||||
/// upstream.
|
||||
pub fn requirements_from_sort_exprs<'a>(
|
||||
exprs: impl IntoIterator<Item = &'a PhysicalSortExpr>,
|
||||
) -> Vec<PhysicalSortRequirement> {
|
||||
exprs
|
||||
.into_iter()
|
||||
.cloned()
|
||||
.map(PhysicalSortRequirement::from)
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Converts the `PhysicalSortRequirement` to `PhysicalSortExpr`.
|
||||
/// If required ordering is `None` for an entry, the default
|
||||
/// ordering `ASC, NULLS LAST` is used.
|
||||
///
|
||||
/// The default is picked to be consistent with
|
||||
/// PostgreSQL: <https://www.postgresql.org/docs/current/queries-order.html>
|
||||
///
|
||||
/// Replace with `PhysicalSortExpr::from` when
|
||||
/// <https://github.com/apache/arrow-datafusion/pull/5863> is merged
|
||||
/// upstream.
|
||||
pub fn into_sort_expr(requirement: PhysicalSortRequirement) -> PhysicalSortExpr {
|
||||
let PhysicalSortRequirement { expr, options } = requirement;
|
||||
|
||||
let options = options.unwrap_or(SortOptions {
|
||||
descending: false,
|
||||
nulls_first: false,
|
||||
});
|
||||
PhysicalSortExpr { expr, options }
|
||||
}
|
||||
|
||||
/// This function converts `PhysicalSortRequirement` to `PhysicalSortExpr`
|
||||
/// for each entry in the input. If required ordering is None for an entry
|
||||
/// default ordering `ASC, NULLS LAST` if given.
|
||||
///
|
||||
/// replace with PhysicalSortExpr::to_sort_exprs when
|
||||
/// <https://github.com/apache/arrow-datafusion/pull/5863> is merged
|
||||
/// upstream.
|
||||
pub fn requirements_to_sort_exprs(
|
||||
required: impl IntoIterator<Item = PhysicalSortRequirement>,
|
||||
) -> Vec<PhysicalSortExpr> {
|
||||
required.into_iter().map(into_sort_expr).collect()
|
||||
}
|
|
@ -2,19 +2,19 @@
|
|||
|
||||
InfluxDB IOx supports running SQL queries via [Apache Arrow Flight SQL](https://arrow.apache.org/docs/format/FlightSql.html)
|
||||
|
||||
You can use either a native FlightSQL client as well as JDBC / ODBC Flight SQL drivers
|
||||
You can use either a native FlightSQL client as well as JDBC / ODBC Flight SQL drivers
|
||||
|
||||
## JDBC:
|
||||
|
||||
To use the JDBC driver with IOx:
|
||||
|
||||
1. Download the driver by following the link from [Maven](https://mvnrepository.com/artifact/org.apache.arrow/flight-sql/10.0.1) or [Dremio](https://www.dremio.com/drivers/jdbc/)
|
||||
2. Use a jdbc conection of the format: `jdbc:arrow-flight-sql://hostname:port?useEncryption=false&iox-namespace-name=NAME`.
|
||||
2. Use a jdbc conection of the format: `jdbc:arrow-flight-sql://hostname:port?useEncryption=false&database=NAME`
|
||||
|
||||
`hostname:port` is the host / port on which the IOx query gRPC API is running (default port is 8082), and `NAME` is the namespace name (for example, `26f7e5a4b7be365b_917b97a92e883afc`)
|
||||
`hostname:port` is the host / port on which the IOx query gRPC API is running (default port is 8082), and `NAME` is the database name (for example, `26f7e5a4b7be365b_917b97a92e883afc`)
|
||||
|
||||
An example JDBC URL is:
|
||||
|
||||
```
|
||||
jdbc:arrow-flight-sql://localhost:8082?useEncryption=false&iox-namespace-name=26f7e5a4b7be365b_917b97a92e883afc
|
||||
jdbc:arrow-flight-sql://localhost:8082?useEncryption=false&database=26f7e5a4b7be365b_917b97a92e883afc
|
||||
```
|
||||
|
|
|
@ -20,5 +20,5 @@ snafu = "0.7"
|
|||
once_cell = { version = "1", default-features = false }
|
||||
prost = "0.11"
|
||||
tokio = { version = "1.27", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time"] }
|
||||
tonic = "0.8"
|
||||
tonic = { workspace = true }
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
|
|
|
@ -4,8 +4,9 @@ use std::fmt::Display;
|
|||
|
||||
use arrow_flight::sql::{
|
||||
ActionClosePreparedStatementRequest, ActionCreatePreparedStatementRequest, Any,
|
||||
CommandGetCatalogs, CommandGetDbSchemas, CommandGetPrimaryKeys, CommandGetSqlInfo,
|
||||
CommandGetTableTypes, CommandGetTables, CommandPreparedStatementQuery, CommandStatementQuery,
|
||||
CommandGetCatalogs, CommandGetCrossReference, CommandGetDbSchemas, CommandGetExportedKeys,
|
||||
CommandGetImportedKeys, CommandGetPrimaryKeys, CommandGetSqlInfo, CommandGetTableTypes,
|
||||
CommandGetTables, CommandPreparedStatementQuery, CommandStatementQuery,
|
||||
};
|
||||
use bytes::Bytes;
|
||||
use prost::Message;
|
||||
|
@ -75,9 +76,20 @@ pub enum FlightSQLCommand {
|
|||
CommandGetSqlInfo(CommandGetSqlInfo),
|
||||
/// Get a list of the available catalogs. See [`CommandGetCatalogs`] for details.
|
||||
CommandGetCatalogs(CommandGetCatalogs),
|
||||
/// Get a description of the foreign key columns in the given foreign key table
|
||||
/// that reference the primary key or the columns representing a unique constraint
|
||||
/// of the parent table (could be the same or a different table).
|
||||
/// See [`CommandGetCrossReference`] for details.
|
||||
CommandGetCrossReference(CommandGetCrossReference),
|
||||
/// Get a list of the available schemas. See [`CommandGetDbSchemas`]
|
||||
/// for details and how to interpret the parameters.
|
||||
CommandGetDbSchemas(CommandGetDbSchemas),
|
||||
/// Get a description of the foreign key columns that reference the given
|
||||
/// table's primary key columns (the foreign keys exported by a table) of a table.
|
||||
/// See [`CommandGetExportedKeys`] for details.
|
||||
CommandGetExportedKeys(CommandGetExportedKeys),
|
||||
/// Get the foreign keys of a table. See [`CommandGetImportedKeys`] for details.
|
||||
CommandGetImportedKeys(CommandGetImportedKeys),
|
||||
/// Get a list of primary keys. See [`CommandGetPrimaryKeys`] for details.
|
||||
CommandGetPrimaryKeys(CommandGetPrimaryKeys),
|
||||
/// Get a list of the available tables
|
||||
|
@ -101,6 +113,37 @@ impl Display for FlightSQLCommand {
|
|||
write!(f, "CommandGetSqlInfo(...)")
|
||||
}
|
||||
Self::CommandGetCatalogs(CommandGetCatalogs {}) => write!(f, "CommandGetCatalogs"),
|
||||
Self::CommandGetCrossReference(CommandGetCrossReference {
|
||||
pk_catalog,
|
||||
pk_db_schema,
|
||||
pk_table,
|
||||
fk_catalog,
|
||||
fk_db_schema,
|
||||
fk_table,
|
||||
}) => {
|
||||
write!(
|
||||
f,
|
||||
"CommandGetCrossReference(
|
||||
pk_catalog={},
|
||||
pk_db_schema={},
|
||||
pk_table={},
|
||||
fk_catalog={},
|
||||
fk_db_schema={},
|
||||
fk_table={}",
|
||||
pk_catalog.as_ref().map(|c| c.as_str()).unwrap_or("<NONE>"),
|
||||
pk_db_schema
|
||||
.as_ref()
|
||||
.map(|c| c.as_str())
|
||||
.unwrap_or("<NONE>"),
|
||||
pk_table,
|
||||
fk_catalog.as_ref().map(|c| c.as_str()).unwrap_or("<NONE>"),
|
||||
fk_db_schema
|
||||
.as_ref()
|
||||
.map(|c| c.as_str())
|
||||
.unwrap_or("<NONE>"),
|
||||
fk_table,
|
||||
)
|
||||
}
|
||||
Self::CommandGetDbSchemas(CommandGetDbSchemas {
|
||||
catalog,
|
||||
db_schema_filter_pattern,
|
||||
|
@ -115,6 +158,32 @@ impl Display for FlightSQLCommand {
|
|||
.unwrap_or("<NONE>")
|
||||
)
|
||||
}
|
||||
Self::CommandGetExportedKeys(CommandGetExportedKeys {
|
||||
catalog,
|
||||
db_schema,
|
||||
table,
|
||||
}) => {
|
||||
write!(
|
||||
f,
|
||||
"CommandGetExportedKeys(catalog={}, db_schema={}, table={})",
|
||||
catalog.as_ref().map(|c| c.as_str()).unwrap_or("<NONE>"),
|
||||
db_schema.as_ref().map(|c| c.as_str()).unwrap_or("<NONE>"),
|
||||
table
|
||||
)
|
||||
}
|
||||
Self::CommandGetImportedKeys(CommandGetImportedKeys {
|
||||
catalog,
|
||||
db_schema,
|
||||
table,
|
||||
}) => {
|
||||
write!(
|
||||
f,
|
||||
"CommandGetImportedKeys(catalog={}, db_schema={}, table={})",
|
||||
catalog.as_ref().map(|c| c.as_str()).unwrap_or("<NONE>"),
|
||||
db_schema.as_ref().map(|c| c.as_str()).unwrap_or("<NONE>"),
|
||||
table
|
||||
)
|
||||
}
|
||||
Self::CommandGetPrimaryKeys(CommandGetPrimaryKeys {
|
||||
catalog,
|
||||
db_schema,
|
||||
|
@ -186,8 +255,14 @@ impl FlightSQLCommand {
|
|||
Ok(Self::CommandGetSqlInfo(decoded_cmd))
|
||||
} else if let Some(decoded_cmd) = Any::unpack::<CommandGetCatalogs>(&msg)? {
|
||||
Ok(Self::CommandGetCatalogs(decoded_cmd))
|
||||
} else if let Some(decoded_cmd) = Any::unpack::<CommandGetCrossReference>(&msg)? {
|
||||
Ok(Self::CommandGetCrossReference(decoded_cmd))
|
||||
} else if let Some(decoded_cmd) = Any::unpack::<CommandGetDbSchemas>(&msg)? {
|
||||
Ok(Self::CommandGetDbSchemas(decoded_cmd))
|
||||
} else if let Some(decoded_cmd) = Any::unpack::<CommandGetExportedKeys>(&msg)? {
|
||||
Ok(Self::CommandGetExportedKeys(decoded_cmd))
|
||||
} else if let Some(decoded_cmd) = Any::unpack::<CommandGetImportedKeys>(&msg)? {
|
||||
Ok(Self::CommandGetImportedKeys(decoded_cmd))
|
||||
} else if let Some(decode_cmd) = Any::unpack::<CommandGetPrimaryKeys>(&msg)? {
|
||||
Ok(Self::CommandGetPrimaryKeys(decode_cmd))
|
||||
} else if let Some(decode_cmd) = Any::unpack::<CommandGetTables>(&msg)? {
|
||||
|
@ -226,7 +301,10 @@ impl FlightSQLCommand {
|
|||
}
|
||||
FlightSQLCommand::CommandGetSqlInfo(cmd) => Any::pack(&cmd),
|
||||
FlightSQLCommand::CommandGetCatalogs(cmd) => Any::pack(&cmd),
|
||||
FlightSQLCommand::CommandGetCrossReference(cmd) => Any::pack(&cmd),
|
||||
FlightSQLCommand::CommandGetDbSchemas(cmd) => Any::pack(&cmd),
|
||||
FlightSQLCommand::CommandGetExportedKeys(cmd) => Any::pack(&cmd),
|
||||
FlightSQLCommand::CommandGetImportedKeys(cmd) => Any::pack(&cmd),
|
||||
FlightSQLCommand::CommandGetPrimaryKeys(cmd) => Any::pack(&cmd),
|
||||
FlightSQLCommand::CommandGetTables(cmd) => Any::pack(&cmd),
|
||||
FlightSQLCommand::CommandGetTableTypes(cmd) => Any::pack(&cmd),
|
||||
|
|
|
@ -11,8 +11,9 @@ use arrow::{
|
|||
use arrow_flight::{
|
||||
sql::{
|
||||
ActionCreatePreparedStatementRequest, ActionCreatePreparedStatementResult, Any,
|
||||
CommandGetCatalogs, CommandGetDbSchemas, CommandGetPrimaryKeys, CommandGetSqlInfo,
|
||||
CommandGetTableTypes, CommandGetTables, CommandStatementQuery,
|
||||
CommandGetCatalogs, CommandGetCrossReference, CommandGetDbSchemas, CommandGetExportedKeys,
|
||||
CommandGetImportedKeys, CommandGetPrimaryKeys, CommandGetSqlInfo, CommandGetTableTypes,
|
||||
CommandGetTables, CommandStatementQuery,
|
||||
},
|
||||
IpcMessage, SchemaAsIpc,
|
||||
};
|
||||
|
@ -64,9 +65,18 @@ impl FlightSQLPlanner {
|
|||
FlightSQLCommand::CommandGetCatalogs(CommandGetCatalogs {}) => {
|
||||
encode_schema(get_catalogs_schema())
|
||||
}
|
||||
FlightSQLCommand::CommandGetCrossReference(CommandGetCrossReference { .. }) => {
|
||||
encode_schema(&GET_CROSS_REFERENCE_SCHEMA)
|
||||
}
|
||||
FlightSQLCommand::CommandGetDbSchemas(CommandGetDbSchemas { .. }) => {
|
||||
encode_schema(get_db_schemas_schema().as_ref())
|
||||
}
|
||||
FlightSQLCommand::CommandGetExportedKeys(CommandGetExportedKeys { .. }) => {
|
||||
encode_schema(&GET_EXPORTED_KEYS_SCHEMA)
|
||||
}
|
||||
FlightSQLCommand::CommandGetImportedKeys(CommandGetImportedKeys { .. }) => {
|
||||
encode_schema(&GET_IMPORTED_KEYS_SCHEMA)
|
||||
}
|
||||
FlightSQLCommand::CommandGetPrimaryKeys(CommandGetPrimaryKeys { .. }) => {
|
||||
encode_schema(&GET_PRIMARY_KEYS_SCHEMA)
|
||||
}
|
||||
|
@ -115,6 +125,35 @@ impl FlightSQLPlanner {
|
|||
let plan = plan_get_catalogs(ctx).await?;
|
||||
Ok(ctx.create_physical_plan(&plan).await?)
|
||||
}
|
||||
FlightSQLCommand::CommandGetCrossReference(CommandGetCrossReference {
|
||||
pk_catalog,
|
||||
pk_db_schema,
|
||||
pk_table,
|
||||
fk_catalog,
|
||||
fk_db_schema,
|
||||
fk_table,
|
||||
}) => {
|
||||
debug!(
|
||||
?pk_catalog,
|
||||
?pk_db_schema,
|
||||
?pk_table,
|
||||
?fk_catalog,
|
||||
?fk_db_schema,
|
||||
?fk_table,
|
||||
"Planning CommandGetCrossReference query"
|
||||
);
|
||||
let plan = plan_get_cross_reference(
|
||||
ctx,
|
||||
pk_catalog,
|
||||
pk_db_schema,
|
||||
pk_table,
|
||||
fk_catalog,
|
||||
fk_db_schema,
|
||||
fk_table,
|
||||
)
|
||||
.await?;
|
||||
Ok(ctx.create_physical_plan(&plan).await?)
|
||||
}
|
||||
FlightSQLCommand::CommandGetDbSchemas(CommandGetDbSchemas {
|
||||
catalog,
|
||||
db_schema_filter_pattern,
|
||||
|
@ -127,6 +166,34 @@ impl FlightSQLPlanner {
|
|||
let plan = plan_get_db_schemas(ctx, catalog, db_schema_filter_pattern).await?;
|
||||
Ok(ctx.create_physical_plan(&plan).await?)
|
||||
}
|
||||
FlightSQLCommand::CommandGetExportedKeys(CommandGetExportedKeys {
|
||||
catalog,
|
||||
db_schema,
|
||||
table,
|
||||
}) => {
|
||||
debug!(
|
||||
?catalog,
|
||||
?db_schema,
|
||||
?table,
|
||||
"Planning GetExportedKeys query"
|
||||
);
|
||||
let plan = plan_get_exported_keys(ctx, catalog, db_schema, table).await?;
|
||||
Ok(ctx.create_physical_plan(&plan).await?)
|
||||
}
|
||||
FlightSQLCommand::CommandGetImportedKeys(CommandGetImportedKeys {
|
||||
catalog,
|
||||
db_schema,
|
||||
table,
|
||||
}) => {
|
||||
debug!(
|
||||
?catalog,
|
||||
?db_schema,
|
||||
?table,
|
||||
"Planning CommandGetImportedKeys query"
|
||||
);
|
||||
let plan = plan_get_imported_keys(ctx, catalog, db_schema, table).await?;
|
||||
Ok(ctx.create_physical_plan(&plan).await?)
|
||||
}
|
||||
FlightSQLCommand::CommandGetPrimaryKeys(CommandGetPrimaryKeys {
|
||||
catalog,
|
||||
db_schema,
|
||||
|
@ -272,6 +339,19 @@ async fn plan_get_catalogs(ctx: &IOxSessionContext) -> Result<LogicalPlan> {
|
|||
Ok(ctx.batch_to_logical_plan(get_catalogs(ctx.inner())?)?)
|
||||
}
|
||||
|
||||
async fn plan_get_cross_reference(
|
||||
ctx: &IOxSessionContext,
|
||||
_pk_catalog: Option<String>,
|
||||
_pk_db_schema: Option<String>,
|
||||
_pk_table: String,
|
||||
_fk_catalog: Option<String>,
|
||||
_fk_db_schema: Option<String>,
|
||||
_fk_table: String,
|
||||
) -> Result<LogicalPlan> {
|
||||
let batch = RecordBatch::new_empty(Arc::clone(&GET_CROSS_REFERENCE_SCHEMA));
|
||||
Ok(ctx.batch_to_logical_plan(batch)?)
|
||||
}
|
||||
|
||||
async fn plan_get_db_schemas(
|
||||
ctx: &IOxSessionContext,
|
||||
catalog: Option<String>,
|
||||
|
@ -281,6 +361,26 @@ async fn plan_get_db_schemas(
|
|||
Ok(ctx.batch_to_logical_plan(batch)?)
|
||||
}
|
||||
|
||||
async fn plan_get_exported_keys(
|
||||
ctx: &IOxSessionContext,
|
||||
_catalog: Option<String>,
|
||||
_db_schema: Option<String>,
|
||||
_table: String,
|
||||
) -> Result<LogicalPlan> {
|
||||
let batch = RecordBatch::new_empty(Arc::clone(&GET_EXPORTED_KEYS_SCHEMA));
|
||||
Ok(ctx.batch_to_logical_plan(batch)?)
|
||||
}
|
||||
|
||||
async fn plan_get_imported_keys(
|
||||
ctx: &IOxSessionContext,
|
||||
_catalog: Option<String>,
|
||||
_db_schema: Option<String>,
|
||||
_table: String,
|
||||
) -> Result<LogicalPlan> {
|
||||
let batch = RecordBatch::new_empty(Arc::clone(&GET_IMPORTED_KEYS_SCHEMA));
|
||||
Ok(ctx.batch_to_logical_plan(batch)?)
|
||||
}
|
||||
|
||||
async fn plan_get_primary_keys(
|
||||
ctx: &IOxSessionContext,
|
||||
_catalog: Option<String>,
|
||||
|
@ -333,6 +433,68 @@ static TABLE_TYPES_RECORD_BATCH: Lazy<RecordBatch> = Lazy::new(|| {
|
|||
RecordBatch::try_new(Arc::clone(&GET_TABLE_TYPE_SCHEMA), vec![table_type]).unwrap()
|
||||
});
|
||||
|
||||
/// The returned data should be ordered by pk_catalog_name, pk_db_schema_name,
|
||||
/// pk_table_name, pk_key_name, then key_sequence.
|
||||
/// update_rule and delete_rule returns a byte that is equivalent to actions:
|
||||
/// - 0 = CASCADE
|
||||
/// - 1 = RESTRICT
|
||||
/// - 2 = SET NULL
|
||||
/// - 3 = NO ACTION
|
||||
/// - 4 = SET DEFAULT
|
||||
static GET_CROSS_REFERENCE_SCHEMA: Lazy<SchemaRef> = Lazy::new(|| {
|
||||
Arc::new(Schema::new(vec![
|
||||
Field::new("pk_catalog_name", DataType::Utf8, false),
|
||||
Field::new("pk_db_schema_name", DataType::Utf8, false),
|
||||
Field::new("pk_table_name", DataType::Utf8, false),
|
||||
Field::new("pk_column_name", DataType::Utf8, false),
|
||||
Field::new("fk_catalog_name", DataType::Utf8, false),
|
||||
Field::new("fk_db_schema_name", DataType::Utf8, false),
|
||||
Field::new("fk_table_name", DataType::Utf8, false),
|
||||
Field::new("fk_column_name", DataType::Utf8, false),
|
||||
Field::new("key_sequence", DataType::Int32, false),
|
||||
Field::new("fk_key_name", DataType::Utf8, false),
|
||||
Field::new("pk_key_name", DataType::Utf8, false),
|
||||
Field::new("update_rule", DataType::UInt8, false),
|
||||
Field::new("delete_rule", DataType::UInt8, false),
|
||||
]))
|
||||
});
|
||||
|
||||
static GET_EXPORTED_KEYS_SCHEMA: Lazy<SchemaRef> = Lazy::new(|| {
|
||||
Arc::new(Schema::new(vec![
|
||||
Field::new("pk_catalog_name", DataType::Utf8, false),
|
||||
Field::new("pk_db_schema_name", DataType::Utf8, false),
|
||||
Field::new("pk_table_name", DataType::Utf8, false),
|
||||
Field::new("pk_column_name", DataType::Utf8, false),
|
||||
Field::new("fk_catalog_name", DataType::Utf8, false),
|
||||
Field::new("fk_db_schema_name", DataType::Utf8, false),
|
||||
Field::new("fk_table_name", DataType::Utf8, false),
|
||||
Field::new("fk_column_name", DataType::Utf8, false),
|
||||
Field::new("key_sequence", DataType::Int32, false),
|
||||
Field::new("fk_key_name", DataType::Utf8, false),
|
||||
Field::new("pk_key_name", DataType::Utf8, false),
|
||||
Field::new("update_rule", DataType::UInt8, false),
|
||||
Field::new("delete_rule", DataType::UInt8, false),
|
||||
]))
|
||||
});
|
||||
|
||||
static GET_IMPORTED_KEYS_SCHEMA: Lazy<SchemaRef> = Lazy::new(|| {
|
||||
Arc::new(Schema::new(vec![
|
||||
Field::new("pk_catalog_name", DataType::Utf8, false),
|
||||
Field::new("pk_db_schema_name", DataType::Utf8, false),
|
||||
Field::new("pk_table_name", DataType::Utf8, false),
|
||||
Field::new("pk_column_name", DataType::Utf8, false),
|
||||
Field::new("fk_catalog_name", DataType::Utf8, false),
|
||||
Field::new("fk_db_schema_name", DataType::Utf8, false),
|
||||
Field::new("fk_table_name", DataType::Utf8, false),
|
||||
Field::new("fk_column_name", DataType::Utf8, false),
|
||||
Field::new("key_sequence", DataType::Int32, false),
|
||||
Field::new("fk_key_name", DataType::Utf8, false),
|
||||
Field::new("pk_key_name", DataType::Utf8, false),
|
||||
Field::new("update_rule", DataType::UInt8, false),
|
||||
Field::new("delete_rule", DataType::UInt8, false),
|
||||
]))
|
||||
});
|
||||
|
||||
static GET_PRIMARY_KEYS_SCHEMA: Lazy<SchemaRef> = Lazy::new(|| {
|
||||
Arc::new(Schema::new(vec![
|
||||
Field::new("catalog_name", DataType::Utf8, false),
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use arrow::{
|
||||
array::{
|
||||
Array, ArrayBuilder, ArrayData, BooleanBuilder, Int32Builder, Int64Builder, Int8Builder,
|
||||
ListBuilder, StringBuilder, UnionArray,
|
||||
},
|
||||
datatypes::{DataType, Field, UnionMode},
|
||||
datatypes::{DataType, Field, UnionFields, UnionMode},
|
||||
};
|
||||
use arrow_flight::sql::SqlInfo;
|
||||
use once_cell::sync::Lazy;
|
||||
|
@ -118,7 +120,7 @@ static UNION_TYPE: Lazy<DataType> = Lazy::new(|| {
|
|||
// treat list as nullable b/c that is what hte builders make
|
||||
Field::new(
|
||||
"string_list",
|
||||
DataType::List(Box::new(Field::new("item", DataType::Utf8, true))),
|
||||
DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))),
|
||||
true,
|
||||
),
|
||||
];
|
||||
|
@ -127,7 +129,7 @@ static UNION_TYPE: Lazy<DataType> = Lazy::new(|| {
|
|||
// assume they go from 0 .. num_fields
|
||||
let type_ids: Vec<i8> = (0..fields.len()).map(|v| v as i8).collect();
|
||||
|
||||
DataType::Union(fields, type_ids, UnionMode::Dense)
|
||||
DataType::Union(UnionFields::new(type_ids, fields), UnionMode::Dense)
|
||||
});
|
||||
|
||||
impl SqlInfoUnionBuilder {
|
||||
|
|
|
@ -19,11 +19,11 @@ prost = "0.11"
|
|||
query_functions = { path = "../query_functions" }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
snafu = "0.7"
|
||||
tonic = "0.8"
|
||||
tonic = { workspace = true }
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
|
||||
[build-dependencies] # In alphabetical order
|
||||
tonic-build = "0.8"
|
||||
tonic-build = { workspace = true }
|
||||
prost-build = "0.11"
|
||||
pbjson-build = "0.5"
|
||||
|
||||
|
|
|
@ -47,7 +47,6 @@ fn generate_grpc_types(root: &Path) -> Result<()> {
|
|||
let sharder_path = root.join("influxdata/iox/sharder/v1");
|
||||
let wal_path = root.join("influxdata/iox/wal/v1");
|
||||
let write_buffer_path = root.join("influxdata/iox/write_buffer/v1");
|
||||
let write_summary_path = root.join("influxdata/iox/write_summary/v1");
|
||||
let storage_path = root.join("influxdata/platform/storage");
|
||||
let storage_errors_path = root.join("influxdata/platform/errors");
|
||||
|
||||
|
@ -59,7 +58,6 @@ fn generate_grpc_types(root: &Path) -> Result<()> {
|
|||
delete_path.join("service.proto"),
|
||||
ingester_path.join("parquet_metadata.proto"),
|
||||
ingester_path.join("query.proto"),
|
||||
ingester_path.join("write_info.proto"),
|
||||
ingester_path.join("write.proto"),
|
||||
ingester_path.join("replication.proto"),
|
||||
ingester_path.join("persist.proto"),
|
||||
|
@ -76,7 +74,6 @@ fn generate_grpc_types(root: &Path) -> Result<()> {
|
|||
sharder_path.join("sharder.proto"),
|
||||
wal_path.join("wal.proto"),
|
||||
write_buffer_path.join("write_buffer.proto"),
|
||||
write_summary_path.join("write_summary.proto"),
|
||||
storage_path.join("predicate.proto"),
|
||||
storage_path.join("service.proto"),
|
||||
storage_path.join("source.proto"),
|
||||
|
|
|
@ -71,20 +71,14 @@ message IngesterQueryResponseMetadata {
|
|||
reserved 6;
|
||||
|
||||
// Partition id for this batch.
|
||||
//
|
||||
// This field is currently NOT used by the ingester but will be soon.
|
||||
int64 partition_id = 7;
|
||||
|
||||
// Optional partition status.
|
||||
//
|
||||
// If this is given, then no schema and no batch will be part of this FlightData object.
|
||||
//
|
||||
// This field is currently NOT used by the ingester but will be soon.
|
||||
PartitionStatus status = 8;
|
||||
|
||||
// UUID of this ingester instance.
|
||||
//
|
||||
// This field is currently NOT used by the ingester but will be soon.
|
||||
string ingester_uuid = 9;
|
||||
|
||||
// Number of Parquet files that have been persisted to object storage for this partition.
|
||||
|
|
|
@ -1,57 +0,0 @@
|
|||
syntax = "proto3";
|
||||
package influxdata.iox.ingester.v1;
|
||||
option go_package = "github.com/influxdata/iox/ingester/v1";
|
||||
|
||||
// NOTE: This is an ALPHA / Internal API that is used as part of the
|
||||
// end to end tests.
|
||||
//
|
||||
// A public API is tracked here:
|
||||
// <https://github.com/influxdata/influxdb_iox/issues/4354>
|
||||
service WriteInfoService {
|
||||
// Get information about a particular write
|
||||
rpc GetWriteInfo(GetWriteInfoRequest) returns (GetWriteInfoResponse);
|
||||
}
|
||||
|
||||
message GetWriteInfoRequest {
|
||||
// The write token returned from a write that was written to one or
|
||||
// more shards
|
||||
string write_token = 1;
|
||||
}
|
||||
|
||||
message GetWriteInfoResponse {
|
||||
// Renamed from kafka_partition_infos to shard_infos
|
||||
reserved 3;
|
||||
reserved "kafka_partition_infos";
|
||||
|
||||
// Information for all shards in this write
|
||||
repeated ShardInfo shard_infos = 4;
|
||||
}
|
||||
|
||||
// Status of a part of a write in a particular shard
|
||||
message ShardInfo {
|
||||
// Unique shard index
|
||||
int32 shard_index = 1;
|
||||
|
||||
// the status of the data for this shard
|
||||
ShardStatus status = 2;
|
||||
}
|
||||
|
||||
// the state
|
||||
enum ShardStatus {
|
||||
// Unspecified status, will result in an error.
|
||||
SHARD_STATUS_UNSPECIFIED = 0;
|
||||
|
||||
// The ingester has not yet processed data in this write
|
||||
SHARD_STATUS_DURABLE = 1;
|
||||
|
||||
// The ingester has processed the data in this write and it is
|
||||
// readable (will be included in a query response)?
|
||||
SHARD_STATUS_READABLE = 2;
|
||||
|
||||
// The ingester has processed the data in this write and it is both
|
||||
// readable and completely persisted to parquet files.
|
||||
SHARD_STATUS_PERSISTED = 3;
|
||||
|
||||
// The ingester does not have information about this shard
|
||||
SHARD_STATUS_UNKNOWN = 4;
|
||||
}
|
|
@ -1,24 +0,0 @@
|
|||
syntax = "proto3";
|
||||
package influxdata.iox.write_summary.v1;
|
||||
option go_package = "github.com/influxdata/iox/write_summary/v1";
|
||||
|
||||
// Represents a single logical write that was partitioned and sharded
|
||||
// into multiple pieces in multiple shards (kafka partitions)
|
||||
message WriteSummary {
|
||||
// Renamed from sequencers to shards
|
||||
reserved 1;
|
||||
reserved "sequencers";
|
||||
|
||||
// per shard index (kafka partition) information
|
||||
repeated ShardWrite shards = 2;
|
||||
}
|
||||
|
||||
// Per shard (kafka partition) information about what sequence
|
||||
// numbers contain part of a write
|
||||
message ShardWrite {
|
||||
// Unique shard index (kafka partition).
|
||||
int32 shard_index = 1;
|
||||
|
||||
// Which sequence numbers for this shard had data
|
||||
repeated int64 sequence_numbers = 2;
|
||||
}
|
|
@ -196,19 +196,6 @@ pub mod influxdata {
|
|||
));
|
||||
}
|
||||
}
|
||||
|
||||
pub mod write_summary {
|
||||
pub mod v1 {
|
||||
include!(concat!(
|
||||
env!("OUT_DIR"),
|
||||
"/influxdata.iox.write_summary.v1.rs"
|
||||
));
|
||||
include!(concat!(
|
||||
env!("OUT_DIR"),
|
||||
"/influxdata.iox.write_summary.v1.serde.rs"
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub mod pbdata {
|
||||
|
@ -281,8 +268,6 @@ pub mod compactor;
|
|||
pub mod delete_predicate;
|
||||
#[cfg(any(feature = "data_types_conversions", test))]
|
||||
pub mod ingester;
|
||||
#[cfg(any(feature = "data_types_conversions", test))]
|
||||
pub mod write_info;
|
||||
|
||||
pub use prost::{DecodeError, EncodeError};
|
||||
|
||||
|
|
|
@ -1,155 +0,0 @@
|
|||
use crate::influxdata::iox::ingester::v1 as proto;
|
||||
use data_types::ShardWriteStatus;
|
||||
use std::collections::HashMap;
|
||||
|
||||
impl From<ShardWriteStatus> for proto::ShardStatus {
|
||||
fn from(status: ShardWriteStatus) -> Self {
|
||||
match status {
|
||||
ShardWriteStatus::ShardUnknown => Self::Unknown,
|
||||
ShardWriteStatus::Durable => Self::Durable,
|
||||
ShardWriteStatus::Readable => Self::Readable,
|
||||
ShardWriteStatus::Persisted => Self::Persisted,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl proto::ShardStatus {
|
||||
/// Convert the status to a number such that higher numbers are later in the data lifecycle.
|
||||
/// For use in merging multiple write status gRPC responses into one response.
|
||||
fn status_order(&self) -> u8 {
|
||||
match self {
|
||||
Self::Unspecified => panic!("Unspecified status"),
|
||||
Self::Unknown => 0,
|
||||
Self::Durable => 1,
|
||||
Self::Readable => 2,
|
||||
Self::Persisted => 3,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl proto::ShardInfo {
|
||||
fn merge(&mut self, other: &Self) {
|
||||
let self_status = self.status();
|
||||
let other_status = other.status();
|
||||
|
||||
let new_status = match self_status.status_order().cmp(&other_status.status_order()) {
|
||||
std::cmp::Ordering::Less => other_status,
|
||||
std::cmp::Ordering::Equal => self_status,
|
||||
std::cmp::Ordering::Greater => self_status,
|
||||
};
|
||||
|
||||
self.set_status(new_status);
|
||||
}
|
||||
}
|
||||
|
||||
/// "Merges" the partition information for write info responses so that the "most recent"
|
||||
/// information is returned.
|
||||
pub fn merge_responses(
|
||||
responses: impl IntoIterator<Item = proto::GetWriteInfoResponse>,
|
||||
) -> proto::GetWriteInfoResponse {
|
||||
// Map shard index to status
|
||||
let mut shard_infos: HashMap<_, proto::ShardInfo> = HashMap::new();
|
||||
|
||||
responses
|
||||
.into_iter()
|
||||
.flat_map(|res| res.shard_infos.into_iter())
|
||||
.for_each(|info| {
|
||||
shard_infos
|
||||
.entry(info.shard_index)
|
||||
.and_modify(|existing_info| existing_info.merge(&info))
|
||||
.or_insert(info);
|
||||
});
|
||||
|
||||
let shard_infos = shard_infos.into_values().collect();
|
||||
|
||||
proto::GetWriteInfoResponse { shard_infos }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use proto::{ShardInfo, ShardStatus};
|
||||
|
||||
#[test]
|
||||
fn test_merge() {
|
||||
#[derive(Debug)]
|
||||
struct Test<'a> {
|
||||
left: &'a ShardInfo,
|
||||
right: &'a ShardInfo,
|
||||
expected: &'a ShardInfo,
|
||||
}
|
||||
|
||||
let durable = ShardInfo {
|
||||
shard_index: 1,
|
||||
status: ShardStatus::Durable.into(),
|
||||
};
|
||||
|
||||
let readable = ShardInfo {
|
||||
shard_index: 1,
|
||||
status: ShardStatus::Readable.into(),
|
||||
};
|
||||
|
||||
let persisted = ShardInfo {
|
||||
shard_index: 1,
|
||||
status: ShardStatus::Persisted.into(),
|
||||
};
|
||||
|
||||
let unknown = ShardInfo {
|
||||
shard_index: 1,
|
||||
status: ShardStatus::Unknown.into(),
|
||||
};
|
||||
|
||||
let tests = vec![
|
||||
Test {
|
||||
left: &unknown,
|
||||
right: &unknown,
|
||||
expected: &unknown,
|
||||
},
|
||||
Test {
|
||||
left: &unknown,
|
||||
right: &durable,
|
||||
expected: &durable,
|
||||
},
|
||||
Test {
|
||||
left: &unknown,
|
||||
right: &readable,
|
||||
expected: &readable,
|
||||
},
|
||||
Test {
|
||||
left: &durable,
|
||||
right: &unknown,
|
||||
expected: &durable,
|
||||
},
|
||||
Test {
|
||||
left: &readable,
|
||||
right: &readable,
|
||||
expected: &readable,
|
||||
},
|
||||
Test {
|
||||
left: &durable,
|
||||
right: &durable,
|
||||
expected: &durable,
|
||||
},
|
||||
Test {
|
||||
left: &readable,
|
||||
right: &durable,
|
||||
expected: &readable,
|
||||
},
|
||||
Test {
|
||||
left: &persisted,
|
||||
right: &durable,
|
||||
expected: &persisted,
|
||||
},
|
||||
];
|
||||
|
||||
for test in tests {
|
||||
let mut output = test.left.clone();
|
||||
|
||||
output.merge(test.right);
|
||||
assert_eq!(
|
||||
&output, test.expected,
|
||||
"Mismatch\n\nOutput:\n{output:#?}\n\nTest:\n{test:#?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -7,10 +7,10 @@ license.workspace = true
|
|||
|
||||
[dependencies]
|
||||
prost = "0.11"
|
||||
prost-types = { version = "0.11.7", features = ["std"] }
|
||||
tonic = "0.8"
|
||||
prost-types = { version = "0.11.9", features = ["std"] }
|
||||
tonic = { workspace = true }
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
|
||||
[build-dependencies]
|
||||
prost-build = "0.11"
|
||||
tonic-build = "0.8"
|
||||
tonic-build = { workspace = true }
|
||||
|
|
|
@ -7,10 +7,10 @@ license.workspace = true
|
|||
|
||||
[dependencies]
|
||||
prost = "0.11"
|
||||
prost-types = { version = "0.11.7", features = ["std"] }
|
||||
tonic = "0.8"
|
||||
prost-types = { version = "0.11.9", features = ["std"] }
|
||||
tonic = { workspace = true }
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
|
||||
[build-dependencies]
|
||||
prost-build = "0.11"
|
||||
tonic-build = "0.8"
|
||||
tonic-build = { workspace = true }
|
||||
|
|
|
@ -16,7 +16,7 @@ hyper = "0.14"
|
|||
pin-project = "1.0"
|
||||
prost = "0.11"
|
||||
tokio = {version = "1", features = [ "rt" ]}
|
||||
tonic = "0.8"
|
||||
tonic = { workspace = true }
|
||||
tower = "0.4"
|
||||
grpc-binary-logger-proto = { path = "../grpc-binary-logger-proto" }
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
|
@ -28,4 +28,4 @@ assert_matches = "1"
|
|||
|
||||
[build-dependencies]
|
||||
prost-build = "0.11"
|
||||
tonic-build = "0.8"
|
||||
tonic-build = { workspace = true }
|
||||
|
|
|
@ -15,10 +15,10 @@ iox_catalog = { path = "../iox_catalog" }
|
|||
object_store = { version = "0.5.6", features = ["aws"] }
|
||||
schema = { path = "../schema" }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.95"
|
||||
serde_json = "1.0.96"
|
||||
thiserror = "1.0.40"
|
||||
tokio = { version = "1.27" }
|
||||
tonic = { version = "0.8" }
|
||||
tonic = { workspace = true }
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
|
||||
[dev-dependencies]
|
||||
|
|
|
@ -10,7 +10,7 @@ bytes = "1.4"
|
|||
futures = { version = "0.3", default-features = false }
|
||||
reqwest = { version = "0.11", default-features = false, features = ["stream", "json", "rustls-tls"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.95"
|
||||
serde_json = "1.0.96"
|
||||
snafu = "0.7"
|
||||
url = "2.3.1"
|
||||
uuid = { version = "1", features = ["v4"] }
|
||||
|
|
|
@ -22,7 +22,6 @@ influxrpc_parser = { path = "../influxrpc_parser"}
|
|||
iox_catalog = { path = "../iox_catalog" }
|
||||
ioxd_common = { path = "../ioxd_common"}
|
||||
ioxd_compactor2 = { path = "../ioxd_compactor2"}
|
||||
ioxd_ingest_replica = { path = "../ioxd_ingest_replica" }
|
||||
ioxd_ingester2 = { path = "../ioxd_ingester2"}
|
||||
ioxd_garbage_collector = { path = "../ioxd_garbage_collector" }
|
||||
ioxd_querier = { path = "../ioxd_querier"}
|
||||
|
@ -64,7 +63,7 @@ libc = { version = "0.2" }
|
|||
num_cpus = "1.15.0"
|
||||
once_cell = { version = "1.17", features = ["parking_lot"] }
|
||||
rustyline = { version = "11.0", default-features = false, features = ["with-file-history"]}
|
||||
serde_json = "1.0.95"
|
||||
serde_json = "1.0.96"
|
||||
snafu = "0.7"
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
|
@ -72,7 +71,7 @@ tikv-jemalloc-ctl = { version = "0.5.0", optional = true }
|
|||
tokio = { version = "1.27", features = ["macros", "net", "parking_lot", "rt-multi-thread", "signal", "sync", "time", "io-std"] }
|
||||
tokio-stream = { version = "0.1", features = ["net"] }
|
||||
tokio-util = { version = "0.7.7", features = ["compat"] }
|
||||
tonic = "0.8"
|
||||
tonic = { workspace = true }
|
||||
uuid = { version = "1", features = ["v4"] }
|
||||
# jemalloc-sys with unprefixed_malloc_on_supported_platforms feature and heappy are mutually exclusive
|
||||
tikv-jemalloc-sys = { version = "0.5.3", optional = true, features = ["unprefixed_malloc_on_supported_platforms"] }
|
||||
|
@ -81,11 +80,11 @@ workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
|||
[dev-dependencies]
|
||||
# In alphabetical order
|
||||
arrow_util = { path = "../arrow_util" }
|
||||
assert_cmd = "2.0.10"
|
||||
assert_cmd = "2.0.11"
|
||||
assert_matches = "1.5"
|
||||
async-trait = "0.1"
|
||||
predicate = { path = "../predicate" }
|
||||
predicates = "3.0.2"
|
||||
predicates = "3.0.3"
|
||||
serde = "1.0.159"
|
||||
test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
|
||||
test_helpers_end_to_end = { path = "../test_helpers_end_to_end" }
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use arrow::record_batch::RecordBatch;
|
||||
use clap::ValueEnum;
|
||||
use futures::TryStreamExt;
|
||||
use influxdb_iox_client::format::influxql::write_columnar;
|
||||
use influxdb_iox_client::format::influxql::{write_columnar, Options};
|
||||
use influxdb_iox_client::{connection::Connection, flight, format::QueryOutputFormat};
|
||||
use thiserror::Error;
|
||||
|
||||
|
@ -105,7 +105,7 @@ pub async fn command(connection: Connection, config: Config) -> Result<()> {
|
|||
|
||||
match (query_lang, &format) {
|
||||
(QueryLanguage::InfluxQL, OutputFormat::Pretty) => {
|
||||
write_columnar(std::io::stdout(), &batches)?
|
||||
write_columnar(std::io::stdout(), &batches, Options::default())?
|
||||
}
|
||||
_ => {
|
||||
let format: QueryOutputFormat = format.into();
|
||||
|
|
|
@ -11,7 +11,7 @@ use clap_blocks::{
|
|||
ingester2::Ingester2Config,
|
||||
ingester_address::IngesterAddress,
|
||||
object_store::{make_object_store, ObjectStoreConfig},
|
||||
querier::{IngesterAddresses, QuerierConfig},
|
||||
querier::QuerierConfig,
|
||||
router2::Router2Config,
|
||||
run_config::RunConfig,
|
||||
socket_addr::SocketAddr,
|
||||
|
@ -425,6 +425,9 @@ impl Config {
|
|||
CatalogDsnConfig::new_sqlite(local_catalog_path)
|
||||
};
|
||||
|
||||
let ingester_addresses =
|
||||
vec![IngesterAddress::from_str(&ingester_grpc_bind_address.to_string()).unwrap()];
|
||||
|
||||
let router_run_config = RunConfig::new(
|
||||
logging_config,
|
||||
tracing_config,
|
||||
|
@ -458,10 +461,7 @@ impl Config {
|
|||
let router_config = Router2Config {
|
||||
query_pool_name: QUERY_POOL_NAME.to_string(),
|
||||
http_request_limit: 1_000,
|
||||
ingester_addresses: vec![IngesterAddress::from_str(
|
||||
&ingester_grpc_bind_address.to_string(),
|
||||
)
|
||||
.unwrap()],
|
||||
ingester_addresses: ingester_addresses.clone(),
|
||||
new_namespace_retention_hours: None, // infinite retention
|
||||
namespace_autocreation_enabled: true,
|
||||
partition_key_pattern: "%Y-%m-%d".to_string(),
|
||||
|
@ -498,10 +498,8 @@ impl Config {
|
|||
};
|
||||
|
||||
let querier_config = QuerierConfig {
|
||||
num_query_threads: None, // will be ignored
|
||||
shard_to_ingesters_file: None, // will be ignored
|
||||
shard_to_ingesters: None, // will be ignored
|
||||
ingester_addresses: vec![ingester_grpc_bind_address.to_string()], // will be ignored
|
||||
num_query_threads: None, // will be ignored
|
||||
ingester_addresses,
|
||||
ram_pool_metadata_bytes: querier_ram_pool_metadata_bytes,
|
||||
ram_pool_data_bytes: querier_ram_pool_data_bytes,
|
||||
max_concurrent_queries: querier_max_concurrent_queries,
|
||||
|
@ -660,12 +658,7 @@ pub async fn command(config: Config) -> Result<()> {
|
|||
)
|
||||
.await;
|
||||
|
||||
let ingester_addresses = IngesterAddresses::List(vec![IngesterAddress::from_str(
|
||||
&ingester_run_config.grpc_bind_address.to_string(),
|
||||
)
|
||||
.unwrap()]);
|
||||
|
||||
info!(?ingester_addresses, "starting querier");
|
||||
info!(ingester_addresses = ?querier_config.ingester_addresses, "starting querier");
|
||||
let querier = create_querier_server_type(QuerierServerTypeArgs {
|
||||
common_state: &common_state,
|
||||
metric_registry: Arc::clone(&metrics),
|
||||
|
@ -673,9 +666,7 @@ pub async fn command(config: Config) -> Result<()> {
|
|||
object_store,
|
||||
exec,
|
||||
time_provider,
|
||||
ingester_addresses,
|
||||
querier_config,
|
||||
rpc_write: true,
|
||||
authz: authz.as_ref().map(Arc::clone),
|
||||
})
|
||||
.await?;
|
||||
|
|
|
@ -1,106 +0,0 @@
|
|||
//! Command line options for running an ingester for a router using the RPC write path to talk to.
|
||||
|
||||
use super::main;
|
||||
use crate::process_info::{setup_metric_registry, USIZE_MAX};
|
||||
use clap_blocks::{
|
||||
catalog_dsn::CatalogDsnConfig, ingest_replica::IngestReplicaConfig, run_config::RunConfig,
|
||||
};
|
||||
use iox_query::exec::Executor;
|
||||
use ioxd_common::{
|
||||
server_type::{CommonServerState, CommonServerStateError},
|
||||
Service,
|
||||
};
|
||||
use ioxd_ingest_replica::create_ingest_replica_server_type;
|
||||
use observability_deps::tracing::*;
|
||||
use std::{num::NonZeroUsize, sync::Arc};
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum Error {
|
||||
#[error("run: {0}")]
|
||||
Run(#[from] main::Error),
|
||||
|
||||
#[error("invalid config: {0}")]
|
||||
InvalidConfig(#[from] CommonServerStateError),
|
||||
|
||||
#[error("error initializing ingest_replica: {0}")]
|
||||
IngestReplica(#[from] ioxd_ingest_replica::Error),
|
||||
|
||||
#[error("catalog DSN error: {0}")]
|
||||
CatalogDsn(#[from] clap_blocks::catalog_dsn::Error),
|
||||
}
|
||||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
#[derive(Debug, clap::Parser)]
|
||||
#[clap(
|
||||
name = "run",
|
||||
about = "Runs in ingest replica mode",
|
||||
long_about = "Run the IOx ingest_replica server.\n\nThe configuration options below can be \
|
||||
set either with the command line flags or with the specified environment \
|
||||
variable. If there is a file named '.env' in the current working directory, \
|
||||
it is sourced before loading the configuration.
|
||||
Configuration is loaded from the following sources (highest precedence first):
|
||||
- command line arguments
|
||||
- user set environment variables
|
||||
- .env file contents
|
||||
- pre-configured default values"
|
||||
)]
|
||||
pub struct Config {
|
||||
#[clap(flatten)]
|
||||
pub(crate) run_config: RunConfig,
|
||||
|
||||
#[clap(flatten)]
|
||||
pub(crate) catalog_dsn: CatalogDsnConfig,
|
||||
|
||||
#[clap(flatten)]
|
||||
pub(crate) ingest_replica_config: IngestReplicaConfig,
|
||||
|
||||
/// Specify the size of the thread-pool for query execution, and the
|
||||
/// separate compaction thread-pool.
|
||||
#[clap(
|
||||
long = "exec-thread-count",
|
||||
env = "INFLUXDB_IOX_EXEC_THREAD_COUNT",
|
||||
default_value = "4",
|
||||
action
|
||||
)]
|
||||
pub exec_thread_count: NonZeroUsize,
|
||||
|
||||
/// Size of memory pool used during query exec, in bytes.
|
||||
#[clap(
|
||||
long = "exec-mem-pool-bytes",
|
||||
env = "INFLUXDB_IOX_EXEC_MEM_POOL_BYTES",
|
||||
default_value = &USIZE_MAX[..],
|
||||
action
|
||||
)]
|
||||
exec_mem_pool_bytes: usize,
|
||||
}
|
||||
|
||||
pub async fn command(config: Config) -> Result<()> {
|
||||
let common_state = CommonServerState::from_config(config.run_config.clone())?;
|
||||
let metric_registry = setup_metric_registry();
|
||||
|
||||
let catalog = config
|
||||
.catalog_dsn
|
||||
.get_catalog("ingester", Arc::clone(&metric_registry))
|
||||
.await?;
|
||||
|
||||
let exec = Arc::new(Executor::new(
|
||||
config.exec_thread_count,
|
||||
config.exec_mem_pool_bytes,
|
||||
));
|
||||
|
||||
let server_type = create_ingest_replica_server_type(
|
||||
&common_state,
|
||||
catalog,
|
||||
Arc::clone(&metric_registry),
|
||||
&config.ingest_replica_config,
|
||||
exec,
|
||||
)
|
||||
.await?;
|
||||
|
||||
info!("starting ingester2");
|
||||
|
||||
let services = vec![Service::create(server_type, common_state.run_config())];
|
||||
Ok(main::main(common_state, services, metric_registry).await?)
|
||||
}
|
|
@ -4,7 +4,6 @@ use trogging::cli::LoggingConfig;
|
|||
pub(crate) mod all_in_one;
|
||||
mod compactor2;
|
||||
mod garbage_collector;
|
||||
mod ingest_replica;
|
||||
mod ingester2;
|
||||
mod main;
|
||||
mod querier;
|
||||
|
@ -29,9 +28,6 @@ pub enum Error {
|
|||
#[snafu(display("Error in ingester2 subcommand: {}", source))]
|
||||
Ingester2Error { source: ingester2::Error },
|
||||
|
||||
#[snafu(display("Error in ingest_replica subcommand: {}", source))]
|
||||
IngestReplicaError { source: ingest_replica::Error },
|
||||
|
||||
#[snafu(display("Error in all in one subcommand: {}", source))]
|
||||
AllInOneError { source: all_in_one::Error },
|
||||
|
||||
|
@ -60,7 +56,6 @@ impl Config {
|
|||
Some(Command::Querier(config)) => config.run_config.logging_config(),
|
||||
Some(Command::Router2(config)) => config.run_config.logging_config(),
|
||||
Some(Command::Ingester2(config)) => config.run_config.logging_config(),
|
||||
Some(Command::IngestReplica(config)) => config.run_config.logging_config(),
|
||||
Some(Command::AllInOne(config)) => &config.logging_config,
|
||||
Some(Command::Test(config)) => config.run_config.logging_config(),
|
||||
}
|
||||
|
@ -81,9 +76,6 @@ enum Command {
|
|||
/// Run the server in ingester2 mode
|
||||
Ingester2(ingester2::Config),
|
||||
|
||||
/// Run the server in ingest_replica mode
|
||||
IngestReplica(ingest_replica::Config),
|
||||
|
||||
/// Run the server in "all in one" mode (Default)
|
||||
AllInOne(all_in_one::Config),
|
||||
|
||||
|
@ -110,9 +102,6 @@ pub async fn command(config: Config) -> Result<()> {
|
|||
Some(Command::Ingester2(config)) => {
|
||||
ingester2::command(config).await.context(Ingester2Snafu)
|
||||
}
|
||||
Some(Command::IngestReplica(config)) => ingest_replica::command(config)
|
||||
.await
|
||||
.context(IngestReplicaSnafu),
|
||||
Some(Command::AllInOne(config)) => all_in_one::command(config).await.context(AllInOneSnafu),
|
||||
Some(Command::Test(config)) => test::command(config).await.context(TestSnafu),
|
||||
}
|
||||
|
|
|
@ -29,9 +29,6 @@ pub enum Error {
|
|||
#[error("Invalid config: {0}")]
|
||||
InvalidConfigCommon(#[from] CommonServerStateError),
|
||||
|
||||
#[error("Invalid config: {0}")]
|
||||
InvalidConfigIngester(#[from] clap_blocks::querier::Error),
|
||||
|
||||
#[error("Catalog error: {0}")]
|
||||
Catalog(#[from] iox_catalog::interface::Error),
|
||||
|
||||
|
@ -120,7 +117,7 @@ pub async fn command(config: Config) -> Result<(), Error> {
|
|||
info!("using the write buffer path");
|
||||
}
|
||||
|
||||
let ingester_addresses = config.querier_config.ingester_addresses()?;
|
||||
let ingester_addresses = &config.querier_config.ingester_addresses;
|
||||
info!(?ingester_addresses, "using ingester addresses");
|
||||
|
||||
let exec = Arc::new(Executor::new(
|
||||
|
@ -135,9 +132,7 @@ pub async fn command(config: Config) -> Result<(), Error> {
|
|||
object_store,
|
||||
exec,
|
||||
time_provider,
|
||||
ingester_addresses,
|
||||
querier_config: config.querier_config,
|
||||
rpc_write,
|
||||
authz: authz.as_ref().map(Arc::clone),
|
||||
})
|
||||
.await?;
|
||||
|
|
|
@ -2,7 +2,7 @@ use std::{collections::HashMap, path::PathBuf, sync::Arc};
|
|||
|
||||
use arrow::{
|
||||
array::as_generic_binary_array,
|
||||
datatypes::{DataType, Schema, SchemaRef, TimeUnit},
|
||||
datatypes::{DataType, Fields, Schema, SchemaRef, TimeUnit},
|
||||
record_batch::RecordBatch,
|
||||
};
|
||||
use arrow_flight::{
|
||||
|
@ -339,6 +339,64 @@ async fn flightsql_get_catalogs_matches_information_schema() {
|
|||
.await
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn flightsql_get_cross_reference() {
|
||||
test_helpers::maybe_start_logging();
|
||||
let database_url = maybe_skip_integration!();
|
||||
|
||||
let primary_table_name = "primary_table";
|
||||
let foreign_table_name = "foreign_table";
|
||||
|
||||
// Set up the cluster ====================================
|
||||
let mut cluster = MiniCluster::create_shared2(database_url).await;
|
||||
|
||||
StepTest::new(
|
||||
&mut cluster,
|
||||
vec![
|
||||
Step::WriteLineProtocol(format!(
|
||||
"{primary_table_name},tag1=A,tag2=B val=42i 123456\n\
|
||||
{primary_table_name},tag1=A,tag2=C val=43i 123457\n
|
||||
{foreign_table_name},tag1=B,tag2=D val=42i 123456\n\
|
||||
{foreign_table_name},tag1=C,tag2=F val=43i 123457"
|
||||
)),
|
||||
Step::Custom(Box::new(move |state: &mut StepTestState| {
|
||||
async move {
|
||||
let mut client = flightsql_client(state.cluster());
|
||||
let pk_catalog: Option<String> = None;
|
||||
let pk_db_schema: Option<String> = None;
|
||||
let fk_catalog: Option<String> = None;
|
||||
let fk_db_schema: Option<String> = None;
|
||||
|
||||
let stream = client
|
||||
.get_cross_reference(
|
||||
pk_catalog,
|
||||
pk_db_schema,
|
||||
primary_table_name.to_string(),
|
||||
fk_catalog,
|
||||
fk_db_schema,
|
||||
foreign_table_name.to_string(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let batches = collect_stream(stream).await;
|
||||
|
||||
insta::assert_yaml_snapshot!(
|
||||
batches_to_sorted_lines(&batches),
|
||||
@r###"
|
||||
---
|
||||
- ++
|
||||
- ++
|
||||
"###
|
||||
);
|
||||
}
|
||||
.boxed()
|
||||
})),
|
||||
],
|
||||
)
|
||||
.run()
|
||||
.await
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn flightsql_get_tables() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
@ -938,6 +996,98 @@ async fn flightsql_get_db_schema_matches_information_schema() {
|
|||
.await
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn flightsql_get_exported_keys() {
|
||||
test_helpers::maybe_start_logging();
|
||||
let database_url = maybe_skip_integration!();
|
||||
|
||||
let table_name = "the_table";
|
||||
|
||||
// Set up the cluster ====================================
|
||||
let mut cluster = MiniCluster::create_shared2(database_url).await;
|
||||
|
||||
StepTest::new(
|
||||
&mut cluster,
|
||||
vec![
|
||||
Step::WriteLineProtocol(format!(
|
||||
"{table_name},tag1=A,tag2=B val=42i 123456\n\
|
||||
{table_name},tag1=A,tag2=C val=43i 123457"
|
||||
)),
|
||||
Step::Custom(Box::new(move |state: &mut StepTestState| {
|
||||
async move {
|
||||
let mut client = flightsql_client(state.cluster());
|
||||
let catalog: Option<String> = None;
|
||||
let db_schema: Option<String> = None;
|
||||
|
||||
let stream = client
|
||||
.get_exported_keys(catalog, db_schema, table_name.to_string())
|
||||
.await
|
||||
.unwrap();
|
||||
let batches = collect_stream(stream).await;
|
||||
|
||||
insta::assert_yaml_snapshot!(
|
||||
batches_to_sorted_lines(&batches),
|
||||
@r###"
|
||||
---
|
||||
- ++
|
||||
- ++
|
||||
"###
|
||||
);
|
||||
}
|
||||
.boxed()
|
||||
})),
|
||||
],
|
||||
)
|
||||
.run()
|
||||
.await
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn flightsql_get_imported_keys() {
|
||||
test_helpers::maybe_start_logging();
|
||||
let database_url = maybe_skip_integration!();
|
||||
|
||||
let table_name = "the_table";
|
||||
|
||||
// Set up the cluster ====================================
|
||||
let mut cluster = MiniCluster::create_shared2(database_url).await;
|
||||
|
||||
StepTest::new(
|
||||
&mut cluster,
|
||||
vec![
|
||||
Step::WriteLineProtocol(format!(
|
||||
"{table_name},tag1=A,tag2=B val=42i 123456\n\
|
||||
{table_name},tag1=A,tag2=C val=43i 123457"
|
||||
)),
|
||||
Step::Custom(Box::new(move |state: &mut StepTestState| {
|
||||
async move {
|
||||
let mut client = flightsql_client(state.cluster());
|
||||
let catalog: Option<String> = None;
|
||||
let db_schema: Option<String> = None;
|
||||
|
||||
let stream = client
|
||||
.get_imported_keys(catalog, db_schema, table_name.to_string())
|
||||
.await
|
||||
.unwrap();
|
||||
let batches = collect_stream(stream).await;
|
||||
|
||||
insta::assert_yaml_snapshot!(
|
||||
batches_to_sorted_lines(&batches),
|
||||
@r###"
|
||||
---
|
||||
- ++
|
||||
- ++
|
||||
"###
|
||||
);
|
||||
}
|
||||
.boxed()
|
||||
})),
|
||||
],
|
||||
)
|
||||
.run()
|
||||
.await
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn flightsql_get_primary_keys() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
@ -1254,10 +1404,10 @@ async fn assert_schema(client: &mut FlightClient, cmd: Any) {
|
|||
}
|
||||
|
||||
fn strip_metadata(schema: &Schema) -> SchemaRef {
|
||||
let stripped_fields: Vec<_> = schema
|
||||
let stripped_fields: Fields = schema
|
||||
.fields()
|
||||
.iter()
|
||||
.map(|f| f.clone().with_metadata(HashMap::new()))
|
||||
.map(|f| f.as_ref().clone().with_metadata(HashMap::new()))
|
||||
.collect();
|
||||
|
||||
Arc::new(Schema::new(stripped_fields))
|
||||
|
@ -1357,8 +1507,149 @@ async fn authz() {
|
|||
authz.close().await;
|
||||
}
|
||||
|
||||
/// Ensure that FligthSQL API supports the following grpc header names,
|
||||
/// in addition to the existing `iox-namespace-name`
|
||||
/// 1. database
|
||||
/// 2. bucket
|
||||
/// 3. bucket-name
|
||||
#[tokio::test]
|
||||
async fn flightsql_client_header_same_database() {
|
||||
test_helpers::maybe_start_logging();
|
||||
let database_url = maybe_skip_integration!();
|
||||
|
||||
let table_name = "the_table";
|
||||
|
||||
// Set up the cluster ====================================
|
||||
let mut cluster = MiniCluster::create_shared2(database_url).await;
|
||||
|
||||
StepTest::new(
|
||||
&mut cluster,
|
||||
vec![
|
||||
Step::WriteLineProtocol(format!(
|
||||
"{table_name},tag1=A,tag2=B val=42i 123456\n\
|
||||
{table_name},tag1=A,tag2=C val=43i 123457"
|
||||
)),
|
||||
Step::Custom(Box::new(move |state: &mut StepTestState| {
|
||||
async move {
|
||||
let mut client = flightsql_client_helper(state.cluster(), "iox-namespace-name");
|
||||
for header_name in &["database", "bucket", "bucket-name"] {
|
||||
// different header names with the same database name
|
||||
client
|
||||
.add_header(header_name, state.cluster().namespace())
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
let stream = client.get_table_types().await.unwrap();
|
||||
let batches = collect_stream(stream).await;
|
||||
|
||||
insta::assert_yaml_snapshot!(
|
||||
batches_to_sorted_lines(&batches),
|
||||
@r###"
|
||||
---
|
||||
- +------------+
|
||||
- "| table_type |"
|
||||
- +------------+
|
||||
- "| BASE TABLE |"
|
||||
- "| VIEW |"
|
||||
- +------------+
|
||||
"###
|
||||
);
|
||||
}
|
||||
.boxed()
|
||||
})),
|
||||
],
|
||||
)
|
||||
.run()
|
||||
.await
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn flightsql_client_header_different_database() {
|
||||
test_helpers::maybe_start_logging();
|
||||
let database_url = maybe_skip_integration!();
|
||||
|
||||
let table_name = "the_table";
|
||||
|
||||
// Set up the cluster ====================================
|
||||
let mut cluster = MiniCluster::create_shared2(database_url).await;
|
||||
|
||||
StepTest::new(
|
||||
&mut cluster,
|
||||
vec![
|
||||
Step::WriteLineProtocol(format!(
|
||||
"{table_name},tag1=A,tag2=B val=42i 123456\n\
|
||||
{table_name},tag1=A,tag2=C val=43i 123457"
|
||||
)),
|
||||
Step::Custom(Box::new(move |state: &mut StepTestState| {
|
||||
async move {
|
||||
let mut client = flightsql_client_helper(state.cluster(), "database");
|
||||
client
|
||||
.add_header("bucket", "different_database_name")
|
||||
.unwrap();
|
||||
|
||||
let err = client.get_table_types().await.unwrap_err();
|
||||
|
||||
assert_matches!(err, FlightError::Tonic(status) => {
|
||||
assert_eq!(status.code(), tonic::Code::InvalidArgument);
|
||||
assert_contains!(status.message(), "More than one headers are found in request");
|
||||
}
|
||||
);
|
||||
}
|
||||
.boxed()
|
||||
})),
|
||||
],
|
||||
)
|
||||
.run()
|
||||
.await
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn flightsql_client_header_no_database() {
|
||||
test_helpers::maybe_start_logging();
|
||||
let database_url = maybe_skip_integration!();
|
||||
|
||||
let table_name = "the_table";
|
||||
|
||||
// Set up the cluster ====================================
|
||||
let mut cluster = MiniCluster::create_shared2(database_url).await;
|
||||
|
||||
StepTest::new(
|
||||
&mut cluster,
|
||||
vec![
|
||||
Step::WriteLineProtocol(format!(
|
||||
"{table_name},tag1=A,tag2=B val=42i 123456\n\
|
||||
{table_name},tag1=A,tag2=C val=43i 123457"
|
||||
)),
|
||||
Step::Custom(Box::new(move |state: &mut StepTestState| {
|
||||
async move {
|
||||
let connection = state.cluster().querier().querier_grpc_connection();
|
||||
let (channel, _headers) = connection.into_grpc_connection().into_parts();
|
||||
|
||||
let mut client = FlightSqlClient::new(channel);
|
||||
|
||||
let err = client.get_table_types().await.unwrap_err();
|
||||
|
||||
assert_matches!(err, FlightError::Tonic(status) => {
|
||||
assert_eq!(status.code(), tonic::Code::InvalidArgument);
|
||||
assert_contains!(status.message(), "no 'database' header in request");
|
||||
}
|
||||
);
|
||||
}
|
||||
.boxed()
|
||||
})),
|
||||
],
|
||||
)
|
||||
.run()
|
||||
.await
|
||||
}
|
||||
|
||||
/// Return a [`FlightSqlClient`] configured for use
|
||||
fn flightsql_client(cluster: &MiniCluster) -> FlightSqlClient {
|
||||
flightsql_client_helper(cluster, "database")
|
||||
}
|
||||
|
||||
/// Helper function for fn `flightsql_client` that returns a [`FlightSqlClient`] configured for use
|
||||
fn flightsql_client_helper(cluster: &MiniCluster, header_name: &str) -> FlightSqlClient {
|
||||
let connection = cluster.querier().querier_grpc_connection();
|
||||
let (channel, _headers) = connection.into_grpc_connection().into_parts();
|
||||
|
||||
|
@ -1366,7 +1657,7 @@ fn flightsql_client(cluster: &MiniCluster) -> FlightSqlClient {
|
|||
|
||||
// Add namespace to client headers until it is fully supported by FlightSQL
|
||||
let namespace = cluster.namespace();
|
||||
client.add_header("iox-namespace-name", namespace).unwrap();
|
||||
client.add_header(header_name, namespace).unwrap();
|
||||
|
||||
client
|
||||
}
|
||||
|
|
|
@ -28,6 +28,13 @@ async fn influxql_returns_error() {
|
|||
"Error while planning query: This feature is not implemented: SHOW TAG KEYS"
|
||||
.into(),
|
||||
},
|
||||
Step::InfluxQLExpectingError {
|
||||
query: "SHOW TAG KEYYYYYES".into(),
|
||||
expected_error_code: tonic::Code::InvalidArgument,
|
||||
expected_message:
|
||||
"Error while planning query: Error during planning: invalid SHOW TAG statement, expected KEYS or VALUES at pos 9"
|
||||
.into(),
|
||||
},
|
||||
],
|
||||
)
|
||||
.run()
|
||||
|
|
|
@ -40,58 +40,6 @@ fn ingester2_errors_without_mode_env_var() {
|
|||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn querier_errors_with_mode_env_var_and_shard_to_ingester_mapping() {
|
||||
let shard_to_ingesters_json = r#"{
|
||||
"ingesters": {
|
||||
"i1": {
|
||||
"addr": "arbitrary"
|
||||
}
|
||||
},
|
||||
"shards": {
|
||||
"0": {
|
||||
"ingester": "i1"
|
||||
}
|
||||
}
|
||||
}"#;
|
||||
|
||||
Command::cargo_bin("influxdb_iox")
|
||||
.unwrap()
|
||||
.env_clear()
|
||||
.env("INFLUXDB_IOX_RPC_MODE", "2")
|
||||
.arg("run")
|
||||
.arg("querier")
|
||||
.arg("--shard-to-ingesters")
|
||||
.arg(shard_to_ingesters_json)
|
||||
.arg("--catalog")
|
||||
.arg("memory")
|
||||
.timeout(Duration::from_secs(2))
|
||||
.assert()
|
||||
.failure()
|
||||
.stderr(predicate::str::contains(
|
||||
"`INFLUXDB_IOX_RPC_MODE` is set but shard to ingester mappings were provided",
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn querier_errors_without_mode_env_var_and_ingester_addresses() {
|
||||
Command::cargo_bin("influxdb_iox")
|
||||
.unwrap()
|
||||
.env_clear()
|
||||
.arg("run")
|
||||
.arg("querier")
|
||||
.arg("--ingester-addresses")
|
||||
.arg("http://arbitrary:8082")
|
||||
.arg("--catalog")
|
||||
.arg("memory")
|
||||
.timeout(Duration::from_secs(2))
|
||||
.assert()
|
||||
.failure()
|
||||
.stderr(predicate::str::contains(
|
||||
"`INFLUXDB_IOX_RPC_MODE` is unset but ingester addresses were provided",
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn querier_without_ingesters_without_mode_env_var_uses_write_buffer() {
|
||||
Command::cargo_bin("influxdb_iox")
|
||||
|
|
|
@ -127,11 +127,27 @@ public class Main {
|
|||
System.out.println("**************");
|
||||
print_result_set(md.getCatalogs());
|
||||
|
||||
System.out.println("**************");
|
||||
System.out.println("CrossReference");
|
||||
System.out.println("**************");
|
||||
print_result_set(md.getCrossReference(null, null, "system", null, null, "iox"));
|
||||
|
||||
System.out.println("**************");
|
||||
System.out.println("Schemas:");
|
||||
System.out.println("**************");
|
||||
print_result_set(md.getSchemas());
|
||||
|
||||
System.out.println("**************");
|
||||
System.out.println("ExportedKeys");
|
||||
System.out.println("**************");
|
||||
print_result_set(md.getExportedKeys(null, null, "system"));
|
||||
|
||||
|
||||
System.out.println("**************");
|
||||
System.out.println("ImportedKeys");
|
||||
System.out.println("**************");
|
||||
print_result_set(md.getImportedKeys(null, null, "system"));
|
||||
|
||||
System.out.println("**************");
|
||||
System.out.println("PrimaryKeys:");
|
||||
System.out.println("**************");
|
||||
|
|
|
@ -10,14 +10,15 @@ influxdb_iox -v
|
|||
|
||||
## Run the JDBC test
|
||||
|
||||
To run the JDBC test program, specify the target namespace in the JDBC URL:
|
||||
To run the JDBC test program, specify the target database in the JDBC URL:
|
||||
|
||||
```shell
|
||||
# run the jdbc client driver program, downloading the JDBC driver if needed
|
||||
./jdbc_client "jdbc:arrow-flight-sql://localhost:8082?useEncryption=false&iox-namespace-name=26f7e5a4b7be365b_917b97a92e883afc" query 'select * from cpu'
|
||||
./jdbc_client "jdbc:arrow-flight-sql://localhost:8082?useEncryption=false&database=26f7e5a4b7be365b_917b97a92e883afc" query 'select * from cpu'
|
||||
```
|
||||
|
||||
# Cleanup:
|
||||
|
||||
Clean up any intermediate files (like JDBC driver)
|
||||
|
||||
```shell
|
||||
|
|
|
@ -1,11 +1,15 @@
|
|||
-- Gap-filling tests
|
||||
-- IOX_SETUP: OneMeasurementTwoSeries
|
||||
|
||||
-- Input data
|
||||
-- region=a 2000-05-05T12:20:00Z
|
||||
-- region=a 2000-05-05T12:40:00Z
|
||||
-- region=b 2000-05-05T12:31:00Z
|
||||
-- region=b 2000-05-05T12:39:00Z
|
||||
-- Input data (by region, time)
|
||||
SELECT *
|
||||
FROM cpu
|
||||
ORDER BY REGION, TIME;
|
||||
|
||||
-- Input data (by time)
|
||||
SELECT *
|
||||
FROM cpu
|
||||
ORDER BY TIME;
|
||||
|
||||
-- IOX_COMPARE: uuid
|
||||
EXPLAIN SELECT
|
||||
|
@ -75,3 +79,13 @@ from cpu
|
|||
where time between timestamp '2000-05-05T12:19:00Z' and timestamp '2000-05-05T12:40:00Z'
|
||||
group by minute;
|
||||
|
||||
-- cpu.idle has a null value at 12:31. Interpolation should still occur,
|
||||
-- overwriting the null value.
|
||||
SELECT
|
||||
date_bin_gapfill(interval '4 minutes', time, timestamp '1970-01-01T00:00:00Z') as four_minute,
|
||||
interpolate(min(cpu.idle)),
|
||||
interpolate(min(cpu."user"))
|
||||
from cpu
|
||||
where time between timestamp '2000-05-05T12:19:00Z' and timestamp '2000-05-05T12:40:00Z'
|
||||
group by four_minute;
|
||||
|
||||
|
|
|
@ -1,21 +1,39 @@
|
|||
-- Test Setup: OneMeasurementTwoSeries
|
||||
-- SQL: SELECT * FROM cpu ORDER BY REGION, TIME;
|
||||
+------+--------+----------------------+------+
|
||||
| idle | region | time | user |
|
||||
+------+--------+----------------------+------+
|
||||
| 70.0 | a | 2000-05-05T12:20:00Z | 23.2 |
|
||||
| | a | 2000-05-05T12:40:00Z | 21.0 |
|
||||
| | b | 2000-05-05T12:31:00Z | 25.2 |
|
||||
| 60.0 | b | 2000-05-05T12:39:00Z | 28.9 |
|
||||
+------+--------+----------------------+------+
|
||||
-- SQL: SELECT * FROM cpu ORDER BY TIME;
|
||||
+------+--------+----------------------+------+
|
||||
| idle | region | time | user |
|
||||
+------+--------+----------------------+------+
|
||||
| 70.0 | a | 2000-05-05T12:20:00Z | 23.2 |
|
||||
| | b | 2000-05-05T12:31:00Z | 25.2 |
|
||||
| 60.0 | b | 2000-05-05T12:39:00Z | 28.9 |
|
||||
| | a | 2000-05-05T12:40:00Z | 21.0 |
|
||||
+------+--------+----------------------+------+
|
||||
-- SQL: EXPLAIN SELECT date_bin_gapfill(interval '10 minute', time, timestamp '1970-01-01T00:00:00Z') as minute, count(cpu.user) from cpu where time between timestamp '2000-05-05T12:00:00Z' and timestamp '2000-05-05T12:59:00Z' group by minute;
|
||||
-- Results After Normalizing UUIDs
|
||||
----------
|
||||
| plan_type | plan |
|
||||
----------
|
||||
| logical_plan | Projection: date_bin_gapfill(IntervalDayTime("600000"),cpu.time,Utf8("1970-01-01T00:00:00Z")) AS minute, COUNT(cpu.user) |
|
||||
| | GapFill: groupBy=[[date_bin_gapfill(IntervalDayTime("600000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))]], aggr=[[COUNT(cpu.user)]], time_column=date_bin_gapfill(IntervalDayTime("600000"),cpu.time,Utf8("1970-01-01T00:00:00Z")), stride=IntervalDayTime("600000"), range=Included(TimestampNanosecond(957528000000000000, None))..Included(TimestampNanosecond(957531540000000000, None)) |
|
||||
| | Aggregate: groupBy=[[datebin(IntervalDayTime("600000"), cpu.time, TimestampNanosecond(0, None)) AS date_bin_gapfill(IntervalDayTime("600000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))]], aggr=[[COUNT(cpu.user)]] |
|
||||
| logical_plan | Projection: date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time,Utf8("1970-01-01T00:00:00Z")) AS minute, COUNT(cpu.user) |
|
||||
| | GapFill: groupBy=[[date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))]], aggr=[[COUNT(cpu.user)]], time_column=date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time,Utf8("1970-01-01T00:00:00Z")), stride=IntervalMonthDayNano("600000000000"), range=Included(TimestampNanosecond(957528000000000000, None))..Included(TimestampNanosecond(957531540000000000, None)) |
|
||||
| | Aggregate: groupBy=[[datebin(IntervalMonthDayNano("600000000000"), cpu.time, TimestampNanosecond(0, None)) AS date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))]], aggr=[[COUNT(cpu.user)]] |
|
||||
| | TableScan: cpu projection=[time, user], full_filters=[cpu.time >= TimestampNanosecond(957528000000000000, None), cpu.time <= TimestampNanosecond(957531540000000000, None)] |
|
||||
| physical_plan | ProjectionExec: expr=[date_bin_gapfill(IntervalDayTime("600000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))@0 as minute, COUNT(cpu.user)@1 as COUNT(cpu.user)] |
|
||||
| | GapFillExec: group_expr=[date_bin_gapfill(IntervalDayTime("600000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))@0], aggr_expr=[COUNT(cpu.user)@1], stride=600000, time_range=Included("957528000000000000")..Included("957531540000000000") |
|
||||
| | SortPreservingMergeExec: [date_bin_gapfill(IntervalDayTime("600000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))@0 ASC] |
|
||||
| | SortExec: expr=[date_bin_gapfill(IntervalDayTime("600000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))@0 ASC] |
|
||||
| | AggregateExec: mode=FinalPartitioned, gby=[date_bin_gapfill(IntervalDayTime("600000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))@0 as date_bin_gapfill(IntervalDayTime("600000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))], aggr=[COUNT(cpu.user)] |
|
||||
| physical_plan | ProjectionExec: expr=[date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))@0 as minute, COUNT(cpu.user)@1 as COUNT(cpu.user)] |
|
||||
| | GapFillExec: group_expr=[date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))@0], aggr_expr=[COUNT(cpu.user)@1], stride=600000000000, time_range=Included("957528000000000000")..Included("957531540000000000") |
|
||||
| | SortPreservingMergeExec: [date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))@0 ASC] |
|
||||
| | SortExec: expr=[date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))@0 ASC] |
|
||||
| | AggregateExec: mode=FinalPartitioned, gby=[date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))@0 as date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))], aggr=[COUNT(cpu.user)] |
|
||||
| | CoalesceBatchesExec: target_batch_size=8192 |
|
||||
| | RepartitionExec: partitioning=Hash([Column { name: "date_bin_gapfill(IntervalDayTime(\"600000\"),cpu.time,Utf8(\"1970-01-01T00:00:00Z\"))", index: 0 }], 4), input_partitions=4 |
|
||||
| | AggregateExec: mode=Partial, gby=[datebin(600000, time@0, 0) as date_bin_gapfill(IntervalDayTime("600000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))], aggr=[COUNT(cpu.user)] |
|
||||
| | RepartitionExec: partitioning=Hash([Column { name: "date_bin_gapfill(IntervalMonthDayNano(\"600000000000\"),cpu.time,Utf8(\"1970-01-01T00:00:00Z\"))", index: 0 }], 4), input_partitions=4 |
|
||||
| | AggregateExec: mode=Partial, gby=[datebin(600000000000, time@0, 0) as date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))], aggr=[COUNT(cpu.user)] |
|
||||
| | RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 |
|
||||
| | CoalesceBatchesExec: target_batch_size=8192 |
|
||||
| | FilterExec: time@0 >= 957528000000000000 AND time@0 <= 957531540000000000 |
|
||||
|
@ -85,18 +103,18 @@
|
|||
----------
|
||||
| plan_type | plan |
|
||||
----------
|
||||
| logical_plan | Projection: cpu.region, date_bin_gapfill(IntervalDayTime("600000"),cpu.time,Utf8("1970-01-01T00:00:00Z")) AS minute, AVG(cpu.user) |
|
||||
| | GapFill: groupBy=[[cpu.region, date_bin_gapfill(IntervalDayTime("600000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))]], aggr=[[LOCF(AVG(cpu.user))]], time_column=date_bin_gapfill(IntervalDayTime("600000"),cpu.time,Utf8("1970-01-01T00:00:00Z")), stride=IntervalDayTime("600000"), range=Included(TimestampNanosecond(957528000000000000, None))..Included(TimestampNanosecond(957531540000000000, None)) |
|
||||
| | Aggregate: groupBy=[[cpu.region, datebin(IntervalDayTime("600000"), cpu.time, TimestampNanosecond(0, None)) AS date_bin_gapfill(IntervalDayTime("600000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))]], aggr=[[AVG(cpu.user)]] |
|
||||
| logical_plan | Projection: cpu.region, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time,Utf8("1970-01-01T00:00:00Z")) AS minute, AVG(cpu.user) |
|
||||
| | GapFill: groupBy=[[cpu.region, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))]], aggr=[[LOCF(AVG(cpu.user))]], time_column=date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time,Utf8("1970-01-01T00:00:00Z")), stride=IntervalMonthDayNano("600000000000"), range=Included(TimestampNanosecond(957528000000000000, None))..Included(TimestampNanosecond(957531540000000000, None)) |
|
||||
| | Aggregate: groupBy=[[cpu.region, datebin(IntervalMonthDayNano("600000000000"), cpu.time, TimestampNanosecond(0, None)) AS date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))]], aggr=[[AVG(cpu.user)]] |
|
||||
| | TableScan: cpu projection=[region, time, user], full_filters=[cpu.time >= TimestampNanosecond(957528000000000000, None), cpu.time <= TimestampNanosecond(957531540000000000, None)] |
|
||||
| physical_plan | ProjectionExec: expr=[region@0 as region, date_bin_gapfill(IntervalDayTime("600000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))@1 as minute, AVG(cpu.user)@2 as AVG(cpu.user)] |
|
||||
| | GapFillExec: group_expr=[region@0, date_bin_gapfill(IntervalDayTime("600000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))@1], aggr_expr=[LOCF(AVG(cpu.user)@2)], stride=600000, time_range=Included("957528000000000000")..Included("957531540000000000") |
|
||||
| | SortPreservingMergeExec: [region@0 ASC,date_bin_gapfill(IntervalDayTime("600000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))@1 ASC] |
|
||||
| | SortExec: expr=[region@0 ASC,date_bin_gapfill(IntervalDayTime("600000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))@1 ASC] |
|
||||
| | AggregateExec: mode=FinalPartitioned, gby=[region@0 as region, date_bin_gapfill(IntervalDayTime("600000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))@1 as date_bin_gapfill(IntervalDayTime("600000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))], aggr=[AVG(cpu.user)] |
|
||||
| physical_plan | ProjectionExec: expr=[region@0 as region, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))@1 as minute, AVG(cpu.user)@2 as AVG(cpu.user)] |
|
||||
| | GapFillExec: group_expr=[region@0, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))@1], aggr_expr=[LOCF(AVG(cpu.user)@2)], stride=600000000000, time_range=Included("957528000000000000")..Included("957531540000000000") |
|
||||
| | SortPreservingMergeExec: [region@0 ASC,date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))@1 ASC] |
|
||||
| | SortExec: expr=[region@0 ASC,date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))@1 ASC] |
|
||||
| | AggregateExec: mode=FinalPartitioned, gby=[region@0 as region, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))@1 as date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))], aggr=[AVG(cpu.user)] |
|
||||
| | CoalesceBatchesExec: target_batch_size=8192 |
|
||||
| | RepartitionExec: partitioning=Hash([Column { name: "region", index: 0 }, Column { name: "date_bin_gapfill(IntervalDayTime(\"600000\"),cpu.time,Utf8(\"1970-01-01T00:00:00Z\"))", index: 1 }], 4), input_partitions=4 |
|
||||
| | AggregateExec: mode=Partial, gby=[region@0 as region, datebin(600000, time@1, 0) as date_bin_gapfill(IntervalDayTime("600000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))], aggr=[AVG(cpu.user)] |
|
||||
| | RepartitionExec: partitioning=Hash([Column { name: "region", index: 0 }, Column { name: "date_bin_gapfill(IntervalMonthDayNano(\"600000000000\"),cpu.time,Utf8(\"1970-01-01T00:00:00Z\"))", index: 1 }], 4), input_partitions=4 |
|
||||
| | AggregateExec: mode=Partial, gby=[region@0 as region, datebin(600000000000, time@1, 0) as date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time,Utf8("1970-01-01T00:00:00Z"))], aggr=[AVG(cpu.user)] |
|
||||
| | RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 |
|
||||
| | CoalesceBatchesExec: target_batch_size=8192 |
|
||||
| | FilterExec: time@1 >= 957528000000000000 AND time@1 <= 957531540000000000 |
|
||||
|
@ -152,4 +170,16 @@
|
|||
| 2000-05-05T12:38:00Z | 70.0 |
|
||||
| 2000-05-05T12:39:00Z | 60.0 |
|
||||
| 2000-05-05T12:40:00Z | 60.0 |
|
||||
+----------------------+---------------+
|
||||
+----------------------+---------------+
|
||||
-- SQL: SELECT date_bin_gapfill(interval '4 minutes', time, timestamp '1970-01-01T00:00:00Z') as four_minute, interpolate(min(cpu.idle)), interpolate(min(cpu."user")) from cpu where time between timestamp '2000-05-05T12:19:00Z' and timestamp '2000-05-05T12:40:00Z' group by four_minute;
|
||||
+----------------------+---------------+---------------+
|
||||
| four_minute | MIN(cpu.idle) | MIN(cpu.user) |
|
||||
+----------------------+---------------+---------------+
|
||||
| 2000-05-05T12:16:00Z | | |
|
||||
| 2000-05-05T12:20:00Z | 70.0 | 23.2 |
|
||||
| 2000-05-05T12:24:00Z | 67.5 | 24.2 |
|
||||
| 2000-05-05T12:28:00Z | 65.0 | 25.2 |
|
||||
| 2000-05-05T12:32:00Z | 62.5 | 27.05 |
|
||||
| 2000-05-05T12:36:00Z | 60.0 | 28.9 |
|
||||
| 2000-05-05T12:40:00Z | | 21.0 |
|
||||
+----------------------+---------------+---------------+
|
|
@ -1,104 +1,204 @@
|
|||
-- Test Setup: InfluxQLSelectSupport
|
||||
-- InfluxQL: SHOW FIELD KEYS;
|
||||
+------------------+--------------+-----------+
|
||||
| iox::measurement | fieldKey | fieldType |
|
||||
+------------------+--------------+-----------+
|
||||
| cpu | usage_idle | float |
|
||||
| cpu | usage_system | float |
|
||||
| disk | bytes_free | integer |
|
||||
| disk | bytes_used | integer |
|
||||
| m0 | f64 | float |
|
||||
| m0 | i64 | integer |
|
||||
| m0 | str | string |
|
||||
| m1 | f64 | float |
|
||||
| m1 | i64 | integer |
|
||||
| m1 | str | string |
|
||||
| m2 | f64 | float |
|
||||
| m3 | u64 | unsigned |
|
||||
+------------------+--------------+-----------+
|
||||
name: cpu
|
||||
+--------------+-----------+
|
||||
| fieldKey | fieldType |
|
||||
+--------------+-----------+
|
||||
| usage_idle | float |
|
||||
| usage_system | float |
|
||||
+--------------+-----------+
|
||||
name: disk
|
||||
+------------+-----------+
|
||||
| fieldKey | fieldType |
|
||||
+------------+-----------+
|
||||
| bytes_free | integer |
|
||||
| bytes_used | integer |
|
||||
+------------+-----------+
|
||||
name: m0
|
||||
+----------+-----------+
|
||||
| fieldKey | fieldType |
|
||||
+----------+-----------+
|
||||
| f64 | float |
|
||||
| i64 | integer |
|
||||
| str | string |
|
||||
+----------+-----------+
|
||||
name: m1
|
||||
+----------+-----------+
|
||||
| fieldKey | fieldType |
|
||||
+----------+-----------+
|
||||
| f64 | float |
|
||||
| i64 | integer |
|
||||
| str | string |
|
||||
+----------+-----------+
|
||||
name: m2
|
||||
+----------+-----------+
|
||||
| fieldKey | fieldType |
|
||||
+----------+-----------+
|
||||
| f64 | float |
|
||||
+----------+-----------+
|
||||
name: m3
|
||||
+----------+-----------+
|
||||
| fieldKey | fieldType |
|
||||
+----------+-----------+
|
||||
| u64 | unsigned |
|
||||
+----------+-----------+
|
||||
-- InfluxQL: SHOW FIELD KEYS LIMIT 2;
|
||||
+------------------+--------------+-----------+
|
||||
| iox::measurement | fieldKey | fieldType |
|
||||
+------------------+--------------+-----------+
|
||||
| cpu | usage_idle | float |
|
||||
| cpu | usage_system | float |
|
||||
| disk | bytes_free | integer |
|
||||
| disk | bytes_used | integer |
|
||||
| m0 | f64 | float |
|
||||
| m0 | i64 | integer |
|
||||
| m1 | f64 | float |
|
||||
| m1 | i64 | integer |
|
||||
| m2 | f64 | float |
|
||||
| m3 | u64 | unsigned |
|
||||
+------------------+--------------+-----------+
|
||||
name: cpu
|
||||
+--------------+-----------+
|
||||
| fieldKey | fieldType |
|
||||
+--------------+-----------+
|
||||
| usage_idle | float |
|
||||
| usage_system | float |
|
||||
+--------------+-----------+
|
||||
name: disk
|
||||
+------------+-----------+
|
||||
| fieldKey | fieldType |
|
||||
+------------+-----------+
|
||||
| bytes_free | integer |
|
||||
| bytes_used | integer |
|
||||
+------------+-----------+
|
||||
name: m0
|
||||
+----------+-----------+
|
||||
| fieldKey | fieldType |
|
||||
+----------+-----------+
|
||||
| f64 | float |
|
||||
| i64 | integer |
|
||||
+----------+-----------+
|
||||
name: m1
|
||||
+----------+-----------+
|
||||
| fieldKey | fieldType |
|
||||
+----------+-----------+
|
||||
| f64 | float |
|
||||
| i64 | integer |
|
||||
+----------+-----------+
|
||||
name: m2
|
||||
+----------+-----------+
|
||||
| fieldKey | fieldType |
|
||||
+----------+-----------+
|
||||
| f64 | float |
|
||||
+----------+-----------+
|
||||
name: m3
|
||||
+----------+-----------+
|
||||
| fieldKey | fieldType |
|
||||
+----------+-----------+
|
||||
| u64 | unsigned |
|
||||
+----------+-----------+
|
||||
-- InfluxQL: SHOW FIELD KEYS OFFSET 1;
|
||||
+------------------+--------------+-----------+
|
||||
| iox::measurement | fieldKey | fieldType |
|
||||
+------------------+--------------+-----------+
|
||||
| cpu | usage_system | float |
|
||||
| disk | bytes_used | integer |
|
||||
| m0 | i64 | integer |
|
||||
| m0 | str | string |
|
||||
| m1 | i64 | integer |
|
||||
| m1 | str | string |
|
||||
+------------------+--------------+-----------+
|
||||
name: cpu
|
||||
+--------------+-----------+
|
||||
| fieldKey | fieldType |
|
||||
+--------------+-----------+
|
||||
| usage_system | float |
|
||||
+--------------+-----------+
|
||||
name: disk
|
||||
+------------+-----------+
|
||||
| fieldKey | fieldType |
|
||||
+------------+-----------+
|
||||
| bytes_used | integer |
|
||||
+------------+-----------+
|
||||
name: m0
|
||||
+----------+-----------+
|
||||
| fieldKey | fieldType |
|
||||
+----------+-----------+
|
||||
| i64 | integer |
|
||||
| str | string |
|
||||
+----------+-----------+
|
||||
name: m1
|
||||
+----------+-----------+
|
||||
| fieldKey | fieldType |
|
||||
+----------+-----------+
|
||||
| i64 | integer |
|
||||
| str | string |
|
||||
+----------+-----------+
|
||||
-- InfluxQL: SHOW FIELD KEYS LIMIT 1 OFFSET 2;
|
||||
+------------------+----------+-----------+
|
||||
| iox::measurement | fieldKey | fieldType |
|
||||
+------------------+----------+-----------+
|
||||
| m0 | str | string |
|
||||
| m1 | str | string |
|
||||
+------------------+----------+-----------+
|
||||
name: m0
|
||||
+----------+-----------+
|
||||
| fieldKey | fieldType |
|
||||
+----------+-----------+
|
||||
| str | string |
|
||||
+----------+-----------+
|
||||
name: m1
|
||||
+----------+-----------+
|
||||
| fieldKey | fieldType |
|
||||
+----------+-----------+
|
||||
| str | string |
|
||||
+----------+-----------+
|
||||
-- InfluxQL: SHOW FIELD KEYS FROM cpu;
|
||||
+------------------+--------------+-----------+
|
||||
| iox::measurement | fieldKey | fieldType |
|
||||
+------------------+--------------+-----------+
|
||||
| cpu | usage_idle | float |
|
||||
| cpu | usage_system | float |
|
||||
+------------------+--------------+-----------+
|
||||
name: cpu
|
||||
+--------------+-----------+
|
||||
| fieldKey | fieldType |
|
||||
+--------------+-----------+
|
||||
| usage_idle | float |
|
||||
| usage_system | float |
|
||||
+--------------+-----------+
|
||||
-- InfluxQL: SHOW FIELD KEYS FROM disk,cpu,disk;
|
||||
+------------------+--------------+-----------+
|
||||
| iox::measurement | fieldKey | fieldType |
|
||||
+------------------+--------------+-----------+
|
||||
| cpu | usage_idle | float |
|
||||
| cpu | usage_system | float |
|
||||
| disk | bytes_free | integer |
|
||||
| disk | bytes_used | integer |
|
||||
+------------------+--------------+-----------+
|
||||
name: cpu
|
||||
+--------------+-----------+
|
||||
| fieldKey | fieldType |
|
||||
+--------------+-----------+
|
||||
| usage_idle | float |
|
||||
| usage_system | float |
|
||||
+--------------+-----------+
|
||||
name: disk
|
||||
+------------+-----------+
|
||||
| fieldKey | fieldType |
|
||||
+------------+-----------+
|
||||
| bytes_free | integer |
|
||||
| bytes_used | integer |
|
||||
+------------+-----------+
|
||||
-- InfluxQL: SHOW FIELD KEYS FROM cpu,disk,cpu;
|
||||
+------------------+--------------+-----------+
|
||||
| iox::measurement | fieldKey | fieldType |
|
||||
+------------------+--------------+-----------+
|
||||
| cpu | usage_idle | float |
|
||||
| cpu | usage_system | float |
|
||||
| disk | bytes_free | integer |
|
||||
| disk | bytes_used | integer |
|
||||
+------------------+--------------+-----------+
|
||||
name: cpu
|
||||
+--------------+-----------+
|
||||
| fieldKey | fieldType |
|
||||
+--------------+-----------+
|
||||
| usage_idle | float |
|
||||
| usage_system | float |
|
||||
+--------------+-----------+
|
||||
name: disk
|
||||
+------------+-----------+
|
||||
| fieldKey | fieldType |
|
||||
+------------+-----------+
|
||||
| bytes_free | integer |
|
||||
| bytes_used | integer |
|
||||
+------------+-----------+
|
||||
-- InfluxQL: SHOW FIELD KEYS FROM /m.*/;
|
||||
+------------------+----------+-----------+
|
||||
| iox::measurement | fieldKey | fieldType |
|
||||
+------------------+----------+-----------+
|
||||
| m0 | f64 | float |
|
||||
| m0 | i64 | integer |
|
||||
| m0 | str | string |
|
||||
| m1 | f64 | float |
|
||||
| m1 | i64 | integer |
|
||||
| m1 | str | string |
|
||||
| m2 | f64 | float |
|
||||
| m3 | u64 | unsigned |
|
||||
+------------------+----------+-----------+
|
||||
name: m0
|
||||
+----------+-----------+
|
||||
| fieldKey | fieldType |
|
||||
+----------+-----------+
|
||||
| f64 | float |
|
||||
| i64 | integer |
|
||||
| str | string |
|
||||
+----------+-----------+
|
||||
name: m1
|
||||
+----------+-----------+
|
||||
| fieldKey | fieldType |
|
||||
+----------+-----------+
|
||||
| f64 | float |
|
||||
| i64 | integer |
|
||||
| str | string |
|
||||
+----------+-----------+
|
||||
name: m2
|
||||
+----------+-----------+
|
||||
| fieldKey | fieldType |
|
||||
+----------+-----------+
|
||||
| f64 | float |
|
||||
+----------+-----------+
|
||||
name: m3
|
||||
+----------+-----------+
|
||||
| fieldKey | fieldType |
|
||||
+----------+-----------+
|
||||
| u64 | unsigned |
|
||||
+----------+-----------+
|
||||
-- InfluxQL: SHOW FIELD KEYS FROM /d\isk/;
|
||||
+------------------+------------+-----------+
|
||||
| iox::measurement | fieldKey | fieldType |
|
||||
+------------------+------------+-----------+
|
||||
| disk | bytes_free | integer |
|
||||
| disk | bytes_used | integer |
|
||||
+------------------+------------+-----------+
|
||||
name: disk
|
||||
+------------+-----------+
|
||||
| fieldKey | fieldType |
|
||||
+------------+-----------+
|
||||
| bytes_free | integer |
|
||||
| bytes_used | integer |
|
||||
+------------+-----------+
|
||||
-- InfluxQL: SHOW FIELD KEYS FROM does_not_exist;
|
||||
+------------------+----------+-----------+
|
||||
| iox::measurement | fieldKey | fieldType |
|
||||
+------------------+----------+-----------+
|
||||
+------------------+----------+-----------+
|
||||
-- InfluxQL: SHOW FIELD KEYS ON my_db;
|
||||
Error while planning query: This feature is not implemented: SHOW FIELD KEYS ON <database>
|
||||
-- InfluxQL: SHOW FIELD KEYS FROM x.my_db;
|
||||
|
|
|
@ -7,49 +7,39 @@
|
|||
|
||||
-- Validates expected data is returned
|
||||
-- Projection wildcard, all tags and fields
|
||||
-- IOX_COMPARE: sorted
|
||||
SELECT * FROM m0;
|
||||
|
||||
-- No matching measurement
|
||||
SELECT * FROM non_existent;
|
||||
|
||||
-- Projection wildcard, only tags
|
||||
-- IOX_COMPARE: sorted
|
||||
SELECT *::tag, f64 FROM m0;
|
||||
|
||||
-- Projection wildcard, only fields
|
||||
-- IOX_COMPARE: sorted
|
||||
SELECT *::field FROM m0;
|
||||
|
||||
-- Projection regex, mixture of tags and fields
|
||||
-- IOX_COMPARE: sorted
|
||||
SELECT /64|tag0/ FROM m0;
|
||||
|
||||
-- Projection specific tags and fields
|
||||
-- IOX_COMPARE: sorted
|
||||
SELECT f64, tag0 FROM m0;
|
||||
|
||||
-- Explicitly select time column
|
||||
-- IOX_COMPARE: sorted
|
||||
SELECT f64, tag0, time FROM m0;
|
||||
|
||||
-- arithmetic operators
|
||||
-- IOX_COMPARE: sorted
|
||||
SELECT f64, f64 * 2, i64, i64 + i64 FROM m0;
|
||||
|
||||
-- bitwise operators
|
||||
-- IOX_COMPARE: sorted
|
||||
SELECT i64, i64 & 1 FROM m0;
|
||||
|
||||
-- Automatic type coercion integer → float
|
||||
-- IOX_COMPARE: sorted
|
||||
SELECT f64 + i64 FROM m0;
|
||||
|
||||
-- Type cast postfix operator
|
||||
SELECT f64, f64::integer FROM m0;
|
||||
|
||||
-- Column alias behaviour
|
||||
-- IOX_COMPARE: sorted
|
||||
SELECT f64 AS f64_2, f64, f64, f64 FROM m0 LIMIT 1;
|
||||
|
||||
--
|
||||
|
@ -57,55 +47,45 @@ SELECT f64 AS f64_2, f64, f64, f64 FROM m0 LIMIT 1;
|
|||
--
|
||||
|
||||
-- Single tag
|
||||
-- IOX_COMPARE: sorted
|
||||
SELECT tag0, f64 FROM m0 WHERE tag0 = 'val00';
|
||||
|
||||
-- IOX_COMPARE: sorted
|
||||
SELECT tag0, f64 FROM m0 WHERE tag0 =~ /^val0(1|2)/;
|
||||
|
||||
-- Conjunction (AND)
|
||||
-- IOX_COMPARE: sorted
|
||||
SELECT /tag(0|1)/, f64 FROM m0 WHERE tag0 = 'val00' AND tag1 = 'val10';
|
||||
|
||||
-- Disjunction (OR)
|
||||
-- IOX_COMPARE: sorted
|
||||
SELECT /tag(0|1)/, f64 FROM m0 WHERE tag0 = 'val00' OR tag1 = 'val10';
|
||||
|
||||
-- arithmetic
|
||||
-- IOX_COMPARE: sorted
|
||||
SELECT f64 FROM m0 WHERE f64 > 10 + 10;
|
||||
|
||||
-- bitwise
|
||||
-- IOX_COMPARE: sorted
|
||||
SELECT i64 FROM m0 WHERE i64 & 1 = 0;
|
||||
|
||||
-- time bounds
|
||||
|
||||
-- timestamp format %Y-%M-%D
|
||||
-- IOX_COMPARE: sorted
|
||||
SELECT i64 FROM m0 WHERE time > '2022-10-31';
|
||||
|
||||
-- timestamp format %Y-%M-%D %h:%m:%s
|
||||
-- IOX_COMPARE: sorted
|
||||
SELECT i64 FROM m0 WHERE time > '2022-10-31 02:00:10';
|
||||
|
||||
-- now() and duration
|
||||
-- NOTE: 100000d is > 270 years, so this test should be ok for a while.
|
||||
-- However, if this test is still in use in 270 years and it starts failing,
|
||||
-- try increasing the number of days 😂
|
||||
-- IOX_COMPARE: sorted
|
||||
SELECT i64 FROM m0 WHERE time > now() - 100000d;
|
||||
|
||||
-- NOT NULL test
|
||||
-- WHERE tag1 != '' is the equivalent to tag1 IS NOT NULL
|
||||
-- TODO(sgc): This is working, but likely by accident
|
||||
-- IOX_COMPARE: sorted
|
||||
SELECT tag1, f64 FROM m0 WHERE tag1 != '';
|
||||
|
||||
-- NULL test
|
||||
-- WHERE tag1 = '' is the equivalent to tag1 IS NULL
|
||||
-- TODO(sgc): Not working, as expected
|
||||
-- -- IOX_COMPARE: sorted
|
||||
--
|
||||
-- SELECT tag1, f64 FROM m0 WHERE tag1 = '';
|
||||
|
||||
--
|
||||
|
@ -292,6 +272,9 @@ SELECT usage_idle, bytes_free, device, cpu FROM cpu, disk GROUP BY device, cpu;
|
|||
SELECT COUNT(f64), SUM(f64), stddev(f64) FROM m0;
|
||||
SELECT COUNT(f64), SUM(f64), stddev(f64) FROM m0, m1;
|
||||
SELECT COUNT(f64), SUM(f64), stddev(f64) FROM m0 GROUP BY tag0;
|
||||
-- IOX_COMPARE: no_borders
|
||||
EXPLAIN SELECT COUNT(f64), SUM(f64), stddev(f64) FROM m0, m1 GROUP BY tag0;
|
||||
-- TODO(sgc): `sorted` is a workaround for https://github.com/influxdata/influxdb_iox/issues/7513
|
||||
-- IOX_COMPARE: sorted
|
||||
SELECT COUNT(f64), SUM(f64), stddev(f64) FROM m0, m1 GROUP BY tag0;
|
||||
SELECT COUNT(f64), SUM(f64), stddev(f64) FROM m0 GROUP BY tag0, non_existent;
|
||||
|
@ -304,6 +287,7 @@ SELECT COUNT(f64) as the_count, SUM(f64) + SUM(non_existent) as foo FROM m0;
|
|||
|
||||
-- measurements with different schema
|
||||
SELECT MEAN(usage_idle), MEAN(bytes_free) FROM cpu, disk;
|
||||
-- TODO(sgc): `sorted` is a workaround for https://github.com/influxdata/influxdb_iox/issues/7513
|
||||
-- IOX_COMPARE: sorted
|
||||
SELECT MEAN(usage_idle), MEAN(bytes_free) FROM cpu, disk GROUP BY TIME(10s) FILL(none);
|
||||
|
||||
|
@ -327,6 +311,7 @@ SELECT COUNT(f64), SUM(f64) FROM m0 GROUP BY TIME(30s) FILL(none);
|
|||
SELECT COUNT(f64), SUM(f64) FROM m0 GROUP BY TIME(30s, 1s) FILL(none);
|
||||
|
||||
SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk;
|
||||
-- TODO(sgc): `sorted` is a workaround for https://github.com/influxdata/influxdb_iox/issues/7513
|
||||
-- IOX_COMPARE: sorted
|
||||
SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk GROUP BY TIME(1s) FILL(none);
|
||||
SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk GROUP BY cpu;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -10,7 +10,7 @@ async fn schema_merge_nonexistent_column() {
|
|||
setup_name: "MultiChunkSchemaMerge",
|
||||
sql: "SELECT * from cpu where foo = 8",
|
||||
expected_error_code: tonic::Code::InvalidArgument,
|
||||
expected_message: r#"Error while planning query: Schema error: No field named "foo". Valid fields are "cpu"."host", "cpu"."region", "cpu"."system", "cpu"."time", "cpu"."user"."#,
|
||||
expected_message: r#"Error while planning query: Schema error: No field named foo. Valid fields are cpu.host, cpu.region, cpu.system, cpu.time, cpu.user."#,
|
||||
}
|
||||
.run()
|
||||
.await;
|
||||
|
|
|
@ -24,11 +24,11 @@ prost = "0.11"
|
|||
rand = "0.8.3"
|
||||
reqwest = { version = "0.11", default-features = false, features = ["stream", "rustls-tls"] }
|
||||
schema = { path = "../schema" }
|
||||
serde_json = "1.0.95"
|
||||
serde_json = "1.0.96"
|
||||
tokio = { version = "1.27", features = ["macros", "parking_lot", "rt-multi-thread"] }
|
||||
tokio-stream = "0.1.12"
|
||||
thiserror = "1.0.40"
|
||||
tonic = { version = "0.8" }
|
||||
tonic = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
insta = { version = "1" }
|
||||
|
|
|
@ -36,8 +36,5 @@ pub mod store;
|
|||
/// Client for testing purposes.
|
||||
pub mod test;
|
||||
|
||||
/// Client for fetching write info
|
||||
pub mod write_info;
|
||||
|
||||
/// Client for write API
|
||||
pub mod write;
|
||||
|
|
|
@ -29,9 +29,9 @@ use arrow_flight::{
|
|||
error::{FlightError, Result},
|
||||
sql::{
|
||||
ActionCreatePreparedStatementRequest, ActionCreatePreparedStatementResult, Any,
|
||||
CommandGetCatalogs, CommandGetDbSchemas, CommandGetPrimaryKeys, CommandGetSqlInfo,
|
||||
CommandGetTableTypes, CommandGetTables, CommandPreparedStatementQuery,
|
||||
CommandStatementQuery, ProstMessageExt,
|
||||
CommandGetCatalogs, CommandGetCrossReference, CommandGetDbSchemas, CommandGetExportedKeys,
|
||||
CommandGetImportedKeys, CommandGetPrimaryKeys, CommandGetSqlInfo, CommandGetTableTypes,
|
||||
CommandGetTables, CommandPreparedStatementQuery, CommandStatementQuery, ProstMessageExt,
|
||||
},
|
||||
Action, FlightClient, FlightDescriptor, FlightInfo, IpcMessage, Ticket,
|
||||
};
|
||||
|
@ -153,6 +153,56 @@ impl FlightSqlClient {
|
|||
self.do_get_with_cmd(msg.as_any()).await
|
||||
}
|
||||
|
||||
/// List a description of the foreign key columns in the given foreign key table that
|
||||
/// reference the primary key or the columns representing a unique constraint of the
|
||||
/// parent table (could be the same or a different table) on this server using a
|
||||
/// [`CommandGetCrossReference`] message.
|
||||
///
|
||||
/// # Parameters
|
||||
///
|
||||
/// Definition from <https://github.com/apache/arrow/blob/f0c8229f5a09fe53186df171d518430243ddf112/format/FlightSql.proto#L1405-L1477>
|
||||
///
|
||||
/// pk_catalog: The catalog name where the parent table is.
|
||||
/// An empty string retrieves those without a catalog.
|
||||
/// If omitted the catalog name should not be used to narrow the search.
|
||||
///
|
||||
/// pk_db_schema: The Schema name where the parent table is.
|
||||
/// An empty string retrieves those without a schema.
|
||||
/// If omitted the schema name should not be used to narrow the search.
|
||||
///
|
||||
/// pk_table: The parent table name. It cannot be null.
|
||||
///
|
||||
/// fk_catalog: The catalog name where the foreign table is.
|
||||
/// An empty string retrieves those without a catalog.
|
||||
/// If omitted the catalog name should not be used to narrow the search.
|
||||
///
|
||||
/// fk_db_schema: The schema name where the foreign table is.
|
||||
/// An empty string retrieves those without a schema.
|
||||
/// If omitted the schema name should not be used to narrow the search.
|
||||
///
|
||||
/// fk_table: The foreign table name. It cannot be null.
|
||||
///
|
||||
/// This implementation does not support alternate endpoints
|
||||
pub async fn get_cross_reference(
|
||||
&mut self,
|
||||
pk_catalog: Option<impl Into<String> + Send>,
|
||||
pk_db_schema: Option<impl Into<String> + Send>,
|
||||
pk_table: String,
|
||||
fk_catalog: Option<impl Into<String> + Send>,
|
||||
fk_db_schema: Option<impl Into<String> + Send>,
|
||||
fk_table: String,
|
||||
) -> Result<FlightRecordBatchStream> {
|
||||
let msg = CommandGetCrossReference {
|
||||
pk_catalog: pk_catalog.map(|s| s.into()),
|
||||
pk_db_schema: pk_db_schema.map(|s| s.into()),
|
||||
pk_table,
|
||||
fk_catalog: fk_catalog.map(|s| s.into()),
|
||||
fk_db_schema: fk_db_schema.map(|s| s.into()),
|
||||
fk_table,
|
||||
};
|
||||
self.do_get_with_cmd(msg.as_any()).await
|
||||
}
|
||||
|
||||
/// List the schemas on this server
|
||||
///
|
||||
/// # Parameters
|
||||
|
@ -182,6 +232,71 @@ impl FlightSqlClient {
|
|||
self.do_get_with_cmd(msg.as_any()).await
|
||||
}
|
||||
|
||||
/// List a description of the foreign key columns that reference the given
|
||||
/// table's primary key columns (the foreign keys exported by a table) of a
|
||||
/// table on this server using a [`CommandGetExportedKeys`] message.
|
||||
///
|
||||
/// # Parameters
|
||||
///
|
||||
/// Definition from <https://github.com/apache/arrow/blob/0434ab65075ecd1d2ab9245bcd7ec6038934ed29/format/FlightSql.proto#L1307-L1352>
|
||||
///
|
||||
/// catalog: Specifies the catalog to search for the foreign key table.
|
||||
/// An empty string retrieves those without a catalog.
|
||||
/// If omitted the catalog name should not be used to narrow the search.
|
||||
///
|
||||
/// db_schema: Specifies the schema to search for the foreign key table.
|
||||
/// An empty string retrieves those without a schema.
|
||||
/// If omitted the schema name should not be used to narrow the search.
|
||||
///
|
||||
/// table: Specifies the foreign key table to get the foreign keys for.
|
||||
///
|
||||
/// This implementation does not support alternate endpoints
|
||||
pub async fn get_exported_keys(
|
||||
&mut self,
|
||||
catalog: Option<impl Into<String> + Send>,
|
||||
db_schema: Option<impl Into<String> + Send>,
|
||||
table: String,
|
||||
) -> Result<FlightRecordBatchStream> {
|
||||
let msg = CommandGetExportedKeys {
|
||||
catalog: catalog.map(|s| s.into()),
|
||||
db_schema: db_schema.map(|s| s.into()),
|
||||
table,
|
||||
};
|
||||
self.do_get_with_cmd(msg.as_any()).await
|
||||
}
|
||||
|
||||
/// List the foreign keys of a table on this server using a
|
||||
/// [`CommandGetImportedKeys`] message.
|
||||
///
|
||||
/// # Parameters
|
||||
///
|
||||
/// Definition from <https://github.com/apache/arrow/blob/196222dbd543d6931f4a1432845add97be0db802/format/FlightSql.proto#L1354-L1403>
|
||||
///
|
||||
/// catalog: Specifies the catalog to search for the primary key table.
|
||||
/// An empty string retrieves those without a catalog.
|
||||
/// If omitted the catalog name should not be used to narrow the search.
|
||||
///
|
||||
/// db_schema: Specifies the schema to search for the primary key table.
|
||||
/// An empty string retrieves those without a schema.
|
||||
/// If omitted the schema name should not be used to narrow the search.
|
||||
///
|
||||
/// table: Specifies the primary key table to get the foreign keys for.
|
||||
///
|
||||
/// This implementation does not support alternate endpoints
|
||||
pub async fn get_imported_keys(
|
||||
&mut self,
|
||||
catalog: Option<impl Into<String> + Send>,
|
||||
db_schema: Option<impl Into<String> + Send>,
|
||||
table: String,
|
||||
) -> Result<FlightRecordBatchStream> {
|
||||
let msg = CommandGetImportedKeys {
|
||||
catalog: catalog.map(|s| s.into()),
|
||||
db_schema: db_schema.map(|s| s.into()),
|
||||
table,
|
||||
};
|
||||
self.do_get_with_cmd(msg.as_any()).await
|
||||
}
|
||||
|
||||
/// List the primary keys on this server using a [`CommandGetPrimaryKeys`] message.
|
||||
///
|
||||
/// # Parameters
|
||||
|
|
|
@ -1,52 +0,0 @@
|
|||
use client_util::connection::GrpcConnection;
|
||||
|
||||
use self::generated_types::{write_info_service_client::WriteInfoServiceClient, *};
|
||||
|
||||
use crate::connection::Connection;
|
||||
use crate::error::Error;
|
||||
|
||||
/// Re-export generated_types
|
||||
pub mod generated_types {
|
||||
pub use generated_types::influxdata::iox::ingester::v1::{
|
||||
write_info_service_client, write_info_service_server, GetWriteInfoRequest,
|
||||
GetWriteInfoResponse, ShardInfo, ShardStatus,
|
||||
};
|
||||
pub use generated_types::write_info::merge_responses;
|
||||
}
|
||||
|
||||
/// A basic client for fetching information about write tokens from a
|
||||
/// single ingester.
|
||||
///
|
||||
/// NOTE: This is an ALPHA / Internal API that is used as part of the
|
||||
/// end to end tests.
|
||||
///
|
||||
/// A public API is tracked here:
|
||||
/// <https://github.com/influxdata/influxdb_iox/issues/4354>
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Client {
|
||||
inner: WriteInfoServiceClient<GrpcConnection>,
|
||||
}
|
||||
|
||||
impl Client {
|
||||
/// Creates a new client with the provided connection
|
||||
pub fn new(connection: Connection) -> Self {
|
||||
Self {
|
||||
inner: WriteInfoServiceClient::new(connection.into_grpc_connection()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the write information for a write token
|
||||
pub async fn get_write_info(
|
||||
&mut self,
|
||||
write_token: &str,
|
||||
) -> Result<GetWriteInfoResponse, Error> {
|
||||
let response = self
|
||||
.inner
|
||||
.get_write_info(GetWriteInfoRequest {
|
||||
write_token: write_token.to_string(),
|
||||
})
|
||||
.await?;
|
||||
|
||||
Ok(response.into_inner())
|
||||
}
|
||||
}
|
|
@ -1,4 +1,5 @@
|
|||
use arrow::array::StringArray;
|
||||
use arrow::array::{Array, ArrayData, StringArray};
|
||||
use arrow::datatypes::DataType;
|
||||
use arrow::error::ArrowError;
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use arrow::util::display::ArrayFormatter;
|
||||
|
@ -29,9 +30,38 @@ pub enum Error {
|
|||
}
|
||||
type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
/// Options for controlling how table borders are rendered.
|
||||
#[derive(Debug, Default, Clone, Copy)]
|
||||
pub enum TableBorders {
|
||||
/// Use ASCII characters.
|
||||
#[default]
|
||||
Ascii,
|
||||
/// Use UNICODE box-drawing characters.
|
||||
Unicode,
|
||||
/// Do not render borders.
|
||||
None,
|
||||
}
|
||||
|
||||
/// Options for the [`write_columnar`] function.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Options {
|
||||
/// Specify how borders should be rendered.
|
||||
pub borders: TableBorders,
|
||||
}
|
||||
|
||||
impl Options {
|
||||
fn table_preset(&self) -> &'static str {
|
||||
match self.borders {
|
||||
TableBorders::Ascii => "||--+-++| ++++++",
|
||||
TableBorders::Unicode => comfy_table::presets::UTF8_FULL,
|
||||
TableBorders::None => comfy_table::presets::NOTHING,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Write the record batches in a columnar format.
|
||||
pub fn write_columnar(mut w: impl Write, batches: &[RecordBatch]) -> Result<()> {
|
||||
let options = arrow::util::display::FormatOptions::default().with_display_error(true);
|
||||
pub fn write_columnar(mut w: impl Write, batches: &[RecordBatch], options: Options) -> Result<()> {
|
||||
let arrow_opts = arrow::util::display::FormatOptions::default().with_display_error(true);
|
||||
|
||||
let Some(schema) = batches.first().map(|b|b.schema()) else { return Ok(()) };
|
||||
let md = schema
|
||||
|
@ -68,7 +98,7 @@ pub fn write_columnar(mut w: impl Write, batches: &[RecordBatch]) -> Result<()>
|
|||
|
||||
let new_table = || {
|
||||
let mut table = Table::new();
|
||||
table.load_preset("||--+-++| ++++++");
|
||||
table.load_preset(options.table_preset());
|
||||
table.set_header(header.clone());
|
||||
table
|
||||
};
|
||||
|
@ -78,7 +108,9 @@ pub fn write_columnar(mut w: impl Write, batches: &[RecordBatch]) -> Result<()>
|
|||
for batch in batches {
|
||||
let cols = col_indexes
|
||||
.iter()
|
||||
.map(|idx| ArrayFormatter::try_new(batch.column(*idx), &options).map_err(Error::Arrow))
|
||||
.map(|idx| {
|
||||
ArrayFormatter::try_new(batch.column(*idx), &arrow_opts).map_err(Error::Arrow)
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
let measurement = batch
|
||||
|
@ -87,6 +119,10 @@ pub fn write_columnar(mut w: impl Write, batches: &[RecordBatch]) -> Result<()>
|
|||
.downcast_ref::<StringArray>()
|
||||
.expect("expected measurement column to be a StringArray");
|
||||
|
||||
// create an empty string array for any tag columns that are NULL
|
||||
let empty: StringArray =
|
||||
StringArray::from(ArrayData::new_null(&DataType::Utf8, measurement.len()));
|
||||
|
||||
let tag_vals = tag_key_indexes
|
||||
.iter()
|
||||
.map(|idx| {
|
||||
|
@ -94,7 +130,7 @@ pub fn write_columnar(mut w: impl Write, batches: &[RecordBatch]) -> Result<()>
|
|||
.column(*idx)
|
||||
.as_any()
|
||||
.downcast_ref::<StringArray>()
|
||||
.expect("expected tag column to be a StringArray")
|
||||
.unwrap_or(&empty)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
|
@ -160,7 +196,7 @@ pub fn write_columnar(mut w: impl Write, batches: &[RecordBatch]) -> Result<()>
|
|||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::format::influxql::write_columnar;
|
||||
use crate::format::influxql::{write_columnar, Options};
|
||||
use arrow::array::{ArrayRef, Float64Array, Int64Array, StringArray, TimestampNanosecondArray};
|
||||
use arrow::datatypes::{DataType, Field, Schema, TimeUnit};
|
||||
use arrow::record_batch::RecordBatch;
|
||||
|
@ -241,7 +277,7 @@ mod test {
|
|||
tag_key_columns: vec![],
|
||||
});
|
||||
let mut s = Vec::<u8>::new();
|
||||
write_columnar(&mut s, &rb).unwrap();
|
||||
write_columnar(&mut s, &rb, Options::default()).unwrap();
|
||||
let res = String::from_utf8(s).unwrap();
|
||||
insta::assert_snapshot!(res, @r###"
|
||||
name: cpu
|
||||
|
@ -271,7 +307,7 @@ mod test {
|
|||
}],
|
||||
});
|
||||
let mut s = Vec::<u8>::new();
|
||||
write_columnar(&mut s, &rb).unwrap();
|
||||
write_columnar(&mut s, &rb, Options::default()).unwrap();
|
||||
let res = String::from_utf8(s).unwrap();
|
||||
insta::assert_snapshot!(res, @r###"
|
||||
name: cpu
|
||||
|
@ -309,7 +345,7 @@ mod test {
|
|||
}],
|
||||
});
|
||||
let mut s = Vec::<u8>::new();
|
||||
write_columnar(&mut s, &rb).unwrap();
|
||||
write_columnar(&mut s, &rb, Options::default()).unwrap();
|
||||
let res = String::from_utf8(s).unwrap();
|
||||
insta::assert_snapshot!(res, @r###"
|
||||
name: cpu
|
||||
|
@ -354,7 +390,7 @@ mod test {
|
|||
],
|
||||
});
|
||||
let mut s = Vec::<u8>::new();
|
||||
write_columnar(&mut s, &rb).unwrap();
|
||||
write_columnar(&mut s, &rb, Options::default()).unwrap();
|
||||
let res = String::from_utf8(s).unwrap();
|
||||
insta::assert_snapshot!(res, @r###"
|
||||
name: cpu
|
||||
|
|
|
@ -9,7 +9,7 @@ license.workspace = true
|
|||
client_util = { path = "../client_util" }
|
||||
generated_types = { path = "../generated_types", default-features=false, features=["data_types"] }
|
||||
prost = "0.11"
|
||||
tonic = { version = "0.8" }
|
||||
tonic = { workspace = true }
|
||||
futures-util = { version = "0.3" }
|
||||
observability_deps = { path = "../observability_deps"}
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
|
|
|
@ -1,57 +0,0 @@
|
|||
[package]
|
||||
name = "ingest_replica"
|
||||
version.workspace = true
|
||||
authors.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
arrow = { workspace = true, features = ["prettyprint"] }
|
||||
arrow-flight = { workspace = true }
|
||||
arrow_util = { version = "0.1.0", path = "../arrow_util" }
|
||||
async-channel = "1.8.0"
|
||||
async-trait = "0.1.60"
|
||||
backoff = { version = "0.1.0", path = "../backoff" }
|
||||
bytes = "1.3.0"
|
||||
crossbeam-utils = "0.8.14"
|
||||
data_types = { version = "0.1.0", path = "../data_types" }
|
||||
datafusion.workspace = true
|
||||
datafusion_util = { path = "../datafusion_util" }
|
||||
flatbuffers = "23.1.21"
|
||||
futures = "0.3.25"
|
||||
generated_types = { version = "0.1.0", path = "../generated_types" }
|
||||
hashbrown.workspace = true
|
||||
iox_catalog = { version = "0.1.0", path = "../iox_catalog" }
|
||||
iox_query = { version = "0.1.0", path = "../iox_query" }
|
||||
iox_time = { path = "../iox_time" }
|
||||
metric = { version = "0.1.0", path = "../metric" }
|
||||
mutable_batch = { version = "0.1.0", path = "../mutable_batch" }
|
||||
mutable_batch_pb = { version = "0.1.0", path = "../mutable_batch_pb" }
|
||||
object_store = "0.5.2"
|
||||
observability_deps = { version = "0.1.0", path = "../observability_deps" }
|
||||
once_cell = "1.17"
|
||||
parking_lot = "0.12.1"
|
||||
parquet_file = { version = "0.1.0", path = "../parquet_file" }
|
||||
pin-project = "1.0.12"
|
||||
predicate = { version = "0.1.0", path = "../predicate" }
|
||||
prost = { version = "0.11.2", default-features = false, features = ["std"] }
|
||||
rand = "0.8.5"
|
||||
schema = { version = "0.1.0", path = "../schema" }
|
||||
service_grpc_catalog = { version = "0.1.0", path = "../service_grpc_catalog" }
|
||||
thiserror = "1.0.38"
|
||||
test_helpers = { path = "../test_helpers", features = ["future_timeout"], optional = true }
|
||||
tokio = { version = "1.22", features = ["macros", "parking_lot", "rt-multi-thread", "sync", "time"] }
|
||||
tonic = "0.8.3"
|
||||
trace = { version = "0.1.0", path = "../trace" }
|
||||
uuid = "1.2.2"
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
|
||||
[dev-dependencies]
|
||||
assert_matches = "1.5.0"
|
||||
criterion = { version = "0.4", default-features = false, features = ["async_tokio"]}
|
||||
datafusion_util = { path = "../datafusion_util" }
|
||||
lazy_static = "1.4.0"
|
||||
mutable_batch_lp = { path = "../mutable_batch_lp" }
|
||||
paste = "1.0.11"
|
||||
tempfile = "3.3.0"
|
||||
test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
|
|
@ -1,93 +0,0 @@
|
|||
//! In memory queryable buffer of data sent from one or more ingesters. It evicts data from the
|
||||
//! buffer when persist requests are sent in.
|
||||
|
||||
use crate::{
|
||||
cache::SchemaCache,
|
||||
query::{response::QueryResponse, QueryError, QueryExec},
|
||||
BufferError, ReplicationBuffer, TableIdToMutableBatch,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use data_types::{
|
||||
sequence_number_set::SequenceNumberSet, NamespaceId, PartitionId, SequenceNumber, TableId,
|
||||
};
|
||||
use iox_query::exec::Executor;
|
||||
use std::sync::Arc;
|
||||
use trace::span::Span;
|
||||
use uuid::Uuid;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct Buffer {
|
||||
_schema_cache: Arc<SchemaCache>,
|
||||
_exec: Arc<Executor>,
|
||||
}
|
||||
|
||||
impl Buffer {
|
||||
pub(crate) fn new(_schema_cache: Arc<SchemaCache>, _exec: Arc<Executor>) -> Self {
|
||||
Self {
|
||||
_schema_cache,
|
||||
_exec,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn apply_write(
|
||||
&self,
|
||||
_namespace_id: NamespaceId,
|
||||
_table_batches: TableIdToMutableBatch,
|
||||
_ingester_id: Uuid,
|
||||
_sequence_number: SequenceNumber,
|
||||
) -> Result<(), BufferError> {
|
||||
panic!("unimplemented")
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ReplicationBuffer for Buffer {
|
||||
async fn apply_write(
|
||||
&self,
|
||||
namespace_id: NamespaceId,
|
||||
table_batches: TableIdToMutableBatch,
|
||||
ingester_id: Uuid,
|
||||
sequence_number: SequenceNumber,
|
||||
) -> Result<(), BufferError> {
|
||||
self.apply_write(namespace_id, table_batches, ingester_id, sequence_number)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn apply_persist(
|
||||
&self,
|
||||
_ingester_id: Uuid,
|
||||
_namespace_id: NamespaceId,
|
||||
_table_id: TableId,
|
||||
_partition_id: PartitionId,
|
||||
_sequence_set: SequenceNumberSet,
|
||||
) -> Result<(), BufferError> {
|
||||
panic!("unimplemented")
|
||||
}
|
||||
|
||||
async fn append_partition_buffer(
|
||||
&self,
|
||||
_ingester_id: Uuid,
|
||||
_namespace_id: NamespaceId,
|
||||
_table_id: TableId,
|
||||
_partition_id: PartitionId,
|
||||
_sequence_set: SequenceNumberSet,
|
||||
_table_batches: TableIdToMutableBatch,
|
||||
) -> Result<(), BufferError> {
|
||||
panic!("unimplemented")
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl QueryExec for Buffer {
|
||||
type Response = QueryResponse;
|
||||
|
||||
async fn query_exec(
|
||||
&self,
|
||||
_namespace_id: NamespaceId,
|
||||
_table_id: TableId,
|
||||
_columns: Vec<String>,
|
||||
_span: Option<Span>,
|
||||
) -> Result<Self::Response, QueryError> {
|
||||
panic!("unimplemented");
|
||||
}
|
||||
}
|
|
@ -1,250 +0,0 @@
|
|||
//! A cache of table schemas and partition sort keys for us with the buffer to answer Flight
|
||||
//! requests.
|
||||
|
||||
use data_types::{NamespaceId, PartitionId, PartitionKey, ShardId, TableId, TableSchema};
|
||||
use iox_catalog::interface::{
|
||||
get_table_schema_by_id, list_schemas, Catalog, Error as CatalogError,
|
||||
};
|
||||
use parking_lot::RwLock;
|
||||
use std::{collections::BTreeMap, ops::DerefMut, sync::Arc};
|
||||
use thiserror::Error;
|
||||
|
||||
/// Errors that occur during the use of the cache.
|
||||
#[derive(Debug, Error)]
|
||||
pub enum CacheError {
|
||||
#[error("namespace {id:?} not found")]
|
||||
NamespaceNotFound { id: NamespaceId },
|
||||
|
||||
#[error("table {id:?} not found")]
|
||||
TableNotFound { id: TableId },
|
||||
|
||||
#[error("partition for table {table_id:?} and partition key {partition_key:?} not found")]
|
||||
PartitionNotFound {
|
||||
table_id: TableId,
|
||||
partition_key: PartitionKey,
|
||||
},
|
||||
|
||||
#[error("catalog error: {0}")]
|
||||
Catalog(#[from] CatalogError),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct SchemaCache {
|
||||
state: RwLock<State>,
|
||||
catalog: Arc<dyn Catalog>,
|
||||
transition_shard_id: ShardId,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct State {
|
||||
partition_ids: BTreeMap<(TableId, PartitionKey), PartitionId>,
|
||||
table_schemas: BTreeMap<TableId, Arc<TableSchema>>,
|
||||
}
|
||||
|
||||
const RECENT_PARTITION_COUNT_TO_WARM: usize = 40000;
|
||||
|
||||
impl SchemaCache {
|
||||
pub async fn warm(&self) -> Result<(), CacheError> {
|
||||
let namespaces = list_schemas(&*self.catalog).await?.collect::<Vec<_>>();
|
||||
let partitions = self
|
||||
.catalog
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.most_recent_n(RECENT_PARTITION_COUNT_TO_WARM)
|
||||
.await?;
|
||||
|
||||
let mut state = self.state.write();
|
||||
|
||||
for (_namespace, schema) in namespaces {
|
||||
for (_table_name, table_schema) in schema.tables {
|
||||
state
|
||||
.table_schemas
|
||||
.insert(table_schema.id, Arc::new(table_schema));
|
||||
}
|
||||
}
|
||||
|
||||
for partition in partitions {
|
||||
state
|
||||
.partition_ids
|
||||
.insert((partition.table_id, partition.partition_key), partition.id);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn new(catalog: Arc<dyn Catalog>, transition_shard_id: ShardId) -> Self {
|
||||
Self {
|
||||
catalog,
|
||||
state: Default::default(),
|
||||
transition_shard_id,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_table_schema(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
) -> Result<Arc<TableSchema>, CacheError> {
|
||||
match self.get_table_schema_from_cache(&table_id) {
|
||||
Some(t) => Ok(t),
|
||||
None => {
|
||||
let table_schema = {
|
||||
let mut repos = self.catalog.repositories().await;
|
||||
get_table_schema_by_id(table_id, repos.deref_mut()).await?
|
||||
};
|
||||
let table_schema = Arc::new(table_schema);
|
||||
let mut s = self.state.write();
|
||||
s.table_schemas.insert(table_id, Arc::clone(&table_schema));
|
||||
|
||||
Ok(table_schema)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_table_schema_from_cache(&self, table_id: &TableId) -> Option<Arc<TableSchema>> {
|
||||
let s = self.state.read();
|
||||
s.table_schemas.get(table_id).cloned()
|
||||
}
|
||||
|
||||
pub async fn get_table_schema_from_catalog(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
) -> Result<Arc<TableSchema>, CacheError> {
|
||||
let table_schema = {
|
||||
let mut repos = self.catalog.repositories().await;
|
||||
get_table_schema_by_id(table_id, repos.deref_mut()).await?
|
||||
};
|
||||
|
||||
let table_schema = Arc::new(table_schema);
|
||||
let mut s = self.state.write();
|
||||
s.table_schemas.insert(table_id, Arc::clone(&table_schema));
|
||||
|
||||
Ok(table_schema)
|
||||
}
|
||||
|
||||
pub async fn get_partition_id(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
partition_key: PartitionKey,
|
||||
) -> Result<PartitionId, CacheError> {
|
||||
let id = match self.get_partition_id_from_cache(table_id, partition_key.clone()) {
|
||||
Some(k) => k,
|
||||
None => {
|
||||
let partition = self
|
||||
.catalog
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.create_or_get(partition_key.clone(), self.transition_shard_id, table_id)
|
||||
.await?;
|
||||
let mut s = self.state.write();
|
||||
s.partition_ids
|
||||
.insert((table_id, partition_key), partition.id);
|
||||
partition.id
|
||||
}
|
||||
};
|
||||
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
fn get_partition_id_from_cache(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
partition_key: PartitionKey,
|
||||
) -> Option<PartitionId> {
|
||||
let s = self.state.read();
|
||||
s.partition_ids.get(&(table_id, partition_key)).cloned()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use data_types::{ColumnType, Namespace, Partition, Table};
|
||||
use iox_catalog::create_or_get_default_records;
|
||||
use iox_catalog::mem::MemCatalog;
|
||||
use metric::Registry;
|
||||
|
||||
const NAMESPACE_NAME: &str = "foo";
|
||||
const TABLE_NAME: &str = "bar";
|
||||
const COLUMN_NAME: &str = "time";
|
||||
const PARTITION_KEY: &str = "2023-01-08";
|
||||
|
||||
#[tokio::test]
|
||||
async fn warms_cache() {
|
||||
let (catalog, shard_id, _namespace, table, partition) = get_test_data().await;
|
||||
|
||||
let cache = SchemaCache::new(catalog, shard_id);
|
||||
assert!(cache.get_table_schema_from_cache(&table.id).is_none());
|
||||
assert!(cache
|
||||
.get_partition_id_from_cache(table.id, partition.partition_key.clone())
|
||||
.is_none());
|
||||
|
||||
cache.warm().await.unwrap();
|
||||
assert_eq!(
|
||||
cache.get_table_schema_from_cache(&table.id).unwrap().id,
|
||||
table.id
|
||||
);
|
||||
assert_eq!(
|
||||
cache
|
||||
.get_partition_id_from_cache(table.id, partition.partition_key)
|
||||
.unwrap(),
|
||||
partition.id
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn gets_table_schema_and_partition_id_from_catalog_if_not_in_cache() {
|
||||
let (catalog, shard_id, _namespace, table, partition) = get_test_data().await;
|
||||
|
||||
let cache = SchemaCache::new(catalog, shard_id);
|
||||
assert!(cache.get_table_schema_from_cache(&table.id).is_none());
|
||||
assert!(cache
|
||||
.get_partition_id_from_cache(table.id, partition.partition_key.clone())
|
||||
.is_none());
|
||||
|
||||
assert_eq!(cache.get_table_schema(table.id).await.unwrap().id, table.id);
|
||||
assert_eq!(
|
||||
cache
|
||||
.get_partition_id(table.id, partition.partition_key)
|
||||
.await
|
||||
.unwrap(),
|
||||
partition.id
|
||||
);
|
||||
}
|
||||
|
||||
async fn get_test_data() -> (Arc<dyn Catalog>, ShardId, Namespace, Table, Partition) {
|
||||
let catalog = MemCatalog::new(Arc::new(Registry::new()));
|
||||
|
||||
let mut txn = catalog.start_transaction().await.unwrap();
|
||||
let (topic, query_pool, shards) = create_or_get_default_records(1, txn.deref_mut())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let shard_id = *shards.keys().next().unwrap();
|
||||
let namespace = txn
|
||||
.namespaces()
|
||||
.create(NAMESPACE_NAME, None, topic.id, query_pool.id)
|
||||
.await
|
||||
.unwrap();
|
||||
let table = txn
|
||||
.tables()
|
||||
.create_or_get(TABLE_NAME, namespace.id)
|
||||
.await
|
||||
.unwrap();
|
||||
let _ = txn
|
||||
.columns()
|
||||
.create_or_get(COLUMN_NAME, table.id, ColumnType::Time)
|
||||
.await
|
||||
.unwrap();
|
||||
let partition = txn
|
||||
.partitions()
|
||||
.create_or_get(PARTITION_KEY.into(), shard_id, table.id)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
txn.commit().await.unwrap();
|
||||
|
||||
(Arc::new(catalog), shard_id, namespace, table, partition)
|
||||
}
|
||||
}
|
|
@ -1,67 +0,0 @@
|
|||
mod query;
|
||||
mod replication;
|
||||
|
||||
use std::{fmt::Debug, sync::Arc};
|
||||
|
||||
use arrow_flight::flight_service_server::FlightServiceServer;
|
||||
use generated_types::influxdata::iox::ingester::v1::replication_service_server::ReplicationServiceServer;
|
||||
|
||||
use crate::ReplicationBuffer;
|
||||
use crate::{
|
||||
query::{response::QueryResponse, QueryExec},
|
||||
IngestReplicaRpcInterface,
|
||||
};
|
||||
|
||||
use self::replication::ReplicationServer;
|
||||
|
||||
/// This type is responsible for injecting internal dependencies that SHOULD NOT
|
||||
/// leak outside of the ingester crate into public gRPC handlers.
|
||||
///
|
||||
/// Configuration and external dependencies SHOULD be injected through the
|
||||
/// respective gRPC handler constructor method.
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct GrpcDelegate<B> {
|
||||
buffer: Arc<B>,
|
||||
metrics: Arc<metric::Registry>,
|
||||
}
|
||||
|
||||
impl<B> GrpcDelegate<B>
|
||||
where
|
||||
B: ReplicationBuffer + QueryExec<Response = QueryResponse> + 'static,
|
||||
{
|
||||
/// Initialise a new [`GrpcDelegate`].
|
||||
pub(crate) fn new(buffer: Arc<B>, metrics: Arc<metric::Registry>) -> Self {
|
||||
Self { buffer, metrics }
|
||||
}
|
||||
}
|
||||
|
||||
/// Implement the type-erasure trait to hide internal types from crate-external
|
||||
/// callers.
|
||||
impl<B> IngestReplicaRpcInterface for GrpcDelegate<B>
|
||||
where
|
||||
B: ReplicationBuffer + QueryExec<Response = QueryResponse> + 'static,
|
||||
{
|
||||
type ReplicationHandler = ReplicationServer<B>;
|
||||
type FlightHandler = query::FlightService<Arc<B>>;
|
||||
|
||||
/// Return a [`ReplicationService`] gRPC implementation.
|
||||
///
|
||||
/// [`ReplicationService`]: generated_types::influxdata::iox::catalog::v1::write_service_server::WriteService.
|
||||
fn replication_service(&self) -> ReplicationServiceServer<Self::ReplicationHandler> {
|
||||
ReplicationServiceServer::new(ReplicationServer::new(Arc::clone(&self.buffer)))
|
||||
}
|
||||
|
||||
/// Return an Arrow [`FlightService`] gRPC implementation.
|
||||
///
|
||||
/// [`FlightService`]: arrow_flight::flight_service_server::FlightService
|
||||
fn query_service(
|
||||
&self,
|
||||
max_simultaneous_requests: usize,
|
||||
) -> FlightServiceServer<Self::FlightHandler> {
|
||||
FlightServiceServer::new(query::FlightService::new(
|
||||
Arc::clone(&self.buffer),
|
||||
max_simultaneous_requests,
|
||||
&self.metrics,
|
||||
))
|
||||
}
|
||||
}
|
|
@ -1,363 +0,0 @@
|
|||
use std::pin::Pin;
|
||||
|
||||
use arrow_flight::{
|
||||
encode::FlightDataEncoderBuilder, error::FlightError,
|
||||
flight_service_server::FlightService as Flight, Action, ActionType, Criteria, Empty,
|
||||
FlightData, FlightDescriptor, FlightInfo, HandshakeRequest, HandshakeResponse, IpcMessage,
|
||||
PutResult, SchemaResult, Ticket,
|
||||
};
|
||||
use data_types::{NamespaceId, PartitionId, TableId};
|
||||
use flatbuffers::FlatBufferBuilder;
|
||||
use futures::{stream::BoxStream, Stream, StreamExt, TryStreamExt};
|
||||
use generated_types::influxdata::iox::ingester::v1::{self as proto, PartitionStatus};
|
||||
use metric::U64Counter;
|
||||
use observability_deps::tracing::*;
|
||||
use prost::Message;
|
||||
use thiserror::Error;
|
||||
use tokio::sync::{Semaphore, TryAcquireError};
|
||||
use tonic::{Request, Response, Streaming};
|
||||
use trace::{ctx::SpanContext, span::SpanExt};
|
||||
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::query::{response::QueryResponse, QueryError, QueryExec};
|
||||
|
||||
/// Error states for the query RPC handler.
|
||||
///
|
||||
/// Note that this DOES NOT include any query-time error states - those are
|
||||
/// mapped directly from the [`QueryError`] itself.
|
||||
///
|
||||
/// Note that this isn't strictly necessary as the [`FlightService`] trait
|
||||
/// expects a [`tonic::Status`] error value, but by defining the errors here
|
||||
/// they serve as documentation of the potential error states (which are then
|
||||
/// converted into [`tonic::Status`] for the handler).
|
||||
#[derive(Debug, Error)]
|
||||
enum Error {
|
||||
/// The payload within the Flight ticket cannot be deserialised into a
|
||||
/// [`proto::IngesterQueryRequest`].
|
||||
#[error("invalid flight ticket: {0}")]
|
||||
InvalidTicket(#[from] prost::DecodeError),
|
||||
|
||||
/// The number of simultaneous queries being executed has been reached.
|
||||
#[error("simultaneous query limit exceeded")]
|
||||
RequestLimit,
|
||||
}
|
||||
|
||||
/// Map a query-execution error into a [`tonic::Status`].
|
||||
impl From<QueryError> for tonic::Status {
|
||||
fn from(e: QueryError) -> Self {
|
||||
use tonic::Code;
|
||||
|
||||
let code = match e {
|
||||
QueryError::TableNotFound(_, _) | QueryError::NamespaceNotFound(_) => Code::NotFound,
|
||||
};
|
||||
|
||||
Self::new(code, e.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
/// Map a gRPC handler error to a [`tonic::Status`].
|
||||
impl From<Error> for tonic::Status {
|
||||
fn from(e: Error) -> Self {
|
||||
use tonic::Code;
|
||||
|
||||
let code = match e {
|
||||
Error::InvalidTicket(_) => {
|
||||
debug!(error=%e, "invalid flight query ticket");
|
||||
Code::InvalidArgument
|
||||
}
|
||||
Error::RequestLimit => {
|
||||
warn!("simultaneous query limit exceeded");
|
||||
Code::ResourceExhausted
|
||||
}
|
||||
};
|
||||
|
||||
Self::new(code, e.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
/// Concrete implementation of the gRPC Arrow Flight Service API
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct FlightService<Q> {
|
||||
query_handler: Q,
|
||||
|
||||
/// A request limiter to restrict the number of simultaneous requests this
|
||||
/// ingester services.
|
||||
///
|
||||
/// This allows the ingester to drop a portion of requests when experiencing
|
||||
/// an unusual flood of requests
|
||||
request_sem: Semaphore,
|
||||
|
||||
/// Number of queries rejected due to lack of available `request_sem`
|
||||
/// permit.
|
||||
query_request_limit_rejected: U64Counter,
|
||||
|
||||
ingester_uuid: Uuid,
|
||||
}
|
||||
|
||||
impl<Q> FlightService<Q> {
|
||||
pub(super) fn new(
|
||||
query_handler: Q,
|
||||
max_simultaneous_requests: usize,
|
||||
metrics: &metric::Registry,
|
||||
) -> Self {
|
||||
let query_request_limit_rejected = metrics
|
||||
.register_metric::<U64Counter>(
|
||||
"query_request_limit_rejected",
|
||||
"number of query requests rejected due to exceeding parallel request limit",
|
||||
)
|
||||
.recorder(&[]);
|
||||
|
||||
Self {
|
||||
query_handler,
|
||||
request_sem: Semaphore::new(max_simultaneous_requests),
|
||||
query_request_limit_rejected,
|
||||
ingester_uuid: Uuid::new_v4(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type TonicStream<T> = Pin<Box<dyn Stream<Item = Result<T, tonic::Status>> + Send + 'static>>;
|
||||
|
||||
#[tonic::async_trait]
|
||||
impl<Q> Flight for FlightService<Q>
|
||||
where
|
||||
Q: QueryExec<Response = QueryResponse> + 'static,
|
||||
{
|
||||
type HandshakeStream = TonicStream<HandshakeResponse>;
|
||||
type ListFlightsStream = TonicStream<FlightInfo>;
|
||||
type DoGetStream = TonicStream<FlightData>;
|
||||
type DoPutStream = TonicStream<PutResult>;
|
||||
type DoActionStream = TonicStream<arrow_flight::Result>;
|
||||
type ListActionsStream = TonicStream<ActionType>;
|
||||
type DoExchangeStream = TonicStream<FlightData>;
|
||||
|
||||
async fn get_schema(
|
||||
&self,
|
||||
_request: Request<FlightDescriptor>,
|
||||
) -> Result<Response<SchemaResult>, tonic::Status> {
|
||||
Err(tonic::Status::unimplemented("Not yet implemented"))
|
||||
}
|
||||
|
||||
async fn do_get(
|
||||
&self,
|
||||
request: Request<Ticket>,
|
||||
) -> Result<Response<Self::DoGetStream>, tonic::Status> {
|
||||
let span_ctx: Option<SpanContext> = request.extensions().get().cloned();
|
||||
|
||||
// Acquire and hold a permit for the duration of this request, or return
|
||||
// an error if the existing requests have already exhausted the
|
||||
// allocation.
|
||||
//
|
||||
// Our goal is to limit the number of concurrently executing queries as
|
||||
// a rough way of ensuring we don't explode memory by trying to do too
|
||||
// much at the same time.
|
||||
let _permit = match self.request_sem.try_acquire() {
|
||||
Ok(p) => p,
|
||||
Err(TryAcquireError::NoPermits) => {
|
||||
warn!("simultaneous request limit exceeded - dropping query request");
|
||||
self.query_request_limit_rejected.inc(1);
|
||||
return Err(Error::RequestLimit)?;
|
||||
}
|
||||
Err(e) => panic!("request limiter error: {e}"),
|
||||
};
|
||||
|
||||
let ticket = request.into_inner();
|
||||
let request = proto::IngesterQueryRequest::decode(&*ticket.ticket).map_err(Error::from)?;
|
||||
|
||||
// Extract the namespace/table identifiers
|
||||
let namespace_id = NamespaceId::new(request.namespace_id);
|
||||
let table_id = TableId::new(request.table_id);
|
||||
|
||||
// Predicate pushdown is part of the API, but not implemented.
|
||||
if let Some(p) = request.predicate {
|
||||
warn!(predicate=?p, "ignoring query predicate (unsupported)");
|
||||
}
|
||||
|
||||
let response = self
|
||||
.query_handler
|
||||
.query_exec(
|
||||
namespace_id,
|
||||
table_id,
|
||||
request.columns,
|
||||
span_ctx.child_span("ingester query"),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let output = encode_response(response, self.ingester_uuid).map_err(tonic::Status::from);
|
||||
|
||||
Ok(Response::new(Box::pin(output) as Self::DoGetStream))
|
||||
}
|
||||
|
||||
async fn handshake(
|
||||
&self,
|
||||
request: Request<Streaming<HandshakeRequest>>,
|
||||
) -> Result<Response<Self::HandshakeStream>, tonic::Status> {
|
||||
let request = request.into_inner().message().await?.unwrap();
|
||||
let response = HandshakeResponse {
|
||||
protocol_version: request.protocol_version,
|
||||
payload: request.payload,
|
||||
};
|
||||
let output = futures::stream::iter(std::iter::once(Ok(response)));
|
||||
Ok(Response::new(Box::pin(output) as Self::HandshakeStream))
|
||||
}
|
||||
|
||||
async fn list_flights(
|
||||
&self,
|
||||
_request: Request<Criteria>,
|
||||
) -> Result<Response<Self::ListFlightsStream>, tonic::Status> {
|
||||
Err(tonic::Status::unimplemented("Not yet implemented"))
|
||||
}
|
||||
|
||||
async fn get_flight_info(
|
||||
&self,
|
||||
_request: Request<FlightDescriptor>,
|
||||
) -> Result<Response<FlightInfo>, tonic::Status> {
|
||||
Err(tonic::Status::unimplemented("Not yet implemented"))
|
||||
}
|
||||
|
||||
async fn do_put(
|
||||
&self,
|
||||
_request: Request<Streaming<FlightData>>,
|
||||
) -> Result<Response<Self::DoPutStream>, tonic::Status> {
|
||||
Err(tonic::Status::unimplemented("Not yet implemented"))
|
||||
}
|
||||
|
||||
async fn do_action(
|
||||
&self,
|
||||
_request: Request<Action>,
|
||||
) -> Result<Response<Self::DoActionStream>, tonic::Status> {
|
||||
Err(tonic::Status::unimplemented("Not yet implemented"))
|
||||
}
|
||||
|
||||
async fn list_actions(
|
||||
&self,
|
||||
_request: Request<Empty>,
|
||||
) -> Result<Response<Self::ListActionsStream>, tonic::Status> {
|
||||
Err(tonic::Status::unimplemented("Not yet implemented"))
|
||||
}
|
||||
|
||||
async fn do_exchange(
|
||||
&self,
|
||||
_request: Request<Streaming<FlightData>>,
|
||||
) -> Result<Response<Self::DoExchangeStream>, tonic::Status> {
|
||||
Err(tonic::Status::unimplemented("Not yet implemented"))
|
||||
}
|
||||
}
|
||||
|
||||
/// Encode the partition information as a None flight data with metadata
|
||||
fn encode_partition(
|
||||
// Partition ID.
|
||||
partition_id: PartitionId,
|
||||
// Partition persistence status.
|
||||
status: PartitionStatus,
|
||||
// Count of persisted Parquet files
|
||||
completed_persistence_count: u64,
|
||||
ingester_uuid: Uuid,
|
||||
) -> std::result::Result<FlightData, FlightError> {
|
||||
let mut bytes = bytes::BytesMut::new();
|
||||
let app_metadata = proto::IngesterQueryResponseMetadata {
|
||||
partition_id: partition_id.get(),
|
||||
status: Some(proto::PartitionStatus {
|
||||
parquet_max_sequence_number: status.parquet_max_sequence_number,
|
||||
}),
|
||||
ingester_uuid: ingester_uuid.to_string(),
|
||||
completed_persistence_count,
|
||||
};
|
||||
prost::Message::encode(&app_metadata, &mut bytes)
|
||||
.map_err(|e| FlightError::from_external_error(Box::new(e)))?;
|
||||
|
||||
Ok(FlightData::new(
|
||||
None,
|
||||
IpcMessage(build_none_flight_msg().into()),
|
||||
bytes.to_vec(),
|
||||
vec![],
|
||||
))
|
||||
}
|
||||
|
||||
fn build_none_flight_msg() -> Vec<u8> {
|
||||
let mut fbb = FlatBufferBuilder::new();
|
||||
|
||||
let mut message = arrow::ipc::MessageBuilder::new(&mut fbb);
|
||||
message.add_version(arrow::ipc::MetadataVersion::V5);
|
||||
message.add_header_type(arrow::ipc::MessageHeader::NONE);
|
||||
message.add_bodyLength(0);
|
||||
|
||||
let data = message.finish();
|
||||
fbb.finish(data, None);
|
||||
|
||||
fbb.finished_data().to_vec()
|
||||
}
|
||||
|
||||
/// Converts a QueryResponse into a stream of Arrow Flight [`FlightData`] response frames.
|
||||
fn encode_response(
|
||||
response: QueryResponse,
|
||||
ingester_uuid: Uuid,
|
||||
) -> BoxStream<'static, std::result::Result<FlightData, FlightError>> {
|
||||
response
|
||||
.into_partition_stream()
|
||||
.flat_map(move |partition| {
|
||||
let partition_id = partition.id();
|
||||
let completed_persistence_count = partition.completed_persistence_count();
|
||||
let head = futures::stream::once(async move {
|
||||
encode_partition(
|
||||
partition_id,
|
||||
PartitionStatus {
|
||||
parquet_max_sequence_number: None,
|
||||
},
|
||||
completed_persistence_count,
|
||||
ingester_uuid,
|
||||
)
|
||||
});
|
||||
|
||||
match partition.into_record_batch_stream() {
|
||||
Some(stream) => {
|
||||
let stream = stream.map_err(|e| FlightError::ExternalError(Box::new(e)));
|
||||
|
||||
let tail = FlightDataEncoderBuilder::new().build(stream);
|
||||
|
||||
head.chain(tail).boxed()
|
||||
}
|
||||
None => head.boxed(),
|
||||
}
|
||||
})
|
||||
.boxed()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use bytes::Bytes;
|
||||
use tonic::Code;
|
||||
|
||||
use crate::query::mock_query_exec::MockQueryExec;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn limits_concurrent_queries() {
|
||||
let mut flight =
|
||||
FlightService::new(MockQueryExec::default(), 100, &metric::Registry::default());
|
||||
|
||||
let req = tonic::Request::new(Ticket {
|
||||
ticket: Bytes::new(),
|
||||
});
|
||||
match flight.do_get(req).await {
|
||||
Ok(_) => panic!("expected error because of invalid ticket"),
|
||||
Err(s) => {
|
||||
assert_eq!(s.code(), Code::NotFound); // Mock response value
|
||||
}
|
||||
}
|
||||
|
||||
flight.request_sem = Semaphore::new(0);
|
||||
|
||||
let req = tonic::Request::new(Ticket {
|
||||
ticket: Bytes::new(),
|
||||
});
|
||||
match flight.do_get(req).await {
|
||||
Ok(_) => panic!("expected error because of request limit"),
|
||||
Err(s) => {
|
||||
assert_eq!(s.code(), Code::ResourceExhausted);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,223 +0,0 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use data_types::sequence_number_set::SequenceNumberSet;
|
||||
use data_types::{NamespaceId, PartitionId, PartitionKey, SequenceNumber, TableId};
|
||||
use generated_types::influxdata::iox::ingester::v1::{
|
||||
self as proto, replication_service_server::ReplicationService,
|
||||
};
|
||||
use mutable_batch::writer;
|
||||
use mutable_batch_pb::decode::decode_database_batch;
|
||||
use observability_deps::tracing::*;
|
||||
use thiserror::Error;
|
||||
use tonic::{Code, Request, Response};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{BufferError, ReplicationBuffer};
|
||||
|
||||
/// A list of error states when handling a ReplicationService request.
|
||||
#[derive(Debug, Error)]
|
||||
enum ReplicationError {
|
||||
/// The replication request did not contain a write payload.
|
||||
#[error("replication request does not contain a payload")]
|
||||
NoPayload,
|
||||
|
||||
/// The replication payload contains no tables.
|
||||
#[error("replication request does not contain any table data")]
|
||||
NoTables,
|
||||
|
||||
/// The replication request didn't contain an ingester id
|
||||
#[error("replication request does not contain an ingester id")]
|
||||
NoIngesterId,
|
||||
|
||||
/// The replication request had an invalid sequence number set
|
||||
#[error("replication request to persist contained invalid sequence number set {0}")]
|
||||
InvalidSequenceNumberSet(String),
|
||||
|
||||
/// Ingester ID not a valid UUID
|
||||
#[error("replication request does not contain valid ingester uuid")]
|
||||
InvalidIngesterId(#[from] uuid::Error),
|
||||
|
||||
/// The serialised write payload could not be read.
|
||||
#[error(transparent)]
|
||||
Decode(mutable_batch_pb::decode::Error),
|
||||
|
||||
/// An error buffering the write or persist
|
||||
#[error("error buffering replciation request: {0}")]
|
||||
Buffer(#[from] BufferError),
|
||||
}
|
||||
|
||||
impl From<ReplicationError> for tonic::Status {
|
||||
fn from(e: ReplicationError) -> Self {
|
||||
let code = match e {
|
||||
ReplicationError::Decode(_)
|
||||
| ReplicationError::NoPayload
|
||||
| ReplicationError::NoTables
|
||||
| ReplicationError::NoIngesterId
|
||||
| ReplicationError::InvalidIngesterId(_)
|
||||
| ReplicationError::InvalidSequenceNumberSet(_) => Code::InvalidArgument,
|
||||
ReplicationError::Buffer(_) => Code::Internal,
|
||||
};
|
||||
|
||||
Self::new(code, e.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a [`BufferError`] returned by the configured [`ReplicationBuffer`] to a
|
||||
/// [`tonic::Status`].
|
||||
impl From<BufferError> for tonic::Status {
|
||||
fn from(e: BufferError) -> Self {
|
||||
match e {
|
||||
BufferError::MutableBatch(e) => map_write_error(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Map a [`mutable_batch::Error`] to a [`tonic::Status`].
|
||||
///
|
||||
/// This method takes care to enumerate all possible error states, so that new
|
||||
/// error additions cause a compilation failure, and therefore require the new
|
||||
/// error to be explicitly mapped to a gRPC status code.
|
||||
fn map_write_error(e: mutable_batch::Error) -> tonic::Status {
|
||||
use tonic::Status;
|
||||
match e {
|
||||
mutable_batch::Error::ColumnError { .. }
|
||||
| mutable_batch::Error::ArrowError { .. }
|
||||
| mutable_batch::Error::InternalSchema { .. }
|
||||
| mutable_batch::Error::ColumnNotFound { .. }
|
||||
| mutable_batch::Error::WriterError {
|
||||
source: writer::Error::KeyNotFound { .. } | writer::Error::InsufficientValues { .. },
|
||||
} => Status::internal(e.to_string()),
|
||||
mutable_batch::Error::WriterError {
|
||||
source: writer::Error::TypeMismatch { .. },
|
||||
} => {
|
||||
// While a schema type conflict is ultimately a user error, if it
|
||||
// reaches the ingester it should have already passed through schema
|
||||
// validation in the router, and as such it is an internal system
|
||||
// failure.
|
||||
Status::internal(e.to_string())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A gRPC [`ReplicationService`] handler.
|
||||
///
|
||||
/// This handler accepts writes from an upstream, and applies them to the
|
||||
/// provided [`ReplicationBuffer`].
|
||||
pub(crate) struct ReplicationServer<B: ReplicationBuffer + 'static> {
|
||||
buffer: Arc<B>,
|
||||
}
|
||||
|
||||
impl<B: ReplicationBuffer + 'static> ReplicationServer<B> {
|
||||
/// Instantiate a new [`ReplicationServer`]
|
||||
pub(crate) fn new(buffer: Arc<B>) -> Self {
|
||||
Self { buffer }
|
||||
}
|
||||
}
|
||||
|
||||
#[tonic::async_trait]
|
||||
impl<B: ReplicationBuffer + 'static> ReplicationService for ReplicationServer<B> {
|
||||
/// Handle an RPC write request.
|
||||
async fn replicate(
|
||||
&self,
|
||||
request: Request<proto::ReplicateRequest>,
|
||||
) -> Result<Response<proto::ReplicateResponse>, tonic::Status> {
|
||||
// Extract the remote address for debugging.
|
||||
let remote_addr = request
|
||||
.remote_addr()
|
||||
.map(|v| v.to_string())
|
||||
.unwrap_or_else(|| "<unknown>".to_string());
|
||||
|
||||
let request = request.into_inner();
|
||||
let ingester_id =
|
||||
Uuid::parse_str(&request.ingester_uuid).map_err(ReplicationError::InvalidIngesterId)?;
|
||||
|
||||
// Extract the database batch payload
|
||||
let payload = request.payload.ok_or(ReplicationError::NoPayload)?;
|
||||
|
||||
let batches = decode_database_batch(&payload).map_err(ReplicationError::Decode)?;
|
||||
let num_tables = batches.len();
|
||||
let sequence_number = SequenceNumber::new(request.sequence_number);
|
||||
let namespace_id = NamespaceId::new(payload.database_id);
|
||||
let partition_key = PartitionKey::from(payload.partition_key);
|
||||
|
||||
if num_tables == 0 {
|
||||
return Err(ReplicationError::NoTables)?;
|
||||
}
|
||||
|
||||
trace!(
|
||||
remote_addr,
|
||||
%ingester_id,
|
||||
?sequence_number,
|
||||
num_tables,
|
||||
%namespace_id,
|
||||
%partition_key,
|
||||
"received replicate write"
|
||||
);
|
||||
|
||||
match self
|
||||
.buffer
|
||||
.apply_write(namespace_id, batches, ingester_id, sequence_number)
|
||||
.await
|
||||
{
|
||||
Ok(()) => {}
|
||||
Err(e) => {
|
||||
error!(error=%e, "failed to write into buffer");
|
||||
return Err(ReplicationError::Buffer(e))?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Response::new(proto::ReplicateResponse {}))
|
||||
}
|
||||
|
||||
async fn persist_complete(
|
||||
&self,
|
||||
request: Request<proto::PersistCompleteRequest>,
|
||||
) -> Result<Response<proto::PersistCompleteResponse>, tonic::Status> {
|
||||
// Extract the remote address for debugging.
|
||||
let remote_addr = request
|
||||
.remote_addr()
|
||||
.map(|v| v.to_string())
|
||||
.unwrap_or_else(|| "<unknown>".to_string());
|
||||
|
||||
let request = request.into_inner();
|
||||
let ingester_id =
|
||||
Uuid::parse_str(&request.ingester_uuid).map_err(ReplicationError::InvalidIngesterId)?;
|
||||
let namespace_id = NamespaceId::new(request.namespace_id);
|
||||
let table_id = TableId::new(request.table_id);
|
||||
let partition_id = PartitionId::new(request.partition_id);
|
||||
let sequence_set =
|
||||
SequenceNumberSet::try_from(request.croaring_sequence_number_bitmap.as_ref())
|
||||
.map_err(ReplicationError::InvalidSequenceNumberSet)?;
|
||||
|
||||
trace!(
|
||||
remote_addr,
|
||||
?ingester_id,
|
||||
?namespace_id,
|
||||
?table_id,
|
||||
?partition_id,
|
||||
);
|
||||
|
||||
match self
|
||||
.buffer
|
||||
.apply_persist(
|
||||
ingester_id,
|
||||
namespace_id,
|
||||
table_id,
|
||||
partition_id,
|
||||
sequence_set,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(()) => {}
|
||||
Err(e) => {
|
||||
error!(error=%e, "failed to apply persist to buffer");
|
||||
return Err(ReplicationError::Buffer(e))?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Response::new(proto::PersistCompleteResponse {}))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {}
|
|
@ -1,169 +0,0 @@
|
|||
//! IOx Ingest Replica implementation
|
||||
//!
|
||||
//! The Ingest Replica serves as an in memory queryable buffer of data from one or more ingesters
|
||||
//! that are persisting data. It provides horizontal scalability of query workloads on the data in
|
||||
//! ingesters that has yet to be persisted to Parquet files. It also ensures that the write path
|
||||
//! and the query path have failure isolation so that an outage in one won't create an outage in
|
||||
//! the other.
|
||||
|
||||
#![allow(dead_code)] // Until ingest_replica is used.
|
||||
#![deny(rustdoc::broken_intra_doc_links, rust_2018_idioms)]
|
||||
#![warn(
|
||||
clippy::clone_on_ref_ptr,
|
||||
clippy::dbg_macro,
|
||||
clippy::explicit_iter_loop,
|
||||
clippy::future_not_send,
|
||||
clippy::todo,
|
||||
clippy::use_self,
|
||||
missing_copy_implementations,
|
||||
missing_debug_implementations,
|
||||
missing_docs
|
||||
)]
|
||||
|
||||
mod buffer;
|
||||
mod cache;
|
||||
mod grpc;
|
||||
mod query;
|
||||
mod query_adaptor;
|
||||
|
||||
use crate::cache::CacheError;
|
||||
use crate::{buffer::Buffer, cache::SchemaCache, grpc::GrpcDelegate};
|
||||
use arrow_flight::flight_service_server::{FlightService, FlightServiceServer};
|
||||
use async_trait::async_trait;
|
||||
use data_types::sequence_number_set::SequenceNumberSet;
|
||||
use data_types::{NamespaceId, PartitionId, SequenceNumber, TableId, TRANSITION_SHARD_INDEX};
|
||||
use generated_types::influxdata::iox::ingester::v1::replication_service_server::{
|
||||
ReplicationService, ReplicationServiceServer,
|
||||
};
|
||||
use hashbrown::HashMap;
|
||||
use iox_catalog::interface::Catalog;
|
||||
use iox_query::exec::Executor;
|
||||
use mutable_batch::MutableBatch;
|
||||
use std::sync::Arc;
|
||||
use thiserror::Error;
|
||||
use uuid::Uuid;
|
||||
|
||||
/// An error returned by the `ReplicationBuffer`.
|
||||
#[derive(Debug, Error)]
|
||||
pub enum BufferError {
|
||||
/// An error from the mutable batch sent to a buffer.
|
||||
#[error("mutable batch error: {0}")]
|
||||
MutableBatch(#[from] mutable_batch::Error),
|
||||
}
|
||||
|
||||
/// Acquire opaque handles to the IngestReplica RPC service implementations.
|
||||
///
|
||||
/// This trait serves as the public crate API boundary - callers external to the
|
||||
/// IngestReplica crate utilise this abstraction to acquire type erased handles to
|
||||
/// the RPC service implementations, hiding internal IngestReplica implementation
|
||||
/// details & types.
|
||||
///
|
||||
/// Callers can mock out this trait or decorate the returned implementation in
|
||||
/// order to simulate or modify the behaviour of an ingest_replica in their own tests.
|
||||
pub trait IngestReplicaRpcInterface: Send + Sync + std::fmt::Debug {
|
||||
/// The type of the [`ReplicationService`] implementation.
|
||||
type ReplicationHandler: ReplicationService;
|
||||
/// The type of the [`FlightService`] implementation.
|
||||
type FlightHandler: FlightService;
|
||||
|
||||
/// Acquire an opaque handle to the IngestReplica's [`ReplicationService`] RPC
|
||||
/// handler implementation.
|
||||
fn replication_service(&self) -> ReplicationServiceServer<Self::ReplicationHandler>;
|
||||
|
||||
/// Acquire an opaque handle to the Ingester's Arrow Flight
|
||||
/// [`FlightService`] RPC handler implementation, allowing at most
|
||||
/// `max_simultaneous_requests` queries to be running at any one time.
|
||||
fn query_service(
|
||||
&self,
|
||||
max_simultaneous_requests: usize,
|
||||
) -> FlightServiceServer<Self::FlightHandler>;
|
||||
}
|
||||
|
||||
/// Alias for the `TableId` to `MutableBatch` hashmap of data received in write and partition
|
||||
/// buffer requests.
|
||||
pub(crate) type TableIdToMutableBatch = HashMap<i64, MutableBatch>;
|
||||
|
||||
/// ReplicationBuffer can receive data from the replication protocol to get buffers of partition
|
||||
/// data, individual write requests, and persistence notification to evict data from the buffer.
|
||||
#[async_trait]
|
||||
pub(crate) trait ReplicationBuffer: Send + Sync {
|
||||
/// Apply an individual write request to the buffer. Can write many rows into many partitions.
|
||||
async fn apply_write(
|
||||
&self,
|
||||
namespace_id: NamespaceId,
|
||||
table_batches: TableIdToMutableBatch,
|
||||
ingester_id: Uuid,
|
||||
sequence_number: SequenceNumber,
|
||||
) -> Result<(), BufferError>;
|
||||
|
||||
/// Apply a persist operation to the buffer, which should clear out the data from the given
|
||||
/// partition.
|
||||
async fn apply_persist(
|
||||
&self,
|
||||
ingester_id: Uuid,
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
partition_id: PartitionId,
|
||||
sequence_set: SequenceNumberSet,
|
||||
) -> Result<(), BufferError>;
|
||||
|
||||
/// Append an entire partition buffer to the buffer. It should be able to evict this entire
|
||||
/// buffer in one operation when it later receives a persist operation that has a SequenceSet
|
||||
/// that is a superset of the one sent here.
|
||||
async fn append_partition_buffer(
|
||||
&self,
|
||||
ingester_id: Uuid,
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
partition_id: PartitionId,
|
||||
sequence_set: SequenceNumberSet,
|
||||
table_batches: TableIdToMutableBatch,
|
||||
) -> Result<(), BufferError>;
|
||||
}
|
||||
|
||||
/// Errors that occur during initialisation of an `ingest_replica` instance.
|
||||
#[derive(Debug, Error)]
|
||||
pub enum InitError {
|
||||
/// An error occurred trying to warm the schema cache
|
||||
#[error("failed to pre-warm schema cache: {0}")]
|
||||
WarmCache(#[from] CacheError),
|
||||
}
|
||||
|
||||
/// Initialise a new `ingest_replica` instance, returning the gRPC service handler
|
||||
/// implementations to be bound by the caller.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn new(
|
||||
catalog: Arc<dyn Catalog>,
|
||||
_ingesters: Vec<String>,
|
||||
exec: Arc<Executor>,
|
||||
metrics: Arc<metric::Registry>,
|
||||
) -> Result<impl IngestReplicaRpcInterface, InitError> {
|
||||
// Create the transition shard.
|
||||
let mut txn = catalog
|
||||
.start_transaction()
|
||||
.await
|
||||
.expect("start transaction");
|
||||
let topic = txn
|
||||
.topics()
|
||||
.create_or_get("iox-shared")
|
||||
.await
|
||||
.expect("get topic");
|
||||
let transition_shard = txn
|
||||
.shards()
|
||||
.create_or_get(&topic, TRANSITION_SHARD_INDEX)
|
||||
.await
|
||||
.expect("create transition shard");
|
||||
txn.commit().await.expect("commit transition shard");
|
||||
|
||||
let schema_cache = Arc::new(SchemaCache::new(Arc::clone(&catalog), transition_shard.id));
|
||||
schema_cache.warm().await?;
|
||||
|
||||
let buffer = Arc::new(Buffer::new(schema_cache, exec));
|
||||
|
||||
// TODO: connect to the remote ingesters and subscribe to their data, receiving the
|
||||
// PartitionBufferResponses into the buffer. Note that the ReplicationService in this
|
||||
// GrpcDelegate must be running before the requests are sent as the ingester will
|
||||
// immediately start sending replciate requests.
|
||||
|
||||
Ok(GrpcDelegate::new(Arc::clone(&buffer), metrics))
|
||||
}
|
|
@ -1,156 +0,0 @@
|
|||
use async_trait::async_trait;
|
||||
use data_types::{NamespaceId, TableId};
|
||||
use iox_time::{SystemProvider, TimeProvider};
|
||||
use metric::{DurationHistogram, Metric};
|
||||
use trace::span::Span;
|
||||
|
||||
use super::QueryExec;
|
||||
use crate::query::QueryError;
|
||||
|
||||
/// An instrumentation decorator over a [`QueryExec`] implementation.
|
||||
///
|
||||
/// This wrapper captures the latency distribution of the decorated
|
||||
/// [`QueryExec::query_exec()`] call, faceted by success/error result.
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct QueryExecInstrumentation<T, P = SystemProvider> {
|
||||
inner: T,
|
||||
time_provider: P,
|
||||
|
||||
/// Query execution duration distribution for successes.
|
||||
query_duration_success: DurationHistogram,
|
||||
|
||||
/// Query execution duration distribution for "not found" errors
|
||||
query_duration_error_not_found: DurationHistogram,
|
||||
}
|
||||
|
||||
impl<T> QueryExecInstrumentation<T> {
|
||||
pub(crate) fn new(inner: T, metrics: &metric::Registry) -> Self {
|
||||
// Record query duration metrics, broken down by query execution result
|
||||
let query_duration: Metric<DurationHistogram> = metrics.register_metric(
|
||||
"ingester_flight_query_duration",
|
||||
"flight request query execution duration",
|
||||
);
|
||||
let query_duration_success = query_duration.recorder(&[("result", "success")]);
|
||||
let query_duration_error_not_found =
|
||||
query_duration.recorder(&[("result", "error"), ("reason", "not_found")]);
|
||||
|
||||
Self {
|
||||
inner,
|
||||
time_provider: Default::default(),
|
||||
query_duration_success,
|
||||
query_duration_error_not_found,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T, P> QueryExec for QueryExecInstrumentation<T, P>
|
||||
where
|
||||
T: QueryExec,
|
||||
P: TimeProvider,
|
||||
{
|
||||
type Response = T::Response;
|
||||
|
||||
#[inline(always)]
|
||||
async fn query_exec(
|
||||
&self,
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
columns: Vec<String>,
|
||||
span: Option<Span>,
|
||||
) -> Result<Self::Response, QueryError> {
|
||||
let t = self.time_provider.now();
|
||||
|
||||
let res = self
|
||||
.inner
|
||||
.query_exec(namespace_id, table_id, columns, span)
|
||||
.await;
|
||||
|
||||
if let Some(delta) = self.time_provider.now().checked_duration_since(t) {
|
||||
match &res {
|
||||
Ok(_) => self.query_duration_success.record(delta),
|
||||
Err(QueryError::TableNotFound { .. } | QueryError::NamespaceNotFound { .. }) => {
|
||||
self.query_duration_error_not_found.record(delta)
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
res
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use assert_matches::assert_matches;
|
||||
use metric::Attributes;
|
||||
|
||||
use super::*;
|
||||
use crate::query::{
|
||||
mock_query_exec::MockQueryExec,
|
||||
response::{PartitionStream, QueryResponse},
|
||||
};
|
||||
|
||||
macro_rules! test_metric {
|
||||
(
|
||||
$name:ident,
|
||||
inner = $inner:expr,
|
||||
want_metric_attr = $want_metric_attr:expr,
|
||||
want_ret = $($want_ret:tt)+
|
||||
) => {
|
||||
paste::paste! {
|
||||
#[tokio::test]
|
||||
async fn [<test_metric_ $name>]() {
|
||||
let metrics = metric::Registry::default();
|
||||
let decorator = QueryExecInstrumentation::new($inner, &metrics);
|
||||
|
||||
// Call the decorator and assert the return value
|
||||
let got = decorator
|
||||
.query_exec(NamespaceId::new(42), TableId::new(24), vec![], None)
|
||||
.await;
|
||||
assert_matches!(got, $($want_ret)+);
|
||||
|
||||
// Validate the histogram with the specified attributes saw
|
||||
// an observation
|
||||
let histogram = metrics
|
||||
.get_instrument::<Metric<DurationHistogram>>("ingester_flight_query_duration")
|
||||
.expect("failed to find metric")
|
||||
.get_observer(&Attributes::from(&$want_metric_attr))
|
||||
.expect("failed to find attributes")
|
||||
.fetch();
|
||||
assert_eq!(histogram.sample_count(), 1);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test_metric!(
|
||||
ok,
|
||||
inner = {
|
||||
let stream: PartitionStream = PartitionStream::new(futures::stream::iter([]));
|
||||
MockQueryExec::default().with_result(Ok(QueryResponse::new(stream)))
|
||||
},
|
||||
want_metric_attr = [("result", "success")],
|
||||
want_ret = Ok(_)
|
||||
);
|
||||
|
||||
test_metric!(
|
||||
namespace_not_found,
|
||||
inner = MockQueryExec::default()
|
||||
.with_result(Err(QueryError::NamespaceNotFound(NamespaceId::new(42)))),
|
||||
want_metric_attr = [("result", "error"), ("reason", "not_found")],
|
||||
want_ret = Err(QueryError::NamespaceNotFound(ns)) => {
|
||||
assert_eq!(ns, NamespaceId::new(42));
|
||||
}
|
||||
);
|
||||
|
||||
test_metric!(
|
||||
table_not_found,
|
||||
inner = MockQueryExec::default()
|
||||
.with_result(Err(QueryError::TableNotFound(NamespaceId::new(42), TableId::new(24)))),
|
||||
want_metric_attr = [("result", "error"), ("reason", "not_found")],
|
||||
want_ret = Err(QueryError::TableNotFound(ns, t)) => {
|
||||
assert_eq!(ns, NamespaceId::new(42));
|
||||
assert_eq!(t, TableId::new(24));
|
||||
}
|
||||
);
|
||||
}
|
|
@ -1,36 +0,0 @@
|
|||
use async_trait::async_trait;
|
||||
use data_types::{NamespaceId, TableId};
|
||||
use parking_lot::Mutex;
|
||||
use trace::span::Span;
|
||||
|
||||
use super::{response::QueryResponse, QueryError, QueryExec};
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub(crate) struct MockQueryExec {
|
||||
response: Mutex<Option<Result<QueryResponse, QueryError>>>,
|
||||
}
|
||||
|
||||
impl MockQueryExec {
|
||||
pub(crate) fn with_result(self, r: Result<QueryResponse, QueryError>) -> Self {
|
||||
*self.response.lock() = Some(r);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl QueryExec for MockQueryExec {
|
||||
type Response = QueryResponse;
|
||||
|
||||
async fn query_exec(
|
||||
&self,
|
||||
_namespace_id: NamespaceId,
|
||||
_table_id: TableId,
|
||||
_columns: Vec<String>,
|
||||
_span: Option<Span>,
|
||||
) -> Result<Self::Response, QueryError> {
|
||||
self.response
|
||||
.lock()
|
||||
.take()
|
||||
.unwrap_or(Err(QueryError::NamespaceNotFound(NamespaceId::new(42))))
|
||||
}
|
||||
}
|
|
@ -1,14 +0,0 @@
|
|||
//! Query execution abstraction & types.
|
||||
|
||||
mod r#trait;
|
||||
pub(crate) use r#trait::*;
|
||||
|
||||
// Response types
|
||||
pub(crate) mod partition_response;
|
||||
pub(crate) mod response;
|
||||
|
||||
pub(crate) mod instrumentation;
|
||||
pub(crate) mod tracing;
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod mock_query_exec;
|
|
@ -1,63 +0,0 @@
|
|||
//! The per-partition data nested in a query [`QueryResponse`].
|
||||
//!
|
||||
//! [`QueryResponse`]: super::response::QueryResponse
|
||||
|
||||
use data_types::PartitionId;
|
||||
use datafusion::physical_plan::SendableRecordBatchStream;
|
||||
|
||||
/// Response data for a single partition.
|
||||
pub(crate) struct PartitionResponse {
|
||||
/// Stream of snapshots.
|
||||
batches: Option<SendableRecordBatchStream>,
|
||||
|
||||
/// Partition ID.
|
||||
id: PartitionId,
|
||||
|
||||
/// Count of persisted Parquet files for this partition by this ingester instance.
|
||||
completed_persistence_count: u64,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for PartitionResponse {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("PartitionResponse")
|
||||
.field(
|
||||
"batches",
|
||||
&match self.batches {
|
||||
Some(_) => "<SNAPSHOT STREAM>",
|
||||
None => "<NO DATA>,",
|
||||
},
|
||||
)
|
||||
.field("partition_id", &self.id)
|
||||
.field(
|
||||
"completed_persistence_count",
|
||||
&self.completed_persistence_count,
|
||||
)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl PartitionResponse {
|
||||
pub(crate) fn new(
|
||||
data: Option<SendableRecordBatchStream>,
|
||||
id: PartitionId,
|
||||
completed_persistence_count: u64,
|
||||
) -> Self {
|
||||
Self {
|
||||
batches: data,
|
||||
id,
|
||||
completed_persistence_count,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn id(&self) -> PartitionId {
|
||||
self.id
|
||||
}
|
||||
|
||||
pub(crate) fn completed_persistence_count(&self) -> u64 {
|
||||
self.completed_persistence_count
|
||||
}
|
||||
|
||||
pub(crate) fn into_record_batch_stream(self) -> Option<SendableRecordBatchStream> {
|
||||
self.batches
|
||||
}
|
||||
}
|
|
@ -1,60 +0,0 @@
|
|||
//! The response type returned from a query [`QueryExec::query_exec()`] call.
|
||||
//!
|
||||
//! [`QueryExec::query_exec()`]: super::QueryExec::query_exec()
|
||||
|
||||
use std::{future, pin::Pin};
|
||||
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use datafusion::common::DataFusionError;
|
||||
use futures::{Stream, StreamExt};
|
||||
|
||||
use super::partition_response::PartitionResponse;
|
||||
|
||||
/// Stream of partitions in this response.
|
||||
pub(crate) struct PartitionStream(Pin<Box<dyn Stream<Item = PartitionResponse> + Send>>);
|
||||
|
||||
impl std::fmt::Debug for PartitionStream {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_tuple("PartitionStream").finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl PartitionStream {
|
||||
pub(crate) fn new<T>(s: T) -> Self
|
||||
where
|
||||
T: Stream<Item = PartitionResponse> + Send + 'static,
|
||||
{
|
||||
Self(s.boxed())
|
||||
}
|
||||
}
|
||||
|
||||
/// A response stream wrapper for ingester query requests.
|
||||
///
|
||||
/// The data structure is constructed to allow lazy/streaming/pull-based data
|
||||
/// sourcing.
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct QueryResponse {
|
||||
/// Stream of partitions.
|
||||
partitions: PartitionStream,
|
||||
}
|
||||
|
||||
impl QueryResponse {
|
||||
/// Make a response
|
||||
pub(crate) fn new(partitions: PartitionStream) -> Self {
|
||||
Self { partitions }
|
||||
}
|
||||
|
||||
/// Return the stream of [`PartitionResponse`].
|
||||
pub(crate) fn into_partition_stream(self) -> impl Stream<Item = PartitionResponse> {
|
||||
self.partitions.0
|
||||
}
|
||||
|
||||
/// Reduce the [`QueryResponse`] to a stream of [`RecordBatch`].
|
||||
pub(crate) fn into_record_batches(
|
||||
self,
|
||||
) -> impl Stream<Item = Result<RecordBatch, DataFusionError>> {
|
||||
self.into_partition_stream()
|
||||
.filter_map(|partition| future::ready(partition.into_record_batch_stream()))
|
||||
.flatten()
|
||||
}
|
||||
}
|
|
@ -1,148 +0,0 @@
|
|||
use std::borrow::Cow;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use data_types::{NamespaceId, TableId};
|
||||
use trace::span::{Span, SpanRecorder};
|
||||
|
||||
use super::QueryExec;
|
||||
use crate::query::QueryError;
|
||||
|
||||
/// An tracing decorator over a [`QueryExec`] implementation.
|
||||
///
|
||||
/// This wrapper emits child tracing spans covering the execution of the inner
|
||||
/// [`QueryExec::query_exec()`] call.
|
||||
///
|
||||
/// Constructing this decorator is cheap.
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct QueryExecTracing<T> {
|
||||
inner: T,
|
||||
name: Cow<'static, str>,
|
||||
}
|
||||
|
||||
impl<T> QueryExecTracing<T> {
|
||||
pub(crate) fn new(inner: T, name: impl Into<Cow<'static, str>>) -> Self {
|
||||
Self {
|
||||
inner,
|
||||
name: name.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T> QueryExec for QueryExecTracing<T>
|
||||
where
|
||||
T: QueryExec,
|
||||
{
|
||||
type Response = T::Response;
|
||||
|
||||
#[inline(always)]
|
||||
async fn query_exec(
|
||||
&self,
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
columns: Vec<String>,
|
||||
span: Option<Span>,
|
||||
) -> Result<Self::Response, QueryError> {
|
||||
let span = span.map(|s| s.child(self.name.clone()));
|
||||
let mut recorder = SpanRecorder::new(span.clone());
|
||||
|
||||
match self
|
||||
.inner
|
||||
.query_exec(namespace_id, table_id, columns, span)
|
||||
.await
|
||||
{
|
||||
Ok(v) => {
|
||||
recorder.ok("query_exec complete");
|
||||
Ok(v)
|
||||
}
|
||||
Err(e) => {
|
||||
recorder.error(e.to_string());
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use assert_matches::assert_matches;
|
||||
use trace::{ctx::SpanContext, span::SpanStatus, RingBufferTraceCollector, TraceCollector};
|
||||
|
||||
use crate::query::{
|
||||
mock_query_exec::MockQueryExec,
|
||||
response::{PartitionStream, QueryResponse},
|
||||
};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[track_caller]
|
||||
fn assert_trace(name: impl Into<String>, status: SpanStatus, traces: &dyn TraceCollector) {
|
||||
let traces = traces
|
||||
.as_any()
|
||||
.downcast_ref::<RingBufferTraceCollector>()
|
||||
.expect("unexpected collector impl");
|
||||
|
||||
let name = name.into();
|
||||
let span = traces
|
||||
.spans()
|
||||
.into_iter()
|
||||
.find(|s| s.name == name)
|
||||
.unwrap_or_else(|| panic!("tracing span {name} not found"));
|
||||
|
||||
assert_eq!(
|
||||
span.status, status,
|
||||
"span status does not match expected value"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_ok() {
|
||||
let stream: PartitionStream = PartitionStream::new(futures::stream::iter([]));
|
||||
let mock = MockQueryExec::default().with_result(Ok(QueryResponse::new(stream)));
|
||||
|
||||
let traces: Arc<dyn TraceCollector> = Arc::new(RingBufferTraceCollector::new(5));
|
||||
let span = SpanContext::new(Arc::clone(&traces));
|
||||
|
||||
// Drive the trace wrapper
|
||||
let _ = QueryExecTracing::new(mock, "bananas")
|
||||
.query_exec(
|
||||
NamespaceId::new(42),
|
||||
TableId::new(24),
|
||||
vec![],
|
||||
Some(span.child("root span")),
|
||||
)
|
||||
.await
|
||||
.expect("wrapper should not modify result");
|
||||
|
||||
// Assert the trace showed up.
|
||||
assert_trace("bananas", SpanStatus::Ok, &*traces);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_err() {
|
||||
let mock = MockQueryExec::default()
|
||||
.with_result(Err(QueryError::NamespaceNotFound(NamespaceId::new(42))));
|
||||
|
||||
let traces: Arc<dyn TraceCollector> = Arc::new(RingBufferTraceCollector::new(5));
|
||||
let span = SpanContext::new(Arc::clone(&traces));
|
||||
|
||||
// Drive the trace wrapper
|
||||
let got = QueryExecTracing::new(mock, "bananas")
|
||||
.query_exec(
|
||||
NamespaceId::new(42),
|
||||
TableId::new(24),
|
||||
vec![],
|
||||
Some(span.child("root span")),
|
||||
)
|
||||
.await
|
||||
.expect_err("wrapper should not modify result");
|
||||
assert_matches!(got, QueryError::NamespaceNotFound(ns) => {
|
||||
assert_eq!(ns, NamespaceId::new(42));
|
||||
});
|
||||
|
||||
// Assert the trace showed up.
|
||||
assert_trace("bananas", SpanStatus::Err, &*traces);
|
||||
}
|
||||
}
|
|
@ -1,49 +0,0 @@
|
|||
use std::{fmt::Debug, ops::Deref, sync::Arc};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use data_types::{NamespaceId, TableId};
|
||||
use thiserror::Error;
|
||||
use trace::span::Span;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
#[allow(missing_copy_implementations)]
|
||||
pub(crate) enum QueryError {
|
||||
#[error("namespace id {0} not found")]
|
||||
NamespaceNotFound(NamespaceId),
|
||||
|
||||
#[error("table id {1} not found in namespace id {0}")]
|
||||
TableNotFound(NamespaceId, TableId),
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub(crate) trait QueryExec: Send + Sync + Debug {
|
||||
type Response: Send + Debug;
|
||||
|
||||
async fn query_exec(
|
||||
&self,
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
columns: Vec<String>,
|
||||
span: Option<Span>,
|
||||
) -> Result<Self::Response, QueryError>;
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T> QueryExec for Arc<T>
|
||||
where
|
||||
T: QueryExec,
|
||||
{
|
||||
type Response = T::Response;
|
||||
|
||||
async fn query_exec(
|
||||
&self,
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
columns: Vec<String>,
|
||||
span: Option<Span>,
|
||||
) -> Result<Self::Response, QueryError> {
|
||||
self.deref()
|
||||
.query_exec(namespace_id, table_id, columns, span)
|
||||
.await
|
||||
}
|
||||
}
|
|
@ -1,208 +0,0 @@
|
|||
//! An adaptor over a set of [`RecordBatch`] allowing them to be used as an IOx
|
||||
//! [`QueryChunk`].
|
||||
|
||||
use std::{any::Any, sync::Arc};
|
||||
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use arrow_util::util::ensure_schema;
|
||||
use data_types::{ChunkId, ChunkOrder, DeletePredicate, PartitionId, TableSummary};
|
||||
use datafusion::error::DataFusionError;
|
||||
use iox_query::{
|
||||
exec::{stringset::StringSet, IOxSessionContext},
|
||||
util::{compute_timenanosecond_min_max, create_basic_summary},
|
||||
QueryChunk, QueryChunkData, QueryChunkMeta,
|
||||
};
|
||||
use once_cell::sync::OnceCell;
|
||||
use predicate::Predicate;
|
||||
use schema::{merge::merge_record_batch_schemas, sort::SortKey, Projection, Schema};
|
||||
|
||||
/// A queryable wrapper over a set of ordered [`RecordBatch`]
|
||||
///
|
||||
/// It is an invariant that a [`QueryAdaptor`] MUST always contain at least one
|
||||
/// row. This frees the caller of having to reason about empty [`QueryAdaptor`]
|
||||
/// instances yielding empty [`RecordBatch`].
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct QueryAdaptor {
|
||||
/// The snapshot data from a partition.
|
||||
///
|
||||
/// This MUST be non-pub(crate) / closed for modification / immutable to support
|
||||
/// interning the merged schema in [`Self::schema()`].
|
||||
data: Vec<Arc<RecordBatch>>,
|
||||
|
||||
/// The catalog ID of the partition the this data is part of.
|
||||
partition_id: PartitionId,
|
||||
|
||||
/// Chunk ID.
|
||||
id: ChunkId,
|
||||
|
||||
/// An interned schema for all [`RecordBatch`] in data.
|
||||
schema: OnceCell<Arc<Schema>>,
|
||||
|
||||
/// An interned table summary.
|
||||
summary: OnceCell<Arc<TableSummary>>,
|
||||
}
|
||||
|
||||
impl QueryAdaptor {
|
||||
/// Construct a [`QueryAdaptor`].
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// This constructor panics if `data` contains no [`RecordBatch`], or all
|
||||
/// [`RecordBatch`] are empty.
|
||||
pub(crate) fn new(partition_id: PartitionId, data: Vec<Arc<RecordBatch>>) -> Self {
|
||||
// There must always be at least one record batch and one row.
|
||||
//
|
||||
// This upholds an invariant that simplifies dealing with empty
|
||||
// partitions - if there is a QueryAdaptor, it contains data.
|
||||
assert!(data.iter().map(|b| b.num_rows()).sum::<usize>() > 0);
|
||||
|
||||
Self {
|
||||
data,
|
||||
partition_id,
|
||||
// To return a value for debugging and make it consistent with ChunkId created in Compactor,
|
||||
// use Uuid for this. Draw this UUID during chunk generation so that it is stable during the whole query process.
|
||||
id: ChunkId::new(),
|
||||
schema: OnceCell::default(),
|
||||
summary: OnceCell::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn project_selection(&self, selection: Projection<'_>) -> Vec<RecordBatch> {
|
||||
// Project the column selection across all RecordBatch
|
||||
self.data
|
||||
.iter()
|
||||
.map(|data| {
|
||||
let batch = data.as_ref();
|
||||
let schema = batch.schema();
|
||||
|
||||
// Apply selection to in-memory batch
|
||||
match selection {
|
||||
Projection::All => batch.clone(),
|
||||
Projection::Some(columns) => {
|
||||
let projection = columns
|
||||
.iter()
|
||||
.flat_map(|&column_name| {
|
||||
// ignore non-existing columns
|
||||
schema.index_of(column_name).ok()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
batch.project(&projection).expect("bug in projection")
|
||||
}
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Returns the [`RecordBatch`] instances in this [`QueryAdaptor`].
|
||||
pub(crate) fn record_batches(&self) -> &[Arc<RecordBatch>] {
|
||||
self.data.as_ref()
|
||||
}
|
||||
|
||||
/// Returns the partition ID from which the data this [`QueryAdaptor`] was
|
||||
/// sourced from.
|
||||
pub(crate) fn partition_id(&self) -> PartitionId {
|
||||
self.partition_id
|
||||
}
|
||||
}
|
||||
|
||||
impl QueryChunkMeta for QueryAdaptor {
|
||||
fn summary(&self) -> Arc<TableSummary> {
|
||||
Arc::clone(self.summary.get_or_init(|| {
|
||||
let ts_min_max = compute_timenanosecond_min_max(self.data.iter().map(|b| b.as_ref()))
|
||||
.expect("Should have time range");
|
||||
|
||||
Arc::new(create_basic_summary(
|
||||
self.data.iter().map(|b| b.num_rows()).sum::<usize>() as u64,
|
||||
self.schema(),
|
||||
ts_min_max,
|
||||
))
|
||||
}))
|
||||
}
|
||||
|
||||
fn schema(&self) -> &Schema {
|
||||
self.schema
|
||||
.get_or_init(|| merge_record_batch_schemas(&self.data).into())
|
||||
.as_ref()
|
||||
}
|
||||
|
||||
fn partition_sort_key(&self) -> Option<&SortKey> {
|
||||
None // Ingester data has not persisted yet and should not be attached to any partition
|
||||
}
|
||||
|
||||
fn partition_id(&self) -> PartitionId {
|
||||
self.partition_id
|
||||
}
|
||||
|
||||
fn sort_key(&self) -> Option<&SortKey> {
|
||||
None // Ingester data is not sorted
|
||||
}
|
||||
|
||||
fn delete_predicates(&self) -> &[Arc<DeletePredicate>] {
|
||||
&[]
|
||||
}
|
||||
}
|
||||
|
||||
impl QueryChunk for QueryAdaptor {
|
||||
fn id(&self) -> ChunkId {
|
||||
self.id
|
||||
}
|
||||
|
||||
/// Returns true if the chunk may contain a duplicate "primary key" within
|
||||
/// itself
|
||||
fn may_contain_pk_duplicates(&self) -> bool {
|
||||
// always true because the rows across record batches have not been
|
||||
// de-duplicated.
|
||||
true
|
||||
}
|
||||
|
||||
/// Returns a set of Strings with column names from the specified
|
||||
/// table that have at least one row that matches `predicate`, if
|
||||
/// the predicate can be evaluated entirely on the metadata of
|
||||
/// this Chunk. Returns `None` otherwise
|
||||
fn column_names(
|
||||
&self,
|
||||
_ctx: IOxSessionContext,
|
||||
_predicate: &Predicate,
|
||||
_columns: Projection<'_>,
|
||||
) -> Result<Option<StringSet>, DataFusionError> {
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
/// Return a set of Strings containing the distinct values in the
|
||||
/// specified columns. If the predicate can be evaluated entirely
|
||||
/// on the metadata of this Chunk. Returns `None` otherwise
|
||||
///
|
||||
/// The requested columns must all have String type.
|
||||
fn column_values(
|
||||
&self,
|
||||
_ctx: IOxSessionContext,
|
||||
_column_name: &str,
|
||||
_predicate: &Predicate,
|
||||
) -> Result<Option<StringSet>, DataFusionError> {
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn data(&self) -> QueryChunkData {
|
||||
let schema = self.schema().as_arrow();
|
||||
|
||||
QueryChunkData::RecordBatches(
|
||||
self.data
|
||||
.iter()
|
||||
.map(|b| ensure_schema(&schema, b).expect("schema handling broken"))
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns chunk type
|
||||
fn chunk_type(&self) -> &str {
|
||||
"QueryAdaptor"
|
||||
}
|
||||
|
||||
fn order(&self) -> ChunkOrder {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
|
@ -35,7 +35,7 @@ parking_lot = "0.12.1"
|
|||
parquet_file = { version = "0.1.0", path = "../parquet_file" }
|
||||
pin-project = "1.0.12"
|
||||
predicate = { version = "0.1.0", path = "../predicate" }
|
||||
prost = { version = "0.11.6", default-features = false, features = ["std"] }
|
||||
prost = { version = "0.11.9", default-features = false, features = ["std"] }
|
||||
rand = "0.8.5"
|
||||
schema = { version = "0.1.0", path = "../schema" }
|
||||
service_grpc_catalog = { version = "0.1.0", path = "../service_grpc_catalog" }
|
||||
|
@ -44,7 +44,7 @@ test_helpers = { path = "../test_helpers", features = ["future_timeout"], option
|
|||
thiserror = "1.0.40"
|
||||
tokio = { version = "1.27", features = ["macros", "parking_lot", "rt-multi-thread", "sync", "time"] }
|
||||
tokio-util = "0.7.7"
|
||||
tonic = "0.8.3"
|
||||
tonic = { workspace = true }
|
||||
trace = { version = "0.1.0", path = "../trace" }
|
||||
uuid = "1.3.1"
|
||||
wal = { version = "0.1.0", path = "../wal" }
|
||||
|
|
|
@ -167,7 +167,7 @@ where
|
|||
table_id: TableId,
|
||||
table_name: Arc<DeferredLoad<TableName>>,
|
||||
transition_shard_id: ShardId,
|
||||
) -> PartitionData {
|
||||
) -> Arc<Mutex<PartitionData>> {
|
||||
// Use the cached PartitionKey instead of the caller's partition_key,
|
||||
// instead preferring to reuse the already-shared Arc<str> in the cache.
|
||||
|
||||
|
@ -188,7 +188,7 @@ where
|
|||
// Use the returned partition key instead of the callers - this
|
||||
// allows the backing str memory to be reused across all partitions
|
||||
// using the same key!
|
||||
return PartitionData::new(
|
||||
return Arc::new(Mutex::new(PartitionData::new(
|
||||
partition_id,
|
||||
key,
|
||||
namespace_id,
|
||||
|
@ -197,7 +197,7 @@ where
|
|||
table_name,
|
||||
SortKeyState::Deferred(Arc::new(sort_key_resolver)),
|
||||
transition_shard_id,
|
||||
);
|
||||
)));
|
||||
}
|
||||
|
||||
debug!(%table_id, %partition_key, "partition cache miss");
|
||||
|
@ -218,6 +218,9 @@ where
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
// Harmless in tests - saves a bunch of extra vars.
|
||||
#![allow(clippy::await_holding_lock)]
|
||||
|
||||
use data_types::ShardId;
|
||||
use iox_catalog::mem::MemCatalog;
|
||||
|
||||
|
@ -282,10 +285,10 @@ mod tests {
|
|||
)
|
||||
.await;
|
||||
|
||||
assert_eq!(got.partition_id(), PARTITION_ID);
|
||||
assert_eq!(got.table_id(), TABLE_ID);
|
||||
assert_eq!(&**got.table_name().get().await, TABLE_NAME);
|
||||
assert_eq!(&**got.namespace_name().get().await, NAMESPACE_NAME);
|
||||
assert_eq!(got.lock().partition_id(), PARTITION_ID);
|
||||
assert_eq!(got.lock().table_id(), TABLE_ID);
|
||||
assert_eq!(&**got.lock().table_name().get().await, TABLE_NAME);
|
||||
assert_eq!(&**got.lock().namespace_name().get().await, NAMESPACE_NAME);
|
||||
assert!(cache.inner.is_empty());
|
||||
}
|
||||
|
||||
|
@ -322,11 +325,14 @@ mod tests {
|
|||
)
|
||||
.await;
|
||||
|
||||
assert_eq!(got.partition_id(), PARTITION_ID);
|
||||
assert_eq!(got.table_id(), TABLE_ID);
|
||||
assert_eq!(&**got.table_name().get().await, TABLE_NAME);
|
||||
assert_eq!(&**got.namespace_name().get().await, NAMESPACE_NAME);
|
||||
assert_eq!(*got.partition_key(), PartitionKey::from(PARTITION_KEY));
|
||||
assert_eq!(got.lock().partition_id(), PARTITION_ID);
|
||||
assert_eq!(got.lock().table_id(), TABLE_ID);
|
||||
assert_eq!(&**got.lock().table_name().get().await, TABLE_NAME);
|
||||
assert_eq!(&**got.lock().namespace_name().get().await, NAMESPACE_NAME);
|
||||
assert_eq!(
|
||||
*got.lock().partition_key(),
|
||||
PartitionKey::from(PARTITION_KEY)
|
||||
);
|
||||
|
||||
// The cache should have been cleaned up as it was consumed.
|
||||
assert!(cache.entries.lock().is_empty());
|
||||
|
@ -334,10 +340,10 @@ mod tests {
|
|||
// Assert the partition key from the cache was used for the lifetime of
|
||||
// the partition, so that it is shared with the cache + other partitions
|
||||
// that share the same partition key across all tables.
|
||||
assert!(got.partition_key().ptr_eq(&stored_partition_key));
|
||||
assert!(got.lock().partition_key().ptr_eq(&stored_partition_key));
|
||||
// It does not use the short-lived caller's partition key (derived from
|
||||
// the DML op it is processing).
|
||||
assert!(!got.partition_key().ptr_eq(&callers_partition_key));
|
||||
assert!(!got.lock().partition_key().ptr_eq(&callers_partition_key));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
@ -385,9 +391,9 @@ mod tests {
|
|||
)
|
||||
.await;
|
||||
|
||||
assert_eq!(got.partition_id(), other_key_id);
|
||||
assert_eq!(got.table_id(), TABLE_ID);
|
||||
assert_eq!(&**got.table_name().get().await, TABLE_NAME);
|
||||
assert_eq!(got.lock().partition_id(), other_key_id);
|
||||
assert_eq!(got.lock().table_id(), TABLE_ID);
|
||||
assert_eq!(&**got.lock().table_name().get().await, TABLE_NAME);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
@ -434,8 +440,8 @@ mod tests {
|
|||
)
|
||||
.await;
|
||||
|
||||
assert_eq!(got.partition_id(), PARTITION_ID);
|
||||
assert_eq!(got.table_id(), other_table);
|
||||
assert_eq!(&**got.table_name().get().await, TABLE_NAME);
|
||||
assert_eq!(got.lock().partition_id(), PARTITION_ID);
|
||||
assert_eq!(got.lock().table_id(), other_table);
|
||||
assert_eq!(&**got.lock().table_name().get().await, TABLE_NAME);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@ use backoff::{Backoff, BackoffConfig};
|
|||
use data_types::{NamespaceId, Partition, PartitionKey, ShardId, TableId};
|
||||
use iox_catalog::interface::Catalog;
|
||||
use observability_deps::tracing::debug;
|
||||
use parking_lot::Mutex;
|
||||
|
||||
use super::r#trait::PartitionProvider;
|
||||
use crate::{
|
||||
|
@ -63,7 +64,7 @@ impl PartitionProvider for CatalogPartitionResolver {
|
|||
table_id: TableId,
|
||||
table_name: Arc<DeferredLoad<TableName>>,
|
||||
transition_shard_id: ShardId,
|
||||
) -> PartitionData {
|
||||
) -> Arc<Mutex<PartitionData>> {
|
||||
debug!(
|
||||
%partition_key,
|
||||
%table_id,
|
||||
|
@ -78,7 +79,7 @@ impl PartitionProvider for CatalogPartitionResolver {
|
|||
.await
|
||||
.expect("retry forever");
|
||||
|
||||
PartitionData::new(
|
||||
Arc::new(Mutex::new(PartitionData::new(
|
||||
p.id,
|
||||
// Use the caller's partition key instance, as it MAY be shared with
|
||||
// other instance, but the instance returned from the catalog
|
||||
|
@ -90,12 +91,15 @@ impl PartitionProvider for CatalogPartitionResolver {
|
|||
table_name,
|
||||
SortKeyState::Provided(p.sort_key()),
|
||||
transition_shard_id,
|
||||
)
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
// Harmless in tests - saves a bunch of extra vars.
|
||||
#![allow(clippy::await_holding_lock)]
|
||||
|
||||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
use assert_matches::assert_matches;
|
||||
|
@ -157,18 +161,18 @@ mod tests {
|
|||
.await;
|
||||
|
||||
// Ensure the table name is available.
|
||||
let _ = got.table_name().get().await;
|
||||
let _ = got.lock().table_name().get().await;
|
||||
|
||||
assert_eq!(got.namespace_id(), namespace_id);
|
||||
assert_eq!(got.table_name().to_string(), table_name.to_string());
|
||||
assert_matches!(got.sort_key(), SortKeyState::Provided(None));
|
||||
assert!(got.partition_key.ptr_eq(&callers_partition_key));
|
||||
assert_eq!(got.lock().namespace_id(), namespace_id);
|
||||
assert_eq!(got.lock().table_name().to_string(), table_name.to_string());
|
||||
assert_matches!(got.lock().sort_key(), SortKeyState::Provided(None));
|
||||
assert!(got.lock().partition_key.ptr_eq(&callers_partition_key));
|
||||
|
||||
let got = catalog
|
||||
.repositories()
|
||||
.await
|
||||
.partitions()
|
||||
.get_by_id(got.partition_id)
|
||||
.get_by_id(got.lock().partition_id)
|
||||
.await
|
||||
.unwrap()
|
||||
.expect("partition not created");
|
||||
|
|
|
@ -0,0 +1,423 @@
|
|||
use std::{
|
||||
pin::Pin,
|
||||
sync::{
|
||||
atomic::{AtomicBool, Ordering},
|
||||
Arc,
|
||||
},
|
||||
};
|
||||
|
||||
use arrow::compute::kernels::partition;
|
||||
use async_trait::async_trait;
|
||||
use data_types::{NamespaceId, PartitionKey, ShardId, TableId};
|
||||
use futures::{future::Shared, FutureExt};
|
||||
use hashbrown::{hash_map::Entry, HashMap};
|
||||
use parking_lot::Mutex;
|
||||
|
||||
use crate::{
|
||||
buffer_tree::{namespace::NamespaceName, partition::PartitionData, table::TableName},
|
||||
deferred_load::DeferredLoad,
|
||||
};
|
||||
|
||||
use super::PartitionProvider;
|
||||
|
||||
/// A helper alias for a boxed, dynamically dispatched future that resolves to a
|
||||
/// arc/mutex wrapped [`PartitionData`].
|
||||
type BoxedResolveFuture =
|
||||
Pin<Box<dyn std::future::Future<Output = Arc<Mutex<PartitionData>>> + Send>>;
|
||||
|
||||
/// A compound key of `(namespace, table, partition_key)` which uniquely
|
||||
/// identifies a single partition.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
struct Key {
|
||||
namespace_id: NamespaceId,
|
||||
table_id: TableId,
|
||||
partition_key: PartitionKey,
|
||||
}
|
||||
|
||||
/// The state of the resolver.
|
||||
///
|
||||
/// The [`Shared`] requires more space than the simple ref-pointer to the
|
||||
/// [`PartitionData`], so resolving callers replace the shared handle with the
|
||||
/// resolved result where possible.
|
||||
#[derive(Debug)]
|
||||
enum State {
|
||||
/// A resolve task is ongoing, and the caller can await the [`Shared`]
|
||||
/// future to obtain the result.
|
||||
///
|
||||
/// If the atomic bool is false, no thread is changing this [`State`] to
|
||||
/// [`State::Resolved`] for the resolved partition. If true, a thread is in
|
||||
/// the process of setting (or already has set) the state to
|
||||
/// [`State::Resolved`].
|
||||
Resolving(Shared<BoxedResolveFuture>, Arc<AtomicBool>),
|
||||
|
||||
/// A prior call resolved this partition.
|
||||
Resolved(Arc<Mutex<PartitionData>>),
|
||||
}
|
||||
|
||||
/// A coalescing [`PartitionProvider`] reducing N partition fetch requests into
|
||||
/// a single call to `T` on a per-partition basis.
|
||||
///
|
||||
/// This type solves a concurrency problem, where a series of concurrent cache
|
||||
/// misses "above" this type causes a series of concurrent lookups against the
|
||||
/// inner resolver "below" this type for a single partition. This is wasteful,
|
||||
/// as only one result is retained by the callers (a single [`PartitionData`] is
|
||||
/// used to reference a partition of data).
|
||||
///
|
||||
/// This type is typically used to coalesce requests against the
|
||||
/// [`CatalogPartitionResolver`]:
|
||||
///
|
||||
/// ```text
|
||||
/// ┌─────────────────────────────┐
|
||||
/// │ Cache │
|
||||
/// └─────────────────────────────┘
|
||||
/// │ │ │
|
||||
/// ▼ ▼ ▼
|
||||
/// ┌─────────────────────────────┐
|
||||
/// │ CoalescePartitionResolver │
|
||||
/// └─────────────────────────────┘
|
||||
/// │
|
||||
/// ▼
|
||||
/// ┌─────────────────────────────┐
|
||||
/// │ CatalogPartitionResolver │
|
||||
/// └─────────────────────────────┘
|
||||
/// ```
|
||||
///
|
||||
/// Imagine the following concurrent requests without this type:
|
||||
///
|
||||
/// * T1: check cache for partition A, miss
|
||||
/// * T2: check cache for partition A, miss
|
||||
/// * T1: inner.get_partition(A)
|
||||
/// * T2: inner.get_partition(A)
|
||||
/// * T1: cache put partition A
|
||||
/// * T2: cache put partition A
|
||||
///
|
||||
/// With this type, the concurrent requests for a single partition (A) are
|
||||
/// coalesced into a single request against the inner resolver:
|
||||
///
|
||||
/// * T1: check cache for partition A, miss
|
||||
/// * T2: check cache for partition A, miss
|
||||
/// * T1: CoalescePartitionResolver::get_partition(A)
|
||||
/// * T2: CoalescePartitionResolver::get_partition(A)
|
||||
/// * inner.get_partition() **(a single call to inner is made)**
|
||||
/// * T1: cache put partition A
|
||||
/// * T2: cache put partition A
|
||||
///
|
||||
/// # Memory Overhead
|
||||
///
|
||||
/// This type makes a best effort attempt to minimise the memory overhead of
|
||||
/// memorising partition fetches. Callers drop the intermediate resolving state
|
||||
/// upon success, leaving only a ref-counted pointer to the shared
|
||||
/// [`PartitionData`] (a single [`Arc`] ref overhead).
|
||||
///
|
||||
/// # Cancellation Safety
|
||||
///
|
||||
/// This type is cancellation safe - calls to
|
||||
/// [`CoalescePartitionResolver::get_partition()`] are safe to abort at any
|
||||
/// point.
|
||||
///
|
||||
/// [`CatalogPartitionResolver`]: super::CatalogPartitionResolver
|
||||
#[derive(Debug)]
|
||||
pub struct CoalescePartitionResolver<T> {
|
||||
/// The inner resolver the actual partition fetch is delegated to.
|
||||
inner: Arc<T>,
|
||||
|
||||
/// A map of handles to ongoing resolve futures.
|
||||
ongoing: Mutex<HashMap<Key, State>>,
|
||||
}
|
||||
|
||||
impl<T> CoalescePartitionResolver<T> {
|
||||
pub fn new(inner: Arc<T>) -> Self {
|
||||
Self {
|
||||
inner,
|
||||
ongoing: Mutex::new(HashMap::default()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T> PartitionProvider for CoalescePartitionResolver<T>
|
||||
where
|
||||
T: PartitionProvider + 'static,
|
||||
{
|
||||
async fn get_partition(
|
||||
&self,
|
||||
partition_key: PartitionKey,
|
||||
namespace_id: NamespaceId,
|
||||
namespace_name: Arc<DeferredLoad<NamespaceName>>,
|
||||
table_id: TableId,
|
||||
table_name: Arc<DeferredLoad<TableName>>,
|
||||
transition_shard_id: ShardId,
|
||||
) -> Arc<Mutex<PartitionData>> {
|
||||
let key = Key {
|
||||
namespace_id,
|
||||
table_id,
|
||||
partition_key: partition_key.clone(), // Ref-counted anyway!
|
||||
};
|
||||
|
||||
// Check if there's an ongoing (or recently completed) resolve.
|
||||
let (shared, done) = match self.ongoing.lock().entry(key.clone()) {
|
||||
Entry::Occupied(v) => match v.get() {
|
||||
State::Resolving(fut, done) => (fut.clone(), Arc::clone(done)),
|
||||
State::Resolved(v) => return Arc::clone(v),
|
||||
},
|
||||
Entry::Vacant(v) => {
|
||||
// Spawn a future to resolve the partition, and retain a handle
|
||||
// to it.
|
||||
let inner = Arc::clone(&self.inner);
|
||||
let fut: BoxedResolveFuture = Box::pin(async move {
|
||||
inner
|
||||
.get_partition(
|
||||
partition_key,
|
||||
namespace_id,
|
||||
namespace_name,
|
||||
table_id,
|
||||
table_name,
|
||||
transition_shard_id,
|
||||
)
|
||||
.await
|
||||
});
|
||||
|
||||
// Make the future poll-able by many callers, all of which
|
||||
// resolve to the same output PartitionData instance.
|
||||
let fut = fut.shared();
|
||||
let done = Arc::new(AtomicBool::new(false));
|
||||
|
||||
// Allow future callers to obtain this shared handle, instead of
|
||||
// resolving the partition themselves.
|
||||
v.insert(State::Resolving(fut.clone(), Arc::clone(&done)));
|
||||
|
||||
(fut, done)
|
||||
}
|
||||
};
|
||||
|
||||
// Wait for the resolve to complete.
|
||||
//
|
||||
// If this caller future is dropped before this resolve future
|
||||
// completes, then it remains unpolled until the next caller obtains a
|
||||
// shared handle and continues the process.
|
||||
let res = shared.await;
|
||||
|
||||
// As an optimisation, select exactly one thread to acquire the lock and
|
||||
// change the state instead of every caller trying to set the state to
|
||||
// "resolved", which involves contending on the lock for all concurrent
|
||||
// callers for all concurrent partition fetches.
|
||||
//
|
||||
// Any caller that has been awaiting the shared future above is a
|
||||
// candidate to perform this state change, but only one thread will
|
||||
// attempt to. If the presence of aborted callers waiting on the shared
|
||||
// future, each completed await caller will attempt to change state
|
||||
// (cancellation safe).
|
||||
if done
|
||||
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
|
||||
.is_ok()
|
||||
{
|
||||
// This task should drop the Shared, swapping it for the resolved
|
||||
// state.
|
||||
//
|
||||
// This thread SHOULD NOT fail to perform this action as no other
|
||||
// thread will attempt it now the bool has been toggled.
|
||||
let old = self
|
||||
.ongoing
|
||||
.lock()
|
||||
.insert(key, State::Resolved(Arc::clone(&res)));
|
||||
|
||||
// Invariant: the resolve future must exist in the map, and the
|
||||
// state may only be changed by the thread that won the CAS.
|
||||
assert!(matches!(old, Some(State::Resolving(..))));
|
||||
}
|
||||
|
||||
res
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::{
|
||||
future,
|
||||
sync::Arc,
|
||||
task::{Context, Poll},
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use assert_matches::assert_matches;
|
||||
use data_types::{PartitionId, TRANSITION_SHARD_ID};
|
||||
use futures::{stream::FuturesUnordered, StreamExt};
|
||||
use test_helpers::timeout::FutureTimeout;
|
||||
|
||||
use crate::buffer_tree::partition::{resolver::mock::MockPartitionProvider, SortKeyState};
|
||||
|
||||
use super::*;
|
||||
|
||||
const PARTITION_KEY: &str = "bananas";
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_coalesce() {
|
||||
const MAX_TASKS: usize = 50;
|
||||
|
||||
let namespace_id = NamespaceId::new(1234);
|
||||
let namespace_name = Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
|
||||
NamespaceName::from("ns-platanos")
|
||||
}));
|
||||
let table_id = TableId::new(24);
|
||||
let table_name = Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
|
||||
TableName::from("platanos")
|
||||
}));
|
||||
let partition = PartitionId::new(4242);
|
||||
let data = PartitionData::new(
|
||||
partition,
|
||||
PartitionKey::from(PARTITION_KEY),
|
||||
namespace_id,
|
||||
Arc::clone(&namespace_name),
|
||||
table_id,
|
||||
Arc::clone(&table_name),
|
||||
SortKeyState::Provided(None),
|
||||
TRANSITION_SHARD_ID,
|
||||
);
|
||||
|
||||
// Add a single instance of the partition - if more than one call is
|
||||
// made, this will cause a panic.
|
||||
let inner = Arc::new(MockPartitionProvider::default().with_partition(data));
|
||||
let layer = Arc::new(CoalescePartitionResolver::new(Arc::clone(&inner)));
|
||||
|
||||
let results = (0..MAX_TASKS)
|
||||
.map(|_| {
|
||||
let namespace_name = Arc::clone(&namespace_name);
|
||||
let table_name = Arc::clone(&table_name);
|
||||
layer.get_partition(
|
||||
PartitionKey::from(PARTITION_KEY),
|
||||
namespace_id,
|
||||
namespace_name,
|
||||
table_id,
|
||||
table_name,
|
||||
TRANSITION_SHARD_ID,
|
||||
)
|
||||
})
|
||||
.collect::<FuturesUnordered<_>>()
|
||||
.collect::<Vec<_>>()
|
||||
.await;
|
||||
|
||||
// All the resulting instances of PartitionData MUST be the same
|
||||
// ref-counted instance.
|
||||
results.as_slice().windows(2).for_each(|v| {
|
||||
assert!(Arc::ptr_eq(&v[0], &v[1]));
|
||||
});
|
||||
|
||||
// The state should have been set to "resolved" to reclaim memory
|
||||
assert_matches!(
|
||||
layer.ongoing.lock().values().next(),
|
||||
Some(State::Resolved(..))
|
||||
);
|
||||
}
|
||||
|
||||
// A resolver that blocks forever when resolving PARTITION_KEY but instantly
|
||||
// finishes all others.
|
||||
#[derive(Debug)]
|
||||
struct BlockingResolver {
|
||||
p: Arc<Mutex<PartitionData>>,
|
||||
}
|
||||
|
||||
impl PartitionProvider for BlockingResolver {
|
||||
fn get_partition<'life0, 'async_trait>(
|
||||
&'life0 self,
|
||||
partition_key: PartitionKey,
|
||||
_namespace_id: NamespaceId,
|
||||
_namespace_name: Arc<DeferredLoad<NamespaceName>>,
|
||||
_table_id: TableId,
|
||||
_table_name: Arc<DeferredLoad<TableName>>,
|
||||
_transition_shard_id: ShardId,
|
||||
) -> core::pin::Pin<
|
||||
Box<
|
||||
dyn core::future::Future<Output = Arc<Mutex<PartitionData>>>
|
||||
+ core::marker::Send
|
||||
+ 'async_trait,
|
||||
>,
|
||||
>
|
||||
where
|
||||
'life0: 'async_trait,
|
||||
Self: 'async_trait,
|
||||
{
|
||||
if partition_key == PartitionKey::from(PARTITION_KEY) {
|
||||
return future::pending().boxed();
|
||||
}
|
||||
future::ready(Arc::clone(&self.p)).boxed()
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_disjoint_parallelised() {
|
||||
use futures::Future;
|
||||
|
||||
let namespace_id = NamespaceId::new(1234);
|
||||
let namespace_name = Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
|
||||
NamespaceName::from("ns-platanos")
|
||||
}));
|
||||
let table_id = TableId::new(24);
|
||||
let table_name = Arc::new(DeferredLoad::new(Duration::from_secs(1), async {
|
||||
TableName::from("platanos")
|
||||
}));
|
||||
let partition = PartitionId::new(4242);
|
||||
let data = PartitionData::new(
|
||||
partition,
|
||||
PartitionKey::from(PARTITION_KEY),
|
||||
namespace_id,
|
||||
Arc::clone(&namespace_name),
|
||||
table_id,
|
||||
Arc::clone(&table_name),
|
||||
SortKeyState::Provided(None),
|
||||
TRANSITION_SHARD_ID,
|
||||
);
|
||||
|
||||
// Add a single instance of the partition - if more than one call is
|
||||
// made to the mock, it will panic.
|
||||
let inner = Arc::new(BlockingResolver {
|
||||
p: Arc::new(Mutex::new(data)),
|
||||
});
|
||||
let layer = Arc::new(CoalescePartitionResolver::new(inner));
|
||||
|
||||
// The following two partitions are for the same (blocked) partition and
|
||||
// neither resolve.
|
||||
let pa_1 = layer.get_partition(
|
||||
PartitionKey::from(PARTITION_KEY),
|
||||
namespace_id,
|
||||
Arc::clone(&namespace_name),
|
||||
table_id,
|
||||
Arc::clone(&table_name),
|
||||
TRANSITION_SHARD_ID,
|
||||
);
|
||||
let pa_2 = layer.get_partition(
|
||||
PartitionKey::from(PARTITION_KEY),
|
||||
namespace_id,
|
||||
Arc::clone(&namespace_name),
|
||||
table_id,
|
||||
Arc::clone(&table_name),
|
||||
TRANSITION_SHARD_ID,
|
||||
);
|
||||
|
||||
let waker = futures::task::noop_waker();
|
||||
let mut cx = Context::from_waker(&waker);
|
||||
|
||||
futures::pin_mut!(pa_1);
|
||||
futures::pin_mut!(pa_2);
|
||||
|
||||
// Neither make progress
|
||||
assert_matches!(Pin::new(&mut pa_1).poll(&mut cx), Poll::Pending);
|
||||
assert_matches!(Pin::new(&mut pa_2).poll(&mut cx), Poll::Pending);
|
||||
|
||||
// But a non-blocked partition is resolved without issue.
|
||||
let _ = layer
|
||||
.get_partition(
|
||||
PartitionKey::from("platanos"),
|
||||
namespace_id,
|
||||
namespace_name,
|
||||
table_id,
|
||||
table_name,
|
||||
TRANSITION_SHARD_ID,
|
||||
)
|
||||
.with_timeout_panic(Duration::from_secs(5))
|
||||
.await;
|
||||
|
||||
// While the original requests are still blocked.
|
||||
assert_matches!(Pin::new(&mut pa_1).poll(&mut cx), Poll::Pending);
|
||||
assert_matches!(Pin::new(&mut pa_2).poll(&mut cx), Poll::Pending);
|
||||
}
|
||||
}
|
|
@ -55,7 +55,7 @@ impl PartitionProvider for MockPartitionProvider {
|
|||
table_id: TableId,
|
||||
table_name: Arc<DeferredLoad<TableName>>,
|
||||
_transition_shard_id: ShardId,
|
||||
) -> PartitionData {
|
||||
) -> Arc<Mutex<PartitionData>> {
|
||||
let p = self
|
||||
.partitions
|
||||
.lock()
|
||||
|
@ -67,6 +67,6 @@ impl PartitionProvider for MockPartitionProvider {
|
|||
assert_eq!(p.namespace_id(), namespace_id);
|
||||
assert_eq!(p.namespace_name().to_string(), namespace_name.to_string());
|
||||
assert_eq!(p.table_name().to_string(), table_name.to_string());
|
||||
p
|
||||
Arc::new(Mutex::new(p))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,5 +16,8 @@ pub(crate) use catalog::*;
|
|||
mod sort_key;
|
||||
pub(crate) use sort_key::*;
|
||||
|
||||
mod coalesce;
|
||||
pub(crate) use coalesce::*;
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod mock;
|
||||
|
|
|
@ -2,6 +2,7 @@ use std::{fmt::Debug, sync::Arc};
|
|||
|
||||
use async_trait::async_trait;
|
||||
use data_types::{NamespaceId, PartitionKey, ShardId, TableId};
|
||||
use parking_lot::Mutex;
|
||||
|
||||
use crate::{
|
||||
buffer_tree::{namespace::NamespaceName, partition::PartitionData, table::TableName},
|
||||
|
@ -25,7 +26,7 @@ pub(crate) trait PartitionProvider: Send + Sync + Debug {
|
|||
table_id: TableId,
|
||||
table_name: Arc<DeferredLoad<TableName>>,
|
||||
transition_shard_id: ShardId,
|
||||
) -> PartitionData;
|
||||
) -> Arc<Mutex<PartitionData>>;
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
|
@ -41,7 +42,7 @@ where
|
|||
table_id: TableId,
|
||||
table_name: Arc<DeferredLoad<TableName>>,
|
||||
transition_shard_id: ShardId,
|
||||
) -> PartitionData {
|
||||
) -> Arc<Mutex<PartitionData>> {
|
||||
(**self)
|
||||
.get_partition(
|
||||
partition_key,
|
||||
|
@ -101,9 +102,12 @@ mod tests {
|
|||
TRANSITION_SHARD_ID,
|
||||
)
|
||||
.await;
|
||||
assert_eq!(got.partition_id(), partition);
|
||||
assert_eq!(got.namespace_id(), namespace_id);
|
||||
assert_eq!(got.namespace_name().to_string(), namespace_name.to_string());
|
||||
assert_eq!(got.table_name().to_string(), table_name.to_string());
|
||||
assert_eq!(got.lock().partition_id(), partition);
|
||||
assert_eq!(got.lock().namespace_id(), namespace_id);
|
||||
assert_eq!(
|
||||
got.lock().namespace_name().to_string(),
|
||||
namespace_name.to_string()
|
||||
);
|
||||
assert_eq!(got.lock().table_name().to_string(), table_name.to_string());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -183,8 +183,7 @@ where
|
|||
//
|
||||
// This MAY return a different instance than `p` if another
|
||||
// thread has already initialised the partition.
|
||||
self.partition_data
|
||||
.get_or_insert_with(&partition_key, || Arc::new(Mutex::new(p)))
|
||||
self.partition_data.get_or_insert_with(&partition_key, || p)
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -223,8 +222,9 @@ where
|
|||
);
|
||||
|
||||
// Gather the partition data from all of the partitions in this table.
|
||||
let span = SpanRecorder::new(span);
|
||||
let partitions = self.partitions().into_iter().map(move |p| {
|
||||
let mut span = SpanRecorder::new(span.clone().map(|s| s.child("partition read")));
|
||||
let mut span = span.child("partition read");
|
||||
|
||||
let (id, completed_persistence_count, data) = {
|
||||
let mut p = p.lock();
|
||||
|
|
|
@ -26,7 +26,9 @@ use wal::Wal;
|
|||
use crate::{
|
||||
buffer_tree::{
|
||||
namespace::name_resolver::{NamespaceNameProvider, NamespaceNameResolver},
|
||||
partition::resolver::{CatalogPartitionResolver, PartitionCache, PartitionProvider},
|
||||
partition::resolver::{
|
||||
CatalogPartitionResolver, CoalescePartitionResolver, PartitionCache, PartitionProvider,
|
||||
},
|
||||
table::name_resolver::{TableNameProvider, TableNameResolver},
|
||||
BufferTree,
|
||||
},
|
||||
|
@ -281,8 +283,10 @@ where
|
|||
.await
|
||||
.map_err(InitError::PreWarmPartitions)?;
|
||||
|
||||
// Build the partition provider, wrapped in the partition cache.
|
||||
// Build the partition provider, wrapped in the partition cache and request
|
||||
// coalescer.
|
||||
let partition_provider = CatalogPartitionResolver::new(Arc::clone(&catalog));
|
||||
let partition_provider = CoalescePartitionResolver::new(Arc::new(partition_provider));
|
||||
let partition_provider = PartitionCache::new(
|
||||
partition_provider,
|
||||
recent_partitions,
|
||||
|
|
|
@ -43,12 +43,11 @@ where
|
|||
columns: Vec<String>,
|
||||
span: Option<Span>,
|
||||
) -> Result<Self::Response, QueryError> {
|
||||
let span = span.map(|s| s.child(self.name.clone()));
|
||||
let mut recorder = SpanRecorder::new(span.clone());
|
||||
let mut recorder = SpanRecorder::new(span).child(self.name.clone());
|
||||
|
||||
match self
|
||||
.inner
|
||||
.query_exec(namespace_id, table_id, columns, span)
|
||||
.query_exec(namespace_id, table_id, columns, recorder.span().cloned())
|
||||
.await
|
||||
{
|
||||
Ok(v) => {
|
||||
|
@ -89,7 +88,7 @@ mod tests {
|
|||
.spans()
|
||||
.into_iter()
|
||||
.find(|s| s.name == name)
|
||||
.unwrap_or_else(|| panic!("tracing span {name} not found"));
|
||||
.unwrap_or_else(|| panic!("tracing span {name} not found in\n{traces:#?}"));
|
||||
|
||||
assert_eq!(
|
||||
span.status, status,
|
||||
|
|
|
@ -146,6 +146,7 @@ where
|
|||
request: Request<Ticket>,
|
||||
) -> Result<Response<Self::DoGetStream>, tonic::Status> {
|
||||
let span_ctx: Option<SpanContext> = request.extensions().get().cloned();
|
||||
let span = span_ctx.child_span("ingester query");
|
||||
|
||||
// Acquire and hold a permit for the duration of this request, or return
|
||||
// an error if the existing requests have already exhausted the
|
||||
|
@ -178,12 +179,7 @@ where
|
|||
|
||||
let response = match self
|
||||
.query_handler
|
||||
.query_exec(
|
||||
namespace_id,
|
||||
table_id,
|
||||
request.columns,
|
||||
span_ctx.child_span("ingester query"),
|
||||
)
|
||||
.query_exec(namespace_id, table_id, request.columns, span)
|
||||
.await
|
||||
{
|
||||
Ok(v) => v,
|
||||
|
|
|
@ -25,11 +25,11 @@ mutable_batch_pb = { version = "0.1.0", path = "../mutable_batch_pb" }
|
|||
object_store = "0.5.6"
|
||||
observability_deps = { version = "0.1.0", path = "../observability_deps" }
|
||||
parquet_file = { version = "0.1.0", path = "../parquet_file" }
|
||||
prost = { version = "0.11.6", default-features = false, features = ["std"] }
|
||||
prost = { version = "0.11.9", default-features = false, features = ["std"] }
|
||||
tempfile = { version = "3.5.0" }
|
||||
test_helpers = { path = "../test_helpers", features = ["future_timeout"] }
|
||||
tokio = { version = "1.27", features = ["macros", "parking_lot", "rt-multi-thread", "sync", "time"] }
|
||||
tokio-util = "0.7.7"
|
||||
tonic = "0.8.3"
|
||||
tonic = { workspace = true }
|
||||
wal = { version = "0.1.0", path = "../wal" }
|
||||
workspace-hack = { version = "0.1", path = "../workspace-hack" }
|
||||
|
|
|
@ -24,7 +24,7 @@ rand = { version = "0.8.3", features = ["small_rng"] }
|
|||
regex = "1.7"
|
||||
schema = { path = "../schema" }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.95"
|
||||
serde_json = "1.0.96"
|
||||
snafu = "0.7"
|
||||
tokio = { version = "1.27", features = ["macros", "parking_lot", "rt-multi-thread", "sync", "time"] }
|
||||
toml = "0.7.3"
|
||||
|
|
|
@ -29,6 +29,7 @@ indexmap = { version = "1.9", features = ["std"] }
|
|||
itertools = "0.10.5"
|
||||
object_store = "0.5.6"
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
once_cell = "1"
|
||||
parking_lot = "0.12"
|
||||
parquet_file = { path = "../parquet_file" }
|
||||
query_functions = { path = "../query_functions"}
|
||||
|
|
|
@ -45,16 +45,19 @@ use super::{params::GapFillParams, FillStrategy};
|
|||
/// │ ╟────╫───┼───┼─────────────╫───┼───┼─────────────╢
|
||||
/// │ 2 ║ ║ │ │ ║ │ │ ║
|
||||
/// │ ╟────╫───┼───┼─────────────╫───┼───┼─────────────╢
|
||||
/// │ 3 ║ ║ │ │ ║ │ │ ║
|
||||
/// │ . . .
|
||||
/// output_batch_size . . .
|
||||
/// │ . . .
|
||||
/// │ ╟────╫───┼───┼─────────────╫───┼───┼─────────────╢
|
||||
/// │ n - 1 ║ ║ │ │ ║ │ │ ║
|
||||
/// │ ╟────╫───┼───┼─────────────╫───┼───┼─────────────╢
|
||||
/// ┴──── n ║ ║ │ │ ║ │ │ ║
|
||||
/// ╟────╫───┼───┼─────────────╫───┼───┼─────────────╢
|
||||
/// trailing row n + 1 ║ ║ │ │ ║ │ │ ║
|
||||
/// ╙────╨───┴───┴─────────────╨───┴───┴─────────────╜
|
||||
/// trailing row(s) n + 1 ║ ║ │ │ ║ │ │ ║
|
||||
/// ╟────╫───┼───┼─────────────╫───┼───┼─────────────╢
|
||||
/// . . .
|
||||
/// . . .
|
||||
/// . . .
|
||||
/// ```
|
||||
///
|
||||
/// Just before generating output, the cursor will generally point at offset 1
|
||||
|
@ -69,13 +72,19 @@ use super::{params::GapFillParams, FillStrategy};
|
|||
/// (using the [`take`](take::take) kernel) when we are generating trailing gaps, i.e.,
|
||||
/// when all of the input rows have been output for a series in the previous batch,
|
||||
/// but there still remains missing rows to produce at the end.
|
||||
/// - Having one additional _trailing row_ at the end ensures that `GapFiller` can
|
||||
/// - Having at least one additional _trailing row_ at the end ensures that `GapFiller` can
|
||||
/// infer whether there is trailing gaps to produce at the beginning of the
|
||||
/// next batch, since it can discover if the last row starts a new series.
|
||||
/// - If there are columns that have a fill strategy of [`LinearInterpolate`], then more
|
||||
/// trailing rows may be necessary to find the next non-null value for the column.
|
||||
///
|
||||
/// [`LinearInterpolate`]: FillStrategy::LinearInterpolate
|
||||
#[derive(Debug)]
|
||||
pub(super) struct GapFiller {
|
||||
/// The static parameters of gap-filling: time range start, end and the stride.
|
||||
params: GapFillParams,
|
||||
/// The number of rows to produce in each output batch.
|
||||
batch_size: usize,
|
||||
/// The current state of gap-filling, including the next timestamp,
|
||||
/// the offset of the next input row, and remaining space in output batch.
|
||||
cursor: Cursor,
|
||||
|
@ -83,9 +92,25 @@ pub(super) struct GapFiller {
|
|||
|
||||
impl GapFiller {
|
||||
/// Initialize a [GapFiller] at the beginning of an input record batch.
|
||||
pub fn new(params: GapFillParams) -> Self {
|
||||
pub fn new(params: GapFillParams, batch_size: usize) -> Self {
|
||||
let cursor = Cursor::new(¶ms);
|
||||
Self { params, cursor }
|
||||
Self {
|
||||
params,
|
||||
batch_size,
|
||||
cursor,
|
||||
}
|
||||
}
|
||||
|
||||
/// Given that the cursor points at the input row that will be
|
||||
/// the first row in the next output batch, return the offset
|
||||
/// of last input row that could possibly be in the output.
|
||||
///
|
||||
/// This offset is used by ['BufferedInput`] to determine how many
|
||||
/// rows need to be buffered.
|
||||
///
|
||||
/// [`BufferedInput`]: super::BufferedInput
|
||||
pub(super) fn last_output_row_offset(&self) -> usize {
|
||||
self.cursor.next_input_offset + self.batch_size - 1
|
||||
}
|
||||
|
||||
/// Returns true if there are no more output rows to produce given
|
||||
|
@ -100,14 +125,13 @@ impl GapFiller {
|
|||
/// schema at member `0`.
|
||||
pub fn build_gapfilled_output(
|
||||
&mut self,
|
||||
batch_size: usize,
|
||||
schema: SchemaRef,
|
||||
input_time_array: (usize, &TimestampNanosecondArray),
|
||||
group_arrays: &[(usize, ArrayRef)],
|
||||
aggr_arrays: &[(usize, ArrayRef)],
|
||||
) -> Result<RecordBatch> {
|
||||
let series_ends = self.plan_output_batch(batch_size, input_time_array.1, group_arrays)?;
|
||||
self.cursor.remaining_output_batch_size = batch_size;
|
||||
let series_ends = self.plan_output_batch(input_time_array.1, group_arrays)?;
|
||||
self.cursor.remaining_output_batch_size = self.batch_size;
|
||||
self.build_output(
|
||||
schema,
|
||||
input_time_array,
|
||||
|
@ -139,7 +163,6 @@ impl GapFiller {
|
|||
/// to partition input rows into series.
|
||||
fn plan_output_batch(
|
||||
&mut self,
|
||||
batch_size: usize,
|
||||
input_time_array: &TimestampNanosecondArray,
|
||||
group_arr: &[(usize, ArrayRef)],
|
||||
) -> Result<Vec<usize>> {
|
||||
|
@ -165,7 +188,7 @@ impl GapFiller {
|
|||
|
||||
let start_offset = cursor.next_input_offset;
|
||||
assert!(start_offset <= 1, "input is sliced after it is consumed");
|
||||
while output_row_count < batch_size {
|
||||
while output_row_count < self.batch_size {
|
||||
match ranges.next() {
|
||||
Some(Range { end, .. }) => {
|
||||
assert!(
|
||||
|
|
|
@ -90,7 +90,6 @@ impl Cursor {
|
|||
.map(|seg| Segment::<T::Native>::try_from(seg.clone()))
|
||||
.transpose()?;
|
||||
let mut builder = InterpolateBuilder {
|
||||
params,
|
||||
values: Vec::with_capacity(self.remaining_output_batch_size),
|
||||
segment,
|
||||
input_time_array,
|
||||
|
@ -173,7 +172,6 @@ impl_from_segment_scalar_value!(f64);
|
|||
/// Implements [`VecBuilder`] for build aggregate columns whose gaps
|
||||
/// are being filled using linear interpolation.
|
||||
pub(super) struct InterpolateBuilder<'a, T: ArrowPrimitiveType> {
|
||||
pub params: &'a GapFillParams,
|
||||
pub values: Vec<Option<T::Native>>,
|
||||
pub segment: Option<Segment<T::Native>>,
|
||||
pub input_time_array: &'a TimestampNanosecondArray,
|
||||
|
@ -193,27 +191,25 @@ where
|
|||
offset,
|
||||
series_end_offset,
|
||||
} => {
|
||||
// If
|
||||
// we are not at the last point
|
||||
// and the distance to the next point is greater than the stride
|
||||
// and both this point and the next are not null
|
||||
// then create a segment that will be used to fill in the missing rows.
|
||||
if offset + 1 < series_end_offset
|
||||
&& self.input_time_array.value(offset + 1) > ts + self.params.stride
|
||||
&& self.input_aggr_array.is_valid(offset)
|
||||
&& self.input_aggr_array.is_valid(offset + 1)
|
||||
{
|
||||
self.segment = Some(Segment {
|
||||
if self.input_aggr_array.is_valid(offset) {
|
||||
let end_offset = self.find_end_offset(offset, series_end_offset);
|
||||
// Find the next non-null value in this column for the series.
|
||||
// If there is one, start a new segment at the current value.
|
||||
self.segment = end_offset.map(|end_offset| Segment {
|
||||
start_point: (ts, self.input_aggr_array.value(offset)),
|
||||
end_point: (
|
||||
self.input_time_array.value(offset + 1),
|
||||
self.input_aggr_array.value(offset + 1),
|
||||
self.input_time_array.value(end_offset),
|
||||
self.input_aggr_array.value(end_offset),
|
||||
),
|
||||
})
|
||||
});
|
||||
self.copy_point(offset);
|
||||
} else {
|
||||
self.segment = None;
|
||||
self.values.push(
|
||||
self.segment
|
||||
.as_ref()
|
||||
.map(|seg| T::Native::interpolate(seg, ts)),
|
||||
);
|
||||
}
|
||||
self.copy_point(offset);
|
||||
}
|
||||
RowStatus::Missing { ts, .. } => self.values.push(
|
||||
self.segment
|
||||
|
@ -243,6 +239,17 @@ where
|
|||
.then_some(self.input_aggr_array.value(offset));
|
||||
self.values.push(v)
|
||||
}
|
||||
|
||||
/// Scan forward to find the endpoint for a segment that starts at `start_offset`.
|
||||
/// Skip over any null values.
|
||||
///
|
||||
/// We are guaranteed to have buffered enough input to find the next non-null point for this series,
|
||||
/// if there is one, by the logic in [`BufferedInput`].
|
||||
///
|
||||
/// [`BufferedInput`]: super::super::buffered_input::BufferedInput
|
||||
fn find_end_offset(&self, start_offset: usize, series_end_offset: usize) -> Option<usize> {
|
||||
((start_offset + 1)..series_end_offset).find(|&i| self.input_aggr_array.is_valid(i))
|
||||
}
|
||||
}
|
||||
|
||||
/// A trait for the native numeric types that can be interpolated
|
||||
|
@ -375,8 +382,8 @@ mod test {
|
|||
- "| 1970-01-01T00:00:00.000001200Z | 133 |"
|
||||
- "| 1970-01-01T00:00:00.000001300Z | 166 |"
|
||||
- "| 1970-01-01T00:00:00.000001400Z | 200 |"
|
||||
- "| 1970-01-01T00:00:00.000001500Z | |"
|
||||
- "| 1970-01-01T00:00:00.000001600Z | |"
|
||||
- "| 1970-01-01T00:00:00.000001500Z | 466 |"
|
||||
- "| 1970-01-01T00:00:00.000001600Z | 733 |"
|
||||
- "| 1970-01-01T00:00:00.000001700Z | 1000 |"
|
||||
- "| 1970-01-01T00:00:00.000001800Z | 500 |"
|
||||
- "| 1970-01-01T00:00:00.000001900Z | 0 |"
|
||||
|
@ -447,8 +454,8 @@ mod test {
|
|||
- "| 1970-01-01T00:00:00.000001200Z | 133 |"
|
||||
- "| 1970-01-01T00:00:00.000001300Z | 166 |"
|
||||
- "| 1970-01-01T00:00:00.000001400Z | 200 |"
|
||||
- "| 1970-01-01T00:00:00.000001500Z | |"
|
||||
- "| 1970-01-01T00:00:00.000001600Z | |"
|
||||
- "| 1970-01-01T00:00:00.000001500Z | 466 |"
|
||||
- "| 1970-01-01T00:00:00.000001600Z | 733 |"
|
||||
- "| 1970-01-01T00:00:00.000001700Z | 1000 |"
|
||||
- "| 1970-01-01T00:00:00.000001800Z | 500 |"
|
||||
- "| 1970-01-01T00:00:00.000001900Z | 0 |"
|
||||
|
@ -519,8 +526,8 @@ mod test {
|
|||
- "| 1970-01-01T00:00:00.000001200Z | 200.0 |"
|
||||
- "| 1970-01-01T00:00:00.000001300Z | 300.0 |"
|
||||
- "| 1970-01-01T00:00:00.000001400Z | 400.0 |"
|
||||
- "| 1970-01-01T00:00:00.000001500Z | |"
|
||||
- "| 1970-01-01T00:00:00.000001600Z | |"
|
||||
- "| 1970-01-01T00:00:00.000001500Z | 600.0 |"
|
||||
- "| 1970-01-01T00:00:00.000001600Z | 800.0 |"
|
||||
- "| 1970-01-01T00:00:00.000001700Z | 1000.0 |"
|
||||
- "| 1970-01-01T00:00:00.000001800Z | 500.0 |"
|
||||
- "| 1970-01-01T00:00:00.000001900Z | 0.0 |"
|
||||
|
|
|
@ -0,0 +1,405 @@
|
|||
//! Logic for buffering record batches for gap filling.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::{
|
||||
array::ArrayRef,
|
||||
record_batch::RecordBatch,
|
||||
row::{RowConverter, Rows, SortField},
|
||||
};
|
||||
use datafusion::error::{DataFusionError, Result};
|
||||
use hashbrown::HashSet;
|
||||
|
||||
use super::{params::GapFillParams, FillStrategy};
|
||||
|
||||
/// Encapsulate the logic around how to buffer input records.
|
||||
///
|
||||
/// If there are no columns with [`FillStrategy::LinearInterpolate`], then
|
||||
/// we need to buffer up to the last input row that might appear in the output, plus
|
||||
/// one additional row.
|
||||
///
|
||||
/// However, if there are columns filled via interpolation, then we need
|
||||
/// to ensure that we read ahead far enough to a non-null value, or a change
|
||||
/// of group columns, in the columns being interpolated.
|
||||
///
|
||||
/// [`FillStrategy::LinearInterpolate`]: super::FillStrategy::LinearInterpolate
|
||||
/// [`GapFillStream`]: super::stream::GapFillStream
|
||||
pub(super) struct BufferedInput {
|
||||
/// Indexes of group columns in the schema (not including time).
|
||||
group_cols: Vec<usize>,
|
||||
/// Indexes of aggregate columns filled via interpolation.
|
||||
interpolate_cols: Vec<usize>,
|
||||
/// Buffered records from the input stream.
|
||||
batches: Vec<RecordBatch>,
|
||||
/// When gap filling with interpolated values, this row converter
|
||||
/// is used to compare rows to see if group columns have changed.
|
||||
row_converter: Option<RowConverter>,
|
||||
/// When gap filling with interpolated values, cache a row-oriented
|
||||
/// representation of the last row that may appear in the output so
|
||||
/// it doesn't need to be computed more than once.
|
||||
last_output_row: Option<Rows>,
|
||||
}
|
||||
|
||||
impl BufferedInput {
|
||||
pub(super) fn new(params: &GapFillParams, group_cols: Vec<usize>) -> Self {
|
||||
let interpolate_cols = params
|
||||
.fill_strategy
|
||||
.iter()
|
||||
.filter_map(|(col_offset, fs)| {
|
||||
(fs == &FillStrategy::LinearInterpolate).then_some(*col_offset)
|
||||
})
|
||||
.collect::<Vec<usize>>();
|
||||
Self {
|
||||
group_cols,
|
||||
interpolate_cols,
|
||||
batches: vec![],
|
||||
row_converter: None,
|
||||
last_output_row: None,
|
||||
}
|
||||
}
|
||||
/// Add a new batch of buffered records from the input stream.
|
||||
pub(super) fn push(&mut self, batch: RecordBatch) {
|
||||
self.batches.push(batch);
|
||||
}
|
||||
|
||||
/// Transfer ownership of the buffered record batches to the caller for
|
||||
/// processing.
|
||||
pub(super) fn take(&mut self) -> Vec<RecordBatch> {
|
||||
self.last_output_row = None;
|
||||
std::mem::take(&mut self.batches)
|
||||
}
|
||||
|
||||
/// Determine if we need more input before we start processing.
|
||||
pub(super) fn need_more(&mut self, last_output_row_offset: usize) -> Result<bool> {
|
||||
let record_count: usize = self.batches.iter().map(|rb| rb.num_rows()).sum();
|
||||
// min number of rows needed is the number of rows up to and including
|
||||
// the last row that may appear in the output, plus one more row.
|
||||
let min_needed = last_output_row_offset + 2;
|
||||
|
||||
if record_count < min_needed {
|
||||
return Ok(true);
|
||||
} else if self.interpolate_cols.is_empty() {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
// Check to see if the last row that might appear in the output
|
||||
// has a different group column values than the last buffered row.
|
||||
// If they are different, then we have enough input to start.
|
||||
let (last_output_batch_offset, last_output_row_offset) = self
|
||||
.find_row_idx(last_output_row_offset)
|
||||
.expect("checked record count");
|
||||
if self.group_columns_changed((last_output_batch_offset, last_output_row_offset))? {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
// Now check if there are non-null values in the columns being interpolated.
|
||||
// We skip over the batches that come before the one that contains the last
|
||||
// possible output row. We start with the last buffered batch, so we can avoid
|
||||
// having to slice unless necessary.
|
||||
let mut cols_that_need_more =
|
||||
HashSet::<usize>::from_iter(self.interpolate_cols.iter().cloned());
|
||||
let mut to_remove = vec![];
|
||||
for (i, batch) in self
|
||||
.batches
|
||||
.iter()
|
||||
.enumerate()
|
||||
.skip(last_output_batch_offset)
|
||||
.rev()
|
||||
{
|
||||
for col_offset in cols_that_need_more.clone() {
|
||||
// If this is the batch containing the last possible output row, slice the
|
||||
// array so we are just looking at that value and the ones after.
|
||||
let array = batch.column(col_offset);
|
||||
let array = if i == last_output_batch_offset {
|
||||
let length = array.len() - last_output_row_offset;
|
||||
batch
|
||||
.column(col_offset)
|
||||
.slice(last_output_row_offset, length)
|
||||
} else {
|
||||
Arc::clone(array)
|
||||
};
|
||||
|
||||
if array.null_count() < array.len() {
|
||||
to_remove.push(col_offset);
|
||||
}
|
||||
}
|
||||
|
||||
to_remove.drain(..).for_each(|c| {
|
||||
cols_that_need_more.remove(&c);
|
||||
});
|
||||
if cols_that_need_more.is_empty() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(!cols_that_need_more.is_empty())
|
||||
}
|
||||
|
||||
/// Check to see if the group column values have changed between the last row
|
||||
/// that may be in the output and the last buffered input row.
|
||||
///
|
||||
/// This method uses the row-oriented representation of Arrow data from [`arrow::row`] to
|
||||
/// compare rows in different record batches.
|
||||
///
|
||||
/// [`arrow::row`]: https://docs.rs/arrow-row/36.0.0/arrow_row/index.html
|
||||
fn group_columns_changed(&mut self, last_output_row_idx: (usize, usize)) -> Result<bool> {
|
||||
if self.group_cols.is_empty() {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
let last_buffered_row_idx = self.last_buffered_row_idx();
|
||||
if last_output_row_idx == last_buffered_row_idx {
|
||||
// the output row is also the last buffered row,
|
||||
// so there is nothing to compare.
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
let last_input_rows = self.convert_row(self.last_buffered_row_idx())?;
|
||||
let last_row_in_output = self.last_output_row(last_output_row_idx)?;
|
||||
|
||||
Ok(last_row_in_output.row(0) != last_input_rows.row(0))
|
||||
}
|
||||
|
||||
/// Get a row converter for comparing records. Keep it in [`Self::row_converter`]
|
||||
/// to avoid creating it multiple times.
|
||||
fn get_row_converter(&mut self) -> Result<&mut RowConverter> {
|
||||
if self.row_converter.is_none() {
|
||||
let batch = self.batches.first().expect("at least one batch");
|
||||
let sort_fields = self
|
||||
.group_cols
|
||||
.iter()
|
||||
.map(|c| SortField::new(batch.column(*c).data_type().clone()))
|
||||
.collect();
|
||||
let row_converter =
|
||||
RowConverter::new(sort_fields).map_err(DataFusionError::ArrowError)?;
|
||||
self.row_converter = Some(row_converter);
|
||||
}
|
||||
Ok(self.row_converter.as_mut().expect("cannot be none"))
|
||||
}
|
||||
|
||||
/// Convert a row to row-oriented format for easy comparison.
|
||||
fn convert_row(&mut self, row_idxs: (usize, usize)) -> Result<Rows> {
|
||||
let batch = &self.batches[row_idxs.0];
|
||||
let columns: Vec<ArrayRef> = self
|
||||
.group_cols
|
||||
.iter()
|
||||
.map(|col_idx| batch.column(*col_idx).slice(row_idxs.1, 1))
|
||||
.collect();
|
||||
self.get_row_converter()?
|
||||
.convert_columns(&columns)
|
||||
.map_err(DataFusionError::ArrowError)
|
||||
}
|
||||
|
||||
/// Returns the row-oriented representation of the last buffered row that may appear in the next
|
||||
/// output batch. Since this row may be used multiple times, cache it in `self` to
|
||||
/// avoid computing it multiple times.
|
||||
fn last_output_row(&mut self, idxs: (usize, usize)) -> Result<&Rows> {
|
||||
if self.last_output_row.is_none() {
|
||||
let rows = self.convert_row(idxs)?;
|
||||
self.last_output_row = Some(rows);
|
||||
}
|
||||
Ok(self.last_output_row.as_ref().expect("cannot be none"))
|
||||
}
|
||||
|
||||
/// Return the `(batch_idx, row_idx)` of the last buffered row.
|
||||
fn last_buffered_row_idx(&self) -> (usize, usize) {
|
||||
let last_batch_len = self.batches.last().unwrap().num_rows();
|
||||
(self.batches.len() - 1, last_batch_len - 1)
|
||||
}
|
||||
|
||||
/// Return the `(batch_idx, row_idx)` of the `nth` row.
|
||||
fn find_row_idx(&self, mut nth: usize) -> Option<(usize, usize)> {
|
||||
let mut idx = None;
|
||||
for (i, batch) in self.batches.iter().enumerate() {
|
||||
if nth >= batch.num_rows() {
|
||||
nth -= batch.num_rows()
|
||||
} else {
|
||||
idx = Some((i, nth));
|
||||
break;
|
||||
}
|
||||
}
|
||||
idx
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::VecDeque;
|
||||
|
||||
use arrow_util::test_util::batches_to_lines;
|
||||
|
||||
use super::*;
|
||||
use crate::exec::gapfill::exec_tests::TestRecords;
|
||||
|
||||
fn test_records(batch_size: usize) -> VecDeque<RecordBatch> {
|
||||
let records = TestRecords {
|
||||
group_cols: vec![
|
||||
std::iter::repeat(Some("a")).take(12).collect(),
|
||||
std::iter::repeat(Some("b"))
|
||||
.take(6)
|
||||
.chain(std::iter::repeat(Some("c")).take(6))
|
||||
.collect(),
|
||||
],
|
||||
time_col: (0..12).map(|i| Some(1000 + i * 5)).take(12).collect(),
|
||||
agg_cols: vec![
|
||||
vec![
|
||||
Some(1),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some(10),
|
||||
],
|
||||
vec![
|
||||
Some(2),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some(20),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
],
|
||||
(0..12).map(Some).collect(),
|
||||
],
|
||||
input_batch_size: batch_size,
|
||||
};
|
||||
|
||||
TryInto::<Vec<RecordBatch>>::try_into(records)
|
||||
.unwrap()
|
||||
.into()
|
||||
}
|
||||
|
||||
fn test_params() -> GapFillParams {
|
||||
GapFillParams {
|
||||
stride: 50_000_000,
|
||||
first_ts: Some(1_000_000_000),
|
||||
last_ts: 1_055_000_000,
|
||||
fill_strategy: [
|
||||
(3, FillStrategy::LinearInterpolate),
|
||||
(4, FillStrategy::LinearInterpolate),
|
||||
]
|
||||
.into(),
|
||||
}
|
||||
}
|
||||
|
||||
// This test is just here so it's clear what the
|
||||
// test data is
|
||||
#[test]
|
||||
fn test_test_records() {
|
||||
let batch = test_records(1000).pop_front().unwrap();
|
||||
let actual = batches_to_lines(&[batch]);
|
||||
insta::assert_yaml_snapshot!(actual, @r###"
|
||||
---
|
||||
- +----+----+--------------------------+----+----+----+
|
||||
- "| g0 | g1 | time | a0 | a1 | a2 |"
|
||||
- +----+----+--------------------------+----+----+----+
|
||||
- "| a | b | 1970-01-01T00:00:01Z | 1 | 2 | 0 |"
|
||||
- "| a | b | 1970-01-01T00:00:01.005Z | | | 1 |"
|
||||
- "| a | b | 1970-01-01T00:00:01.010Z | | | 2 |"
|
||||
- "| a | b | 1970-01-01T00:00:01.015Z | | | 3 |"
|
||||
- "| a | b | 1970-01-01T00:00:01.020Z | | | 4 |"
|
||||
- "| a | b | 1970-01-01T00:00:01.025Z | | | 5 |"
|
||||
- "| a | c | 1970-01-01T00:00:01.030Z | | | 6 |"
|
||||
- "| a | c | 1970-01-01T00:00:01.035Z | | | 7 |"
|
||||
- "| a | c | 1970-01-01T00:00:01.040Z | | 20 | 8 |"
|
||||
- "| a | c | 1970-01-01T00:00:01.045Z | | | 9 |"
|
||||
- "| a | c | 1970-01-01T00:00:01.050Z | | | 10 |"
|
||||
- "| a | c | 1970-01-01T00:00:01.055Z | 10 | | 11 |"
|
||||
- +----+----+--------------------------+----+----+----+
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_group_no_interpolate() {
|
||||
let batch_size = 3;
|
||||
let mut params = test_params();
|
||||
params.fill_strategy = [].into();
|
||||
|
||||
let mut buffered_input = BufferedInput::new(¶ms, vec![]);
|
||||
let mut batches = test_records(batch_size);
|
||||
|
||||
// There are no rows, so that is less than the batch size,
|
||||
// it needs more.
|
||||
assert!(buffered_input.need_more(batch_size - 1).unwrap());
|
||||
|
||||
// There are now 3 rows, still less than batch_size + 1,
|
||||
// so it needs more.
|
||||
buffered_input.push(batches.pop_front().unwrap());
|
||||
assert!(buffered_input.need_more(batch_size - 1).unwrap());
|
||||
|
||||
// We now have batch_size * 2, records, which is enough.
|
||||
buffered_input.push(batches.pop_front().unwrap());
|
||||
assert!(!buffered_input.need_more(batch_size - 1).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_group() {
|
||||
let batch_size = 3;
|
||||
let params = test_params();
|
||||
let mut buffered_input = BufferedInput::new(¶ms, vec![]);
|
||||
let mut batches = test_records(batch_size);
|
||||
|
||||
// There are no rows, so that is less than the batch size,
|
||||
// it needs more.
|
||||
assert!(buffered_input.need_more(batch_size - 1).unwrap());
|
||||
|
||||
// There are now 3 rows, still less than batch_size + 1,
|
||||
// so it needs more.
|
||||
buffered_input.push(batches.pop_front().unwrap());
|
||||
assert!(buffered_input.need_more(batch_size - 1).unwrap());
|
||||
|
||||
// There are now 6 rows, if we were not interpolating,
|
||||
// this would be enough.
|
||||
buffered_input.push(batches.pop_front().unwrap());
|
||||
|
||||
// If we are interpolating, there are no non null values
|
||||
// at offset 5.
|
||||
assert!(buffered_input.need_more(batch_size - 1).unwrap());
|
||||
|
||||
// Push more rows, now totaling 9.
|
||||
buffered_input.push(batches.pop_front().unwrap());
|
||||
assert!(buffered_input.need_more(batch_size - 1).unwrap());
|
||||
// Column `a1` has a non-null value at offset 8.
|
||||
// If that were the only column being interpolated, we would have enough.
|
||||
|
||||
// 12 rows, with non-null values in both columns being interpolated.
|
||||
buffered_input.push(batches.pop_front().unwrap());
|
||||
assert!(!buffered_input.need_more(batch_size - 1).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn with_group() {
|
||||
let params = test_params();
|
||||
let group_cols = vec![0, 1];
|
||||
let mut buffered_input = BufferedInput::new(¶ms, group_cols);
|
||||
|
||||
let batch_size = 3;
|
||||
let mut batches = test_records(batch_size);
|
||||
|
||||
// no rows
|
||||
assert!(buffered_input.need_more(batch_size - 1).unwrap());
|
||||
|
||||
// 3 rows
|
||||
buffered_input.push(batches.pop_front().unwrap());
|
||||
assert!(buffered_input.need_more(batch_size - 1).unwrap());
|
||||
|
||||
// 6 rows
|
||||
buffered_input.push(batches.pop_front().unwrap());
|
||||
assert!(buffered_input.need_more(batch_size - 1).unwrap());
|
||||
|
||||
// 9 rows (series changes here)
|
||||
buffered_input.push(batches.pop_front().unwrap());
|
||||
assert!(!buffered_input.need_more(batch_size - 1).unwrap());
|
||||
}
|
||||
}
|
|
@ -775,6 +775,7 @@ fn test_gapfill_fill_interpolate() {
|
|||
Some("b"),
|
||||
Some("b"),
|
||||
Some("b"),
|
||||
Some("b"),
|
||||
]],
|
||||
time_col: vec![
|
||||
None,
|
||||
|
@ -788,7 +789,7 @@ fn test_gapfill_fill_interpolate() {
|
|||
// --- new series
|
||||
None,
|
||||
Some(975),
|
||||
// 1000
|
||||
Some(1000),
|
||||
Some(1025),
|
||||
// 1050
|
||||
Some(1075),
|
||||
|
@ -807,7 +808,7 @@ fn test_gapfill_fill_interpolate() {
|
|||
// --- new series
|
||||
Some(-10),
|
||||
Some(1100), // 975
|
||||
// 1200 1000
|
||||
None, // 1200 1000 (this null value will be filled)
|
||||
Some(1300), // 1025
|
||||
// 1325 1050
|
||||
Some(1350), // 1075
|
||||
|
@ -979,13 +980,13 @@ fn assert_batch_count(actual_batches: &[RecordBatch], batch_size: usize) {
|
|||
|
||||
type ExprVec = Vec<Arc<dyn PhysicalExpr>>;
|
||||
|
||||
struct TestRecords {
|
||||
group_cols: Vec<Vec<Option<&'static str>>>,
|
||||
pub(super) struct TestRecords {
|
||||
pub group_cols: Vec<Vec<Option<&'static str>>>,
|
||||
// Stored as millisecods since intervals use millis,
|
||||
// to let test cases be consistent and easier to read.
|
||||
time_col: Vec<Option<i64>>,
|
||||
agg_cols: Vec<Vec<Option<i64>>>,
|
||||
input_batch_size: usize,
|
||||
pub time_col: Vec<Option<i64>>,
|
||||
pub agg_cols: Vec<Vec<Option<i64>>>,
|
||||
pub input_batch_size: usize,
|
||||
}
|
||||
|
||||
impl TestRecords {
|
||||
|
@ -1174,14 +1175,16 @@ fn phys_fill_strategies(
|
|||
|
||||
fn get_params_ms_with_fill_strategy(
|
||||
batch: &TestRecords,
|
||||
stride: i64,
|
||||
stride_ms: i64,
|
||||
start: Option<i64>,
|
||||
end: i64,
|
||||
fill_strategy: FillStrategy,
|
||||
) -> GapFillExecParams {
|
||||
// stride is in ms
|
||||
let stride = ScalarValue::new_interval_mdn(0, 0, stride_ms * 1_000_000);
|
||||
|
||||
GapFillExecParams {
|
||||
// interval day time is milliseconds in the low 32-bit word
|
||||
stride: phys_lit(ScalarValue::IntervalDayTime(Some(stride))), // milliseconds
|
||||
stride: phys_lit(stride),
|
||||
time_column: Column::new("t", batch.group_cols.len()),
|
||||
origin: phys_lit(ScalarValue::TimestampNanosecond(Some(0), None)),
|
||||
// timestamps are nanos, so scale them accordingly
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
//! a gap-filling extension to DataFusion
|
||||
|
||||
mod algo;
|
||||
mod buffered_input;
|
||||
#[cfg(test)]
|
||||
mod exec_tests;
|
||||
mod params;
|
||||
|
@ -31,7 +32,6 @@ use datafusion::{
|
|||
},
|
||||
prelude::Expr,
|
||||
};
|
||||
use datafusion_util::sort_exprs::requirements_from_sort_exprs;
|
||||
|
||||
use self::stream::GapFillStream;
|
||||
|
||||
|
@ -475,7 +475,9 @@ impl ExecutionPlan for GapFillExec {
|
|||
}
|
||||
|
||||
fn required_input_ordering(&self) -> Vec<Option<Vec<PhysicalSortRequirement>>> {
|
||||
vec![Some(requirements_from_sort_exprs(&self.sort_expr))]
|
||||
vec![Some(PhysicalSortRequirement::from_sort_exprs(
|
||||
&self.sort_expr,
|
||||
))]
|
||||
}
|
||||
|
||||
fn maintains_input_order(&self) -> Vec<bool> {
|
||||
|
@ -740,11 +742,11 @@ mod test {
|
|||
explain,
|
||||
@r###"
|
||||
---
|
||||
- " ProjectionExec: expr=[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@0 as minute, AVG(temps.temp)@1 as AVG(temps.temp)]"
|
||||
- " GapFillExec: group_expr=[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@0], aggr_expr=[AVG(temps.temp)@1], stride=60000, time_range=Included(\"315532800000000000\")..Excluded(\"347155200000000000\")"
|
||||
- " SortExec: expr=[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@0 ASC]"
|
||||
- " AggregateExec: mode=Final, gby=[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@0 as date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))], aggr=[AVG(temps.temp)]"
|
||||
- " AggregateExec: mode=Partial, gby=[datebin(60000, time@0, 0) as date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))], aggr=[AVG(temps.temp)]"
|
||||
- " ProjectionExec: expr=[date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@0 as minute, AVG(temps.temp)@1 as AVG(temps.temp)]"
|
||||
- " GapFillExec: group_expr=[date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@0], aggr_expr=[AVG(temps.temp)@1], stride=60000000000, time_range=Included(\"315532800000000000\")..Excluded(\"347155200000000000\")"
|
||||
- " SortExec: expr=[date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@0 ASC]"
|
||||
- " AggregateExec: mode=Final, gby=[date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@0 as date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))], aggr=[AVG(temps.temp)]"
|
||||
- " AggregateExec: mode=Partial, gby=[datebin(60000000000, time@0, 0) as date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))], aggr=[AVG(temps.temp)]"
|
||||
- " EmptyExec: produce_one_row=false"
|
||||
"###
|
||||
);
|
||||
|
@ -770,11 +772,11 @@ mod test {
|
|||
explain,
|
||||
@r###"
|
||||
---
|
||||
- " ProjectionExec: expr=[loc@0 as loc, date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@1 as minute, concat(Utf8(\"zz\"),temps.loc)@2 as loczz, AVG(temps.temp)@3 as AVG(temps.temp)]"
|
||||
- " GapFillExec: group_expr=[loc@0, date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@1, concat(Utf8(\"zz\"),temps.loc)@2], aggr_expr=[AVG(temps.temp)@3], stride=60000, time_range=Included(\"315532800000000000\")..Excluded(\"347155200000000000\")"
|
||||
- " SortExec: expr=[loc@0 ASC,concat(Utf8(\"zz\"),temps.loc)@2 ASC,date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@1 ASC]"
|
||||
- " AggregateExec: mode=Final, gby=[loc@0 as loc, date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@1 as date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\")), concat(Utf8(\"zz\"),temps.loc)@2 as concat(Utf8(\"zz\"),temps.loc)], aggr=[AVG(temps.temp)]"
|
||||
- " AggregateExec: mode=Partial, gby=[loc@1 as loc, datebin(60000, time@0, 0) as date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\")), concat(zz, loc@1) as concat(Utf8(\"zz\"),temps.loc)], aggr=[AVG(temps.temp)]"
|
||||
- " ProjectionExec: expr=[loc@0 as loc, date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@1 as minute, concat(Utf8(\"zz\"),temps.loc)@2 as loczz, AVG(temps.temp)@3 as AVG(temps.temp)]"
|
||||
- " GapFillExec: group_expr=[loc@0, date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@1, concat(Utf8(\"zz\"),temps.loc)@2], aggr_expr=[AVG(temps.temp)@3], stride=60000000000, time_range=Included(\"315532800000000000\")..Excluded(\"347155200000000000\")"
|
||||
- " SortExec: expr=[loc@0 ASC,concat(Utf8(\"zz\"),temps.loc)@2 ASC,date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@1 ASC]"
|
||||
- " AggregateExec: mode=Final, gby=[loc@0 as loc, date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\"))@1 as date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\")), concat(Utf8(\"zz\"),temps.loc)@2 as concat(Utf8(\"zz\"),temps.loc)], aggr=[AVG(temps.temp)]"
|
||||
- " AggregateExec: mode=Partial, gby=[loc@1 as loc, datebin(60000000000, time@0, 0) as date_bin_gapfill(IntervalMonthDayNano(\"60000000000\"),temps.time,Utf8(\"1970-01-01T00:00:00Z\")), concat(zz, loc@1) as concat(Utf8(\"zz\"),temps.loc)], aggr=[AVG(temps.temp)]"
|
||||
- " EmptyExec: produce_one_row=false"
|
||||
"###
|
||||
);
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
use std::ops::Bound;
|
||||
|
||||
use arrow::{
|
||||
datatypes::{IntervalDayTimeType, SchemaRef},
|
||||
datatypes::{IntervalMonthDayNanoType, SchemaRef},
|
||||
record_batch::RecordBatch,
|
||||
};
|
||||
use chrono::Duration;
|
||||
|
@ -133,10 +133,17 @@ fn extract_timestamp_nanos(cv: &ColumnarValue) -> Result<i64> {
|
|||
|
||||
fn extract_interval_nanos(cv: &ColumnarValue) -> Result<i64> {
|
||||
match cv {
|
||||
ColumnarValue::Scalar(ScalarValue::IntervalDayTime(Some(v))) => {
|
||||
let (days, ms) = IntervalDayTimeType::to_parts(*v);
|
||||
ColumnarValue::Scalar(ScalarValue::IntervalMonthDayNano(Some(v))) => {
|
||||
let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(*v);
|
||||
|
||||
if months != 0 {
|
||||
return Err(DataFusionError::Execution(
|
||||
"gap filling does not support month intervals".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let nanos =
|
||||
(Duration::days(days as i64) + Duration::milliseconds(ms as i64)).num_nanoseconds();
|
||||
(Duration::days(days as i64) + Duration::nanoseconds(nanos)).num_nanoseconds();
|
||||
nanos.ok_or_else(|| {
|
||||
DataFusionError::Execution("gap filling argument is too large".to_string())
|
||||
})
|
||||
|
@ -261,9 +268,7 @@ mod tests {
|
|||
}
|
||||
|
||||
fn interval(ns: i64) -> Arc<dyn PhysicalExpr> {
|
||||
Arc::new(Literal::new(ScalarValue::IntervalDayTime(Some(
|
||||
ns / 1_000_000,
|
||||
))))
|
||||
Arc::new(Literal::new(ScalarValue::new_interval_mdn(0, 0, ns)))
|
||||
}
|
||||
|
||||
fn timestamp(ns: i64) -> Arc<dyn PhysicalExpr> {
|
||||
|
|
|
@ -22,9 +22,16 @@ use datafusion::{
|
|||
};
|
||||
use futures::{ready, Stream, StreamExt};
|
||||
|
||||
use super::{algo::GapFiller, params::GapFillParams, GapFillExec};
|
||||
use super::{algo::GapFiller, buffered_input::BufferedInput, params::GapFillParams, GapFillExec};
|
||||
|
||||
/// An implementation of a gap-filling operator that uses the [Stream] trait.
|
||||
///
|
||||
/// This type takes responsibility for:
|
||||
/// - Reading input record batches
|
||||
/// - Accounting for memory
|
||||
/// - Extracting arrays for processing by [`GapFiller`]
|
||||
/// - Recording metrics
|
||||
/// - Sending record batches to next operator (by implementing [`Self::poll_next'])
|
||||
#[allow(dead_code)]
|
||||
pub(super) struct GapFillStream {
|
||||
/// The schema of the input and output.
|
||||
|
@ -38,12 +45,10 @@ pub(super) struct GapFillStream {
|
|||
group_expr: Vec<Arc<dyn PhysicalExpr>>,
|
||||
/// The aggregate columns from the select list of the original query.
|
||||
aggr_expr: Vec<Arc<dyn PhysicalExpr>>,
|
||||
/// The number of rows to produce in each output batch.
|
||||
batch_size: usize,
|
||||
/// The producer of the input record batches.
|
||||
input: SendableRecordBatchStream,
|
||||
/// Input that has been read from the iput stream.
|
||||
buffered_input_batches: Vec<RecordBatch>,
|
||||
buffered_input: BufferedInput,
|
||||
/// The thing that does the gap filling.
|
||||
gap_filler: GapFiller,
|
||||
/// This is true as long as there are more input record batches to read from `input`.
|
||||
|
@ -83,16 +88,19 @@ impl GapFillStream {
|
|||
.collect::<Vec<_>>();
|
||||
let aggr_expr = aggr_expr.to_owned();
|
||||
let time_expr = group_expr.split_off(group_expr.len() - 1).pop().unwrap();
|
||||
|
||||
let group_cols = group_expr.iter().map(expr_to_index).collect::<Vec<_>>();
|
||||
let params = GapFillParams::try_new(Arc::clone(&schema), params)?;
|
||||
let gap_filler = GapFiller::new(params);
|
||||
let buffered_input = BufferedInput::new(¶ms, group_cols);
|
||||
|
||||
let gap_filler = GapFiller::new(params, batch_size);
|
||||
Ok(Self {
|
||||
schema,
|
||||
time_expr,
|
||||
group_expr,
|
||||
aggr_expr,
|
||||
batch_size,
|
||||
input,
|
||||
buffered_input_batches: vec![],
|
||||
buffered_input,
|
||||
gap_filler,
|
||||
more_input: true,
|
||||
reservation,
|
||||
|
@ -112,28 +120,17 @@ impl Stream for GapFillStream {
|
|||
|
||||
/// Produces a gap-filled record batch from its input stream.
|
||||
///
|
||||
/// This method starts off by reading input until it has buffered `batch_size` + 2 rows,
|
||||
/// or until there is no more input. Having at least `batch_size` rows ensures that we
|
||||
/// can produce at least one full output batch. We need two additional rows so that we have
|
||||
/// 1) an input row that corresponds to the row before the current output batch. This is
|
||||
/// needed for the case where we are producing trailing gaps, and we need to use the
|
||||
/// `take` kernel to build the group columns. There must be at least one row from the
|
||||
/// corresponding series in the input to take from.
|
||||
/// 2) an input row that corresponds to the next input row that will be read after the
|
||||
/// current output batch. This tells us if we have processed all of our input for a series
|
||||
/// but may be in "trailing gaps" mode.
|
||||
///
|
||||
/// Once input rows have been buffered, it will produce a gap-filled [RecordBatch] with `self.batch_size`
|
||||
/// rows (or less, if there is no more input).
|
||||
/// For details on implementation, see [`GapFiller`].
|
||||
fn poll_next(
|
||||
mut self: Pin<&mut Self>,
|
||||
cx: &mut Context<'_>,
|
||||
) -> Poll<Option<Result<RecordBatch>>> {
|
||||
while self.more_input && self.buffered_input_row_count() < self.batch_size + 2 {
|
||||
let last_output_row_offset = self.gap_filler.last_output_row_offset();
|
||||
while self.more_input && self.buffered_input.need_more(last_output_row_offset)? {
|
||||
match ready!(self.input.poll_next_unpin(cx)) {
|
||||
Some(Ok(batch)) => {
|
||||
self.reservation.try_grow(batch.get_array_memory_size())?;
|
||||
self.buffered_input_batches.push(batch);
|
||||
self.buffered_input.push(batch);
|
||||
}
|
||||
Some(Err(e)) => {
|
||||
return Poll::Ready(Some(Err(e)));
|
||||
|
@ -162,8 +159,7 @@ impl Stream for GapFillStream {
|
|||
|
||||
match self.process(input_batch) {
|
||||
Ok((output_batch, remaining_input_batch)) => {
|
||||
self.buffered_input_batches.push(remaining_input_batch);
|
||||
assert_eq!(1, self.buffered_input_batches.len());
|
||||
self.buffered_input.push(remaining_input_batch);
|
||||
|
||||
self.reservation
|
||||
.shrink(output_batch.get_array_memory_size());
|
||||
|
@ -175,30 +171,21 @@ impl Stream for GapFillStream {
|
|||
}
|
||||
|
||||
impl GapFillStream {
|
||||
/// Count of input rows that are currently buffered.
|
||||
fn buffered_input_row_count(&self) -> usize {
|
||||
self.buffered_input_batches
|
||||
.iter()
|
||||
.map(|rb| rb.num_rows())
|
||||
.sum()
|
||||
}
|
||||
|
||||
/// If any buffered input batches are present, concatenates it all together
|
||||
/// and returns an owned batch to the caller, leaving `self.buffered_input_batches` empty.
|
||||
fn take_buffered_input(&mut self) -> Result<Option<RecordBatch>> {
|
||||
if self.buffered_input_batches.is_empty() {
|
||||
let batches = self.buffered_input.take();
|
||||
if batches.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let mut v = vec![];
|
||||
std::mem::swap(&mut v, &mut self.buffered_input_batches);
|
||||
let old_size = v.iter().map(|rb| rb.get_array_memory_size()).sum();
|
||||
let old_size = batches.iter().map(|rb| rb.get_array_memory_size()).sum();
|
||||
|
||||
let mut batch = arrow::compute::concat_batches(&self.schema, &v)
|
||||
let mut batch = arrow::compute::concat_batches(&self.schema, &batches)
|
||||
.map_err(DataFusionError::ArrowError)?;
|
||||
self.reservation.try_grow(batch.get_array_memory_size())?;
|
||||
|
||||
if v.len() > 1 {
|
||||
if batches.len() > 1 {
|
||||
// Optimize the dictionaries. The output of this operator uses the take kernel to produce
|
||||
// its output. Since the input batches will usually be smaller than the output, it should
|
||||
// be less work to optimize here vs optimizing the output.
|
||||
|
@ -234,7 +221,6 @@ impl GapFillStream {
|
|||
let output_batch = self
|
||||
.gap_filler
|
||||
.build_gapfilled_output(
|
||||
self.batch_size,
|
||||
Arc::clone(&self.schema),
|
||||
input_time_array,
|
||||
&group_arrays,
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
|
||||
use arrow::{
|
||||
self,
|
||||
array::{Array, BooleanArray, DictionaryArray, StringArray},
|
||||
array::{downcast_array, Array, BooleanArray, DictionaryArray, StringArray},
|
||||
compute,
|
||||
datatypes::{DataType, Int32Type, SchemaRef},
|
||||
record_batch::RecordBatch,
|
||||
|
@ -188,9 +188,7 @@ impl SeriesSetConverter {
|
|||
])
|
||||
.expect("concat");
|
||||
|
||||
// until https://github.com/apache/arrow-rs/issues/2901 is done, use a workaround
|
||||
// to get a `BooleanArray`
|
||||
BooleanArray::from(arr.data().clone())
|
||||
downcast_array(&arr)
|
||||
}
|
||||
|
||||
/// Creates (column_name, column_value) pairs for each column
|
||||
|
|
|
@ -73,9 +73,7 @@ use datafusion::{
|
|||
scalar::ScalarValue,
|
||||
};
|
||||
|
||||
use datafusion_util::{
|
||||
sort_exprs::requirements_from_sort_exprs, watch::WatchedTask, AdapterStream,
|
||||
};
|
||||
use datafusion_util::{watch::WatchedTask, AdapterStream};
|
||||
use futures::StreamExt;
|
||||
use observability_deps::tracing::*;
|
||||
use parking_lot::Mutex;
|
||||
|
@ -215,7 +213,7 @@ impl ExecutionPlan for StreamSplitExec {
|
|||
let requirement = self
|
||||
.input
|
||||
.output_ordering()
|
||||
.map(requirements_from_sort_exprs);
|
||||
.map(PhysicalSortRequirement::from_sort_exprs);
|
||||
|
||||
vec![requirement]
|
||||
}
|
||||
|
|
|
@ -20,6 +20,7 @@ use datafusion::{error::DataFusionError, prelude::SessionContext};
|
|||
use exec::{stringset::StringSet, IOxSessionContext};
|
||||
use hashbrown::HashMap;
|
||||
use observability_deps::tracing::{debug, trace};
|
||||
use once_cell::sync::Lazy;
|
||||
use parquet_file::storage::ParquetExecInput;
|
||||
use predicate::{rpc_predicate::QueryNamespaceMeta, Predicate, PredicateMatch};
|
||||
use schema::{
|
||||
|
@ -45,9 +46,12 @@ pub use query_functions::group_by::{Aggregate, WindowDuration};
|
|||
/// The name of the virtual column that represents the chunk order.
|
||||
pub const CHUNK_ORDER_COLUMN_NAME: &str = "__chunk_order";
|
||||
|
||||
static CHUNK_ORDER_FIELD: Lazy<Arc<Field>> =
|
||||
Lazy::new(|| Arc::new(Field::new(CHUNK_ORDER_COLUMN_NAME, DataType::Int64, false)));
|
||||
|
||||
/// Generate [`Field`] for [chunk order column](CHUNK_ORDER_COLUMN_NAME).
|
||||
pub fn chunk_order_field() -> Field {
|
||||
Field::new(CHUNK_ORDER_COLUMN_NAME, DataType::Int64, false)
|
||||
pub fn chunk_order_field() -> Arc<Field> {
|
||||
Arc::clone(&CHUNK_ORDER_FIELD)
|
||||
}
|
||||
|
||||
/// Trait for an object (designed to be a Chunk) which can provide
|
||||
|
|
|
@ -14,7 +14,7 @@ use datafusion::{
|
|||
optimizer::{optimizer::ApplyOrder, OptimizerConfig, OptimizerRule},
|
||||
prelude::{col, Expr},
|
||||
};
|
||||
use query_functions::gapfill::{DATE_BIN_GAPFILL_UDF_NAME, LOCF_UDF_NAME};
|
||||
use query_functions::gapfill::{DATE_BIN_GAPFILL_UDF_NAME, INTERPOLATE_UDF_NAME, LOCF_UDF_NAME};
|
||||
use std::{
|
||||
collections::HashSet,
|
||||
ops::{Bound, Range},
|
||||
|
@ -349,6 +349,14 @@ impl TreeNodeRewriter for DateBinGapfillRewriter {
|
|||
}
|
||||
}
|
||||
|
||||
fn udf_to_fill_strategy(name: &str) -> Option<FillStrategy> {
|
||||
match name {
|
||||
LOCF_UDF_NAME => Some(FillStrategy::PrevNullAsMissing),
|
||||
INTERPOLATE_UDF_NAME => Some(FillStrategy::LinearInterpolate),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_projection(proj: &Projection) -> Result<Option<LogicalPlan>> {
|
||||
let Projection {
|
||||
input,
|
||||
|
@ -365,12 +373,16 @@ fn handle_projection(proj: &Projection) -> Result<Option<LogicalPlan>> {
|
|||
return Ok(None)
|
||||
};
|
||||
|
||||
let fill_cols: Vec<(&Expr, FillStrategy)> = proj_exprs
|
||||
let fill_cols: Vec<(&Expr, FillStrategy, &str)> = proj_exprs
|
||||
.iter()
|
||||
.filter_map(|e| match e {
|
||||
Expr::ScalarUDF { fun, args } if fun.name == LOCF_UDF_NAME => {
|
||||
let col = &args[0];
|
||||
Some((col, FillStrategy::PrevNullAsMissing))
|
||||
Expr::ScalarUDF { fun, args } => {
|
||||
if let Some(strategy) = udf_to_fill_strategy(&fun.name) {
|
||||
let col = &args[0];
|
||||
Some((col, strategy, fun.name.as_str()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
|
@ -383,12 +395,12 @@ fn handle_projection(proj: &Projection) -> Result<Option<LogicalPlan>> {
|
|||
// Clone the existing GapFill node, then modify it in place
|
||||
// to reflect the new fill strategy.
|
||||
let mut new_gapfill = child_gapfill.clone();
|
||||
for (e, col) in fill_cols {
|
||||
if new_gapfill.replace_fill_strategy(e, col).is_none() {
|
||||
// There was a gap filling function called on an aggregate column.
|
||||
return Err(DataFusionError::Plan(
|
||||
"LOCF must be called on an aggregate column in a gap-filling query".to_string(),
|
||||
));
|
||||
for (e, fs, fn_name) in fill_cols {
|
||||
if new_gapfill.replace_fill_strategy(e, fs).is_none() {
|
||||
// There was a gap filling function called on a non-aggregate column.
|
||||
return Err(DataFusionError::Plan(format!(
|
||||
"{fn_name} must be called on an aggregate column in a gap-filling query"
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -397,7 +409,9 @@ fn handle_projection(proj: &Projection) -> Result<Option<LogicalPlan>> {
|
|||
.iter()
|
||||
.cloned()
|
||||
.map(|e| match e {
|
||||
Expr::ScalarUDF { fun, mut args } if fun.name == LOCF_UDF_NAME => args.remove(0),
|
||||
Expr::ScalarUDF { fun, mut args } if udf_to_fill_strategy(&fun.name).is_some() => {
|
||||
args.remove(0)
|
||||
}
|
||||
_ => e,
|
||||
})
|
||||
.collect();
|
||||
|
@ -433,16 +447,19 @@ fn check_node(node: &LogicalPlan) -> Result<()> {
|
|||
node.expressions().iter().try_for_each(|expr| {
|
||||
let dbg_count = count_udf(expr, DATE_BIN_GAPFILL_UDF_NAME)?;
|
||||
if dbg_count > 0 {
|
||||
Err(DataFusionError::Plan(format!(
|
||||
return Err(DataFusionError::Plan(format!(
|
||||
"{DATE_BIN_GAPFILL_UDF_NAME} may only be used as a GROUP BY expression"
|
||||
)))
|
||||
} else if count_udf(expr, LOCF_UDF_NAME)? > 0 {
|
||||
Err(DataFusionError::Plan(format!(
|
||||
"{LOCF_UDF_NAME} may only be used in the SELECT list of a gap-filling query"
|
||||
)))
|
||||
} else {
|
||||
Ok(())
|
||||
)));
|
||||
}
|
||||
|
||||
for fn_name in [LOCF_UDF_NAME, INTERPOLATE_UDF_NAME] {
|
||||
if count_udf(expr, fn_name)? > 0 {
|
||||
return Err(DataFusionError::Plan(format!(
|
||||
"{fn_name} may only be used in the SELECT list of a gap-filling query"
|
||||
)));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -459,7 +476,9 @@ mod test {
|
|||
use datafusion::optimizer::OptimizerContext;
|
||||
use datafusion::prelude::{avg, case, col, lit, lit_timestamp_nano, min, Expr};
|
||||
use datafusion::scalar::ScalarValue;
|
||||
use query_functions::gapfill::{DATE_BIN_GAPFILL_UDF_NAME, LOCF_UDF_NAME};
|
||||
use query_functions::gapfill::{
|
||||
DATE_BIN_GAPFILL_UDF_NAME, INTERPOLATE_UDF_NAME, LOCF_UDF_NAME,
|
||||
};
|
||||
|
||||
fn table_scan() -> Result<LogicalPlan> {
|
||||
let schema = Schema::new(vec![
|
||||
|
@ -497,6 +516,13 @@ mod test {
|
|||
})
|
||||
}
|
||||
|
||||
fn interpolate(arg: Expr) -> Result<Expr> {
|
||||
Ok(Expr::ScalarUDF {
|
||||
fun: query_functions::registry().udf(INTERPOLATE_UDF_NAME)?,
|
||||
args: vec![arg],
|
||||
})
|
||||
}
|
||||
|
||||
fn optimize(plan: &LogicalPlan) -> Result<Option<LogicalPlan>> {
|
||||
let optimizer = Optimizer::with_rules(vec![Arc::new(HandleGapFill::default())]);
|
||||
optimizer.optimize_recursively(
|
||||
|
@ -581,6 +607,20 @@ mod test {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// calling INTERPOLATE in a WHERE predicate is not valid
|
||||
#[test]
|
||||
fn misplaced_interpolate_err() -> Result<()> {
|
||||
// date_bin_gapfill used in a filter should produce an error
|
||||
let scan = table_scan()?;
|
||||
let plan = LogicalPlanBuilder::from(scan)
|
||||
.filter(interpolate(col("temp"))?.gt(lit(100.0)))?
|
||||
.build()?;
|
||||
assert_optimizer_err(
|
||||
&plan,
|
||||
"Error during planning: interpolate may only be used in the SELECT list of a gap-filling query",
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
/// calling LOCF on the SELECT list but not on an aggregate column is not valid.
|
||||
#[test]
|
||||
fn misplaced_locf_non_agg_err() -> Result<()> {
|
||||
|
@ -607,7 +647,7 @@ mod test {
|
|||
.build()?;
|
||||
assert_optimizer_err(
|
||||
&plan,
|
||||
"LOCF must be called on an aggregate column in a gap-filling query",
|
||||
"locf must be called on an aggregate column in a gap-filling query",
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
@ -852,4 +892,37 @@ mod test {
|
|||
assert_optimized_plan_eq(&plan, &expected)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn with_interpolate() -> Result<()> {
|
||||
let dbg_args = "IntervalDayTime(\"60000\"),temps.time,TimestampNanosecond(0, None)";
|
||||
let plan = LogicalPlanBuilder::from(table_scan()?)
|
||||
.filter(
|
||||
col("time")
|
||||
.gt_eq(lit_timestamp_nano(1000))
|
||||
.and(col("time").lt(lit_timestamp_nano(2000))),
|
||||
)?
|
||||
.aggregate(
|
||||
vec![date_bin_gapfill(
|
||||
lit(ScalarValue::IntervalDayTime(Some(60_000))),
|
||||
col("time"),
|
||||
)?],
|
||||
vec![avg(col("temp")), min(col("temp"))],
|
||||
)?
|
||||
.project(vec![
|
||||
col(format!("date_bin_gapfill({dbg_args})")),
|
||||
interpolate(col("AVG(temps.temp)"))?,
|
||||
interpolate(col("MIN(temps.temp)"))?,
|
||||
])?
|
||||
.build()?;
|
||||
|
||||
let expected = format!(
|
||||
"Projection: date_bin_gapfill({dbg_args}), AVG(temps.temp), MIN(temps.temp)\
|
||||
\n GapFill: groupBy=[[date_bin_gapfill({dbg_args})]], aggr=[[INTERPOLATE(AVG(temps.temp)), INTERPOLATE(MIN(temps.temp))]], time_column=date_bin_gapfill({dbg_args}), stride=IntervalDayTime(\"60000\"), range=Included(TimestampNanosecond(1000, None))..Excluded(TimestampNanosecond(2000, None))\
|
||||
\n Aggregate: groupBy=[[datebin(IntervalDayTime(\"60000\"), temps.time, TimestampNanosecond(0, None))]], aggr=[[AVG(temps.temp), MIN(temps.temp)]]\
|
||||
\n Filter: temps.time >= TimestampNanosecond(1000, None) AND temps.time < TimestampNanosecond(2000, None)\
|
||||
\n TableScan: temps");
|
||||
assert_optimized_plan_eq(&plan, &expected)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::Schema as ArrowSchema;
|
||||
use arrow::datatypes::{Fields, Schema as ArrowSchema};
|
||||
use datafusion::physical_plan::ExecutionPlan;
|
||||
use schema::Schema;
|
||||
|
||||
|
@ -40,7 +40,7 @@ fn dedup_plan_impl(
|
|||
.iter()
|
||||
.cloned()
|
||||
.chain(std::iter::once(chunk_order_field()))
|
||||
.collect(),
|
||||
.collect::<Fields>(),
|
||||
))
|
||||
} else {
|
||||
schema.as_arrow()
|
||||
|
|
|
@ -169,12 +169,14 @@ impl PhysicalOptimizerRule for ProjectionPushdown {
|
|||
&column_names,
|
||||
Arc::clone(child_sort.input()),
|
||||
|plan| {
|
||||
Ok(Arc::new(SortExec::new_with_partitioning(
|
||||
reassign_sort_exprs_columns(child_sort.expr(), &plan.schema())?,
|
||||
plan,
|
||||
child_sort.preserve_partitioning(),
|
||||
child_sort.fetch(),
|
||||
)))
|
||||
Ok(Arc::new(
|
||||
SortExec::new(
|
||||
reassign_sort_exprs_columns(child_sort.expr(), &plan.schema())?,
|
||||
plan,
|
||||
)
|
||||
.with_preserve_partitioning(child_sort.preserve_partitioning())
|
||||
.with_fetch(child_sort.fetch()),
|
||||
))
|
||||
},
|
||||
)?;
|
||||
|
||||
|
@ -930,7 +932,7 @@ mod tests {
|
|||
ProjectionExec::try_new(
|
||||
vec![(expr_col("tag1", &schema), String::from("tag1"))],
|
||||
Arc::new(
|
||||
SortExec::try_new(
|
||||
SortExec::new(
|
||||
vec![PhysicalSortExpr {
|
||||
expr: expr_col("tag2", &schema),
|
||||
options: SortOptions {
|
||||
|
@ -939,9 +941,8 @@ mod tests {
|
|||
},
|
||||
}],
|
||||
Arc::new(TestExec::new(schema)),
|
||||
Some(42),
|
||||
)
|
||||
.unwrap(),
|
||||
.with_fetch(Some(42)),
|
||||
),
|
||||
)
|
||||
.unwrap(),
|
||||
|
@ -971,18 +972,20 @@ mod tests {
|
|||
let plan = Arc::new(
|
||||
ProjectionExec::try_new(
|
||||
vec![(expr_col("tag1", &schema), String::from("tag1"))],
|
||||
Arc::new(SortExec::new_with_partitioning(
|
||||
vec![PhysicalSortExpr {
|
||||
expr: expr_col("tag2", &schema),
|
||||
options: SortOptions {
|
||||
descending: true,
|
||||
..Default::default()
|
||||
},
|
||||
}],
|
||||
Arc::new(TestExec::new_with_partitions(schema, 2)),
|
||||
true,
|
||||
Some(42),
|
||||
)),
|
||||
Arc::new(
|
||||
SortExec::new(
|
||||
vec![PhysicalSortExpr {
|
||||
expr: expr_col("tag2", &schema),
|
||||
options: SortOptions {
|
||||
descending: true,
|
||||
..Default::default()
|
||||
},
|
||||
}],
|
||||
Arc::new(TestExec::new_with_partitions(schema, 2)),
|
||||
)
|
||||
.with_preserve_partitioning(true)
|
||||
.with_fetch(Some(42)),
|
||||
),
|
||||
)
|
||||
.unwrap(),
|
||||
);
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue