Merge branch 'main' into cn/one-at-a-time-please
commit
3fcca070f0
|
@ -172,8 +172,7 @@ jobs:
|
|||
- cache_restore
|
||||
- run:
|
||||
name: Cargo doc
|
||||
# excluding datafusion because it's effectively a dependency masqueraded as workspace crate.
|
||||
command: cargo doc --document-private-items --no-deps --workspace --exclude datafusion
|
||||
command: cargo doc --document-private-items --no-deps --workspace
|
||||
- cache_save
|
||||
- run:
|
||||
name: Compress Docs
|
||||
|
|
|
@ -475,9 +475,9 @@ checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba"
|
|||
|
||||
[[package]]
|
||||
name = "bytemuck"
|
||||
version = "1.12.1"
|
||||
version = "1.12.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2f5715e491b5a1598fc2bef5a606847b5dc1d48ea625bd3c02c00de8285591da"
|
||||
checksum = "5aec14f5d4e6e3f927cd0c81f72e5710d95ee9019fbeb4b3021193867491bfd8"
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
|
@ -541,9 +541,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
|
|||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.73"
|
||||
version = "1.0.74"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
|
||||
checksum = "581f5dba903aac52ea3feb5ec4810848460ee833876f1f9b0fdeab1f19091574"
|
||||
dependencies = [
|
||||
"jobserver",
|
||||
]
|
||||
|
@ -718,9 +718,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cmake"
|
||||
version = "0.1.48"
|
||||
version = "0.1.49"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e8ad8cef104ac57b68b89df3208164d228503abbdce70f6880ffa3d970e7443a"
|
||||
checksum = "db34956e100b30725f2eb215f90d4871051239535632f84fea3bc92722c66b7c"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
@ -843,9 +843,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "const-random"
|
||||
version = "0.1.14"
|
||||
version = "0.1.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "acf7ab93790ae0eac37744aff15866e9e3dcc31515d7bf34a6d0fc6c9726b564"
|
||||
checksum = "368a7a772ead6ce7e1de82bfb04c485f3db8ec744f72925af5735e29a22cc18e"
|
||||
dependencies = [
|
||||
"const-random-macro",
|
||||
"proc-macro-hack",
|
||||
|
@ -853,9 +853,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "const-random-macro"
|
||||
version = "0.1.14"
|
||||
version = "0.1.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c6495bfab021aa116773c3e215be28cee0604417ea358f49966fba050c40d9c"
|
||||
checksum = "9d7d6ab3c3a2282db210df5f02c4dab6e0a7057af0fb7ebd4070f30fe05c0ddb"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"once_cell",
|
||||
|
@ -1129,10 +1129,12 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion"
|
||||
version = "13.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=48f73c6af3b0cc747c38b4a9c7a610f4630e8736#48f73c6af3b0cc747c38b4a9c7a610f4630e8736"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=2b08a43b82127ef144204e5999dd2730fa1c4756#2b08a43b82127ef144204e5999dd2730fa1c4756"
|
||||
dependencies = [
|
||||
"ahash 0.8.0",
|
||||
"arrow",
|
||||
"arrow-buffer",
|
||||
"arrow-schema",
|
||||
"async-compression",
|
||||
"async-trait",
|
||||
"bytes",
|
||||
|
@ -1147,6 +1149,7 @@ dependencies = [
|
|||
"flate2",
|
||||
"futures",
|
||||
"glob",
|
||||
"half 2.1.0",
|
||||
"hashbrown",
|
||||
"itertools",
|
||||
"lazy_static",
|
||||
|
@ -1161,7 +1164,7 @@ dependencies = [
|
|||
"pin-project-lite",
|
||||
"rand",
|
||||
"smallvec",
|
||||
"sqlparser 0.25.0",
|
||||
"sqlparser",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
|
@ -1173,31 +1176,32 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-common"
|
||||
version = "13.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=48f73c6af3b0cc747c38b4a9c7a610f4630e8736#48f73c6af3b0cc747c38b4a9c7a610f4630e8736"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=2b08a43b82127ef144204e5999dd2730fa1c4756#2b08a43b82127ef144204e5999dd2730fa1c4756"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"chrono",
|
||||
"object_store",
|
||||
"ordered-float 3.3.0",
|
||||
"parquet",
|
||||
"sqlparser 0.25.0",
|
||||
"sqlparser",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-expr"
|
||||
version = "13.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=48f73c6af3b0cc747c38b4a9c7a610f4630e8736#48f73c6af3b0cc747c38b4a9c7a610f4630e8736"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=2b08a43b82127ef144204e5999dd2730fa1c4756#2b08a43b82127ef144204e5999dd2730fa1c4756"
|
||||
dependencies = [
|
||||
"ahash 0.8.0",
|
||||
"arrow",
|
||||
"datafusion-common",
|
||||
"log",
|
||||
"sqlparser 0.25.0",
|
||||
"sqlparser",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "datafusion-optimizer"
|
||||
version = "13.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=48f73c6af3b0cc747c38b4a9c7a610f4630e8736#48f73c6af3b0cc747c38b4a9c7a610f4630e8736"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=2b08a43b82127ef144204e5999dd2730fa1c4756#2b08a43b82127ef144204e5999dd2730fa1c4756"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"async-trait",
|
||||
|
@ -1212,7 +1216,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-physical-expr"
|
||||
version = "13.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=48f73c6af3b0cc747c38b4a9c7a610f4630e8736#48f73c6af3b0cc747c38b4a9c7a610f4630e8736"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=2b08a43b82127ef144204e5999dd2730fa1c4756#2b08a43b82127ef144204e5999dd2730fa1c4756"
|
||||
dependencies = [
|
||||
"ahash 0.8.0",
|
||||
"arrow",
|
||||
|
@ -1223,6 +1227,7 @@ dependencies = [
|
|||
"datafusion-expr",
|
||||
"datafusion-row",
|
||||
"hashbrown",
|
||||
"itertools",
|
||||
"lazy_static",
|
||||
"md-5",
|
||||
"ordered-float 3.3.0",
|
||||
|
@ -1236,13 +1241,13 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-proto"
|
||||
version = "13.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=48f73c6af3b0cc747c38b4a9c7a610f4630e8736#48f73c6af3b0cc747c38b4a9c7a610f4630e8736"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=2b08a43b82127ef144204e5999dd2730fa1c4756#2b08a43b82127ef144204e5999dd2730fa1c4756"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"async-trait",
|
||||
"datafusion",
|
||||
"datafusion-common",
|
||||
"datafusion-expr",
|
||||
"pbjson-build",
|
||||
"prost 0.11.0",
|
||||
"prost-build 0.11.1",
|
||||
]
|
||||
|
@ -1250,7 +1255,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-row"
|
||||
version = "13.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=48f73c6af3b0cc747c38b4a9c7a610f4630e8736#48f73c6af3b0cc747c38b4a9c7a610f4630e8736"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=2b08a43b82127ef144204e5999dd2730fa1c4756#2b08a43b82127ef144204e5999dd2730fa1c4756"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"datafusion-common",
|
||||
|
@ -1261,12 +1266,12 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "datafusion-sql"
|
||||
version = "13.0.0"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=48f73c6af3b0cc747c38b4a9c7a610f4630e8736#48f73c6af3b0cc747c38b4a9c7a610f4630e8736"
|
||||
source = "git+https://github.com/apache/arrow-datafusion.git?rev=2b08a43b82127ef144204e5999dd2730fa1c4756#2b08a43b82127ef144204e5999dd2730fa1c4756"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"datafusion-common",
|
||||
"datafusion-expr",
|
||||
"sqlparser 0.25.0",
|
||||
"sqlparser",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1953,9 +1958,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
|
|||
|
||||
[[package]]
|
||||
name = "hyper"
|
||||
version = "0.14.20"
|
||||
version = "0.14.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "02c929dc5c39e335a03c405292728118860721b10190d98c2a0f0efd5baafbac"
|
||||
checksum = "abfba89e19b959ca163c7752ba59d737c1ceea53a5d31a149c805446fc958064"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"futures-channel",
|
||||
|
@ -2002,9 +2007,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "iana-time-zone"
|
||||
version = "0.1.51"
|
||||
version = "0.1.53"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f5a6ef98976b22b3b7f2f3a806f858cb862044cfa66805aa3ad84cb3d3b785ed"
|
||||
checksum = "64c122667b287044802d6ce17ee2ddf13207ed924c712de9a66a5814d5b64765"
|
||||
dependencies = [
|
||||
"android_system_properties",
|
||||
"core-foundation-sys",
|
||||
|
@ -2266,7 +2271,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"generated_types",
|
||||
"snafu",
|
||||
"sqlparser 0.26.0",
|
||||
"sqlparser",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
||||
|
@ -2289,6 +2294,7 @@ dependencies = [
|
|||
"flatbuffers",
|
||||
"futures",
|
||||
"generated_types",
|
||||
"hashbrown",
|
||||
"hyper",
|
||||
"iox_catalog",
|
||||
"iox_query",
|
||||
|
@ -3056,6 +3062,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"bytes",
|
||||
"criterion",
|
||||
"data_types",
|
||||
"dml",
|
||||
"flate2",
|
||||
"generated_types",
|
||||
|
@ -3277,9 +3284,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.15.0"
|
||||
version = "1.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e82dad04139b71a90c080c8463fe0dc7902db5192d939bd0950f074d014339e1"
|
||||
checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860"
|
||||
dependencies = [
|
||||
"parking_lot_core 0.9.4",
|
||||
]
|
||||
|
@ -3310,9 +3317,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "os_str_bytes"
|
||||
version = "6.3.0"
|
||||
version = "6.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9ff7415e9ae3fff1225851df9e0d9e4e5479f947619774677a63572e55e80eff"
|
||||
checksum = "3baf96e39c5359d2eb0dd6ccb42c62b91d9678aa68160d261b9e0ccbf9e9dea9"
|
||||
|
||||
[[package]]
|
||||
name = "output_vt100"
|
||||
|
@ -3606,9 +3613,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
|
|||
|
||||
[[package]]
|
||||
name = "pkg-config"
|
||||
version = "0.3.25"
|
||||
version = "0.3.26"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae"
|
||||
checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160"
|
||||
|
||||
[[package]]
|
||||
name = "pprof"
|
||||
|
@ -3655,7 +3662,7 @@ dependencies = [
|
|||
"query_functions",
|
||||
"schema",
|
||||
"snafu",
|
||||
"sqlparser 0.26.0",
|
||||
"sqlparser",
|
||||
"test_helpers",
|
||||
"workspace-hack",
|
||||
]
|
||||
|
@ -3931,6 +3938,7 @@ dependencies = [
|
|||
"schema",
|
||||
"service_common",
|
||||
"service_grpc_catalog",
|
||||
"service_grpc_object_store",
|
||||
"service_grpc_schema",
|
||||
"sharder",
|
||||
"snafu",
|
||||
|
@ -4269,7 +4277,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "rskafka"
|
||||
version = "0.3.0"
|
||||
source = "git+https://github.com/influxdata/rskafka.git?rev=8c98c56b5d4b06206ce40e21404a75e6bb7bf7af#8c98c56b5d4b06206ce40e21404a75e6bb7bf7af"
|
||||
source = "git+https://github.com/influxdata/rskafka.git?rev=8678dfe049de05415929ffec7c1be8921bb057f7#8678dfe049de05415929ffec7c1be8921bb057f7"
|
||||
dependencies = [
|
||||
"async-socks5",
|
||||
"async-trait",
|
||||
|
@ -4281,10 +4289,10 @@ dependencies = [
|
|||
"parking_lot 0.12.1",
|
||||
"pin-project-lite",
|
||||
"rand",
|
||||
"snap",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"zstd",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -4764,15 +4772,6 @@ dependencies = [
|
|||
"unicode_categories",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sqlparser"
|
||||
version = "0.25.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0781f2b6bd03e5adf065c8e772b49eaea9f640d06a1b9130330fe8bd2563f4fd"
|
||||
dependencies = [
|
||||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sqlparser"
|
||||
version = "0.26.0"
|
||||
|
|
|
@ -110,8 +110,8 @@ license = "MIT OR Apache-2.0"
|
|||
[workspace.dependencies]
|
||||
arrow = { version = "25.0.0" }
|
||||
arrow-flight = { version = "25.0.0" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="48f73c6af3b0cc747c38b4a9c7a610f4630e8736", default-features = false }
|
||||
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="48f73c6af3b0cc747c38b4a9c7a610f4630e8736" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="2b08a43b82127ef144204e5999dd2730fa1c4756", default-features = false }
|
||||
datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="2b08a43b82127ef144204e5999dd2730fa1c4756" }
|
||||
parquet = { version = "25.0.0" }
|
||||
|
||||
# This profile optimizes for runtime performance and small binary size at the expense of longer
|
||||
|
|
|
@ -4,40 +4,18 @@ use data_types::{
|
|||
ChunkId, ChunkOrder, CompactionLevel, DeletePredicate, PartitionId, SequenceNumber,
|
||||
TableSummary, Timestamp, Tombstone,
|
||||
};
|
||||
use datafusion::{error::DataFusionError, physical_plan::SendableRecordBatchStream};
|
||||
use datafusion::error::DataFusionError;
|
||||
use iox_query::{
|
||||
exec::{stringset::StringSet, IOxSessionContext},
|
||||
util::create_basic_summary,
|
||||
QueryChunk, QueryChunkMeta,
|
||||
QueryChunk, QueryChunkData, QueryChunkMeta,
|
||||
};
|
||||
use observability_deps::tracing::trace;
|
||||
use parquet_file::chunk::ParquetChunk;
|
||||
use predicate::{delete_predicate::tombstones_to_delete_predicates, Predicate};
|
||||
use schema::{merge::SchemaMerger, selection::Selection, sort::SortKey, Schema};
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use schema::{merge::SchemaMerger, sort::SortKey, Projection, Schema};
|
||||
use std::{any::Any, sync::Arc};
|
||||
use uuid::Uuid;
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[allow(missing_copy_implementations, missing_docs)]
|
||||
pub enum Error {
|
||||
#[snafu(display("Failed to read parquet: {}", source))]
|
||||
ReadParquet {
|
||||
source: parquet_file::storage::ReadError,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Error reading IOx Metadata from Parquet IoxParquetMetadata: {}",
|
||||
source
|
||||
))]
|
||||
ReadParquetMeta {
|
||||
source: parquet_file::storage::ReadError,
|
||||
},
|
||||
}
|
||||
|
||||
/// A specialized `Error` for Compactor's query errors
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
/// QueryableParquetChunk that implements QueryChunk and QueryMetaChunk for building query plan
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct QueryableParquetChunk {
|
||||
|
@ -134,8 +112,8 @@ impl QueryableParquetChunk {
|
|||
}
|
||||
|
||||
impl QueryChunkMeta for QueryableParquetChunk {
|
||||
fn summary(&self) -> Option<Arc<TableSummary>> {
|
||||
Some(Arc::clone(&self.summary))
|
||||
fn summary(&self) -> Arc<TableSummary> {
|
||||
Arc::clone(&self.summary)
|
||||
}
|
||||
|
||||
fn schema(&self) -> Arc<Schema> {
|
||||
|
@ -194,7 +172,7 @@ impl QueryChunk for QueryableParquetChunk {
|
|||
&self,
|
||||
_ctx: IOxSessionContext,
|
||||
_predicate: &Predicate,
|
||||
_columns: Selection<'_>,
|
||||
_columns: Projection<'_>,
|
||||
) -> Result<Option<StringSet>, DataFusionError> {
|
||||
Ok(None)
|
||||
}
|
||||
|
@ -213,33 +191,8 @@ impl QueryChunk for QueryableParquetChunk {
|
|||
Ok(None)
|
||||
}
|
||||
|
||||
/// Provides access to raw `QueryChunk` data as an
|
||||
/// asynchronous stream of `RecordBatch`es filtered by a *required*
|
||||
/// predicate. Note that not all chunks can evaluate all types of
|
||||
/// predicates and this function will return an error
|
||||
/// if requested to evaluate with a predicate that is not supported
|
||||
///
|
||||
/// This is the analog of the `TableProvider` in DataFusion
|
||||
///
|
||||
/// The reason we can't simply use the `TableProvider` trait
|
||||
/// directly is that the data for a particular Table lives in
|
||||
/// several chunks within a partition, so there needs to be an
|
||||
/// implementation of `TableProvider` that stitches together the
|
||||
/// streams from several different `QueryChunk`s.
|
||||
fn read_filter(
|
||||
&self,
|
||||
mut ctx: IOxSessionContext,
|
||||
predicate: &Predicate,
|
||||
selection: Selection<'_>,
|
||||
) -> Result<SendableRecordBatchStream, DataFusionError> {
|
||||
ctx.set_metadata("storage", "compactor");
|
||||
ctx.set_metadata("projection", format!("{}", selection));
|
||||
trace!(?selection, "selection");
|
||||
|
||||
self.data
|
||||
.read_filter(predicate, selection, ctx.inner())
|
||||
.context(ReadParquetSnafu)
|
||||
.map_err(|e| DataFusionError::External(Box::new(e)))
|
||||
fn data(&self) -> QueryChunkData {
|
||||
QueryChunkData::Parquet(self.data.parquet_exec_input())
|
||||
}
|
||||
|
||||
/// Returns chunk type
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
use datafusion::{
|
||||
config::{
|
||||
OPT_COALESCE_TARGET_BATCH_SIZE, OPT_PARQUET_PUSHDOWN_FILTERS, OPT_PARQUET_REORDER_FILTERS,
|
||||
},
|
||||
prelude::SessionConfig,
|
||||
};
|
||||
|
||||
// The default catalog name - this impacts what SQL queries use if not specified
|
||||
pub const DEFAULT_CATALOG: &str = "public";
|
||||
// The default schema name - this impacts what SQL queries use if not specified
|
||||
pub const DEFAULT_SCHEMA: &str = "iox";
|
||||
|
||||
/// The maximum number of rows that DataFusion should create in each RecordBatch
|
||||
pub const BATCH_SIZE: usize = 8 * 1024;
|
||||
|
||||
const COALESCE_BATCH_SIZE: usize = BATCH_SIZE / 2;
|
||||
|
||||
/// Return a SessionConfig object configured for IOx
|
||||
pub fn iox_session_config() -> SessionConfig {
|
||||
SessionConfig::new()
|
||||
.with_batch_size(BATCH_SIZE)
|
||||
.set_u64(
|
||||
OPT_COALESCE_TARGET_BATCH_SIZE,
|
||||
COALESCE_BATCH_SIZE.try_into().unwrap(),
|
||||
)
|
||||
// Enable parquet predicate pushdown optimization
|
||||
.set_bool(OPT_PARQUET_PUSHDOWN_FILTERS, true)
|
||||
.set_bool(OPT_PARQUET_REORDER_FILTERS, true)
|
||||
.create_default_catalog_and_schema(true)
|
||||
.with_information_schema(true)
|
||||
.with_default_catalog_and_schema(DEFAULT_CATALOG, DEFAULT_SCHEMA)
|
||||
}
|
|
@ -10,6 +10,7 @@
|
|||
//! [datafusion_optimizer::utils](https://docs.rs/datafusion-optimizer/13.0.0/datafusion_optimizer/utils/index.html)
|
||||
//! for expression manipulation functions.
|
||||
|
||||
pub mod config;
|
||||
pub mod sender;
|
||||
pub mod watch;
|
||||
|
||||
|
|
|
@ -15,7 +15,10 @@
|
|||
|
||||
use std::time::Duration;
|
||||
|
||||
use data_types::{DeletePredicate, NonEmptyString, PartitionKey, Sequence, StatValues, Statistics};
|
||||
use data_types::{
|
||||
DeletePredicate, NamespaceId, NonEmptyString, PartitionKey, Sequence, StatValues, Statistics,
|
||||
TableId,
|
||||
};
|
||||
use hashbrown::HashMap;
|
||||
use iox_time::{Time, TimeProvider};
|
||||
use mutable_batch::MutableBatch;
|
||||
|
@ -182,6 +185,32 @@ pub struct DmlWrite {
|
|||
max_timestamp: i64,
|
||||
/// The partition key derived for this write.
|
||||
partition_key: PartitionKey,
|
||||
|
||||
// !!!!!!! TRANSITION TIME !!!!!!!
|
||||
//
|
||||
// While implementing "sending IDs over Kafka" (#4880) there has to be a
|
||||
// transition period where the producers (routers) populate the fields, but
|
||||
// the consumers (ingesters) do not utilise them.
|
||||
//
|
||||
// This period of overlap is necessary to support a rolling deployment where
|
||||
// the consumers MAY be deployed before the producers, or the producer code
|
||||
// MAY be rolled back due to a defect. During this potential rollback
|
||||
// window, all fields need to be populated to ensure both new and old
|
||||
// versions of the code can process the enqueued messages.
|
||||
//
|
||||
// Because the consumers (ingesters) and the producers (routers) use the
|
||||
// same common application-level type to represent writes (the DmlWrite), it
|
||||
// has to support the producer pushing the IDs into the DmlWrite, but the
|
||||
// consumer must not make use of them.
|
||||
//
|
||||
// In a follow-up PR, this consumer will be switched to make use of the
|
||||
// TableIds, at which point the table map will change from the current
|
||||
// `Table name -> Data` to `TableId -> Data`, and the second map can be
|
||||
// removed from the DmlWrite.
|
||||
#[allow(dead_code)]
|
||||
namespace_id: NamespaceId,
|
||||
// Used to resolve the table ID for a given table name during serialisation.
|
||||
table_ids: HashMap<String, TableId>,
|
||||
}
|
||||
|
||||
impl DmlWrite {
|
||||
|
@ -196,7 +225,9 @@ impl DmlWrite {
|
|||
/// - a MutableBatch lacks an i64 "time" column
|
||||
pub fn new(
|
||||
namespace: impl Into<String>,
|
||||
namespace_id: NamespaceId,
|
||||
tables: HashMap<String, MutableBatch>,
|
||||
table_ids: HashMap<String, TableId>,
|
||||
partition_key: PartitionKey,
|
||||
meta: DmlMeta,
|
||||
) -> Self {
|
||||
|
@ -221,10 +252,12 @@ impl DmlWrite {
|
|||
Self {
|
||||
namespace: namespace.into(),
|
||||
tables,
|
||||
table_ids,
|
||||
partition_key,
|
||||
meta,
|
||||
min_timestamp: stats.min.unwrap(),
|
||||
max_timestamp: stats.max.unwrap(),
|
||||
namespace_id,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -284,7 +317,13 @@ impl DmlWrite {
|
|||
.iter()
|
||||
.map(|(k, v)| std::mem::size_of_val(k) + k.capacity() + v.size())
|
||||
.sum::<usize>()
|
||||
+ self
|
||||
.table_ids
|
||||
.keys()
|
||||
.map(|k| std::mem::size_of_val(k) + k.capacity() + std::mem::size_of::<TableId>())
|
||||
.sum::<usize>()
|
||||
+ self.meta.size()
|
||||
+ std::mem::size_of::<NamespaceId>()
|
||||
+ std::mem::size_of::<PartitionKey>()
|
||||
- std::mem::size_of::<DmlMeta>()
|
||||
}
|
||||
|
@ -293,6 +332,28 @@ impl DmlWrite {
|
|||
pub fn partition_key(&self) -> &PartitionKey {
|
||||
&self.partition_key
|
||||
}
|
||||
|
||||
/// Return the map of [`TableId`] to table names for this batch.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Marked unsafe because of the critical invariant; Kafka conumers MUST NOT
|
||||
/// utilise this method until this warning is removed. See [`DmlWrite`]
|
||||
/// docs.
|
||||
pub unsafe fn table_id(&self, name: &str) -> Option<TableId> {
|
||||
self.table_ids.get(name).cloned()
|
||||
}
|
||||
|
||||
/// Return the [`NamespaceId`] to which this [`DmlWrite`] should be applied.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Marked unsafe because of the critical invariant; Kafka conumers MUST NOT
|
||||
/// utilise this method until this warning is removed. See [`DmlWrite`]
|
||||
/// docs.
|
||||
pub unsafe fn namespace_id(&self) -> NamespaceId {
|
||||
self.namespace_id
|
||||
}
|
||||
}
|
||||
|
||||
/// A delete operation
|
||||
|
@ -363,7 +424,7 @@ impl DmlDelete {
|
|||
/// Test utilities
|
||||
pub mod test_util {
|
||||
use arrow_util::display::pretty_format_batches;
|
||||
use schema::selection::Selection;
|
||||
use schema::Projection;
|
||||
|
||||
use super::*;
|
||||
|
||||
|
@ -405,8 +466,8 @@ pub mod test_util {
|
|||
let b_batch = b.table(table_name).expect("table not found");
|
||||
|
||||
assert_eq!(
|
||||
pretty_format_batches(&[a_batch.to_arrow(Selection::All).unwrap()]).unwrap(),
|
||||
pretty_format_batches(&[b_batch.to_arrow(Selection::All).unwrap()]).unwrap(),
|
||||
pretty_format_batches(&[a_batch.to_arrow(Projection::All).unwrap()]).unwrap(),
|
||||
pretty_format_batches(&[b_batch.to_arrow(Projection::All).unwrap()]).unwrap(),
|
||||
"batches for table \"{}\" differ",
|
||||
table_name
|
||||
);
|
||||
|
|
218
docs/cli.md
218
docs/cli.md
|
@ -1,6 +1,6 @@
|
|||
# InfluxDB CLI cookbook
|
||||
|
||||
You can use the `influxdb_iox` command line tool to interact with the server in various ways
|
||||
You can use the `influxdb_iox` command line tool to interact with the IOx server in various ways. This document contains a brief tour of highlights and detailed information on each command can be found by passing `--help`.
|
||||
|
||||
|
||||
## Ports
|
||||
|
@ -12,65 +12,20 @@ To connect on a different port, use the `--host` argument:
|
|||
$ influxdb_iox --host http://localhost:8083 <command>
|
||||
```
|
||||
|
||||
## List all namespaces
|
||||
## Getting data in to IOx
|
||||
|
||||
You can load data in parallel using the influxdb_iox client by specifing one or more files from the command line.
|
||||
|
||||
This command uses the http v2 endpoint, which often runs on port 8080, rather than the default 8082 which handles gRPC:
|
||||
|
||||
```shell
|
||||
# Connects to port 8082 (gRPC by default)
|
||||
$ influxdb_iox debug namespace list
|
||||
[
|
||||
{
|
||||
"id": "1",
|
||||
"name": "26f7e5a4b7be365b_917b97a92e883afc"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
## List Schema in a Namespace
|
||||
|
||||
```shell
|
||||
$ influxdb_iox debug schema get 26f7e5a4b7be365b_917b97a92e883afc
|
||||
{
|
||||
"id": "1",
|
||||
"kafkaTopicId": "1",
|
||||
"queryPoolId": "1",
|
||||
"tables": {
|
||||
"mem": {
|
||||
"id": "2",
|
||||
"columns": {
|
||||
"time": {
|
||||
"id": "10",
|
||||
"columnType": 6
|
||||
},
|
||||
"host": {
|
||||
"id": "16",
|
||||
"columnType": 7
|
||||
},
|
||||
"available": {
|
||||
"id": "17",
|
||||
"columnType": 1
|
||||
},
|
||||
"wired": {
|
||||
...
|
||||
```
|
||||
|
||||
Alternately you can use `show tables` using SQL (see [sql cookbook](sql.md) for more details):
|
||||
|
||||
```shell
|
||||
$ influxdb_iox query 26f7e5a4b7be365b_917b97a92e883afc 'show tables'
|
||||
+---------------+--------------------+------------+------------+
|
||||
| table_catalog | table_schema | table_name | table_type |
|
||||
+---------------+--------------------+------------+------------+
|
||||
| public | iox | cpu | BASE TABLE |
|
||||
| public | iox | disk | BASE TABLE |
|
||||
| public | iox | diskio | BASE TABLE |
|
||||
...
|
||||
| public | information_schema | columns | VIEW |
|
||||
+---------------+--------------------+------------+------------+
|
||||
influxdb_iox --host=http://localhost:8080 -v write test_db test_fixtures/lineproto/*.lp
|
||||
```
|
||||
|
||||
## Run Queries
|
||||
|
||||
### SQL
|
||||
You can run an individual SQL query using the `query` command and providing the namespace and the SQL text. See the [sql cookbook](sql.md)for more detailed documentation on SQL.
|
||||
|
||||
```shell
|
||||
$ influxdb_iox query 26f7e5a4b7be365b_917b97a92e883afc 'select count(*), cpu as cpu_num from cpu group by cpu'
|
||||
|
@ -97,30 +52,9 @@ $ influxdb_iox query 26f7e5a4b7be365b_917b97a92e883afc 'select count(*), cpu as
|
|||
+-----------------+-----------+
|
||||
```
|
||||
|
||||
### InfluxRPC (used by Flux and InfluxQL)
|
||||
### SQL REPL
|
||||
|
||||
```shell
|
||||
TODO
|
||||
```
|
||||
|
||||
### Ingester (used internally to IOx to query unpersisted data)
|
||||
|
||||
```shell
|
||||
# Note you need to connect to the ingester (running on port 8083 in all in one mode)
|
||||
$ influxdb_iox query-ingester --host http://localhost:8083 26f7e5a4b7be365b_917b97a92e883afc mem available_percent | head
|
||||
+--------------------+
|
||||
| available_percent |
|
||||
+--------------------+
|
||||
| 56.58011436462402 |
|
||||
| 57.43834972381592 |
|
||||
| 57.46076703071594 |
|
||||
| 57.482320070266724 |
|
||||
| 57.447218894958496 |
|
||||
| 57.420217990875244 |
|
||||
| 57.361191511154175 |
|
||||
```
|
||||
|
||||
### SQL Repl
|
||||
IOx comes with its own Read Evaluate Print Loop (REPL) for running SQL interactively. See the [sql cookbook](sql.md)for more detailed documentation.
|
||||
|
||||
```shell
|
||||
$ influxdb_iox sql
|
||||
|
@ -137,3 +71,135 @@ You are now in remote mode, querying database 26f7e5a4b7be365b_917b97a92e883afc
|
|||
+-----------------+
|
||||
Returned 1 row in 59.410821ms
|
||||
```
|
||||
|
||||
## Getting data out of IOx
|
||||
|
||||
## Fetch the parquet files for a particular table
|
||||
|
||||
You can retrieve the parquet files used to store a particular table to a local directory
|
||||
|
||||
```shell
|
||||
$ influxdb_iox remote store get-table 26f7e5a4b7be365b_917b97a92e883afc mem
|
||||
found 3 Parquet files, downloading...
|
||||
downloading file 1 of 3 (1ce7e327-7b48-478f-b141-96e8d366ca12.5.parquet)...
|
||||
downloading file 2 of 3 (fa45a0db-5e9e-4374-b3d3-8294b5e7ade0.5.parquet)...
|
||||
downloading file 3 of 3 (ad5e47f6-b984-400b-99c2-f562151985d6.5.parquet)...
|
||||
Done.
|
||||
```
|
||||
|
||||
These are standard parquet files and can be read by any other tool that understands the parquet file format.
|
||||
|
||||
## Convert parquet files into line protocol
|
||||
|
||||
Parquet files created by IOx can be converted back into the Line Protocol format using metadata stored in the file:
|
||||
|
||||
```shell
|
||||
$ influxdb_iox debug parquet-to-lp mem/1ce7e327-7b48-478f-b141-96e8d366ca12.5.parquet
|
||||
disk,device=disk1s1s1,fstype=apfs,host=MacBook-Pro-8.local,mode=ro,path=/ free=89205854208i,inodes_free=871150920i,inodes_total=871652968i,inodes_used=502048i,total=1000240963584i,used=911035109376i,used_percent=91.0815635975992 1667300090000000000
|
||||
disk,device=disk1s1,fstype=apfs,host=MacBook-Pro-8.local,mode=rw,path=/System/Volumes/Update/mnt1 free=89205854208i,inodes_free=871150920i,inodes_total=871652990i,inodes_used=502070i,total=1000240963584i,used=911035109376i,used_percent=91.0815635975992 1667300090000000000
|
||||
...
|
||||
```
|
||||
|
||||
Note you can also write such parquet files that came from IOx to another IOx instance using the `influxdb_iox write` command.
|
||||
|
||||
|
||||
## Inspect The Catalog
|
||||
|
||||
|
||||
## List all namespaces
|
||||
|
||||
```shell
|
||||
# Connects to port 8082 (gRPC by default)
|
||||
$ influxdb_iox namespace list
|
||||
[
|
||||
{
|
||||
"id": "1",
|
||||
"name": "26f7e5a4b7be365b_917b97a92e883afc"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
## List Schema in a Namespace
|
||||
|
||||
```shell
|
||||
$ influxdb_iox debug schema get 26f7e5a4b7be365b_917b97a92e883afc
|
||||
{
|
||||
"id": "1",
|
||||
"topicId": "1",
|
||||
"queryPoolId": "1",
|
||||
"tables": {
|
||||
"cpu": {
|
||||
"id": "5",
|
||||
"columns": {
|
||||
"host": {
|
||||
"id": "56",
|
||||
"columnType": "COLUMN_TYPE_TAG"
|
||||
},
|
||||
"usage_nice": {
|
||||
"id": "51",
|
||||
"columnType": "COLUMN_TYPE_F64"
|
||||
},
|
||||
...
|
||||
```
|
||||
|
||||
Alternately you can use `show tables` using SQL (see [sql cookbook](sql.md) for more details):
|
||||
|
||||
```shell
|
||||
$ influxdb_iox query 26f7e5a4b7be365b_917b97a92e883afc 'show tables'
|
||||
+---------------+--------------------+------------+------------+
|
||||
| table_catalog | table_schema | table_name | table_type |
|
||||
+---------------+--------------------+------------+------------+
|
||||
| public | iox | cpu | BASE TABLE |
|
||||
| public | iox | disk | BASE TABLE |
|
||||
| public | iox | diskio | BASE TABLE |
|
||||
...
|
||||
| public | information_schema | columns | VIEW |
|
||||
+---------------+--------------------+------------+------------+
|
||||
```
|
||||
|
||||
## Advanced Querying
|
||||
|
||||
These CLI options are most often used for developing and debugging IOx rather than intended for end users.
|
||||
|
||||
### InfluxRPC (used by Flux and InfluxQL)
|
||||
|
||||
`influxrpc` is the name used to describe the protocol to talk with Flux and InfluxQL services. There is limited CLI support for making such queries. For example, to run `measurement-fields` request,
|
||||
|
||||
```shell
|
||||
$ influxdb_iox storage 26f7e5a4b7be365b_917b97a92e883afc measurement-fields cpu
|
||||
|
||||
tag values: 10
|
||||
+----------------------------------------------+
|
||||
| values |
|
||||
+----------------------------------------------+
|
||||
| key: usage_guest, type: 0, timestamp: 0 |
|
||||
| key: usage_guest_nice, type: 0, timestamp: 0 |
|
||||
| key: usage_idle, type: 0, timestamp: 0 |
|
||||
| key: usage_iowait, type: 0, timestamp: 0 |
|
||||
| key: usage_irq, type: 0, timestamp: 0 |
|
||||
| key: usage_nice, type: 0, timestamp: 0 |
|
||||
| key: usage_softirq, type: 0, timestamp: 0 |
|
||||
| key: usage_steal, type: 0, timestamp: 0 |
|
||||
| key: usage_system, type: 0, timestamp: 0 |
|
||||
| key: usage_user, type: 0, timestamp: 0 |
|
||||
+----------------------------------------------+
|
||||
```
|
||||
|
||||
### Ingester (used internally to IOx to query unpersisted data)
|
||||
|
||||
You can make direct queries to the ingester to see its unpersisted data using the `query-ingester` command. Note you need to connect to the ingester (running on port 8083 in all in one mode)
|
||||
|
||||
```shell
|
||||
$ influxdb_iox query-ingester --host http://localhost:8083 26f7e5a4b7be365b_917b97a92e883afc swap
|
||||
+------------+---------------------+----+-----+----------------------+------------+------------+-------------------+
|
||||
| free | host | in | out | time | total | used | used_percent |
|
||||
+------------+---------------------+----+-----+----------------------+------------+------------+-------------------+
|
||||
| 1496055808 | MacBook-Pro-8.local | | | 2022-11-01T10:08:40Z | 6442450944 | 4946395136 | 76.77815755208334 |
|
||||
| | MacBook-Pro-8.local | 0 | 0 | 2022-11-01T10:08:40Z | | | |
|
||||
| 1496055808 | MacBook-Pro-8.local | | | 2022-11-01T10:08:40Z | 6442450944 | 4946395136 | 76.77815755208334 |
|
||||
| | MacBook-Pro-8.local | 0 | 0 | 2022-11-01T10:08:40Z | | | |
|
||||
| 1496055808 | MacBook-Pro-8.local | | | 2022-11-01T10:08:50Z | 6442450944 | 4946395136 | 76.77815755208334 |
|
||||
| | MacBook-Pro-8.local | 0 | 0 | 2022-11-01T10:08:50Z | | | |
|
||||
| 1496055808 | MacBook-Pro-8.local | | | 2022-11-01T10:08:50Z | 6442450944 | 4946395136 | 76.77815755208334 |
|
||||
...
|
||||
```
|
||||
|
|
|
@ -26,6 +26,6 @@ clap_blocks = { path = "../clap_blocks" }
|
|||
data_types = { path = "../data_types" }
|
||||
filetime = "0.2"
|
||||
metric = { path = "../metric" }
|
||||
once_cell = { version = "1.15.0", features = ["parking_lot"] }
|
||||
once_cell = { version = "1.16.0", features = ["parking_lot"] }
|
||||
parquet_file = { path = "../parquet_file" }
|
||||
tempfile = "3"
|
||||
|
|
|
@ -8,6 +8,9 @@ message DatabaseBatch {
|
|||
// The destination database name / namespace for this write.
|
||||
string database_name = 1;
|
||||
|
||||
// The catalog ID for this database / namespace.
|
||||
int64 database_id = 4;
|
||||
|
||||
// An optional partition key for this batch.
|
||||
//
|
||||
// If specified, all batches in this write MUST map to this partition key.
|
||||
|
@ -22,6 +25,9 @@ message DatabaseBatch {
|
|||
message TableBatch {
|
||||
string table_name = 1;
|
||||
|
||||
// The catalog ID for this table.
|
||||
int64 table_id = 4;
|
||||
|
||||
// Data are represented here.
|
||||
//
|
||||
// Exactly one column named and typed "time" *must* exist,
|
||||
|
@ -119,7 +125,7 @@ message Column {
|
|||
}
|
||||
|
||||
// Note there used to be a service that would load this internal protobuf format.
|
||||
// See https://github.com/influxdata/influxdb_iox/pull/5750 and
|
||||
// See https://github.com/influxdata/influxdb_iox/pull/5750 and
|
||||
// https://github.com/influxdata/influxdb_iox/issues/4866
|
||||
// for rationale of why it was removed
|
||||
|
||||
|
|
|
@ -17,7 +17,7 @@ uuid = { version = "1", features = ["v4"] }
|
|||
|
||||
[dev-dependencies] # In alphabetical order
|
||||
mockito = "0.31"
|
||||
once_cell = { version = "1.15.0", features = ["parking_lot"] }
|
||||
once_cell = { version = "1.16.0", features = ["parking_lot"] }
|
||||
parking_lot = "0.12"
|
||||
tokio = { version = "1.21", features = ["macros", "parking_lot", "rt-multi-thread", "sync", "time"] }
|
||||
test_helpers = { path = "../test_helpers" }
|
||||
|
|
|
@ -16,6 +16,7 @@ impl Client {
|
|||
|
||||
let response = self
|
||||
.request(Method::POST, &create_bucket_url)
|
||||
.header("Content-Type", "application/json")
|
||||
.body(
|
||||
serde_json::to_string(&post_bucket_request.unwrap_or_default())
|
||||
.context(SerializingSnafu)?,
|
||||
|
@ -47,6 +48,7 @@ mod tests {
|
|||
|
||||
let mock_server = mock("POST", "/api/v2/buckets")
|
||||
.match_header("Authorization", format!("Token {}", token).as_str())
|
||||
.match_header("Content-Type", "application/json")
|
||||
.match_body(
|
||||
format!(
|
||||
r#"{{"orgID":"{}","name":"{}","retentionRules":[]}}"#,
|
||||
|
|
|
@ -73,6 +73,7 @@ impl Client {
|
|||
};
|
||||
let response = self
|
||||
.request(Method::POST, &create_label_url)
|
||||
.header("Content-Type", "application/json")
|
||||
.body(serde_json::to_string(&body).context(SerializingSnafu)?)
|
||||
.send()
|
||||
.await
|
||||
|
@ -100,6 +101,7 @@ impl Client {
|
|||
let body = LabelUpdate { name, properties };
|
||||
let response = self
|
||||
.request(Method::PATCH, &update_label_url)
|
||||
.header("Content-Type", "application/json")
|
||||
.body(serde_json::to_string(&body).context(SerializingSnafu)?)
|
||||
.send()
|
||||
.await
|
||||
|
@ -198,6 +200,7 @@ mod tests {
|
|||
|
||||
let mock_server = mock("POST", BASE_PATH)
|
||||
.match_header("Authorization", format!("Token {}", token).as_str())
|
||||
.match_header("Content-Type", "application/json")
|
||||
.match_body(
|
||||
format!(
|
||||
r#"{{"orgID":"{}","name":"{}","properties":{{"some-key":"some-value"}}}}"#,
|
||||
|
@ -222,6 +225,7 @@ mod tests {
|
|||
|
||||
let mock_server = mock("POST", BASE_PATH)
|
||||
.match_header("Authorization", format!("Token {}", token).as_str())
|
||||
.match_header("Content-Type", "application/json")
|
||||
.match_body(format!(r#"{{"orgID":"{}","name":"{}"}}"#, org_id, name).as_str())
|
||||
.create();
|
||||
|
||||
|
@ -242,6 +246,7 @@ mod tests {
|
|||
|
||||
let mock_server = mock("PATCH", format!("{}/{}", BASE_PATH, label_id).as_str())
|
||||
.match_header("Authorization", format!("Token {}", token).as_str())
|
||||
.match_header("Content-Type", "application/json")
|
||||
.match_body(
|
||||
format!(
|
||||
r#"{{"name":"{}","properties":{{"some-key":"some-value"}}}}"#,
|
||||
|
@ -267,6 +272,7 @@ mod tests {
|
|||
|
||||
let mock_server = mock("PATCH", format!("{}/{}", BASE_PATH, label_id).as_str())
|
||||
.match_header("Authorization", format!("Token {}", token).as_str())
|
||||
.match_header("Content-Type", "application/json")
|
||||
.match_body("{}")
|
||||
.create();
|
||||
|
||||
|
|
|
@ -54,6 +54,7 @@ impl Client {
|
|||
|
||||
let response = self
|
||||
.request(Method::POST, &setup_init_url)
|
||||
.header("Content-Type", "application/json")
|
||||
.body(serde_json::to_string(&body).context(SerializingSnafu)?)
|
||||
.send()
|
||||
.await
|
||||
|
@ -94,6 +95,7 @@ impl Client {
|
|||
|
||||
let response = self
|
||||
.request(Method::POST, &setup_new_url)
|
||||
.header("Content-Type", "application/json")
|
||||
.body(serde_json::to_string(&body).context(SerializingSnafu)?)
|
||||
.send()
|
||||
.await
|
||||
|
@ -138,6 +140,7 @@ mod tests {
|
|||
let retention_period_hrs = 1;
|
||||
|
||||
let mock_server = mock("POST", "/api/v2/setup")
|
||||
.match_header("Content-Type", "application/json")
|
||||
.match_body(
|
||||
format!(
|
||||
r#"{{"username":"{}","org":"{}","bucket":"{}","password":"{}","retentionPeriodHrs":{}}}"#,
|
||||
|
@ -173,6 +176,7 @@ mod tests {
|
|||
|
||||
let mock_server = mock("POST", "/api/v2/setup/user")
|
||||
.match_header("Authorization", format!("Token {}", token).as_str())
|
||||
.match_header("Content-Type", "application/json")
|
||||
.match_body(
|
||||
format!(
|
||||
r#"{{"username":"{}","org":"{}","bucket":"{}","password":"{}","retentionPeriodHrs":{}}}"#,
|
||||
|
@ -204,6 +208,7 @@ mod tests {
|
|||
let bucket = "some-bucket";
|
||||
|
||||
let mock_server = mock("POST", "/api/v2/setup")
|
||||
.match_header("Content-Type", "application/json")
|
||||
.match_body(
|
||||
format!(
|
||||
r#"{{"username":"{}","org":"{}","bucket":"{}"}}"#,
|
||||
|
@ -231,6 +236,7 @@ mod tests {
|
|||
|
||||
let mock_server = mock("POST", "/api/v2/setup/user")
|
||||
.match_header("Authorization", format!("Token {}", token).as_str())
|
||||
.match_header("Content-Type", "application/json")
|
||||
.match_body(
|
||||
format!(
|
||||
r#"{{"username":"{}","org":"{}","bucket":"{}"}}"#,
|
||||
|
|
|
@ -12,7 +12,7 @@ use nom::bytes::complete::tag;
|
|||
use nom::character::complete::{char, multispace0};
|
||||
use nom::combinator::{cut, map, opt, value};
|
||||
use nom::multi::{many0, separated_list0};
|
||||
use nom::sequence::{delimited, pair, preceded, separated_pair, tuple};
|
||||
use nom::sequence::{delimited, pair, preceded, separated_pair, terminated, tuple};
|
||||
use std::fmt::{Display, Formatter, Write};
|
||||
|
||||
/// An InfluxQL arithmetic expression.
|
||||
|
@ -316,11 +316,54 @@ where
|
|||
)(i)
|
||||
}
|
||||
|
||||
/// Parse a variable reference, which is an identifier followed by an optional cast expression.
|
||||
/// Parse a segmented identifier
|
||||
///
|
||||
/// ```text
|
||||
/// segmented_identifier ::= identifier |
|
||||
/// ( identifier "." identifier ) |
|
||||
/// ( identifier "." identifier? "." identifier )
|
||||
/// ```
|
||||
fn segmented_identifier(i: &str) -> ParseResult<&str, Identifier> {
|
||||
let (remaining, (opt_prefix, name)) = pair(
|
||||
opt(alt((
|
||||
// ident2 "." ident1 "."
|
||||
map(
|
||||
pair(
|
||||
terminated(identifier, tag(".")),
|
||||
terminated(identifier, tag(".")),
|
||||
),
|
||||
|(ident2, ident1)| (Some(ident2), Some(ident1)),
|
||||
),
|
||||
// identifier ".."
|
||||
map(terminated(identifier, tag("..")), |ident2| {
|
||||
(Some(ident2), None)
|
||||
}),
|
||||
// identifier "."
|
||||
map(terminated(identifier, tag(".")), |ident1| {
|
||||
(None, Some(ident1))
|
||||
}),
|
||||
))),
|
||||
identifier,
|
||||
)(i)?;
|
||||
|
||||
Ok((
|
||||
remaining,
|
||||
match opt_prefix {
|
||||
Some((None, Some(ident1))) => format!("{}.{}", ident1.0, name.0).into(),
|
||||
Some((Some(ident2), None)) => format!("{}..{}", ident2.0, name.0).into(),
|
||||
Some((Some(ident2), Some(ident1))) => {
|
||||
format!("{}.{}.{}", ident2.0, ident1.0, name.0).into()
|
||||
}
|
||||
_ => name,
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
/// Parse a variable reference, which is a segmented identifier followed by an optional cast expression.
|
||||
pub(crate) fn var_ref(i: &str) -> ParseResult<&str, Expr> {
|
||||
map(
|
||||
pair(
|
||||
identifier,
|
||||
segmented_identifier,
|
||||
opt(preceded(
|
||||
tag("::"),
|
||||
expect(
|
||||
|
@ -515,6 +558,19 @@ mod test {
|
|||
let (_, got) = var_ref("foo").unwrap();
|
||||
assert_eq!(got, var_ref!("foo"));
|
||||
|
||||
// Whilst this is parsed as a 3-part name, it is treated as a quoted string 🙄
|
||||
// VarRefs are parsed as segmented identifiers
|
||||
//
|
||||
// * https://github.com/influxdata/influxql/blob/7e7d61973256ffeef4b99edd0a89f18a9e52fa2d/parser.go#L2515-L2516
|
||||
//
|
||||
// and then the segments are joined as a single string
|
||||
//
|
||||
// * https://github.com/influxdata/influxql/blob/7e7d61973256ffeef4b99edd0a89f18a9e52fa2d/parser.go#L2551
|
||||
let (rem, got) = var_ref("db.rp.foo").unwrap();
|
||||
assert_eq!(got, var_ref!("db.rp.foo"));
|
||||
assert_eq!(format!("{}", got), r#""db.rp.foo""#);
|
||||
assert_eq!(rem, "");
|
||||
|
||||
// with cast operator
|
||||
let (_, got) = var_ref("foo::tag").unwrap();
|
||||
assert_eq!(got, var_ref!("foo", Tag));
|
||||
|
@ -539,6 +595,62 @@ mod test {
|
|||
assert!(got.is_empty())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_segmented_identifier() {
|
||||
// Unquoted
|
||||
let (rem, id) = segmented_identifier("part0").unwrap();
|
||||
assert_eq!(rem, "");
|
||||
assert_eq!(format!("{}", id), "part0");
|
||||
|
||||
// id.id
|
||||
let (rem, id) = segmented_identifier("part1.part0").unwrap();
|
||||
assert_eq!(rem, "");
|
||||
assert_eq!(format!("{}", id), "\"part1.part0\"");
|
||||
|
||||
// id..id
|
||||
let (rem, id) = segmented_identifier("part2..part0").unwrap();
|
||||
assert_eq!(rem, "");
|
||||
assert_eq!(format!("{}", id), "\"part2..part0\"");
|
||||
|
||||
// id.id.id
|
||||
let (rem, id) = segmented_identifier("part2.part1.part0").unwrap();
|
||||
assert_eq!(rem, "");
|
||||
assert_eq!(format!("{}", id), "\"part2.part1.part0\"");
|
||||
|
||||
// "id"."id".id
|
||||
let (rem, id) = segmented_identifier(r#""part 2"."part 1".part0"#).unwrap();
|
||||
assert_eq!(rem, "");
|
||||
assert_eq!(format!("{}", id), "\"part 2.part 1.part0\"");
|
||||
|
||||
// Only parses 3 segments
|
||||
let (rem, id) = segmented_identifier("part2.part1.part0.foo").unwrap();
|
||||
assert_eq!(rem, ".foo");
|
||||
assert_eq!(format!("{}", id), "\"part2.part1.part0\"");
|
||||
|
||||
// Quoted
|
||||
let (rem, id) = segmented_identifier("\"part0\"").unwrap();
|
||||
assert_eq!(rem, "");
|
||||
assert_eq!(format!("{}", id), "part0");
|
||||
|
||||
// Additional test cases, with compatibility proven via https://go.dev/play/p/k2150CJocVl
|
||||
|
||||
let (rem, id) = segmented_identifier(r#""part" 2"."part 1".part0"#).unwrap();
|
||||
assert_eq!(rem, r#" 2"."part 1".part0"#);
|
||||
assert_eq!(format!("{}", id), "part");
|
||||
|
||||
let (rem, id) = segmented_identifier(r#""part" 2."part 1".part0"#).unwrap();
|
||||
assert_eq!(rem, r#" 2."part 1".part0"#);
|
||||
assert_eq!(format!("{}", id), "part");
|
||||
|
||||
let (rem, id) = segmented_identifier(r#""part "2"."part 1".part0"#).unwrap();
|
||||
assert_eq!(rem, r#"2"."part 1".part0"#);
|
||||
assert_eq!(format!("{}", id), r#""part ""#);
|
||||
|
||||
let (rem, id) = segmented_identifier(r#""part ""2"."part 1".part0"#).unwrap();
|
||||
assert_eq!(rem, r#""2"."part 1".part0"#);
|
||||
assert_eq!(format!("{}", id), r#""part ""#);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_display_expr() {
|
||||
let (_, e) = arithmetic_expression("5 + 51").unwrap();
|
||||
|
|
|
@ -249,15 +249,15 @@ fn single_duration(i: &str) -> ParseResult<&str, i64> {
|
|||
pair(
|
||||
integer,
|
||||
alt((
|
||||
value(Nanosecond, tag("ns")), // nanoseconds
|
||||
value(Microsecond, tag("µs")), // microseconds
|
||||
value(Microsecond, tag("us")), // microseconds
|
||||
value(Millisecond, tag("ms")), // milliseconds
|
||||
value(Second, tag("s")), // seconds
|
||||
value(Minute, tag("m")), // minutes
|
||||
value(Hour, tag("h")), // hours
|
||||
value(Day, tag("d")), // days
|
||||
value(Week, tag("w")), // weeks
|
||||
value(Nanosecond, tag("ns")), // nanoseconds
|
||||
value(Microsecond, tag("µ")), // microseconds
|
||||
value(Microsecond, tag("u")), // microseconds
|
||||
value(Millisecond, tag("ms")), // milliseconds
|
||||
value(Second, tag("s")), // seconds
|
||||
value(Minute, tag("m")), // minutes
|
||||
value(Hour, tag("h")), // hours
|
||||
value(Day, tag("d")), // days
|
||||
value(Week, tag("w")), // weeks
|
||||
)),
|
||||
),
|
||||
|(v, unit)| match unit {
|
||||
|
@ -410,10 +410,14 @@ mod test {
|
|||
let (_, got) = single_duration("38ns").unwrap();
|
||||
assert_eq!(got, 38);
|
||||
|
||||
let (_, got) = single_duration("22us").unwrap();
|
||||
let (_, got) = single_duration("22u").unwrap();
|
||||
assert_eq!(got, 22 * NANOS_PER_MICRO);
|
||||
|
||||
let (_, got) = single_duration("7µs").unwrap();
|
||||
let (rem, got) = single_duration("22us").unwrap();
|
||||
assert_eq!(got, 22 * NANOS_PER_MICRO);
|
||||
assert_eq!(rem, "s"); // prove that we ignore the trailing s
|
||||
|
||||
let (_, got) = single_duration("7µ").unwrap();
|
||||
assert_eq!(got, 7 * NANOS_PER_MICRO);
|
||||
|
||||
let (_, got) = single_duration("15ms").unwrap();
|
||||
|
|
|
@ -774,6 +774,12 @@ mod test {
|
|||
select_statement("SELECT value FROM cpu WHERE time <= now()TZ('Australia/Hobart')")
|
||||
.unwrap();
|
||||
assert_eq!(rem, "");
|
||||
|
||||
// segmented var ref identifiers
|
||||
let (rem, _) =
|
||||
select_statement(r#"SELECT LAST("n.usage_user") FROM cpu WHERE n.usage_user > 0"#)
|
||||
.unwrap();
|
||||
assert_eq!(rem, "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -848,6 +854,16 @@ mod test {
|
|||
}
|
||||
);
|
||||
|
||||
// Parse expression with an alias and no unnecessary whitespace
|
||||
let (_, got) = Field::parse("LAST(\"n.asks\")").unwrap();
|
||||
assert_eq!(
|
||||
got,
|
||||
Field {
|
||||
expr: call!("LAST", var_ref!("n.asks")),
|
||||
alias: None
|
||||
}
|
||||
);
|
||||
|
||||
// Parse a call with a VarRef
|
||||
let (_, got) = Field::parse("DISTINCT foo AS bar").unwrap();
|
||||
assert_eq!(
|
||||
|
|
|
@ -7,8 +7,8 @@
|
|||
use crate::impl_tuple_clause;
|
||||
use crate::internal::{expect, ParseError, ParseResult};
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::{is_not, tag};
|
||||
use nom::character::complete::char;
|
||||
use nom::bytes::complete::{is_not, tag, take_till};
|
||||
use nom::character::complete::{anychar, char};
|
||||
use nom::combinator::{map, value, verify};
|
||||
use nom::error::Error;
|
||||
use nom::multi::fold_many0;
|
||||
|
@ -137,13 +137,24 @@ fn regex_literal(i: &str) -> ParseResult<&str, &str> {
|
|||
|
||||
loop {
|
||||
// match everything except `\`, `/` or `\n`
|
||||
let (_, match_i) = is_not("\\/\n")(remaining)?;
|
||||
let (_, match_i) = take_till(|c| c == '\\' || c == '/' || c == '\n')(remaining)?;
|
||||
consumed = &i[..(consumed.len() + match_i.len())];
|
||||
remaining = &i[consumed.len()..];
|
||||
|
||||
// If we didn't consume anything, check whether it is a newline or regex delimiter,
|
||||
// which signals we should leave this parser for outer processing.
|
||||
if consumed.is_empty() {
|
||||
is_not("/\n")(remaining)?;
|
||||
}
|
||||
|
||||
// Try and consume '\' followed by a '/'
|
||||
if let Ok((remaining_i, _)) = char::<_, Error<&str>>('\\')(remaining) {
|
||||
if char::<_, Error<&str>>('/')(remaining_i).is_ok() {
|
||||
// If we didn't consume anything, but we found "\/" sequence,
|
||||
// we need to return an error so the outer fold_many0 parser does not trigger
|
||||
// an infinite recursion error.
|
||||
anychar(consumed)?;
|
||||
|
||||
// We're escaping a '/' (a regex delimiter), so finish and let
|
||||
// the outer parser match and unescape
|
||||
return Ok((remaining, consumed));
|
||||
|
@ -201,6 +212,10 @@ mod test {
|
|||
let (_, got) = double_quoted_string(r#""quick draw""#).unwrap();
|
||||
assert_eq!(got, "quick draw");
|
||||
|
||||
// ascii
|
||||
let (_, got) = double_quoted_string(r#""n.asks""#).unwrap();
|
||||
assert_eq!(got, "n.asks");
|
||||
|
||||
// unicode
|
||||
let (_, got) = double_quoted_string("\"quick draw\u{1f47d}\"").unwrap();
|
||||
assert_eq!(
|
||||
|
@ -265,6 +280,9 @@ mod test {
|
|||
let (_, got) = single_quoted_string(r#"'\n\''"#).unwrap();
|
||||
assert_eq!(got, "\n'");
|
||||
|
||||
let (_, got) = single_quoted_string(r#"'\'hello\''"#).unwrap();
|
||||
assert_eq!(got, "'hello'");
|
||||
|
||||
// literal tab
|
||||
let (_, got) = single_quoted_string("'quick\tdraw'").unwrap();
|
||||
assert_eq!(got, "quick\tdraw");
|
||||
|
@ -300,13 +318,17 @@ mod test {
|
|||
assert_eq!(got, "hello".into());
|
||||
|
||||
// handle escaped delimiters "\/"
|
||||
let (_, got) = regex(r#"/this\/is\/a\/path/"#).unwrap();
|
||||
assert_eq!(got, "this/is/a/path".into());
|
||||
let (_, got) = regex(r#"/\/this\/is\/a\/path/"#).unwrap();
|
||||
assert_eq!(got, "/this/is/a/path".into());
|
||||
|
||||
// ignores any other possible escape sequence
|
||||
let (_, got) = regex(r#"/hello\n/"#).unwrap();
|
||||
assert_eq!(got, "hello\\n".into());
|
||||
|
||||
// can parse possible escape sequence at beginning of regex
|
||||
let (_, got) = regex(r#"/\w.*/"#).unwrap();
|
||||
assert_eq!(got, "\\w.*".into());
|
||||
|
||||
// Empty regex
|
||||
let (i, got) = regex("//").unwrap();
|
||||
assert_eq!(i, "");
|
||||
|
|
|
@ -57,7 +57,7 @@ humantime = "2.1.0"
|
|||
itertools = "0.10.5"
|
||||
libc = { version = "0.2" }
|
||||
num_cpus = "1.13.0"
|
||||
once_cell = { version = "1.15.0", features = ["parking_lot"] }
|
||||
once_cell = { version = "1.16.0", features = ["parking_lot"] }
|
||||
rustyline = { version = "10.0", default-features = false }
|
||||
serde_json = "1.0.87"
|
||||
snafu = "0.7"
|
||||
|
|
|
@ -2,7 +2,6 @@ use futures::Future;
|
|||
use influxdb_iox_client::connection::Connection;
|
||||
use snafu::prelude::*;
|
||||
|
||||
mod namespace;
|
||||
mod parquet_to_lp;
|
||||
mod print_cpu;
|
||||
mod schema;
|
||||
|
@ -14,10 +13,6 @@ pub enum Error {
|
|||
#[snafu(display("Error in schema subcommand: {}", source))]
|
||||
Schema { source: schema::Error },
|
||||
|
||||
#[snafu(context(false))]
|
||||
#[snafu(display("Error in namespace subcommand: {}", source))]
|
||||
Namespace { source: namespace::Error },
|
||||
|
||||
#[snafu(context(false))]
|
||||
#[snafu(display("Error in parquet_to_lp subcommand: {}", source))]
|
||||
ParquetToLp { source: parquet_to_lp::Error },
|
||||
|
@ -41,9 +36,6 @@ enum Command {
|
|||
/// Prints what CPU features are used by the compiler by default.
|
||||
PrintCpu,
|
||||
|
||||
/// Interrogate IOx namespaces
|
||||
Namespace(namespace::Config),
|
||||
|
||||
/// Interrogate the schema of a namespace
|
||||
Schema(schema::Config),
|
||||
|
||||
|
@ -61,10 +53,6 @@ where
|
|||
{
|
||||
match config.command {
|
||||
Command::PrintCpu => print_cpu::main(),
|
||||
Command::Namespace(config) => {
|
||||
let connection = connection().await;
|
||||
namespace::command(connection, config).await?
|
||||
}
|
||||
Command::Schema(config) => {
|
||||
let connection = connection().await;
|
||||
schema::command(connection, config).await?
|
||||
|
|
|
@ -20,7 +20,7 @@ pub struct Config {
|
|||
command: Command,
|
||||
}
|
||||
|
||||
/// All possible subcommands for catalog
|
||||
/// All possible subcommands for namespace
|
||||
#[derive(Debug, clap::Parser)]
|
||||
enum Command {
|
||||
/// Fetch namespaces
|
|
@ -172,6 +172,7 @@ pub enum Format {
|
|||
#[derive(Debug, clap::Parser)]
|
||||
enum Command {
|
||||
MeasurementFields(MeasurementFields),
|
||||
MeasurementTagKeys(MeasurementTagKeys),
|
||||
ReadFilter,
|
||||
ReadGroup(ReadGroup),
|
||||
ReadWindowAggregate(ReadWindowAggregate),
|
||||
|
@ -184,6 +185,12 @@ struct MeasurementFields {
|
|||
measurement: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, clap::Parser)]
|
||||
struct MeasurementTagKeys {
|
||||
#[clap(action)]
|
||||
measurement: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, clap::Parser)]
|
||||
struct ReadGroup {
|
||||
#[clap(
|
||||
|
@ -279,6 +286,22 @@ pub async fn command(connection: Connection, config: Config) -> Result<()> {
|
|||
Format::Quiet => {}
|
||||
}
|
||||
}
|
||||
Command::MeasurementTagKeys(m) => {
|
||||
let result = client
|
||||
.measurement_tag_keys(request::measurement_tag_keys(
|
||||
source,
|
||||
m.measurement,
|
||||
config.start,
|
||||
config.stop,
|
||||
predicate,
|
||||
))
|
||||
.await
|
||||
.context(ServerSnafu)?;
|
||||
match config.format {
|
||||
Format::Pretty => response::pretty_print_strings(result).context(ResponseSnafu)?,
|
||||
Format::Quiet => {}
|
||||
}
|
||||
}
|
||||
Command::ReadFilter => {
|
||||
let result = client
|
||||
.read_filter(request::read_filter(
|
||||
|
|
|
@ -33,6 +33,21 @@ pub fn measurement_fields(
|
|||
}
|
||||
}
|
||||
|
||||
pub fn measurement_tag_keys(
|
||||
org_bucket: Any,
|
||||
measurement: String,
|
||||
start: i64,
|
||||
stop: i64,
|
||||
predicate: std::option::Option<Predicate>,
|
||||
) -> MeasurementTagKeysRequest {
|
||||
generated_types::MeasurementTagKeysRequest {
|
||||
source: Some(org_bucket),
|
||||
measurement,
|
||||
range: Some(TimestampRange { start, end: stop }),
|
||||
predicate,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn read_filter(
|
||||
org_bucket: Any,
|
||||
start: i64,
|
||||
|
|
|
@ -31,6 +31,7 @@ mod commands {
|
|||
pub mod compactor;
|
||||
pub mod debug;
|
||||
pub mod import;
|
||||
pub mod namespace;
|
||||
pub mod query;
|
||||
pub mod query_ingester;
|
||||
pub mod remote;
|
||||
|
@ -200,6 +201,9 @@ enum Command {
|
|||
|
||||
/// Commands related to the bulk ingest of data
|
||||
Import(commands::import::Config),
|
||||
|
||||
/// Various commands for namespace manipulation
|
||||
Namespace(commands::namespace::Config),
|
||||
}
|
||||
|
||||
fn main() -> Result<(), std::io::Error> {
|
||||
|
@ -349,6 +353,14 @@ fn main() -> Result<(), std::io::Error> {
|
|||
std::process::exit(ReturnCode::Failure as _)
|
||||
}
|
||||
}
|
||||
Some(Command::Namespace(config)) => {
|
||||
let _tracing_guard = handle_init_logs(init_simple_logs(log_verbose_count));
|
||||
let connection = connection().await;
|
||||
if let Err(e) = commands::namespace::command(connection, config).await {
|
||||
eprintln!("{}", e);
|
||||
std::process::exit(ReturnCode::Failure as _)
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
|
|
|
@ -533,7 +533,7 @@ async fn wait_for_query_result(state: &mut StepTestState<'_>, query_sql: &str, e
|
|||
);
|
||||
}
|
||||
|
||||
/// Test the schema cli command
|
||||
/// Test the namespace cli command
|
||||
#[tokio::test]
|
||||
async fn namespaces_cli() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
@ -556,7 +556,6 @@ async fn namespaces_cli() {
|
|||
.unwrap()
|
||||
.arg("-h")
|
||||
.arg(&querier_addr)
|
||||
.arg("debug")
|
||||
.arg("namespace")
|
||||
.arg("list")
|
||||
.assert()
|
||||
|
|
|
@ -177,6 +177,44 @@ async fn remote_store_get_table() {
|
|||
}
|
||||
.boxed()
|
||||
})),
|
||||
Step::Custom(Box::new(move |state: &mut StepTestState| {
|
||||
async move {
|
||||
// Test that we can download files from the querier (not just the router)
|
||||
// to ensure it has the correct grpc services
|
||||
let querier_addr = state.cluster().querier().querier_grpc_base().to_string();
|
||||
let namespace = state.cluster().namespace().to_string();
|
||||
|
||||
// Ensure files are actually written to the filesystem
|
||||
let dir = tempfile::tempdir().expect("could not get temporary directory");
|
||||
|
||||
Command::cargo_bin("influxdb_iox")
|
||||
.unwrap()
|
||||
.current_dir(&dir)
|
||||
.arg("-h")
|
||||
.arg(&querier_addr)
|
||||
.arg("remote")
|
||||
.arg("store")
|
||||
.arg("get-table")
|
||||
.arg(&namespace)
|
||||
.arg(&table_name)
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
let table_dir = dir.as_ref().join(&table_name);
|
||||
|
||||
// There should be a directory created that, by default, is named the same as
|
||||
// the table
|
||||
assert!(table_dir.is_dir());
|
||||
let entries: Vec<_> = table_dir.read_dir().unwrap().flatten().collect();
|
||||
// The two Parquet files for this table should be present
|
||||
assert_eq!(
|
||||
entries.len(),
|
||||
2,
|
||||
"Expected 2 files in the directory, got: {entries:?}"
|
||||
);
|
||||
}
|
||||
.boxed()
|
||||
})),
|
||||
],
|
||||
)
|
||||
.run()
|
||||
|
|
|
@ -7,8 +7,8 @@ license.workspace = true
|
|||
|
||||
[dependencies]
|
||||
arrow = { workspace = true, features = ["prettyprint"] }
|
||||
arrow-flight = { workspace = true }
|
||||
arrow_util = { path = "../arrow_util" }
|
||||
arrow-flight = { workspace = true }
|
||||
async-trait = "0.1.58"
|
||||
backoff = { path = "../backoff" }
|
||||
bytes = "1.2"
|
||||
|
@ -20,6 +20,7 @@ dml = { path = "../dml" }
|
|||
flatbuffers = "2.1.2"
|
||||
futures = "0.3"
|
||||
generated_types = { path = "../generated_types" }
|
||||
hashbrown = "0.12.3"
|
||||
hyper = "0.14"
|
||||
iox_catalog = { path = "../iox_catalog" }
|
||||
iox_query = { path = "../iox_query" }
|
||||
|
|
|
@ -0,0 +1,331 @@
|
|||
//! A map key-value map where values are always wrapped in an [`Arc`], with
|
||||
//! helper methods for exactly-once initialisation.
|
||||
|
||||
#![allow(dead_code)]
|
||||
|
||||
use std::{
|
||||
borrow::Borrow,
|
||||
hash::{BuildHasher, Hash, Hasher},
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use hashbrown::{
|
||||
hash_map::{DefaultHashBuilder, RawEntryMut},
|
||||
HashMap,
|
||||
};
|
||||
use parking_lot::RwLock;
|
||||
|
||||
/// A key-value map where all values are wrapped in [`Arc`]'s and shared across
|
||||
/// all readers of a given key.
|
||||
///
|
||||
/// Each key in an [`ArcMap`] is initialised exactly once, with subsequent
|
||||
/// lookups being handed an [`Arc`] handle to the same instance.
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct ArcMap<K, V, S = DefaultHashBuilder> {
|
||||
map: RwLock<HashMap<K, Arc<V>, S>>,
|
||||
hasher: S,
|
||||
}
|
||||
|
||||
impl<K, V, S> std::ops::Deref for ArcMap<K, V, S> {
|
||||
type Target = RwLock<HashMap<K, Arc<V>, S>>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.map
|
||||
}
|
||||
}
|
||||
|
||||
impl<K, V> Default for ArcMap<K, V> {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
map: Default::default(),
|
||||
hasher: Default::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<K, V, S> ArcMap<K, V, S>
|
||||
where
|
||||
K: Hash + Eq,
|
||||
S: BuildHasher,
|
||||
{
|
||||
/// Fetch an [`Arc`]-wrapped `V` for `key`, or if this is the first lookup
|
||||
/// for `key`, initialise the value with the provided `init` closure.
|
||||
///
|
||||
/// # Concurrency
|
||||
///
|
||||
/// This call is thread-safe - if two calls race, a value will be
|
||||
/// initialised exactly once (one arbitrary caller's `init` closure will be
|
||||
/// executed) and both callers will obtain a handle to the same instance of
|
||||
/// `V`. Both threads will eagerly initialise V and race to "win" storing V
|
||||
/// in the map.
|
||||
///
|
||||
/// # Performance
|
||||
///
|
||||
/// This method is biased towards read-heavy workloads, with many readers
|
||||
/// progressing in parallel. If the value for `key` must be initialised, all
|
||||
/// readers are blocked while `init` executes and the resulting `V` is
|
||||
/// memorised.
|
||||
pub(crate) fn get_or_else<Q, F>(&self, key: &Q, init: F) -> Arc<V>
|
||||
where
|
||||
Q: Hash + PartialEq<K> + ToOwned<Owned = K> + ?Sized,
|
||||
F: FnOnce() -> Arc<V>,
|
||||
{
|
||||
// Memorise the hash outside of the lock.
|
||||
//
|
||||
// This allows the hash to be re-used (and not recomputed) if the value
|
||||
// has to be inserted into the map after the existence check. It also
|
||||
// obviously keeps the hashing outside of the lock.
|
||||
let hash = self.compute_hash(key);
|
||||
|
||||
// First check if the entry exists already.
|
||||
//
|
||||
// This does NOT use an upgradable read lock, as readers waiting for an
|
||||
// upgradeable read lock block other readers wanting an upgradeable read
|
||||
// lock. If all readers do that, it's effectively an exclusive lock.
|
||||
if let Some((_, v)) = self.map.read().raw_entry().from_hash(hash, |q| key == q) {
|
||||
return Arc::clone(v);
|
||||
}
|
||||
|
||||
// Otherwise acquire a write lock and insert the value if necessary (it
|
||||
// is possible another thread initialised the value after the read check
|
||||
// above, but before this write lock was granted).
|
||||
let mut guard = self.map.write();
|
||||
match guard.raw_entry_mut().from_hash(hash, |q| key == q) {
|
||||
RawEntryMut::Occupied(v) => Arc::clone(v.get()),
|
||||
RawEntryMut::Vacant(v) => {
|
||||
Arc::clone(v.insert_hashed_nocheck(hash, key.to_owned(), init()).1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A convenience method over [`Self::get_or_else()`] that initialises `V`
|
||||
/// to the default value when `key` has no entry.
|
||||
pub(crate) fn get_or_default<Q>(&self, key: &Q) -> Arc<V>
|
||||
where
|
||||
Q: Hash + PartialEq<K> + ToOwned<Owned = K> + ?Sized,
|
||||
V: Default,
|
||||
{
|
||||
self.get_or_else(key, Default::default)
|
||||
}
|
||||
|
||||
/// A getter for `key` that returns an [`Arc`]-wrapped `V`, or [`None`] if
|
||||
/// `key` has not yet been initialised.
|
||||
///
|
||||
/// # Concurrency
|
||||
///
|
||||
/// This method is cheap, and multiple callers progress in parallel. Callers
|
||||
/// are blocked by a call to [`Self::get_or_else()`] only when a `V` needs
|
||||
/// to be initialised.
|
||||
pub(crate) fn get<Q>(&self, key: &Q) -> Option<Arc<V>>
|
||||
where
|
||||
K: Borrow<Q>,
|
||||
Q: Hash + PartialEq<K> + ?Sized,
|
||||
{
|
||||
let hash = self.compute_hash(key);
|
||||
self.map
|
||||
.read()
|
||||
.raw_entry()
|
||||
.from_hash(hash, |q| key == q)
|
||||
.map(|(_k, v)| Arc::clone(v))
|
||||
}
|
||||
|
||||
/// Insert `value` indexed by `key`.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// This method panics if a value already exists for `key`.
|
||||
pub(crate) fn insert<Q>(&self, key: &Q, value: Arc<V>)
|
||||
where
|
||||
Q: Hash + PartialEq<K> + ToOwned<Owned = K> + ?Sized,
|
||||
{
|
||||
let hash = self.compute_hash(key);
|
||||
|
||||
match self
|
||||
.map
|
||||
.write()
|
||||
.raw_entry_mut()
|
||||
.from_hash(hash, |q| key == q)
|
||||
{
|
||||
RawEntryMut::Occupied(_) => panic!("inserting existing key into ArcMap"),
|
||||
RawEntryMut::Vacant(view) => {
|
||||
view.insert_hashed_nocheck(hash, key.to_owned(), value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Return a state snapshot of all the values in this [`ArcMap`] in
|
||||
/// arbitrary order.
|
||||
///
|
||||
/// # Concurrency
|
||||
///
|
||||
/// The snapshot generation is serialised w.r.t concurrent calls to mutate
|
||||
/// `self` (that is, a new entry may appear immediately after the snapshot
|
||||
/// is generated). Calls to [`Self::values`] and other "read" methods
|
||||
/// proceed in parallel.
|
||||
pub(crate) fn values(&self) -> Vec<Arc<V>> {
|
||||
self.map.read().values().map(Arc::clone).collect()
|
||||
}
|
||||
|
||||
fn compute_hash<Q: Hash + ?Sized>(&self, key: &Q) -> u64 {
|
||||
let mut state = self.hasher.build_hasher();
|
||||
key.hash(&mut state);
|
||||
state.finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::{
|
||||
atomic::{AtomicUsize, Ordering},
|
||||
Arc, Barrier,
|
||||
};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_get() {
|
||||
let map = ArcMap::<String, usize>::default();
|
||||
|
||||
let key: &str = "bananas";
|
||||
|
||||
assert!(map.get(key).is_none());
|
||||
|
||||
// Assert the value is initialised from the closure
|
||||
let got: Arc<usize> = map.get_or_else(key, || Arc::new(42));
|
||||
assert_eq!(*got, 42);
|
||||
|
||||
// Assert the same Arc is returned later.
|
||||
let other = map.get(key).expect("should have been initialised");
|
||||
assert!(Arc::ptr_eq(&got, &other));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_init_once() {
|
||||
let map = ArcMap::<String, usize>::default();
|
||||
|
||||
let key: &str = "bananas";
|
||||
|
||||
// Assert the value is initialised from the closure
|
||||
let got = map.get_or_else(key, || Arc::new(42));
|
||||
assert_eq!(*got, 42);
|
||||
|
||||
// And subsequent calls observe the same value, regardless of the init
|
||||
// closure
|
||||
let got = map.get_or_else(key, || Arc::new(13));
|
||||
assert_eq!(*got, 42);
|
||||
|
||||
let got = map.get_or_default(key);
|
||||
assert_eq!(*got, 42);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_insert() {
|
||||
let map = ArcMap::<String, usize>::default();
|
||||
|
||||
let key: &str = "bananas";
|
||||
|
||||
assert!(map.get(key).is_none());
|
||||
|
||||
// Assert the value is initialised from the closure
|
||||
map.insert(key, Arc::new(42));
|
||||
let got = map.get(key).unwrap();
|
||||
assert_eq!(*got, 42);
|
||||
|
||||
// Assert the same Arc is returned later.
|
||||
let other = map.get(key).expect("should have been initialised");
|
||||
assert_eq!(*other, 42);
|
||||
assert!(Arc::ptr_eq(&got, &other));
|
||||
|
||||
// And subsequent calls observe the same value, regardless of the init
|
||||
// closure
|
||||
let got = map.get_or_else(key, || Arc::new(13));
|
||||
assert_eq!(*got, 42);
|
||||
assert!(Arc::ptr_eq(&got, &other));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_values() {
|
||||
let map = ArcMap::<usize, String>::default();
|
||||
|
||||
map.insert(&1, Arc::new("bananas".to_string()));
|
||||
map.insert(&2, Arc::new("platanos".to_string()));
|
||||
|
||||
let mut got = map
|
||||
.values()
|
||||
.into_iter()
|
||||
.map(|v| String::clone(&*v))
|
||||
.collect::<Vec<_>>();
|
||||
got.sort_unstable();
|
||||
|
||||
assert_eq!(got, &["bananas", "platanos"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic = "inserting existing key"]
|
||||
fn test_insert_existing() {
|
||||
let map = ArcMap::<String, usize>::default();
|
||||
|
||||
let key: &str = "bananas";
|
||||
map.insert(key, Arc::new(42));
|
||||
map.insert(key, Arc::new(42));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(clippy::needless_collect)] // Only needless if you like deadlocks.
|
||||
fn test_init_once_parallel() {
|
||||
let map = Arc::new(ArcMap::<String, usize>::default());
|
||||
|
||||
const NUM_THREADS: usize = 10;
|
||||
|
||||
let barrier = Arc::new(Barrier::new(NUM_THREADS));
|
||||
let init_count = Arc::new(AtomicUsize::new(0));
|
||||
let key: &str = "bananas";
|
||||
|
||||
// Spawn NUM_THREADS and have all of them wait until all the threads
|
||||
// have initialised before racing to initialise a V for key.
|
||||
//
|
||||
// Each thread tries to initialise V to a unique per-thread value, and
|
||||
// this test asserts only one thread successfully initialises V to it's
|
||||
// unique value.
|
||||
let handles = (0..NUM_THREADS)
|
||||
.map(|i| {
|
||||
let map = Arc::clone(&map);
|
||||
let barrier = Arc::clone(&barrier);
|
||||
let init_count = Arc::clone(&init_count);
|
||||
|
||||
std::thread::spawn(move || {
|
||||
// Rendezvous with all threads before continuing to maximise
|
||||
// the racy-ness.
|
||||
barrier.wait();
|
||||
|
||||
let got = map.get_or_else(key, || {
|
||||
init_count.fetch_add(1, Ordering::SeqCst);
|
||||
Arc::new(i)
|
||||
});
|
||||
|
||||
*got == i
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let winners = handles
|
||||
.into_iter()
|
||||
.fold(0, |acc, h| if h.join().unwrap() { acc + 1 } else { acc });
|
||||
|
||||
assert_eq!(winners, 1); // Number of threads that observed their unique value
|
||||
assert_eq!(init_count.load(Ordering::SeqCst), 1); // Number of init() calls
|
||||
}
|
||||
|
||||
// Assert values can be "moved" due to FnOnce being used, vs. Fn.
|
||||
//
|
||||
// This is a compile-time assertion more than a runtime test.
|
||||
#[test]
|
||||
fn test_fn_once() {
|
||||
let map = ArcMap::<String, String>::default();
|
||||
|
||||
// A non-copy value that is moved into the FnOnce
|
||||
let v = "bananas".to_owned();
|
||||
let v = map.get_or_else("platanos", move || Arc::new(v));
|
||||
assert_eq!(*v, "bananas")
|
||||
}
|
||||
}
|
|
@ -159,7 +159,7 @@ mod tests {
|
|||
use arrow_util::assert_batches_eq;
|
||||
use data_types::PartitionId;
|
||||
use mutable_batch_lp::lines_to_batches;
|
||||
use schema::selection::Selection;
|
||||
use schema::Projection;
|
||||
|
||||
use super::*;
|
||||
use crate::test_util::{
|
||||
|
@ -182,7 +182,7 @@ mod tests {
|
|||
.unwrap()
|
||||
.get("cpu")
|
||||
.unwrap()
|
||||
.to_arrow(Selection::All)
|
||||
.to_arrow(Projection::All)
|
||||
.unwrap();
|
||||
|
||||
let batch = QueryAdaptor::new(
|
||||
|
|
|
@ -614,9 +614,11 @@ mod tests {
|
|||
|
||||
use dml::{DmlDelete, DmlMeta, DmlWrite};
|
||||
use futures::TryStreamExt;
|
||||
use iox_catalog::{mem::MemCatalog, validate_or_insert_schema};
|
||||
use hashbrown::HashMap;
|
||||
use iox_catalog::{interface::RepoCollection, mem::MemCatalog, validate_or_insert_schema};
|
||||
use iox_time::Time;
|
||||
use metric::{MetricObserver, Observation};
|
||||
use mutable_batch::MutableBatch;
|
||||
use mutable_batch_lp::lines_to_batches;
|
||||
use object_store::memory::InMemory;
|
||||
|
||||
|
@ -664,9 +666,12 @@ mod tests {
|
|||
|
||||
let ignored_ts = Time::from_timestamp_millis(42);
|
||||
|
||||
let batch = lines_to_batches("mem foo=1 10", 0).unwrap();
|
||||
let w1 = DmlWrite::new(
|
||||
"foo",
|
||||
lines_to_batches("mem foo=1 10", 0).unwrap(),
|
||||
namespace.id,
|
||||
batch.clone(),
|
||||
build_id_map(repos.deref_mut(), namespace.id, &batch).await,
|
||||
"1970-01-01".into(),
|
||||
DmlMeta::sequenced(
|
||||
Sequence::new(ShardIndex::new(1), SequenceNumber::new(1)),
|
||||
|
@ -705,9 +710,12 @@ mod tests {
|
|||
.unwrap();
|
||||
assert_matches!(action, DmlApplyAction::Applied(false));
|
||||
|
||||
let batch = lines_to_batches("mem foo=1 10", 0).unwrap();
|
||||
let w2 = DmlWrite::new(
|
||||
"foo",
|
||||
lines_to_batches("mem foo=1 10", 0).unwrap(),
|
||||
namespace.id,
|
||||
batch.clone(),
|
||||
build_id_map(&mut *catalog.repositories().await, namespace.id, &batch).await,
|
||||
"1970-01-01".into(),
|
||||
DmlMeta::sequenced(
|
||||
Sequence::new(ShardIndex::new(1), SequenceNumber::new(2)),
|
||||
|
@ -750,16 +758,19 @@ mod tests {
|
|||
Arc::clone(&catalog),
|
||||
[(shard1.id, shard1.shard_index)],
|
||||
Arc::new(Executor::new(1)),
|
||||
Arc::new(CatalogPartitionResolver::new(catalog)),
|
||||
Arc::new(CatalogPartitionResolver::new(Arc::clone(&catalog))),
|
||||
BackoffConfig::default(),
|
||||
Arc::clone(&metrics),
|
||||
));
|
||||
|
||||
let schema = NamespaceSchema::new(namespace.id, topic.id, query_pool.id, 100);
|
||||
|
||||
let batch = lines_to_batches("mem foo=1 10\nmem foo=1 11", 0).unwrap();
|
||||
let w1 = DmlWrite::new(
|
||||
"foo",
|
||||
lines_to_batches("mem foo=1 10\nmem foo=1 11", 0).unwrap(),
|
||||
namespace.id,
|
||||
batch.clone(),
|
||||
build_id_map(repos.deref_mut(), namespace.id, &batch).await,
|
||||
"1970-01-01".into(),
|
||||
DmlMeta::sequenced(
|
||||
Sequence::new(ShardIndex::new(1), SequenceNumber::new(1)),
|
||||
|
@ -866,9 +877,12 @@ mod tests {
|
|||
|
||||
let ignored_ts = Time::from_timestamp_millis(42);
|
||||
|
||||
let batch = lines_to_batches("mem foo=1 10", 0).unwrap();
|
||||
let w1 = DmlWrite::new(
|
||||
"foo",
|
||||
lines_to_batches("mem foo=1 10", 0).unwrap(),
|
||||
namespace.id,
|
||||
batch.clone(),
|
||||
build_id_map(repos.deref_mut(), namespace.id, &batch).await,
|
||||
"1970-01-01".into(),
|
||||
DmlMeta::sequenced(
|
||||
Sequence::new(ShardIndex::new(1), SequenceNumber::new(1)),
|
||||
|
@ -882,9 +896,12 @@ mod tests {
|
|||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
let batch = lines_to_batches("cpu foo=1 10", 1).unwrap();
|
||||
let w2 = DmlWrite::new(
|
||||
"foo",
|
||||
lines_to_batches("cpu foo=1 10", 1).unwrap(),
|
||||
namespace.id,
|
||||
batch.clone(),
|
||||
build_id_map(repos.deref_mut(), namespace.id, &batch).await,
|
||||
"1970-01-01".into(),
|
||||
DmlMeta::sequenced(
|
||||
Sequence::new(ShardIndex::new(2), SequenceNumber::new(1)),
|
||||
|
@ -900,9 +917,12 @@ mod tests {
|
|||
|
||||
// drop repos so the mem catalog won't deadlock.
|
||||
std::mem::drop(repos);
|
||||
let batch = lines_to_batches("mem foo=1 30", 2).unwrap();
|
||||
let w3 = DmlWrite::new(
|
||||
"foo",
|
||||
lines_to_batches("mem foo=1 30", 2).unwrap(),
|
||||
namespace.id,
|
||||
batch.clone(),
|
||||
build_id_map(&mut *catalog.repositories().await, namespace.id, &batch).await,
|
||||
"1970-01-01".into(),
|
||||
DmlMeta::sequenced(
|
||||
Sequence::new(ShardIndex::new(1), SequenceNumber::new(2)),
|
||||
|
@ -1131,7 +1151,7 @@ mod tests {
|
|||
(shard2.id, shard2.shard_index),
|
||||
],
|
||||
Arc::new(Executor::new(1)),
|
||||
Arc::new(CatalogPartitionResolver::new(catalog)),
|
||||
Arc::new(CatalogPartitionResolver::new(Arc::clone(&catalog))),
|
||||
BackoffConfig::default(),
|
||||
Arc::clone(&metrics),
|
||||
));
|
||||
|
@ -1141,9 +1161,12 @@ mod tests {
|
|||
let ignored_ts = Time::from_timestamp_millis(42);
|
||||
|
||||
// write with sequence number 1
|
||||
let batch = lines_to_batches("mem foo=1 10", 0).unwrap();
|
||||
let w1 = DmlWrite::new(
|
||||
"foo",
|
||||
lines_to_batches("mem foo=1 10", 0).unwrap(),
|
||||
namespace.id,
|
||||
batch.clone(),
|
||||
build_id_map(repos.deref_mut(), namespace.id, &batch).await,
|
||||
"1970-01-01".into(),
|
||||
DmlMeta::sequenced(
|
||||
Sequence::new(ShardIndex::new(1), SequenceNumber::new(1)),
|
||||
|
@ -1158,9 +1181,12 @@ mod tests {
|
|||
.unwrap();
|
||||
|
||||
// write with sequence number 2
|
||||
let batch = lines_to_batches("mem foo=1 30\ncpu bar=1 20", 0).unwrap();
|
||||
let w2 = DmlWrite::new(
|
||||
"foo",
|
||||
lines_to_batches("mem foo=1 30\ncpu bar=1 20", 0).unwrap(),
|
||||
namespace.id,
|
||||
batch.clone(),
|
||||
build_id_map(repos.deref_mut(), namespace.id, &batch).await,
|
||||
"1970-01-01".into(),
|
||||
DmlMeta::sequenced(
|
||||
Sequence::new(ShardIndex::new(1), SequenceNumber::new(2)),
|
||||
|
@ -1256,9 +1282,12 @@ mod tests {
|
|||
|
||||
let ignored_ts = Time::from_timestamp_millis(42);
|
||||
|
||||
let batch = lines_to_batches("mem foo=1 10", 0).unwrap();
|
||||
let w1 = DmlWrite::new(
|
||||
"foo",
|
||||
lines_to_batches("mem foo=1 10", 0).unwrap(),
|
||||
namespace.id,
|
||||
batch.clone(),
|
||||
build_id_map(repos.deref_mut(), namespace.id, &batch).await,
|
||||
"1970-01-01".into(),
|
||||
DmlMeta::sequenced(
|
||||
Sequence::new(ShardIndex::new(1), SequenceNumber::new(1)),
|
||||
|
@ -1267,9 +1296,12 @@ mod tests {
|
|||
50,
|
||||
),
|
||||
);
|
||||
let batch = lines_to_batches("mem foo=1 10", 0).unwrap();
|
||||
let w2 = DmlWrite::new(
|
||||
"foo",
|
||||
lines_to_batches("mem foo=1 10", 0).unwrap(),
|
||||
namespace.id,
|
||||
batch.clone(),
|
||||
build_id_map(repos.deref_mut(), namespace.id, &batch).await,
|
||||
"1970-01-01".into(),
|
||||
DmlMeta::sequenced(
|
||||
Sequence::new(ShardIndex::new(1), SequenceNumber::new(2)),
|
||||
|
@ -1434,7 +1466,7 @@ mod tests {
|
|||
Arc::clone(&catalog),
|
||||
[(shard1.id, shard_index)],
|
||||
Arc::new(Executor::new(1)),
|
||||
Arc::new(CatalogPartitionResolver::new(catalog)),
|
||||
Arc::new(CatalogPartitionResolver::new(Arc::clone(&catalog))),
|
||||
BackoffConfig::default(),
|
||||
Arc::clone(&metrics),
|
||||
));
|
||||
|
@ -1443,9 +1475,12 @@ mod tests {
|
|||
|
||||
let ignored_ts = Time::from_timestamp_millis(42);
|
||||
|
||||
let batch = lines_to_batches("mem foo=1 10", 0).unwrap();
|
||||
let w1 = DmlWrite::new(
|
||||
"foo",
|
||||
lines_to_batches("mem foo=1 10", 0).unwrap(),
|
||||
namespace.id,
|
||||
batch.clone(),
|
||||
build_id_map(repos.deref_mut(), namespace.id, &batch).await,
|
||||
"1970-01-01".into(),
|
||||
DmlMeta::sequenced(
|
||||
Sequence::new(ShardIndex::new(1), SequenceNumber::new(1)),
|
||||
|
@ -1515,4 +1550,28 @@ mod tests {
|
|||
|
||||
assert_eq!(progresses, expected_progresses);
|
||||
}
|
||||
|
||||
pub async fn build_id_map<R>(
|
||||
catalog: &mut R,
|
||||
namespace_id: NamespaceId,
|
||||
tables: &HashMap<String, MutableBatch>,
|
||||
) -> HashMap<String, TableId>
|
||||
where
|
||||
R: RepoCollection + ?Sized,
|
||||
{
|
||||
let mut ret = HashMap::with_capacity(tables.len());
|
||||
|
||||
for k in tables.keys() {
|
||||
let id = catalog
|
||||
.tables()
|
||||
.create_or_get(k, namespace_id)
|
||||
.await
|
||||
.expect("table should create OK")
|
||||
.id;
|
||||
|
||||
ret.insert(k.clone(), id);
|
||||
}
|
||||
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@ use std::sync::Arc;
|
|||
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use mutable_batch::MutableBatch;
|
||||
use schema::selection::Selection;
|
||||
use schema::Projection;
|
||||
|
||||
/// A [`Buffer`] is an internal mutable buffer wrapper over a [`MutableBatch`]
|
||||
/// for the [`BufferState`] FSM.
|
||||
|
@ -42,7 +42,7 @@ impl Buffer {
|
|||
pub(super) fn snapshot(self) -> Option<Arc<RecordBatch>> {
|
||||
Some(Arc::new(
|
||||
self.buffer?
|
||||
.to_arrow(Selection::All)
|
||||
.to_arrow(Projection::All)
|
||||
.expect("failed to snapshot buffer data"),
|
||||
))
|
||||
}
|
||||
|
|
|
@ -135,7 +135,7 @@ mod tests {
|
|||
|
||||
use arrow_util::assert_batches_eq;
|
||||
use mutable_batch_lp::test_helpers::lp_to_mutable_batch;
|
||||
use schema::selection::Selection;
|
||||
use schema::Projection;
|
||||
|
||||
use super::*;
|
||||
|
||||
|
@ -267,7 +267,7 @@ mod tests {
|
|||
// Generate the combined buffer from the original inputs to compare
|
||||
// against.
|
||||
mb1.extend_from(&mb2).unwrap();
|
||||
let want = mb1.to_arrow(Selection::All).unwrap();
|
||||
let want = mb1.to_arrow(Projection::All).unwrap();
|
||||
|
||||
assert_eq!(&**snapshot, &want);
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@ use std::sync::Arc;
|
|||
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use mutable_batch::MutableBatch;
|
||||
use schema::selection::Selection;
|
||||
use schema::Projection;
|
||||
|
||||
use crate::data::partition::buffer::{
|
||||
mutable_buffer::Buffer,
|
||||
|
@ -34,7 +34,7 @@ impl Queryable for Buffering {
|
|||
fn get_query_data(&self) -> Vec<Arc<RecordBatch>> {
|
||||
let data = self.buffer.buffer().map(|v| {
|
||||
Arc::new(
|
||||
v.to_arrow(Selection::All)
|
||||
v.to_arrow(Projection::All)
|
||||
.expect("failed to snapshot buffer data"),
|
||||
)
|
||||
});
|
||||
|
|
|
@ -445,7 +445,7 @@ impl<T> Drop for IngestHandlerImpl<T> {
|
|||
mod tests {
|
||||
use std::{num::NonZeroU32, ops::DerefMut};
|
||||
|
||||
use data_types::{Namespace, NamespaceSchema, Sequence, SequenceNumber};
|
||||
use data_types::{Namespace, NamespaceId, NamespaceSchema, Sequence, SequenceNumber, TableId};
|
||||
use dml::{DmlMeta, DmlWrite};
|
||||
use iox_catalog::{mem::MemCatalog, validate_or_insert_schema};
|
||||
use iox_time::Time;
|
||||
|
@ -600,7 +600,9 @@ mod tests {
|
|||
let ingest_ts1 = Time::from_timestamp_millis(42);
|
||||
let write_operations = vec![DmlWrite::new(
|
||||
"foo",
|
||||
NamespaceId::new(1),
|
||||
lines_to_batches("cpu bar=2 20", 0).unwrap(),
|
||||
[("cpu".to_string(), TableId::new(1))].into_iter().collect(),
|
||||
"1970-01-01".into(),
|
||||
DmlMeta::sequenced(
|
||||
Sequence::new(ShardIndex::new(0), SequenceNumber::new(10)),
|
||||
|
@ -626,7 +628,9 @@ mod tests {
|
|||
let ingest_ts1 = Time::from_timestamp_millis(42);
|
||||
let write_operations = vec![DmlWrite::new(
|
||||
"foo",
|
||||
NamespaceId::new(1),
|
||||
lines_to_batches("cpu bar=2 20", 0).unwrap(),
|
||||
[("cpu".to_string(), TableId::new(1))].into_iter().collect(),
|
||||
"1970-01-01".into(),
|
||||
DmlMeta::sequenced(
|
||||
Sequence::new(ShardIndex::new(0), SequenceNumber::new(2)),
|
||||
|
@ -652,7 +656,9 @@ mod tests {
|
|||
let ingest_ts1 = Time::from_timestamp_millis(42);
|
||||
let write_operations = vec![DmlWrite::new(
|
||||
"foo",
|
||||
NamespaceId::new(1),
|
||||
lines_to_batches("cpu bar=2 20", 0).unwrap(),
|
||||
[("cpu".to_string(), TableId::new(1))].into_iter().collect(),
|
||||
"1970-01-01".into(),
|
||||
DmlMeta::sequenced(
|
||||
Sequence::new(ShardIndex::new(0), SequenceNumber::new(2)),
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
clippy::dbg_macro
|
||||
)]
|
||||
|
||||
mod arcmap;
|
||||
pub(crate) mod compact;
|
||||
pub mod data;
|
||||
pub mod handler;
|
||||
|
|
|
@ -10,7 +10,7 @@ use datafusion_util::MemoryStream;
|
|||
use futures::{Stream, StreamExt, TryStreamExt};
|
||||
use generated_types::ingester::IngesterQueryRequest;
|
||||
use observability_deps::tracing::debug;
|
||||
use schema::{merge::SchemaMerger, selection::Selection};
|
||||
use schema::{merge::SchemaMerger, Projection};
|
||||
use snafu::{ensure, Snafu};
|
||||
use trace::span::{Span, SpanRecorder};
|
||||
|
||||
|
@ -344,9 +344,9 @@ pub async fn prepare_data_to_querier(
|
|||
.map(String::as_str)
|
||||
.collect::<Vec<_>>();
|
||||
let selection = if columns.is_empty() {
|
||||
Selection::All
|
||||
Projection::All
|
||||
} else {
|
||||
Selection::Some(columns.as_ref())
|
||||
Projection::Some(columns.as_ref())
|
||||
};
|
||||
|
||||
let snapshots = batch.project_selection(selection).into_iter().map(|batch| {
|
||||
|
@ -666,6 +666,6 @@ mod tests {
|
|||
}
|
||||
|
||||
fn lp_to_batch(lp: &str) -> RecordBatch {
|
||||
lp_to_mutable_batch(lp).1.to_arrow(Selection::All).unwrap()
|
||||
lp_to_mutable_batch(lp).1.to_arrow(Projection::All).unwrap()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,24 +6,16 @@ use std::{any::Any, sync::Arc};
|
|||
use arrow::record_batch::RecordBatch;
|
||||
use arrow_util::util::ensure_schema;
|
||||
use data_types::{ChunkId, ChunkOrder, DeletePredicate, PartitionId, TableSummary};
|
||||
use datafusion::{
|
||||
error::DataFusionError,
|
||||
physical_plan::{
|
||||
common::SizedRecordBatchStream,
|
||||
metrics::{ExecutionPlanMetricsSet, MemTrackingMetrics},
|
||||
SendableRecordBatchStream,
|
||||
},
|
||||
};
|
||||
use datafusion::error::DataFusionError;
|
||||
use iox_query::{
|
||||
exec::{stringset::StringSet, IOxSessionContext},
|
||||
util::{compute_timenanosecond_min_max, create_basic_summary},
|
||||
QueryChunk, QueryChunkMeta,
|
||||
QueryChunk, QueryChunkData, QueryChunkMeta,
|
||||
};
|
||||
use observability_deps::tracing::trace;
|
||||
use once_cell::sync::OnceCell;
|
||||
use predicate::Predicate;
|
||||
use schema::{merge::merge_record_batch_schemas, selection::Selection, sort::SortKey, Schema};
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use schema::{merge::merge_record_batch_schemas, sort::SortKey, Projection, Schema};
|
||||
use snafu::Snafu;
|
||||
|
||||
use crate::data::table::TableName;
|
||||
|
||||
|
@ -109,7 +101,7 @@ impl QueryAdaptor {
|
|||
}
|
||||
}
|
||||
|
||||
pub(crate) fn project_selection(&self, selection: Selection<'_>) -> Vec<RecordBatch> {
|
||||
pub(crate) fn project_selection(&self, selection: Projection<'_>) -> Vec<RecordBatch> {
|
||||
// Project the column selection across all RecordBatch
|
||||
self.data
|
||||
.iter()
|
||||
|
@ -119,8 +111,8 @@ impl QueryAdaptor {
|
|||
|
||||
// Apply selection to in-memory batch
|
||||
match selection {
|
||||
Selection::All => batch.clone(),
|
||||
Selection::Some(columns) => {
|
||||
Projection::All => batch.clone(),
|
||||
Projection::Some(columns) => {
|
||||
let projection = columns
|
||||
.iter()
|
||||
.flat_map(|&column_name| {
|
||||
|
@ -148,8 +140,8 @@ impl QueryAdaptor {
|
|||
}
|
||||
|
||||
impl QueryChunkMeta for QueryAdaptor {
|
||||
fn summary(&self) -> Option<Arc<TableSummary>> {
|
||||
Some(Arc::clone(self.summary.get_or_init(|| {
|
||||
fn summary(&self) -> Arc<TableSummary> {
|
||||
Arc::clone(self.summary.get_or_init(|| {
|
||||
let ts_min_max = compute_timenanosecond_min_max(self.data.iter().map(|b| b.as_ref()))
|
||||
.expect("Should have time range");
|
||||
|
||||
|
@ -158,7 +150,7 @@ impl QueryChunkMeta for QueryAdaptor {
|
|||
&self.schema(),
|
||||
ts_min_max,
|
||||
))
|
||||
})))
|
||||
}))
|
||||
}
|
||||
|
||||
fn schema(&self) -> Arc<Schema> {
|
||||
|
@ -211,7 +203,7 @@ impl QueryChunk for QueryAdaptor {
|
|||
&self,
|
||||
_ctx: IOxSessionContext,
|
||||
_predicate: &Predicate,
|
||||
_columns: Selection<'_>,
|
||||
_columns: Projection<'_>,
|
||||
) -> Result<Option<StringSet>, DataFusionError> {
|
||||
Ok(None)
|
||||
}
|
||||
|
@ -230,42 +222,15 @@ impl QueryChunk for QueryAdaptor {
|
|||
Ok(None)
|
||||
}
|
||||
|
||||
/// Provides access to raw `QueryChunk` data as an
|
||||
/// asynchronous stream of `RecordBatch`es
|
||||
fn read_filter(
|
||||
&self,
|
||||
mut ctx: IOxSessionContext,
|
||||
_predicate: &Predicate,
|
||||
selection: Selection<'_>,
|
||||
) -> Result<SendableRecordBatchStream, DataFusionError> {
|
||||
ctx.set_metadata("storage", "ingester");
|
||||
ctx.set_metadata("projection", format!("{}", selection));
|
||||
trace!(?selection, "selection");
|
||||
fn data(&self) -> QueryChunkData {
|
||||
let schema = self.schema().as_arrow();
|
||||
|
||||
let schema = self
|
||||
.schema()
|
||||
.select(selection)
|
||||
.context(SchemaSnafu)
|
||||
.map_err(|e| DataFusionError::External(Box::new(e)))?;
|
||||
|
||||
// Apply the projection over all the data in self, ensuring each batch
|
||||
// has the specified schema.
|
||||
let batches = self
|
||||
.project_selection(selection)
|
||||
.into_iter()
|
||||
.map(|batch| {
|
||||
ensure_schema(&schema.as_arrow(), &batch)
|
||||
.context(ConcatBatchesSnafu {})
|
||||
.map(Arc::new)
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
.map_err(|e| DataFusionError::External(Box::new(e)))?;
|
||||
|
||||
// Return stream of data
|
||||
let dummy_metrics = ExecutionPlanMetricsSet::new();
|
||||
let mem_metrics = MemTrackingMetrics::new(&dummy_metrics, 0);
|
||||
let stream = SizedRecordBatchStream::new(schema.as_arrow(), batches, mem_metrics);
|
||||
Ok(Box::pin(stream))
|
||||
QueryChunkData::RecordBatches(
|
||||
self.data
|
||||
.iter()
|
||||
.map(|b| ensure_schema(&schema, b).expect("schema handling broken"))
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns chunk type
|
||||
|
|
|
@ -481,7 +481,7 @@ mod tests {
|
|||
use data_types::PartitionId;
|
||||
use futures::StreamExt;
|
||||
use mutable_batch_lp::test_helpers::lp_to_mutable_batch;
|
||||
use schema::selection::Selection;
|
||||
use schema::Projection;
|
||||
|
||||
use crate::querier_handler::PartitionStatus;
|
||||
|
||||
|
@ -496,7 +496,7 @@ mod tests {
|
|||
async fn test_get_stream_all_types() {
|
||||
let batch = lp_to_mutable_batch("table z=1 0")
|
||||
.1
|
||||
.to_arrow(Selection::All)
|
||||
.to_arrow(Projection::All)
|
||||
.unwrap();
|
||||
let schema = batch.schema();
|
||||
|
||||
|
@ -572,7 +572,7 @@ mod tests {
|
|||
async fn test_get_stream_dictionary_batches() {
|
||||
let batch = lp_to_mutable_batch("table,x=\"foo\",y=\"bar\" z=1 0")
|
||||
.1
|
||||
.to_arrow(Selection::All)
|
||||
.to_arrow(Projection::All)
|
||||
.unwrap();
|
||||
|
||||
assert_get_stream(
|
||||
|
|
|
@ -514,7 +514,7 @@ mod tests {
|
|||
|
||||
use assert_matches::assert_matches;
|
||||
use async_trait::async_trait;
|
||||
use data_types::{DeletePredicate, Sequence, TimestampRange};
|
||||
use data_types::{DeletePredicate, NamespaceId, Sequence, TableId, TimestampRange};
|
||||
use dml::{DmlDelete, DmlMeta, DmlWrite};
|
||||
use futures::stream::{self, BoxStream};
|
||||
use iox_time::{SystemProvider, Time};
|
||||
|
@ -539,6 +539,11 @@ mod tests {
|
|||
// Return a DmlWrite with the given namespace and a single table.
|
||||
fn make_write(name: impl Into<String>, write_time: u64) -> DmlWrite {
|
||||
let tables = lines_to_batches("bananas level=42 4242", 0).unwrap();
|
||||
let ids = tables
|
||||
.keys()
|
||||
.enumerate()
|
||||
.map(|(i, v)| (v.clone(), TableId::new(i as _)))
|
||||
.collect();
|
||||
let sequence = DmlMeta::sequenced(
|
||||
Sequence::new(ShardIndex::new(1), SequenceNumber::new(2)),
|
||||
TEST_TIME
|
||||
|
@ -547,7 +552,14 @@ mod tests {
|
|||
None,
|
||||
42,
|
||||
);
|
||||
DmlWrite::new(name, tables, "1970-01-01".into(), sequence)
|
||||
DmlWrite::new(
|
||||
name,
|
||||
NamespaceId::new(42),
|
||||
tables,
|
||||
ids,
|
||||
"1970-01-01".into(),
|
||||
sequence,
|
||||
)
|
||||
}
|
||||
|
||||
// Return a DmlDelete with the given namespace.
|
||||
|
|
|
@ -239,7 +239,7 @@ mod tests {
|
|||
use std::sync::Arc;
|
||||
|
||||
use assert_matches::assert_matches;
|
||||
use data_types::{Sequence, SequenceNumber};
|
||||
use data_types::{NamespaceId, Sequence, SequenceNumber, TableId};
|
||||
use dml::{DmlMeta, DmlWrite};
|
||||
use iox_time::Time;
|
||||
use metric::{Metric, MetricObserver, Observation};
|
||||
|
@ -272,7 +272,19 @@ mod tests {
|
|||
/// Return a DmlWrite with the given metadata and a single table.
|
||||
fn make_write(meta: DmlMeta) -> DmlWrite {
|
||||
let tables = lines_to_batches("bananas level=42 4242", 0).unwrap();
|
||||
DmlWrite::new("bananas", tables, "1970-01-01".into(), meta)
|
||||
let ids = tables
|
||||
.keys()
|
||||
.enumerate()
|
||||
.map(|(i, v)| (v.clone(), TableId::new(i as _)))
|
||||
.collect();
|
||||
DmlWrite::new(
|
||||
"bananas",
|
||||
NamespaceId::new(42),
|
||||
tables,
|
||||
ids,
|
||||
"1970-01-01".into(),
|
||||
meta,
|
||||
)
|
||||
}
|
||||
|
||||
/// Extract the metric with the given name from `metrics`.
|
||||
|
|
|
@ -572,9 +572,17 @@ pub(crate) fn make_write_op(
|
|||
sequence_number: i64,
|
||||
lines: &str,
|
||||
) -> DmlWrite {
|
||||
let tables = lines_to_batches(lines, 0).unwrap();
|
||||
let ids = tables
|
||||
.keys()
|
||||
.enumerate()
|
||||
.map(|(i, v)| (v.clone(), TableId::new(i as _)))
|
||||
.collect();
|
||||
DmlWrite::new(
|
||||
namespace.to_string(),
|
||||
lines_to_batches(lines, 0).unwrap(),
|
||||
NamespaceId::new(42),
|
||||
tables,
|
||||
ids,
|
||||
partition_key.clone(),
|
||||
DmlMeta::sequenced(
|
||||
Sequence {
|
||||
|
|
|
@ -5,6 +5,7 @@ use data_types::{
|
|||
ShardIndex, TopicId,
|
||||
};
|
||||
use dml::{DmlMeta, DmlWrite};
|
||||
use futures::{stream::FuturesUnordered, StreamExt};
|
||||
use generated_types::ingester::IngesterQueryRequest;
|
||||
use ingester::{
|
||||
handler::{IngestHandler, IngestHandlerImpl},
|
||||
|
@ -261,9 +262,42 @@ impl TestContext {
|
|||
partition_key: PartitionKey,
|
||||
sequence_number: i64,
|
||||
) -> SequenceNumber {
|
||||
// Resolve the namespace ID needed to construct the DML op
|
||||
let namespace_id = self
|
||||
.namespaces
|
||||
.get(namespace)
|
||||
.expect("namespace does not exist")
|
||||
.id;
|
||||
|
||||
// Build the TableId -> TableName map, upserting the tables in the
|
||||
// process.
|
||||
let ids = lines_to_batches(lp, 0)
|
||||
.unwrap()
|
||||
.keys()
|
||||
.map(|v| {
|
||||
let catalog = Arc::clone(&self.catalog);
|
||||
async move {
|
||||
let id = catalog
|
||||
.repositories()
|
||||
.await
|
||||
.tables()
|
||||
.create_or_get(v, namespace_id)
|
||||
.await
|
||||
.expect("table should create OK")
|
||||
.id;
|
||||
|
||||
(v.clone(), id)
|
||||
}
|
||||
})
|
||||
.collect::<FuturesUnordered<_>>()
|
||||
.collect::<hashbrown::HashMap<_, _>>()
|
||||
.await;
|
||||
|
||||
self.enqueue_write(DmlWrite::new(
|
||||
namespace,
|
||||
namespace_id,
|
||||
lines_to_batches(lp, 0).unwrap(),
|
||||
ids,
|
||||
partition_key,
|
||||
DmlMeta::sequenced(
|
||||
Sequence::new(TEST_SHARD_INDEX, SequenceNumber::new(sequence_number)),
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
ALTER TABLE IF EXISTS namespace
|
||||
ADD COLUMN IF NOT EXISTS retention_period_ns BIGINT DEFAULT NULL;
|
||||
|
||||
|
||||
ALTER TABLE IF EXISTS partition
|
||||
ADD COLUMN IF NOT EXISTS to_delete BIGINT DEFAULT NULL;
|
|
@ -7,7 +7,7 @@ use futures::stream;
|
|||
use influxdb2_client::models::WriteDataPoint;
|
||||
use mutable_batch_lp::lines_to_batches;
|
||||
use parquet_file::{metadata::IoxMetadata, serialize};
|
||||
use schema::selection::Selection;
|
||||
use schema::Projection;
|
||||
use snafu::{ensure, ResultExt, Snafu};
|
||||
#[cfg(test)]
|
||||
use std::{collections::BTreeMap, sync::Arc};
|
||||
|
@ -349,7 +349,7 @@ impl InnerPointsWriter {
|
|||
|
||||
for (measurement, batch) in batches_by_measurement {
|
||||
let record_batch = batch
|
||||
.to_arrow(Selection::All)
|
||||
.to_arrow(Projection::All)
|
||||
.context(ConvertToArrowSnafu)?;
|
||||
let stream = Box::pin(MemoryStream::new(vec![record_batch]));
|
||||
|
||||
|
|
|
@ -10,7 +10,6 @@ mod schema_pivot;
|
|||
pub mod seriesset;
|
||||
pub(crate) mod split;
|
||||
pub mod stringset;
|
||||
pub use context::{DEFAULT_CATALOG, DEFAULT_SCHEMA};
|
||||
use executor::DedicatedExecutor;
|
||||
use object_store::DynObjectStore;
|
||||
use parquet_file::storage::StorageId;
|
||||
|
|
|
@ -27,7 +27,6 @@ use arrow::record_batch::RecordBatch;
|
|||
use async_trait::async_trait;
|
||||
use datafusion::{
|
||||
catalog::catalog::CatalogProvider,
|
||||
config::OPT_COALESCE_TARGET_BATCH_SIZE,
|
||||
execution::{
|
||||
context::{QueryPlanner, SessionState, TaskContext},
|
||||
runtime_env::RuntimeEnv,
|
||||
|
@ -41,10 +40,10 @@ use datafusion::{
|
|||
},
|
||||
prelude::*,
|
||||
};
|
||||
use datafusion_util::config::{iox_session_config, DEFAULT_CATALOG};
|
||||
use executor::DedicatedExecutor;
|
||||
use futures::TryStreamExt;
|
||||
use observability_deps::tracing::debug;
|
||||
use parquet_file::serialize::ROW_GROUP_WRITE_SIZE;
|
||||
use query_functions::selectors::register_selector_aggregates;
|
||||
use std::{convert::TryInto, fmt, sync::Arc};
|
||||
use trace::{
|
||||
|
@ -55,11 +54,6 @@ use trace::{
|
|||
// Reuse DataFusion error and Result types for this module
|
||||
pub use datafusion::error::{DataFusionError as Error, Result};
|
||||
|
||||
// The default catalog name - this impacts what SQL queries use if not specified
|
||||
pub const DEFAULT_CATALOG: &str = "public";
|
||||
// The default schema name - this impacts what SQL queries use if not specified
|
||||
pub const DEFAULT_SCHEMA: &str = "iox";
|
||||
|
||||
/// This structure implements the DataFusion notion of "query planner"
|
||||
/// and is needed to create plans with the IOx extension nodes.
|
||||
struct IOxQueryPlanner {}
|
||||
|
@ -175,26 +169,9 @@ impl fmt::Debug for IOxSessionConfig {
|
|||
}
|
||||
}
|
||||
|
||||
const BATCH_SIZE: usize = 8 * 1024;
|
||||
const COALESCE_BATCH_SIZE: usize = BATCH_SIZE / 2;
|
||||
|
||||
// ensure read and write work well together
|
||||
// Skip clippy due to <https://github.com/rust-lang/rust-clippy/issues/8159>.
|
||||
#[allow(clippy::assertions_on_constants)]
|
||||
const _: () = assert!(ROW_GROUP_WRITE_SIZE % BATCH_SIZE == 0);
|
||||
|
||||
impl IOxSessionConfig {
|
||||
pub(super) fn new(exec: DedicatedExecutor, runtime: Arc<RuntimeEnv>) -> Self {
|
||||
let session_config = SessionConfig::new()
|
||||
.with_batch_size(BATCH_SIZE)
|
||||
// TODO add function in SessionCofig
|
||||
.set_u64(
|
||||
OPT_COALESCE_TARGET_BATCH_SIZE,
|
||||
COALESCE_BATCH_SIZE.try_into().unwrap(),
|
||||
)
|
||||
.create_default_catalog_and_schema(true)
|
||||
.with_information_schema(true)
|
||||
.with_default_catalog_and_schema(DEFAULT_CATALOG, DEFAULT_SCHEMA);
|
||||
let session_config = iox_session_config();
|
||||
|
||||
Self {
|
||||
exec,
|
||||
|
|
|
@ -31,7 +31,7 @@ use query_functions::{
|
|||
make_window_bound_expr,
|
||||
selectors::{selector_first, selector_last, selector_max, selector_min, SelectorOutput},
|
||||
};
|
||||
use schema::{selection::Selection, InfluxColumnType, Schema, TIME_COLUMN_NAME};
|
||||
use schema::{InfluxColumnType, Projection, Schema, TIME_COLUMN_NAME};
|
||||
use snafu::{ensure, OptionExt, ResultExt, Snafu};
|
||||
use std::collections::HashSet as StdHashSet;
|
||||
use std::{cmp::Reverse, collections::BTreeSet, sync::Arc};
|
||||
|
@ -387,7 +387,7 @@ impl InfluxRpcPlanner {
|
|||
.map(|f| f.name().as_str())
|
||||
.collect::<Vec<&str>>();
|
||||
|
||||
let selection = Selection::Some(&column_names);
|
||||
let selection = Projection::Some(&column_names);
|
||||
|
||||
// If there are delete predicates, we need to scan (or do full plan) the data to eliminate
|
||||
// deleted data before getting tag keys
|
||||
|
|
|
@ -10,17 +10,18 @@
|
|||
clippy::dbg_macro
|
||||
)]
|
||||
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use async_trait::async_trait;
|
||||
use data_types::{ChunkId, ChunkOrder, DeletePredicate, InfluxDbType, PartitionId, TableSummary};
|
||||
use datafusion::{error::DataFusionError, physical_plan::SendableRecordBatchStream};
|
||||
use datafusion::{error::DataFusionError, prelude::SessionContext};
|
||||
use exec::{stringset::StringSet, IOxSessionContext};
|
||||
use hashbrown::HashMap;
|
||||
use observability_deps::tracing::{debug, trace};
|
||||
use parquet_file::storage::ParquetExecInput;
|
||||
use predicate::{rpc_predicate::QueryDatabaseMeta, Predicate, PredicateMatch};
|
||||
use schema::{
|
||||
selection::Selection,
|
||||
sort::{SortKey, SortKeyBuilder},
|
||||
Schema, TIME_COLUMN_NAME,
|
||||
Projection, Schema, TIME_COLUMN_NAME,
|
||||
};
|
||||
use std::{any::Any, collections::BTreeSet, fmt::Debug, iter::FromIterator, sync::Arc};
|
||||
|
||||
|
@ -32,7 +33,6 @@ pub mod pruning;
|
|||
pub mod statistics;
|
||||
pub mod util;
|
||||
|
||||
pub use exec::context::{DEFAULT_CATALOG, DEFAULT_SCHEMA};
|
||||
pub use frontend::common::ScanPlanBuilder;
|
||||
pub use query_functions::group_by::{Aggregate, WindowDuration};
|
||||
|
||||
|
@ -40,7 +40,7 @@ pub use query_functions::group_by::{Aggregate, WindowDuration};
|
|||
/// metadata
|
||||
pub trait QueryChunkMeta {
|
||||
/// Return a summary of the data
|
||||
fn summary(&self) -> Option<Arc<TableSummary>>;
|
||||
fn summary(&self) -> Arc<TableSummary>;
|
||||
|
||||
/// return a reference to the summary of the data held in this chunk
|
||||
fn schema(&self) -> Arc<Schema>;
|
||||
|
@ -174,6 +174,37 @@ pub trait QueryDatabase: QueryDatabaseMeta + Debug + Send + Sync {
|
|||
fn as_meta(&self) -> &dyn QueryDatabaseMeta;
|
||||
}
|
||||
|
||||
/// Raw data of a [`QueryChunk`].
|
||||
#[derive(Debug)]
|
||||
pub enum QueryChunkData {
|
||||
/// In-memory record batches.
|
||||
///
|
||||
/// **IMPORTANT: All batches MUST have the schema that the [chunk reports](QueryChunkMeta::schema).**
|
||||
RecordBatches(Vec<RecordBatch>),
|
||||
|
||||
/// Parquet file.
|
||||
///
|
||||
/// See [`ParquetExecInput`] for details.
|
||||
Parquet(ParquetExecInput),
|
||||
}
|
||||
|
||||
impl QueryChunkData {
|
||||
/// Read data into [`RecordBatch`]es. This is mostly meant for testing!
|
||||
pub async fn read_to_batches(
|
||||
self,
|
||||
schema: Arc<Schema>,
|
||||
session_ctx: &SessionContext,
|
||||
) -> Vec<RecordBatch> {
|
||||
match self {
|
||||
Self::RecordBatches(batches) => batches,
|
||||
Self::Parquet(exec_input) => exec_input
|
||||
.read_to_batches(schema.as_arrow(), Projection::All, session_ctx)
|
||||
.await
|
||||
.unwrap(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Collection of data that shares the same partition key
|
||||
pub trait QueryChunk: QueryChunkMeta + Debug + Send + Sync + 'static {
|
||||
/// returns the Id of this chunk. Ids are unique within a
|
||||
|
@ -197,10 +228,7 @@ pub trait QueryChunk: QueryChunkMeta + Debug + Send + Sync + 'static {
|
|||
&self,
|
||||
predicate: &Predicate,
|
||||
) -> Result<PredicateMatch, DataFusionError> {
|
||||
Ok(self
|
||||
.summary()
|
||||
.map(|summary| predicate.apply_to_table_summary(&summary, self.schema().as_arrow()))
|
||||
.unwrap_or(PredicateMatch::Unknown))
|
||||
Ok(predicate.apply_to_table_summary(&self.summary(), self.schema().as_arrow()))
|
||||
}
|
||||
|
||||
/// Returns a set of Strings with column names from the specified
|
||||
|
@ -211,7 +239,7 @@ pub trait QueryChunk: QueryChunkMeta + Debug + Send + Sync + 'static {
|
|||
&self,
|
||||
ctx: IOxSessionContext,
|
||||
predicate: &Predicate,
|
||||
columns: Selection<'_>,
|
||||
columns: Projection<'_>,
|
||||
) -> Result<Option<StringSet>, DataFusionError>;
|
||||
|
||||
/// Return a set of Strings containing the distinct values in the
|
||||
|
@ -226,25 +254,10 @@ pub trait QueryChunk: QueryChunkMeta + Debug + Send + Sync + 'static {
|
|||
predicate: &Predicate,
|
||||
) -> Result<Option<StringSet>, DataFusionError>;
|
||||
|
||||
/// Provides access to raw `QueryChunk` data as an
|
||||
/// asynchronous stream of `RecordBatch`es filtered by a *required*
|
||||
/// predicate. Note that not all chunks can evaluate all types of
|
||||
/// predicates and this function will return an error
|
||||
/// if requested to evaluate with a predicate that is not supported
|
||||
/// Provides access to raw [`QueryChunk`] data.
|
||||
///
|
||||
/// This is the analog of the `TableProvider` in DataFusion
|
||||
///
|
||||
/// The reason we can't simply use the `TableProvider` trait
|
||||
/// directly is that the data for a particular Table lives in
|
||||
/// several chunks within a partition, so there needs to be an
|
||||
/// implementation of `TableProvider` that stitches together the
|
||||
/// streams from several different `QueryChunk`s.
|
||||
fn read_filter(
|
||||
&self,
|
||||
ctx: IOxSessionContext,
|
||||
predicate: &Predicate,
|
||||
selection: Selection<'_>,
|
||||
) -> Result<SendableRecordBatchStream, DataFusionError>;
|
||||
/// The engine assume that minimal work shall be performed to gather the `QueryChunkData`.
|
||||
fn data(&self) -> QueryChunkData;
|
||||
|
||||
/// Returns chunk type. Useful in tests and debug logs.
|
||||
fn chunk_type(&self) -> &str;
|
||||
|
@ -261,7 +274,7 @@ impl<P> QueryChunkMeta for Arc<P>
|
|||
where
|
||||
P: QueryChunkMeta,
|
||||
{
|
||||
fn summary(&self) -> Option<Arc<TableSummary>> {
|
||||
fn summary(&self) -> Arc<TableSummary> {
|
||||
self.as_ref().summary()
|
||||
}
|
||||
|
||||
|
@ -290,7 +303,7 @@ where
|
|||
|
||||
/// Implement ChunkMeta for Arc<dyn QueryChunk>
|
||||
impl QueryChunkMeta for Arc<dyn QueryChunk> {
|
||||
fn summary(&self) -> Option<Arc<TableSummary>> {
|
||||
fn summary(&self) -> Arc<TableSummary> {
|
||||
self.as_ref().summary()
|
||||
}
|
||||
|
||||
|
@ -317,26 +330,32 @@ impl QueryChunkMeta for Arc<dyn QueryChunk> {
|
|||
}
|
||||
}
|
||||
|
||||
/// return true if all the chunks include statistics
|
||||
pub fn chunks_have_stats<'a>(chunks: impl IntoIterator<Item = &'a Arc<dyn QueryChunk>>) -> bool {
|
||||
/// return true if all the chunks include distinct counts for all columns.
|
||||
pub fn chunks_have_distinct_counts<'a>(
|
||||
chunks: impl IntoIterator<Item = &'a Arc<dyn QueryChunk>>,
|
||||
) -> bool {
|
||||
// If at least one of the provided chunk cannot provide stats,
|
||||
// do not need to compute potential duplicates. We will treat
|
||||
// as all of them have duplicates
|
||||
chunks.into_iter().all(|c| c.summary().is_some())
|
||||
chunks.into_iter().all(|chunk| {
|
||||
chunk
|
||||
.summary()
|
||||
.columns
|
||||
.iter()
|
||||
.all(|col| col.stats.distinct_count().is_some())
|
||||
})
|
||||
}
|
||||
|
||||
pub fn compute_sort_key_for_chunks<'a>(
|
||||
schema: &Schema,
|
||||
chunks: impl Copy + IntoIterator<Item = &'a Arc<dyn QueryChunk>>,
|
||||
) -> SortKey {
|
||||
if !chunks_have_stats(chunks) {
|
||||
if !chunks_have_distinct_counts(chunks) {
|
||||
// chunks have not enough stats, return its pk that is
|
||||
// sorted lexicographically but time column always last
|
||||
SortKey::from_columns(schema.primary_key())
|
||||
} else {
|
||||
let summaries = chunks
|
||||
.into_iter()
|
||||
.map(|x| x.summary().expect("Chunk should have summary"));
|
||||
let summaries = chunks.into_iter().map(|x| x.summary());
|
||||
compute_sort_key(summaries)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -37,12 +37,15 @@ use snafu::{ResultExt, Snafu};
|
|||
|
||||
mod adapter;
|
||||
mod deduplicate;
|
||||
mod metrics;
|
||||
pub mod overlap;
|
||||
mod physical;
|
||||
use self::overlap::group_potential_duplicates;
|
||||
pub use deduplicate::{DeduplicateExec, RecordBatchDeduplicator};
|
||||
pub(crate) use physical::IOxReadFilterNode;
|
||||
|
||||
pub use metrics::parquet_metrics;
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
#[snafu(display(
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use datafusion::physical_plan::{file_format::ParquetExec, metrics::MetricsSet, ExecutionPlan};
|
||||
|
||||
use super::IOxReadFilterNode;
|
||||
|
||||
/// Recursively retrieve metrics from all ParquetExec's in `plan`
|
||||
pub fn parquet_metrics(plan: Arc<dyn ExecutionPlan>) -> Vec<MetricsSet> {
|
||||
let mut output = vec![];
|
||||
parquet_metrics_impl(plan, &mut output);
|
||||
output
|
||||
}
|
||||
|
||||
fn parquet_metrics_impl(plan: Arc<dyn ExecutionPlan>, output: &mut Vec<MetricsSet>) {
|
||||
// Temporarily need to special case `IoxReadFilter` as it
|
||||
// may create `ParquetExec` during execution.
|
||||
//
|
||||
// This can be removed when
|
||||
// <https://github.com/influxdata/influxdb_iox/issues/5897> is
|
||||
// completed
|
||||
if let Some(iox_read_node) = plan.as_any().downcast_ref::<IOxReadFilterNode>() {
|
||||
if let Some(metrics) = iox_read_node.metrics() {
|
||||
output.push(metrics)
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(parquet) = plan.as_any().downcast_ref::<ParquetExec>() {
|
||||
if let Some(metrics) = parquet.metrics() {
|
||||
output.push(metrics)
|
||||
}
|
||||
}
|
||||
|
||||
for child in plan.children() {
|
||||
parquet_metrics_impl(child, output)
|
||||
}
|
||||
}
|
|
@ -112,7 +112,7 @@ pub fn group_potential_duplicates(
|
|||
}
|
||||
|
||||
fn timestamp_min_max(chunk: &dyn QueryChunk) -> Option<TimestampMinMax> {
|
||||
chunk.summary().and_then(|summary| summary.time_range())
|
||||
chunk.summary().time_range()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
@ -1,22 +1,29 @@
|
|||
//! Implementation of a DataFusion PhysicalPlan node across partition chunks
|
||||
|
||||
use super::adapter::SchemaAdapterStream;
|
||||
use crate::{exec::IOxSessionContext, QueryChunk};
|
||||
use crate::{exec::IOxSessionContext, QueryChunk, QueryChunkData};
|
||||
use arrow::datatypes::SchemaRef;
|
||||
use data_types::TableSummary;
|
||||
use datafusion::{
|
||||
datasource::listing::PartitionedFile,
|
||||
error::DataFusionError,
|
||||
execution::context::TaskContext,
|
||||
physical_plan::{
|
||||
execute_stream,
|
||||
expressions::PhysicalSortExpr,
|
||||
file_format::{FileScanConfig, ParquetExec},
|
||||
memory::MemoryStream,
|
||||
metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet},
|
||||
stream::RecordBatchStreamAdapter,
|
||||
DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream, Statistics,
|
||||
},
|
||||
};
|
||||
use futures::TryStreamExt;
|
||||
use observability_deps::tracing::trace;
|
||||
use parking_lot::Mutex;
|
||||
use predicate::Predicate;
|
||||
use schema::{selection::Selection, Schema};
|
||||
use std::{fmt, sync::Arc};
|
||||
use schema::Schema;
|
||||
use std::{collections::HashSet, fmt, sync::Arc};
|
||||
|
||||
/// Implements the DataFusion physical plan interface
|
||||
#[derive(Debug)]
|
||||
|
@ -27,9 +34,18 @@ pub(crate) struct IOxReadFilterNode {
|
|||
iox_schema: Arc<Schema>,
|
||||
chunks: Vec<Arc<dyn QueryChunk>>,
|
||||
predicate: Predicate,
|
||||
|
||||
/// Execution metrics
|
||||
metrics: ExecutionPlanMetricsSet,
|
||||
|
||||
/// remember all ParquetExecs created by this node so we can pass
|
||||
/// along metrics.
|
||||
///
|
||||
/// When we use ParquetExec directly (rather
|
||||
/// than an IOxReadFilterNode) the metric will be directly
|
||||
/// available: <https://github.com/influxdata/influxdb_iox/issues/5897>
|
||||
parquet_execs: Mutex<Vec<Arc<ParquetExec>>>,
|
||||
|
||||
// execution context used for tracing
|
||||
ctx: IOxSessionContext,
|
||||
}
|
||||
|
@ -46,14 +62,21 @@ impl IOxReadFilterNode {
|
|||
predicate: Predicate,
|
||||
) -> Self {
|
||||
Self {
|
||||
ctx,
|
||||
table_name,
|
||||
iox_schema,
|
||||
chunks,
|
||||
predicate,
|
||||
metrics: ExecutionPlanMetricsSet::new(),
|
||||
parquet_execs: Mutex::new(vec![]),
|
||||
ctx,
|
||||
}
|
||||
}
|
||||
|
||||
// Meant for testing -- provide input to the inner parquet execs
|
||||
// that were created
|
||||
fn parquet_execs(&self) -> Vec<Arc<ParquetExec>> {
|
||||
self.parquet_execs.lock().to_vec()
|
||||
}
|
||||
}
|
||||
|
||||
impl ExecutionPlan for IOxReadFilterNode {
|
||||
|
@ -95,6 +118,7 @@ impl ExecutionPlan for IOxReadFilterNode {
|
|||
iox_schema: Arc::clone(&self.iox_schema),
|
||||
chunks,
|
||||
predicate: self.predicate.clone(),
|
||||
parquet_execs: Mutex::new(self.parquet_execs()),
|
||||
metrics: ExecutionPlanMetricsSet::new(),
|
||||
};
|
||||
|
||||
|
@ -104,16 +128,13 @@ impl ExecutionPlan for IOxReadFilterNode {
|
|||
fn execute(
|
||||
&self,
|
||||
partition: usize,
|
||||
_context: Arc<TaskContext>,
|
||||
context: Arc<TaskContext>,
|
||||
) -> datafusion::error::Result<SendableRecordBatchStream> {
|
||||
trace!(partition, "Start IOxReadFilterNode::execute");
|
||||
|
||||
let baseline_metrics = BaselineMetrics::new(&self.metrics, partition);
|
||||
let timer = baseline_metrics.elapsed_compute().timer();
|
||||
|
||||
let schema = self.schema();
|
||||
let fields = schema.fields();
|
||||
let selection_cols = fields.iter().map(|f| f.name() as &str).collect::<Vec<_>>();
|
||||
|
||||
let chunk = Arc::clone(&self.chunks[partition]);
|
||||
|
||||
|
@ -125,32 +146,88 @@ impl ExecutionPlan for IOxReadFilterNode {
|
|||
// restrict the requested selection to the actual columns
|
||||
// available, and use SchemaAdapterStream to pad the rest of
|
||||
// the columns with NULLs if necessary
|
||||
let selection_cols = restrict_selection(selection_cols, &chunk_table_schema);
|
||||
let selection = Selection::Some(&selection_cols);
|
||||
let final_output_column_names: HashSet<_> =
|
||||
schema.fields().iter().map(|f| f.name()).collect();
|
||||
let projection: Vec<_> = chunk_table_schema
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_idx, (_t, field))| final_output_column_names.contains(field.name()))
|
||||
.map(|(idx, _)| idx)
|
||||
.collect();
|
||||
let projection = (!((projection.len() == chunk_table_schema.len())
|
||||
&& (projection.iter().enumerate().all(|(a, b)| a == *b))))
|
||||
.then_some(projection);
|
||||
let incomplete_output_schema = projection
|
||||
.as_ref()
|
||||
.map(|projection| {
|
||||
Arc::new(
|
||||
chunk_table_schema
|
||||
.as_arrow()
|
||||
.project(projection)
|
||||
.expect("projection broken"),
|
||||
)
|
||||
})
|
||||
.unwrap_or_else(|| chunk_table_schema.as_arrow());
|
||||
|
||||
let stream = chunk
|
||||
.read_filter(
|
||||
self.ctx.child_ctx("chunk read_filter"),
|
||||
&self.predicate,
|
||||
selection,
|
||||
)
|
||||
.map_err(|e| {
|
||||
DataFusionError::Execution(format!(
|
||||
"Error creating scan for table {} chunk {}: {}",
|
||||
self.table_name,
|
||||
chunk.id(),
|
||||
e
|
||||
))
|
||||
})?;
|
||||
let stream = match chunk.data() {
|
||||
QueryChunkData::RecordBatches(batches) => {
|
||||
let stream = Box::pin(MemoryStream::try_new(
|
||||
batches,
|
||||
incomplete_output_schema,
|
||||
projection,
|
||||
)?);
|
||||
let adapter = SchemaAdapterStream::try_new(stream, schema, baseline_metrics)
|
||||
.map_err(|e| DataFusionError::Internal(e.to_string()))?;
|
||||
Box::pin(adapter) as SendableRecordBatchStream
|
||||
}
|
||||
QueryChunkData::Parquet(exec_input) => {
|
||||
let base_config = FileScanConfig {
|
||||
object_store_url: exec_input.object_store_url,
|
||||
file_schema: Arc::clone(&schema),
|
||||
file_groups: vec![vec![PartitionedFile {
|
||||
object_meta: exec_input.object_meta,
|
||||
partition_values: vec![],
|
||||
range: None,
|
||||
extensions: None,
|
||||
}]],
|
||||
statistics: Statistics::default(),
|
||||
projection: None,
|
||||
limit: None,
|
||||
table_partition_cols: vec![],
|
||||
config_options: context.session_config().config_options(),
|
||||
};
|
||||
let delete_predicates: Vec<_> = chunk
|
||||
.delete_predicates()
|
||||
.iter()
|
||||
.map(|pred| Arc::new(pred.as_ref().clone().into()))
|
||||
.collect();
|
||||
let predicate = self
|
||||
.predicate
|
||||
.clone()
|
||||
.with_delete_predicates(&delete_predicates);
|
||||
let metadata_size_hint = None;
|
||||
|
||||
// all CPU time is now done, pass in baseline metrics to adapter
|
||||
timer.done();
|
||||
let exec = Arc::new(ParquetExec::new(
|
||||
base_config,
|
||||
predicate.filter_expr(),
|
||||
metadata_size_hint,
|
||||
));
|
||||
|
||||
let adapter = SchemaAdapterStream::try_new(stream, schema, baseline_metrics)
|
||||
.map_err(|e| DataFusionError::Internal(e.to_string()))?;
|
||||
self.parquet_execs.lock().push(Arc::clone(&exec));
|
||||
|
||||
let stream = RecordBatchStreamAdapter::new(
|
||||
schema,
|
||||
futures::stream::once(execute_stream(exec, context)).try_flatten(),
|
||||
);
|
||||
|
||||
// Note: No SchemaAdapterStream required here because `ParquetExec` already creates NULL columns for us.
|
||||
|
||||
Box::pin(stream)
|
||||
}
|
||||
};
|
||||
|
||||
trace!(partition, "End IOxReadFilterNode::execute");
|
||||
Ok(Box::pin(adapter))
|
||||
Ok(stream)
|
||||
}
|
||||
|
||||
fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
|
@ -168,23 +245,27 @@ impl ExecutionPlan for IOxReadFilterNode {
|
|||
}
|
||||
|
||||
fn metrics(&self) -> Option<MetricsSet> {
|
||||
Some(self.metrics.clone_inner())
|
||||
let mut metrics = self.metrics.clone_inner();
|
||||
|
||||
// copy all metrics from the child parquet_execs
|
||||
for exec in self.parquet_execs() {
|
||||
if let Some(parquet_metrics) = exec.metrics() {
|
||||
for m in parquet_metrics.iter() {
|
||||
metrics.push(Arc::clone(m))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Some(metrics)
|
||||
}
|
||||
|
||||
fn statistics(&self) -> Statistics {
|
||||
let mut combined_summary_option: Option<TableSummary> = None;
|
||||
for chunk in &self.chunks {
|
||||
combined_summary_option = match combined_summary_option {
|
||||
None => Some(
|
||||
chunk
|
||||
.summary()
|
||||
.expect("Chunk should have summary")
|
||||
.as_ref()
|
||||
.clone(),
|
||||
),
|
||||
None => Some(chunk.summary().as_ref().clone()),
|
||||
Some(mut combined_summary) => {
|
||||
combined_summary
|
||||
.update_from(&chunk.summary().expect("Chunk should have summary"));
|
||||
combined_summary.update_from(&chunk.summary());
|
||||
Some(combined_summary)
|
||||
}
|
||||
}
|
||||
|
@ -197,17 +278,3 @@ impl ExecutionPlan for IOxReadFilterNode {
|
|||
.unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Removes any columns that are not present in schema, returning a possibly
|
||||
/// restricted set of columns
|
||||
fn restrict_selection<'a>(
|
||||
selection_cols: Vec<&'a str>,
|
||||
chunk_table_schema: &'a Schema,
|
||||
) -> Vec<&'a str> {
|
||||
let arrow_schema = chunk_table_schema.as_arrow();
|
||||
|
||||
selection_cols
|
||||
.into_iter()
|
||||
.filter(|col| arrow_schema.fields().iter().any(|f| f.name() == col))
|
||||
.collect()
|
||||
}
|
||||
|
|
|
@ -87,7 +87,7 @@ pub fn prune_chunks(
|
|||
/// `false` for every single row.
|
||||
pub fn prune_summaries(
|
||||
table_schema: Arc<Schema>,
|
||||
summaries: &Vec<Option<Arc<TableSummary>>>,
|
||||
summaries: &Vec<Arc<TableSummary>>,
|
||||
predicate: &Predicate,
|
||||
) -> Result<Vec<bool>, NotPrunedReason> {
|
||||
let filter_expr = match predicate.filter_expr() {
|
||||
|
@ -127,7 +127,7 @@ pub fn prune_summaries(
|
|||
/// interface required by [`PruningPredicate`]
|
||||
struct ChunkPruningStatistics<'a> {
|
||||
table_schema: &'a Schema,
|
||||
summaries: &'a Vec<Option<Arc<TableSummary>>>,
|
||||
summaries: &'a Vec<Arc<TableSummary>>,
|
||||
}
|
||||
|
||||
impl<'a> ChunkPruningStatistics<'a> {
|
||||
|
@ -143,10 +143,9 @@ impl<'a> ChunkPruningStatistics<'a> {
|
|||
&'c self,
|
||||
column: &'b Column,
|
||||
) -> impl Iterator<Item = Option<Statistics>> + 'a {
|
||||
self.summaries.iter().map(|summary| match summary {
|
||||
Some(summary) => Some(summary.column(&column.name)?.stats.clone()),
|
||||
None => None,
|
||||
})
|
||||
self.summaries
|
||||
.iter()
|
||||
.map(|summary| Some(summary.column(&column.name)?.stats.clone()))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -8,15 +8,14 @@ use crate::{
|
|||
stringset::{StringSet, StringSetRef},
|
||||
ExecutionContextProvider, Executor, ExecutorType, IOxSessionContext,
|
||||
},
|
||||
Predicate, PredicateMatch, QueryChunk, QueryChunkMeta, QueryCompletedToken, QueryDatabase,
|
||||
QueryText,
|
||||
Predicate, PredicateMatch, QueryChunk, QueryChunkData, QueryChunkMeta, QueryCompletedToken,
|
||||
QueryDatabase, QueryText,
|
||||
};
|
||||
use arrow::{
|
||||
array::{
|
||||
ArrayRef, DictionaryArray, Int64Array, StringArray, TimestampNanosecondArray, UInt64Array,
|
||||
},
|
||||
datatypes::{DataType, Int32Type, TimeUnit},
|
||||
error::ArrowError,
|
||||
record_batch::RecordBatch,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
|
@ -24,16 +23,14 @@ use data_types::{
|
|||
ChunkId, ChunkOrder, ColumnSummary, DeletePredicate, InfluxDbType, PartitionId, StatValues,
|
||||
Statistics, TableSummary,
|
||||
};
|
||||
use datafusion::{error::DataFusionError, physical_plan::SendableRecordBatchStream};
|
||||
use datafusion_util::stream_from_batches;
|
||||
use futures::StreamExt;
|
||||
use datafusion::error::DataFusionError;
|
||||
use hashbrown::HashSet;
|
||||
use observability_deps::tracing::debug;
|
||||
use parking_lot::Mutex;
|
||||
use predicate::rpc_predicate::QueryDatabaseMeta;
|
||||
use schema::{
|
||||
builder::SchemaBuilder, merge::SchemaMerger, selection::Selection, sort::SortKey,
|
||||
InfluxColumnType, Schema, TIME_COLUMN_NAME,
|
||||
builder::SchemaBuilder, merge::SchemaMerger, sort::SortKey, InfluxColumnType, Projection,
|
||||
Schema, TIME_COLUMN_NAME,
|
||||
};
|
||||
use std::{any::Any, collections::BTreeMap, fmt, num::NonZeroU64, sync::Arc};
|
||||
use trace::ctx::SpanContext;
|
||||
|
@ -949,34 +946,8 @@ impl QueryChunk for TestChunk {
|
|||
self.may_contain_pk_duplicates
|
||||
}
|
||||
|
||||
fn read_filter(
|
||||
&self,
|
||||
_ctx: IOxSessionContext,
|
||||
predicate: &Predicate,
|
||||
selection: Selection<'_>,
|
||||
) -> Result<SendableRecordBatchStream, DataFusionError> {
|
||||
self.check_error()?;
|
||||
|
||||
// save the predicate
|
||||
self.predicates.lock().push(predicate.clone());
|
||||
|
||||
let batches = match self
|
||||
.schema
|
||||
.df_projection(selection)
|
||||
.map_err(|e| DataFusionError::External(Box::new(e)))?
|
||||
{
|
||||
None => self.table_data.clone(),
|
||||
Some(projection) => self
|
||||
.table_data
|
||||
.iter()
|
||||
.map(|batch| {
|
||||
let batch = batch.project(&projection)?;
|
||||
Ok(Arc::new(batch))
|
||||
})
|
||||
.collect::<std::result::Result<Vec<_>, ArrowError>>()?,
|
||||
};
|
||||
|
||||
Ok(stream_from_batches(self.schema().as_arrow(), batches))
|
||||
fn data(&self) -> QueryChunkData {
|
||||
QueryChunkData::RecordBatches(self.table_data.iter().map(|b| b.as_ref().clone()).collect())
|
||||
}
|
||||
|
||||
fn chunk_type(&self) -> &str {
|
||||
|
@ -1014,7 +985,7 @@ impl QueryChunk for TestChunk {
|
|||
&self,
|
||||
_ctx: IOxSessionContext,
|
||||
predicate: &Predicate,
|
||||
selection: Selection<'_>,
|
||||
selection: Projection<'_>,
|
||||
) -> Result<Option<StringSet>, DataFusionError> {
|
||||
self.check_error()?;
|
||||
|
||||
|
@ -1023,8 +994,8 @@ impl QueryChunk for TestChunk {
|
|||
|
||||
// only return columns specified in selection
|
||||
let column_names = match selection {
|
||||
Selection::All => self.all_column_names(),
|
||||
Selection::Some(cols) => self.specific_column_names_selection(cols),
|
||||
Projection::All => self.all_column_names(),
|
||||
Projection::Some(cols) => self.specific_column_names_selection(cols),
|
||||
};
|
||||
|
||||
Ok(Some(column_names))
|
||||
|
@ -1040,8 +1011,8 @@ impl QueryChunk for TestChunk {
|
|||
}
|
||||
|
||||
impl QueryChunkMeta for TestChunk {
|
||||
fn summary(&self) -> Option<Arc<TableSummary>> {
|
||||
Some(Arc::new(self.table_summary.clone()))
|
||||
fn summary(&self) -> Arc<TableSummary> {
|
||||
Arc::new(self.table_summary.clone())
|
||||
}
|
||||
|
||||
fn schema(&self) -> Arc<Schema> {
|
||||
|
@ -1071,17 +1042,10 @@ impl QueryChunkMeta for TestChunk {
|
|||
|
||||
/// Return the raw data from the list of chunks
|
||||
pub async fn raw_data(chunks: &[Arc<dyn QueryChunk>]) -> Vec<RecordBatch> {
|
||||
let ctx = IOxSessionContext::with_testing();
|
||||
let mut batches = vec![];
|
||||
for c in chunks {
|
||||
let pred = Predicate::default();
|
||||
let selection = Selection::All;
|
||||
let mut stream = c
|
||||
.read_filter(IOxSessionContext::with_testing(), &pred, selection)
|
||||
.expect("Error in read_filter");
|
||||
while let Some(b) = stream.next().await {
|
||||
let b = b.expect("Error in stream");
|
||||
batches.push(b)
|
||||
}
|
||||
batches.append(&mut c.data().read_to_batches(c.schema(), ctx.inner()).await);
|
||||
}
|
||||
batches
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@ use datafusion::{
|
|||
logical_expr::{
|
||||
expr_rewriter::ExprRewriter, BinaryExpr, ExprSchemable, LogicalPlan, LogicalPlanBuilder,
|
||||
},
|
||||
optimizer::expr_simplifier::{ExprSimplifier, SimplifyContext},
|
||||
optimizer::simplify_expressions::{ExprSimplifier, SimplifyContext},
|
||||
physical_expr::create_physical_expr,
|
||||
physical_plan::{
|
||||
expressions::{col as physical_col, PhysicalSortExpr},
|
||||
|
|
|
@ -18,7 +18,7 @@ metric = { path = "../metric" }
|
|||
mutable_batch_lp = { path = "../mutable_batch_lp" }
|
||||
object_store = "0.5.1"
|
||||
observability_deps = { path = "../observability_deps" }
|
||||
once_cell = { version = "1.15.0", features = ["parking_lot"] }
|
||||
once_cell = { version = "1.16.0", features = ["parking_lot"] }
|
||||
parquet_file = { path = "../parquet_file" }
|
||||
predicate = { path = "../predicate" }
|
||||
iox_query = { path = "../iox_query" }
|
||||
|
|
|
@ -30,11 +30,9 @@ use parquet_file::{
|
|||
metadata::IoxMetadata,
|
||||
storage::{ParquetStorage, StorageId},
|
||||
};
|
||||
use predicate::Predicate;
|
||||
use schema::{
|
||||
selection::Selection,
|
||||
sort::{adjust_sort_key_columns, compute_sort_key, SortKey},
|
||||
Schema,
|
||||
Projection, Schema,
|
||||
};
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
use uuid::Uuid;
|
||||
|
@ -389,14 +387,13 @@ impl TestTable {
|
|||
Arc::new(schema),
|
||||
self.catalog.parquet_store.clone(),
|
||||
);
|
||||
let rx = chunk
|
||||
.read_filter(
|
||||
&Predicate::default(),
|
||||
Selection::All,
|
||||
chunk
|
||||
.parquet_exec_input()
|
||||
.read_to_batches(
|
||||
chunk.schema().as_arrow(),
|
||||
Projection::All,
|
||||
&chunk.store().test_df_context(),
|
||||
)
|
||||
.unwrap();
|
||||
datafusion::physical_plan::common::collect(rx)
|
||||
.await
|
||||
.unwrap()
|
||||
}
|
||||
|
@ -753,8 +750,8 @@ impl TestParquetFileBuilder {
|
|||
pub fn with_line_protocol(self, line_protocol: &str) -> Self {
|
||||
let (table, batch) = lp_to_mutable_batch(line_protocol);
|
||||
|
||||
let schema = batch.schema(Selection::All).unwrap();
|
||||
let record_batch = batch.to_arrow(Selection::All).unwrap();
|
||||
let schema = batch.schema(Projection::All).unwrap();
|
||||
let record_batch = batch.to_arrow(Projection::All).unwrap();
|
||||
|
||||
self.with_record_batch(record_batch)
|
||||
.with_table(table)
|
||||
|
|
|
@ -95,6 +95,7 @@ impl<C: QuerierHandler + std::fmt::Debug + 'static> ServerType for QuerierServer
|
|||
);
|
||||
add_service!(builder, self.server.handler().schema_service());
|
||||
add_service!(builder, self.server.handler().catalog_service());
|
||||
add_service!(builder, self.server.handler().object_store_service());
|
||||
|
||||
serve_builder!(builder);
|
||||
|
||||
|
@ -204,7 +205,11 @@ pub async fn create_querier_server_type(
|
|||
)
|
||||
.await?,
|
||||
);
|
||||
let querier_handler = Arc::new(QuerierHandlerImpl::new(args.catalog, Arc::clone(&database)));
|
||||
let querier_handler = Arc::new(QuerierHandlerImpl::new(
|
||||
args.catalog,
|
||||
Arc::clone(&database),
|
||||
Arc::clone(&args.object_store),
|
||||
));
|
||||
|
||||
let querier = QuerierServer::new(args.metric_registry, querier_handler);
|
||||
Ok(Arc::new(QuerierServerType::new(
|
||||
|
|
|
@ -289,12 +289,11 @@ pub async fn create_router_server_type(
|
|||
let shard_service = init_shard_service(sharder, write_buffer_config, catalog).await?;
|
||||
|
||||
// Initialise the API delegates
|
||||
let handler_stack = Arc::new(handler_stack);
|
||||
let http = HttpDelegate::new(
|
||||
common_state.run_config().max_http_request_size,
|
||||
request_limit,
|
||||
namespace_resolver,
|
||||
Arc::clone(&handler_stack),
|
||||
handler_stack,
|
||||
&metrics,
|
||||
);
|
||||
let grpc = GrpcDelegate::new(schema_catalog, object_store, shard_service);
|
||||
|
|
|
@ -12,7 +12,7 @@ tracing-subscriber = "0.3"
|
|||
workspace-hack = { path = "../workspace-hack"}
|
||||
|
||||
[dev-dependencies] # In alphabetical order
|
||||
once_cell = { version = "1.15.0", features = ["parking_lot"] }
|
||||
once_cell = { version = "1.16.0", features = ["parking_lot"] }
|
||||
parking_lot = "0.12"
|
||||
regex = "1"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
|
|
|
@ -22,7 +22,7 @@ use arrow::record_batch::RecordBatch;
|
|||
use data_types::StatValues;
|
||||
use hashbrown::HashMap;
|
||||
use iox_time::Time;
|
||||
use schema::selection::Selection;
|
||||
use schema::Projection;
|
||||
use schema::{builder::SchemaBuilder, Schema, TIME_COLUMN_NAME};
|
||||
use snafu::{OptionExt, ResultExt, Snafu};
|
||||
use std::{collections::BTreeSet, ops::Range};
|
||||
|
@ -85,10 +85,10 @@ impl MutableBatch {
|
|||
/// Returns the schema for a given selection
|
||||
///
|
||||
/// If Selection::All the returned columns are sorted by name
|
||||
pub fn schema(&self, selection: Selection<'_>) -> Result<Schema> {
|
||||
pub fn schema(&self, selection: Projection<'_>) -> Result<Schema> {
|
||||
let mut schema_builder = SchemaBuilder::new();
|
||||
let schema = match selection {
|
||||
Selection::All => {
|
||||
Projection::All => {
|
||||
for (column_name, column_idx) in self.column_names.iter() {
|
||||
let column = &self.columns[*column_idx];
|
||||
schema_builder.influx_column(column_name, column.influx_type());
|
||||
|
@ -99,7 +99,7 @@ impl MutableBatch {
|
|||
.context(InternalSchemaSnafu)?
|
||||
.sort_fields_by_name()
|
||||
}
|
||||
Selection::Some(cols) => {
|
||||
Projection::Some(cols) => {
|
||||
for col in cols {
|
||||
let column = self.column(col)?;
|
||||
schema_builder.influx_column(col, column.influx_type());
|
||||
|
@ -112,7 +112,7 @@ impl MutableBatch {
|
|||
}
|
||||
|
||||
/// Convert all the data in this `MutableBatch` into a `RecordBatch`
|
||||
pub fn to_arrow(&self, selection: Selection<'_>) -> Result<RecordBatch> {
|
||||
pub fn to_arrow(&self, selection: Projection<'_>) -> Result<RecordBatch> {
|
||||
let schema = self.schema(selection)?;
|
||||
let columns = schema
|
||||
.iter()
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use arrow_util::assert_batches_eq;
|
||||
use data_types::{StatValues, Statistics};
|
||||
use mutable_batch::{writer::Writer, MutableBatch};
|
||||
use schema::selection::Selection;
|
||||
use schema::Projection;
|
||||
use std::{collections::BTreeMap, num::NonZeroU64};
|
||||
|
||||
#[test]
|
||||
|
@ -56,7 +56,7 @@ fn test_extend() {
|
|||
|
||||
writer.commit();
|
||||
|
||||
let a_before = a.to_arrow(Selection::All).unwrap();
|
||||
let a_before = a.to_arrow(Projection::All).unwrap();
|
||||
|
||||
a.extend_from(&b).unwrap();
|
||||
|
||||
|
@ -90,7 +90,7 @@ fn test_extend() {
|
|||
"| v1 | v5 | 1970-01-01T00:00:00.000000012Z |",
|
||||
"+------+------+--------------------------------+",
|
||||
],
|
||||
&[b.to_arrow(Selection::All).unwrap()]
|
||||
&[b.to_arrow(Projection::All).unwrap()]
|
||||
);
|
||||
|
||||
assert_batches_eq!(
|
||||
|
@ -113,7 +113,7 @@ fn test_extend() {
|
|||
"| v1 | | v5 | 1970-01-01T00:00:00.000000012Z |",
|
||||
"+------+------+------+--------------------------------+",
|
||||
],
|
||||
&[a.to_arrow(Selection::All).unwrap()]
|
||||
&[a.to_arrow(Projection::All).unwrap()]
|
||||
);
|
||||
|
||||
let stats: BTreeMap<_, _> = a.columns().map(|(k, v)| (k.as_str(), v.stats())).collect();
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use arrow_util::assert_batches_eq;
|
||||
use data_types::{StatValues, Statistics};
|
||||
use mutable_batch::{writer::Writer, MutableBatch};
|
||||
use schema::selection::Selection;
|
||||
use schema::Projection;
|
||||
use std::{collections::BTreeMap, num::NonZeroU64};
|
||||
|
||||
#[test]
|
||||
|
@ -68,7 +68,7 @@ fn test_extend_range() {
|
|||
"| | v2 | 1970-01-01T00:00:00.000000004Z |",
|
||||
"+-----+------+--------------------------------+",
|
||||
],
|
||||
&[a.to_arrow(Selection::All).unwrap()]
|
||||
&[a.to_arrow(Projection::All).unwrap()]
|
||||
);
|
||||
|
||||
assert_batches_eq!(
|
||||
|
@ -86,7 +86,7 @@ fn test_extend_range() {
|
|||
"| | v1 | v2 | 1970-01-01T00:00:00.000000012Z |",
|
||||
"+-------+------+------+--------------------------------+",
|
||||
],
|
||||
&[b.to_arrow(Selection::All).unwrap()]
|
||||
&[b.to_arrow(Projection::All).unwrap()]
|
||||
);
|
||||
|
||||
a.extend_from_range(&b, 1..4).unwrap();
|
||||
|
@ -106,7 +106,7 @@ fn test_extend_range() {
|
|||
"| | | | v1 | 1970-01-01T00:00:00.000000008Z |",
|
||||
"+-------+-----+------+------+--------------------------------+",
|
||||
],
|
||||
&[a.to_arrow(Selection::All).unwrap()]
|
||||
&[a.to_arrow(Projection::All).unwrap()]
|
||||
);
|
||||
|
||||
let stats: BTreeMap<_, _> = a.columns().map(|(k, v)| (k.as_str(), v.stats())).collect();
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use arrow_util::assert_batches_eq;
|
||||
use data_types::{StatValues, Statistics};
|
||||
use mutable_batch::{writer::Writer, MutableBatch, TimestampSummary};
|
||||
use schema::selection::Selection;
|
||||
use schema::Projection;
|
||||
use std::num::NonZeroU64;
|
||||
|
||||
fn get_stats(batch: &MutableBatch) -> Vec<(&str, Statistics)> {
|
||||
|
@ -158,7 +158,7 @@ fn test_basic() {
|
|||
),
|
||||
];
|
||||
|
||||
assert_batches_eq!(expected_data, &[batch.to_arrow(Selection::All).unwrap()]);
|
||||
assert_batches_eq!(expected_data, &[batch.to_arrow(Projection::All).unwrap()]);
|
||||
assert_eq!(stats, expected_stats);
|
||||
|
||||
let mut writer = Writer::new(&mut batch, 4);
|
||||
|
@ -175,7 +175,7 @@ fn test_basic() {
|
|||
let stats: Vec<_> = get_stats(&batch);
|
||||
|
||||
// Writer dropped, should not impact stats or data
|
||||
assert_batches_eq!(expected_data, &[batch.to_arrow(Selection::All).unwrap()]);
|
||||
assert_batches_eq!(expected_data, &[batch.to_arrow(Projection::All).unwrap()]);
|
||||
assert_eq!(stats, expected_stats);
|
||||
|
||||
let err = Writer::new(&mut batch, 1)
|
||||
|
@ -208,7 +208,7 @@ fn test_basic() {
|
|||
let stats: Vec<_> = get_stats(&batch);
|
||||
|
||||
// Writer not committed, should not impact stats or data
|
||||
assert_batches_eq!(expected_data, &[batch.to_arrow(Selection::All).unwrap()]);
|
||||
assert_batches_eq!(expected_data, &[batch.to_arrow(Projection::All).unwrap()]);
|
||||
assert_eq!(stats, expected_stats);
|
||||
|
||||
let mut writer = Writer::new(&mut batch, 17);
|
||||
|
@ -330,7 +330,7 @@ fn test_basic() {
|
|||
),
|
||||
];
|
||||
|
||||
assert_batches_eq!(expected_data, &[batch.to_arrow(Selection::All).unwrap()]);
|
||||
assert_batches_eq!(expected_data, &[batch.to_arrow(Projection::All).unwrap()]);
|
||||
assert_eq!(stats, expected_stats);
|
||||
|
||||
let mut expected_timestamps = TimestampSummary::default();
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use arrow_util::assert_batches_eq;
|
||||
use mutable_batch::writer::Writer;
|
||||
use mutable_batch::MutableBatch;
|
||||
use schema::selection::Selection;
|
||||
use schema::Projection;
|
||||
|
||||
#[test]
|
||||
fn test_new_column() {
|
||||
|
@ -23,7 +23,7 @@ fn test_new_column() {
|
|||
"+-------+",
|
||||
];
|
||||
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Selection::All).unwrap()]);
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Projection::All).unwrap()]);
|
||||
|
||||
let mut writer = Writer::new(&mut batch, 1);
|
||||
writer
|
||||
|
@ -33,5 +33,5 @@ fn test_new_column() {
|
|||
std::mem::drop(writer);
|
||||
|
||||
// Should not include tag1 column
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Selection::All).unwrap()]);
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Projection::All).unwrap()]);
|
||||
}
|
||||
|
|
|
@ -18,7 +18,7 @@ use data_types::{IsNan, PartitionTemplate, StatValues, Statistics, TemplatePart}
|
|||
use hashbrown::HashSet;
|
||||
use mutable_batch::{writer::Writer, MutableBatch, PartitionWrite, WritePayload};
|
||||
use rand::prelude::*;
|
||||
use schema::selection::Selection;
|
||||
use schema::Projection;
|
||||
use std::{collections::BTreeMap, num::NonZeroU64, ops::Range, sync::Arc};
|
||||
|
||||
fn make_rng() -> StdRng {
|
||||
|
@ -391,7 +391,7 @@ fn test_writer_fuzz() {
|
|||
expected.concat(&ret.filter(&ranges));
|
||||
}
|
||||
|
||||
let actual = batch.to_arrow(Selection::All).unwrap();
|
||||
let actual = batch.to_arrow(Projection::All).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
arrow_util::display::pretty_format_batches(&[actual]).unwrap(),
|
||||
|
|
|
@ -327,7 +327,7 @@ mod tests {
|
|||
use super::*;
|
||||
use arrow_util::assert_batches_eq;
|
||||
use assert_matches::assert_matches;
|
||||
use schema::selection::Selection;
|
||||
use schema::Projection;
|
||||
|
||||
#[test]
|
||||
fn test_basic() {
|
||||
|
@ -353,7 +353,7 @@ mod tests {
|
|||
"| 2 | v1 | v2 | 1970-01-01T00:00:00.000000005Z | |",
|
||||
"+------+------+------+--------------------------------+-----+",
|
||||
],
|
||||
&[batches["cpu"].to_arrow(Selection::All).unwrap()]
|
||||
&[batches["cpu"].to_arrow(Projection::All).unwrap()]
|
||||
);
|
||||
|
||||
assert_batches_eq!(
|
||||
|
@ -365,7 +365,7 @@ mod tests {
|
|||
"| 2 | v5 | 1970-01-01T00:00:00.000000001Z |",
|
||||
"+------+------+--------------------------------+",
|
||||
],
|
||||
&[batches["mem"].to_arrow(Selection::All).unwrap()]
|
||||
&[batches["mem"].to_arrow(Projection::All).unwrap()]
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -378,7 +378,7 @@ m f1=10i 1639612800000000000
|
|||
let batches = lines_to_batches(lp, 5).unwrap();
|
||||
assert_eq!(batches.len(), 1);
|
||||
|
||||
let batch = batches["m"].to_arrow(Selection::All).unwrap();
|
||||
let batch = batches["m"].to_arrow(Projection::All).unwrap();
|
||||
assert_batches_eq!(
|
||||
&[
|
||||
"+-----+----+----------------------+",
|
||||
|
@ -412,7 +412,7 @@ m b=t 1639612800000000000
|
|||
let batches = lines_to_batches(lp, 5).unwrap();
|
||||
assert_eq!(batches.len(), 1);
|
||||
|
||||
let batch = batches["m"].to_arrow(Selection::All).unwrap();
|
||||
let batch = batches["m"].to_arrow(Projection::All).unwrap();
|
||||
assert_batches_eq!(
|
||||
&[
|
||||
"+------+---+----------------------+---+",
|
||||
|
@ -464,7 +464,7 @@ m b=t 1639612800000000000
|
|||
"| 1970-01-01T00:00:00Z | 2 |",
|
||||
"+----------------------+-----+",
|
||||
],
|
||||
&[batches["m1"].to_arrow(Selection::All).unwrap()]
|
||||
&[batches["m1"].to_arrow(Projection::All).unwrap()]
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -484,7 +484,7 @@ m b=t 1639612800000000000
|
|||
"| 1970-01-01T00:00:00Z | 2 |",
|
||||
"+----------------------+-----+",
|
||||
],
|
||||
&[batches["m1"].to_arrow(Selection::All).unwrap()]
|
||||
&[batches["m1"].to_arrow(Projection::All).unwrap()]
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -60,16 +60,21 @@ pub type Result<T, E = Error> = std::result::Result<T, E>;
|
|||
/// Decodes a [`DatabaseBatch`] to a map of [`MutableBatch`] keyed by table name
|
||||
pub fn decode_database_batch(
|
||||
database_batch: &DatabaseBatch,
|
||||
) -> Result<HashMap<String, MutableBatch>> {
|
||||
let mut ret = HashMap::with_capacity(database_batch.table_batches.len());
|
||||
) -> Result<(HashMap<String, MutableBatch>, HashMap<i64, String>)> {
|
||||
let mut name_to_data = HashMap::with_capacity(database_batch.table_batches.len());
|
||||
let mut id_to_name = HashMap::with_capacity(database_batch.table_batches.len());
|
||||
|
||||
for table_batch in &database_batch.table_batches {
|
||||
let (_, batch) = ret
|
||||
let (_, batch) = name_to_data
|
||||
.raw_entry_mut()
|
||||
.from_key(table_batch.table_name.as_str())
|
||||
.or_insert_with(|| (table_batch.table_name.clone(), MutableBatch::new()));
|
||||
|
||||
id_to_name.insert(table_batch.table_id, table_batch.table_name.clone());
|
||||
|
||||
write_table_batch(batch, table_batch)?;
|
||||
}
|
||||
Ok(ret)
|
||||
Ok((name_to_data, id_to_name))
|
||||
}
|
||||
|
||||
/// Writes the provided [`TableBatch`] to a [`MutableBatch`] on error any changes made
|
||||
|
@ -432,7 +437,7 @@ fn pb_value_type(column: &str, values: &PbValues) -> Result<InfluxFieldType> {
|
|||
mod tests {
|
||||
use arrow_util::assert_batches_eq;
|
||||
use generated_types::influxdata::pbdata::v1::InternedStrings;
|
||||
use schema::selection::Selection;
|
||||
use schema::Projection;
|
||||
|
||||
use super::*;
|
||||
|
||||
|
@ -620,6 +625,7 @@ mod tests {
|
|||
),
|
||||
],
|
||||
row_count: 5,
|
||||
table_id: 42,
|
||||
};
|
||||
|
||||
let mut batch = MutableBatch::new();
|
||||
|
@ -638,7 +644,7 @@ mod tests {
|
|||
"+-----+-----+------+------+--------------------------------+-----+",
|
||||
];
|
||||
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Selection::All).unwrap()]);
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Projection::All).unwrap()]);
|
||||
|
||||
table_batch.columns.push(table_batch.columns[0].clone());
|
||||
|
||||
|
@ -658,7 +664,7 @@ mod tests {
|
|||
.to_string();
|
||||
assert_eq!(err, "table batch must contain time column");
|
||||
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Selection::All).unwrap()]);
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Projection::All).unwrap()]);
|
||||
|
||||
// Nulls in time column -> error
|
||||
time.null_mask = vec![1];
|
||||
|
@ -669,7 +675,7 @@ mod tests {
|
|||
.to_string();
|
||||
assert_eq!(err, "time column must not contain nulls");
|
||||
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Selection::All).unwrap()]);
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Projection::All).unwrap()]);
|
||||
|
||||
// Missing values -> error
|
||||
table_batch.columns[0].values.take().unwrap();
|
||||
|
@ -679,7 +685,7 @@ mod tests {
|
|||
.to_string();
|
||||
assert_eq!(err, "column with no values: tag1");
|
||||
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Selection::All).unwrap()]);
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Projection::All).unwrap()]);
|
||||
|
||||
// No data -> error
|
||||
table_batch.columns[0].values = Some(PbValues {
|
||||
|
@ -698,7 +704,7 @@ mod tests {
|
|||
.to_string();
|
||||
assert_eq!(err, "column with no values: tag1");
|
||||
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Selection::All).unwrap()]);
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Projection::All).unwrap()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -759,6 +765,7 @@ mod tests {
|
|||
),
|
||||
],
|
||||
row_count: 6,
|
||||
table_id: 42,
|
||||
};
|
||||
|
||||
let mut batch = MutableBatch::new();
|
||||
|
@ -777,7 +784,7 @@ mod tests {
|
|||
"+----------+----+--------+-------+------+--------------------------------+",
|
||||
];
|
||||
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Selection::All).unwrap()]);
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Projection::All).unwrap()]);
|
||||
|
||||
// Try to write 6 rows expecting an error
|
||||
let mut try_write = |other: PbColumn, expected_err: &str| {
|
||||
|
@ -792,6 +799,7 @@ mod tests {
|
|||
other,
|
||||
],
|
||||
row_count: 6,
|
||||
table_id: 42,
|
||||
};
|
||||
|
||||
let err = write_table_batch(&mut batch, &table_batch)
|
||||
|
@ -799,7 +807,7 @@ mod tests {
|
|||
.to_string();
|
||||
|
||||
assert_eq!(err, expected_err);
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Selection::All).unwrap()]);
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Projection::All).unwrap()]);
|
||||
};
|
||||
|
||||
try_write(
|
||||
|
@ -899,6 +907,7 @@ mod tests {
|
|||
),
|
||||
],
|
||||
row_count: 10,
|
||||
table_id: 42,
|
||||
};
|
||||
|
||||
let mut batch = MutableBatch::new();
|
||||
|
@ -922,7 +931,7 @@ mod tests {
|
|||
"+-----+--------------------------------+",
|
||||
];
|
||||
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Selection::All).unwrap()]);
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Projection::All).unwrap()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -936,6 +945,7 @@ mod tests {
|
|||
vec![],
|
||||
)],
|
||||
row_count: 9,
|
||||
table_id: 42,
|
||||
};
|
||||
|
||||
let mut batch = MutableBatch::new();
|
||||
|
@ -958,7 +968,7 @@ mod tests {
|
|||
"+--------------------------------+",
|
||||
];
|
||||
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Selection::All).unwrap()]);
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Projection::All).unwrap()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -1038,6 +1048,7 @@ mod tests {
|
|||
with_i64(column("time", SemanticType::Time), vec![1, 2, 3], vec![]),
|
||||
],
|
||||
row_count: 9,
|
||||
table_id: 42,
|
||||
};
|
||||
|
||||
let mut batch = MutableBatch::new();
|
||||
|
@ -1060,13 +1071,14 @@ mod tests {
|
|||
"+-------+-----+-----+-----+-----+-----+-----+-----+-----+--------------------------------+-----+",
|
||||
];
|
||||
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Selection::All).unwrap()]);
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Projection::All).unwrap()]);
|
||||
|
||||
// we need at least one value though
|
||||
let table_batch = TableBatch {
|
||||
table_name: "table".to_string(),
|
||||
columns: vec![with_i64(column("time", SemanticType::Time), vec![], vec![])],
|
||||
row_count: 9,
|
||||
table_id: 42,
|
||||
};
|
||||
|
||||
let mut batch = MutableBatch::new();
|
||||
|
|
|
@ -12,19 +12,38 @@ use mutable_batch::MutableBatch;
|
|||
use schema::InfluxColumnType;
|
||||
|
||||
/// Convert a [`DmlWrite`] to a [`DatabaseBatch`]
|
||||
pub fn encode_write(db_name: &str, write: &DmlWrite) -> DatabaseBatch {
|
||||
pub fn encode_write(db_name: &str, database_id: i64, write: &DmlWrite) -> DatabaseBatch {
|
||||
DatabaseBatch {
|
||||
database_name: db_name.to_string(),
|
||||
table_batches: write
|
||||
.tables()
|
||||
.map(|(table_name, batch)| encode_batch(table_name, batch))
|
||||
.map(|(table_name, batch)| {
|
||||
// Temporary code.
|
||||
//
|
||||
// Once only IDs are pushed over the network this extra lookup
|
||||
// can be removed.
|
||||
//
|
||||
// Safety: this code path is invoked only in the producer, and
|
||||
// therefore accessing the table IDs is acceptable. See
|
||||
// DmlWrite for context.
|
||||
let table_id = unsafe {
|
||||
write.table_id(table_name).unwrap_or_else(|| {
|
||||
panic!(
|
||||
"no table ID mapping found for {} table {}",
|
||||
db_name, table_name
|
||||
)
|
||||
})
|
||||
};
|
||||
encode_batch(table_name, table_id.get(), batch)
|
||||
})
|
||||
.collect(),
|
||||
partition_key: write.partition_key().to_string(),
|
||||
database_id,
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a [`MutableBatch`] to [`TableBatch`]
|
||||
pub fn encode_batch(table_name: &str, batch: &MutableBatch) -> TableBatch {
|
||||
pub fn encode_batch(table_name: &str, table_id: i64, batch: &MutableBatch) -> TableBatch {
|
||||
TableBatch {
|
||||
table_name: table_name.to_string(),
|
||||
columns: batch
|
||||
|
@ -45,6 +64,7 @@ pub fn encode_batch(table_name: &str, batch: &MutableBatch) -> TableBatch {
|
|||
})
|
||||
.collect(),
|
||||
row_count: batch.rows() as u32,
|
||||
table_id,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@ use arrow_util::assert_batches_eq;
|
|||
use data_types::{PartitionTemplate, TemplatePart};
|
||||
use mutable_batch::{writer::Writer, MutableBatch, PartitionWrite, WritePayload};
|
||||
use mutable_batch_pb::{decode::write_table_batch, encode::encode_batch};
|
||||
use schema::selection::Selection;
|
||||
use schema::Projection;
|
||||
|
||||
#[test]
|
||||
fn test_encode_decode() {
|
||||
|
@ -28,14 +28,15 @@ fn test_encode_decode() {
|
|||
"+-------+------+-------+-----+------+--------------------------------+-----+",
|
||||
];
|
||||
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Selection::All).unwrap()]);
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Projection::All).unwrap()]);
|
||||
|
||||
let encoded = encode_batch("foo", &batch);
|
||||
let encoded = encode_batch("foo", 42, &batch);
|
||||
assert_eq!(encoded.table_id, 42);
|
||||
|
||||
let mut batch = MutableBatch::new();
|
||||
write_table_batch(&mut batch, &encoded).unwrap();
|
||||
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Selection::All).unwrap()]);
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Projection::All).unwrap()]);
|
||||
}
|
||||
|
||||
// This test asserts columns containing no values do not prevent an encoded
|
||||
|
@ -139,7 +140,9 @@ fn test_encode_decode_null_columns_issue_4272() {
|
|||
.write_to_batch(&mut got)
|
||||
.expect("should write");
|
||||
|
||||
let encoded = encode_batch("bananas", &got);
|
||||
let encoded = encode_batch("bananas", 24, &got);
|
||||
assert_eq!(encoded.table_id, 24);
|
||||
|
||||
let mut batch = MutableBatch::new();
|
||||
// Without the fix for #4272 this deserialisation call would fail.
|
||||
write_table_batch(&mut batch, &encoded).unwrap();
|
||||
|
@ -151,7 +154,7 @@ fn test_encode_decode_null_columns_issue_4272() {
|
|||
"| 1 | 1970-01-01T00:00:00.000000160Z |",
|
||||
"+---+--------------------------------+",
|
||||
];
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Selection::All).unwrap()]);
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Projection::All).unwrap()]);
|
||||
|
||||
// And finally assert the "1970-07-05" round-trip
|
||||
let mut got = MutableBatch::default();
|
||||
|
@ -161,7 +164,9 @@ fn test_encode_decode_null_columns_issue_4272() {
|
|||
.write_to_batch(&mut got)
|
||||
.expect("should write");
|
||||
|
||||
let encoded = encode_batch("bananas", &got);
|
||||
let encoded = encode_batch("bananas", 42, &got);
|
||||
assert_eq!(encoded.table_id, 42);
|
||||
|
||||
let mut batch = MutableBatch::new();
|
||||
// Without the fix for #4272 this deserialisation call would fail.
|
||||
write_table_batch(&mut batch, &encoded).unwrap();
|
||||
|
@ -173,5 +178,5 @@ fn test_encode_decode_null_columns_issue_4272() {
|
|||
"| 1 | 1970-07-05T06:32:41.568756160Z |",
|
||||
"+---+--------------------------------+",
|
||||
];
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Selection::All).unwrap()]);
|
||||
assert_batches_eq!(expected, &[batch.to_arrow(Projection::All).unwrap()]);
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@ prost = "0.11"
|
|||
[dev-dependencies]
|
||||
bytes = "1.2"
|
||||
criterion = { version = "0.4", default-features = false, features = ["rayon"]}
|
||||
data_types = { path = "../data_types", default-features = false }
|
||||
|
||||
[[bench]]
|
||||
name = "write_lp"
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
use bytes::{Bytes, BytesMut};
|
||||
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
|
||||
use data_types::{NamespaceId, TableId};
|
||||
use dml::DmlWrite;
|
||||
use generated_types::influxdata::pbdata::v1::DatabaseBatch;
|
||||
use mutable_batch::MutableBatch;
|
||||
|
@ -12,8 +13,21 @@ fn generate_pbdata_bytes() -> Vec<(String, (usize, Bytes))> {
|
|||
.into_iter()
|
||||
.map(|(bench, lp)| {
|
||||
let batches = lines_to_batches(&lp, 0).unwrap();
|
||||
let write = DmlWrite::new("test_db", batches, "bananas".into(), Default::default());
|
||||
let database_batch = mutable_batch_pb::encode::encode_write("db", &write);
|
||||
let ids = batches
|
||||
.keys()
|
||||
.enumerate()
|
||||
.map(|(i, name)| (name.clone(), TableId::new(i as _)))
|
||||
.collect();
|
||||
|
||||
let write = DmlWrite::new(
|
||||
"test_db",
|
||||
NamespaceId::new(42),
|
||||
batches,
|
||||
ids,
|
||||
"bananas".into(),
|
||||
Default::default(),
|
||||
);
|
||||
let database_batch = mutable_batch_pb::encode::encode_write("db", 42, &write);
|
||||
|
||||
let mut bytes = BytesMut::new();
|
||||
database_batch.encode(&mut bytes).unwrap();
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
//! A metadata summary of a Parquet file in object storage, with the ability to
|
||||
//! download & execute a scan.
|
||||
|
||||
use crate::{storage::ParquetStorage, ParquetFilePath};
|
||||
use crate::{
|
||||
storage::{ParquetExecInput, ParquetStorage},
|
||||
ParquetFilePath,
|
||||
};
|
||||
use data_types::{ParquetFile, TimestampMinMax};
|
||||
use datafusion::{physical_plan::SendableRecordBatchStream, prelude::SessionContext};
|
||||
use predicate::Predicate;
|
||||
use schema::{selection::Selection, Schema};
|
||||
use schema::{Projection, Schema};
|
||||
use std::{collections::BTreeSet, mem, sync::Arc};
|
||||
use uuid::Uuid;
|
||||
|
||||
|
@ -60,11 +61,11 @@ impl ParquetChunk {
|
|||
}
|
||||
|
||||
/// Return the columns names that belong to the given column selection
|
||||
pub fn column_names(&self, selection: Selection<'_>) -> Option<BTreeSet<String>> {
|
||||
pub fn column_names(&self, selection: Projection<'_>) -> Option<BTreeSet<String>> {
|
||||
let fields = self.schema.inner().fields().iter();
|
||||
|
||||
Some(match selection {
|
||||
Selection::Some(cols) => fields
|
||||
Projection::Some(cols) => fields
|
||||
.filter_map(|x| {
|
||||
if cols.contains(&x.name().as_str()) {
|
||||
Some(x.name().clone())
|
||||
|
@ -73,26 +74,19 @@ impl ParquetChunk {
|
|||
}
|
||||
})
|
||||
.collect(),
|
||||
Selection::All => fields.map(|x| x.name().clone()).collect(),
|
||||
Projection::All => fields.map(|x| x.name().clone()).collect(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Return stream of data read from parquet file
|
||||
pub fn read_filter(
|
||||
&self,
|
||||
predicate: &Predicate,
|
||||
selection: Selection<'_>,
|
||||
session_ctx: &SessionContext,
|
||||
) -> Result<SendableRecordBatchStream, crate::storage::ReadError> {
|
||||
/// Inputs for [`ParquetExec`].
|
||||
///
|
||||
/// See [`ParquetExecInput`] for more information.
|
||||
///
|
||||
/// [`ParquetExec`]: datafusion::physical_plan::file_format::ParquetExec
|
||||
pub fn parquet_exec_input(&self) -> ParquetExecInput {
|
||||
let path: ParquetFilePath = self.parquet_file.as_ref().into();
|
||||
self.store.read_filter(
|
||||
predicate,
|
||||
selection,
|
||||
Arc::clone(&self.schema.as_arrow()),
|
||||
&path,
|
||||
self.file_size_bytes(),
|
||||
session_ctx,
|
||||
)
|
||||
self.store.parquet_exec_input(&path, self.file_size_bytes())
|
||||
}
|
||||
|
||||
/// The total number of rows in all row groups in this chunk.
|
||||
|
|
|
@ -6,6 +6,7 @@ use std::{io::Write, sync::Arc};
|
|||
|
||||
use arrow::error::ArrowError;
|
||||
use datafusion::physical_plan::SendableRecordBatchStream;
|
||||
use datafusion_util::config::BATCH_SIZE;
|
||||
use futures::{pin_mut, TryStreamExt};
|
||||
use observability_deps::tracing::{debug, trace, warn};
|
||||
use parquet::{
|
||||
|
@ -21,6 +22,11 @@ use crate::metadata::{IoxMetadata, METADATA_KEY};
|
|||
/// Parquet row group write size
|
||||
pub const ROW_GROUP_WRITE_SIZE: usize = 1024 * 1024;
|
||||
|
||||
/// ensure read and write work well together
|
||||
/// Skip clippy due to <https://github.com/rust-lang/rust-clippy/issues/8159>.
|
||||
#[allow(clippy::assertions_on_constants)]
|
||||
const _: () = assert!(ROW_GROUP_WRITE_SIZE % BATCH_SIZE == 0);
|
||||
|
||||
/// [`RecordBatch`] to Parquet serialisation errors.
|
||||
///
|
||||
/// [`RecordBatch`]: arrow::record_batch::RecordBatch
|
||||
|
|
|
@ -6,26 +6,26 @@ use crate::{
|
|||
serialize::{self, CodecError},
|
||||
ParquetFilePath,
|
||||
};
|
||||
use arrow::datatypes::{Field, SchemaRef};
|
||||
use arrow::{
|
||||
datatypes::{Field, SchemaRef},
|
||||
record_batch::RecordBatch,
|
||||
};
|
||||
use bytes::Bytes;
|
||||
use datafusion::{
|
||||
datasource::{listing::PartitionedFile, object_store::ObjectStoreUrl},
|
||||
error::DataFusionError,
|
||||
execution::context::TaskContext,
|
||||
physical_plan::{
|
||||
execute_stream,
|
||||
file_format::{FileScanConfig, ParquetExec},
|
||||
stream::RecordBatchStreamAdapter,
|
||||
SendableRecordBatchStream, Statistics,
|
||||
ExecutionPlan, SendableRecordBatchStream, Statistics,
|
||||
},
|
||||
prelude::SessionContext,
|
||||
};
|
||||
use futures::TryStreamExt;
|
||||
use datafusion_util::config::iox_session_config;
|
||||
use object_store::{DynObjectStore, ObjectMeta};
|
||||
use observability_deps::tracing::*;
|
||||
use predicate::Predicate;
|
||||
use schema::selection::{select_schema, Selection};
|
||||
use schema::Projection;
|
||||
use std::{
|
||||
num::TryFromIntError,
|
||||
sync::Arc,
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
|
@ -52,38 +52,6 @@ pub enum UploadError {
|
|||
Upload(#[from] object_store::Error),
|
||||
}
|
||||
|
||||
/// Errors during Parquet file download & scan.
|
||||
#[derive(Debug, Error)]
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum ReadError {
|
||||
/// Error writing the bytes fetched from object store to the temporary
|
||||
/// parquet file on disk.
|
||||
#[error("i/o error writing downloaded parquet: {0}")]
|
||||
IO(#[from] std::io::Error),
|
||||
|
||||
/// An error fetching Parquet file bytes from object store.
|
||||
#[error("failed to read data from object store: {0}")]
|
||||
ObjectStore(#[from] object_store::Error),
|
||||
|
||||
/// An error reading the downloaded Parquet file.
|
||||
#[error("invalid parquet file: {0}")]
|
||||
Parquet(#[from] parquet::errors::ParquetError),
|
||||
|
||||
/// Schema mismatch
|
||||
#[error("Schema mismatch (expected VS actual parquet file) for file '{path}': {source}")]
|
||||
SchemaMismatch {
|
||||
/// Path of the affected parquet file.
|
||||
path: object_store::path::Path,
|
||||
|
||||
/// Source error
|
||||
source: ProjectionError,
|
||||
},
|
||||
|
||||
/// Malformed integer data for row count
|
||||
#[error("Malformed row count integer")]
|
||||
MalformedRowCount(#[from] TryFromIntError),
|
||||
}
|
||||
|
||||
/// ID for an object store hooked up into DataFusion.
|
||||
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
|
||||
pub struct StorageId(&'static str);
|
||||
|
@ -106,6 +74,69 @@ impl std::fmt::Display for StorageId {
|
|||
}
|
||||
}
|
||||
|
||||
/// Inputs required to build a [`ParquetExec`] for one or multiple files.
|
||||
///
|
||||
/// The files shall be grouped by [`object_store_url`](Self::object_store_url). For each each object store, you shall
|
||||
/// create one [`ParquetExec`] and put each file into its own "file group".
|
||||
///
|
||||
/// [`ParquetExec`]: datafusion::physical_plan::file_format::ParquetExec
|
||||
#[derive(Debug)]
|
||||
pub struct ParquetExecInput {
|
||||
/// Store where the file is located.
|
||||
pub object_store_url: ObjectStoreUrl,
|
||||
|
||||
/// Object metadata.
|
||||
pub object_meta: ObjectMeta,
|
||||
}
|
||||
|
||||
impl ParquetExecInput {
|
||||
/// Read parquet file into [`RecordBatch`]es.
|
||||
///
|
||||
/// This should only be used for testing purposes.
|
||||
pub async fn read_to_batches(
|
||||
&self,
|
||||
schema: SchemaRef,
|
||||
projection: Projection<'_>,
|
||||
session_ctx: &SessionContext,
|
||||
) -> Result<Vec<RecordBatch>, DataFusionError> {
|
||||
// Compute final (output) schema after selection
|
||||
let schema = Arc::new(
|
||||
projection
|
||||
.project_schema(&schema)
|
||||
.as_ref()
|
||||
.clone()
|
||||
.with_metadata(Default::default()),
|
||||
);
|
||||
|
||||
let base_config = FileScanConfig {
|
||||
object_store_url: self.object_store_url.clone(),
|
||||
file_schema: schema,
|
||||
file_groups: vec![vec![PartitionedFile {
|
||||
object_meta: self.object_meta.clone(),
|
||||
partition_values: vec![],
|
||||
range: None,
|
||||
extensions: None,
|
||||
}]],
|
||||
statistics: Statistics::default(),
|
||||
projection: None,
|
||||
limit: None,
|
||||
table_partition_cols: vec![],
|
||||
// TODO avoid this `copied_config` when config_options are directly available on context
|
||||
config_options: session_ctx.copied_config().config_options(),
|
||||
};
|
||||
let exec = ParquetExec::new(base_config, None, None);
|
||||
let exec_schema = exec.schema();
|
||||
datafusion::physical_plan::collect(Arc::new(exec), session_ctx.task_ctx())
|
||||
.await
|
||||
.map(|batches| {
|
||||
for batch in &batches {
|
||||
assert_eq!(batch.schema(), exec_schema);
|
||||
}
|
||||
batches
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// The [`ParquetStorage`] type encapsulates [`RecordBatch`] persistence to an
|
||||
/// underlying [`ObjectStore`].
|
||||
///
|
||||
|
@ -147,7 +178,7 @@ impl ParquetStorage {
|
|||
pub fn test_df_context(&self) -> SessionContext {
|
||||
// set up "fake" DataFusion session
|
||||
let object_store = Arc::clone(&self.object_store);
|
||||
let session_ctx = SessionContext::new();
|
||||
let session_ctx = SessionContext::with_config(iox_session_config());
|
||||
let task_ctx = Arc::new(TaskContext::from(&session_ctx));
|
||||
task_ctx
|
||||
.runtime_env()
|
||||
|
@ -219,72 +250,22 @@ impl ParquetStorage {
|
|||
Ok((parquet_meta, file_size))
|
||||
}
|
||||
|
||||
/// Pull the Parquet-encoded [`RecordBatch`] at the file path derived from
|
||||
/// the provided [`ParquetFilePath`].
|
||||
/// Inputs for [`ParquetExec`].
|
||||
///
|
||||
/// The `selection` projection is pushed down to the Parquet deserializer.
|
||||
/// See [`ParquetExecInput`] for more information.
|
||||
///
|
||||
/// This impl fetches the associated Parquet file bytes from object storage,
|
||||
/// temporarily persisting them to a local temp file to feed to the arrow
|
||||
/// reader.
|
||||
///
|
||||
/// No caching is performed by `read_filter()`, and each call to
|
||||
/// `read_filter()` will re-download the parquet file unless the underlying
|
||||
/// object store impl caches the fetched bytes.
|
||||
///
|
||||
/// [`RecordBatch`]: arrow::record_batch::RecordBatch
|
||||
pub fn read_filter(
|
||||
&self,
|
||||
predicate: &Predicate,
|
||||
selection: Selection<'_>,
|
||||
schema: SchemaRef,
|
||||
path: &ParquetFilePath,
|
||||
file_size: usize,
|
||||
session_ctx: &SessionContext,
|
||||
) -> Result<SendableRecordBatchStream, ReadError> {
|
||||
let path = path.object_store_path();
|
||||
trace!(path=?path, "fetching parquet data for filtered read");
|
||||
|
||||
// Compute final (output) schema after selection
|
||||
let schema = Arc::new(
|
||||
select_schema(selection, &schema)
|
||||
.as_ref()
|
||||
.clone()
|
||||
.with_metadata(Default::default()),
|
||||
);
|
||||
|
||||
// create ParquetExec node
|
||||
let object_meta = ObjectMeta {
|
||||
location: path,
|
||||
// we don't care about the "last modified" field
|
||||
last_modified: Default::default(),
|
||||
size: file_size,
|
||||
};
|
||||
let expr = predicate.filter_expr();
|
||||
let base_config = FileScanConfig {
|
||||
/// [`ParquetExec`]: datafusion::physical_plan::file_format::ParquetExec
|
||||
pub fn parquet_exec_input(&self, path: &ParquetFilePath, file_size: usize) -> ParquetExecInput {
|
||||
ParquetExecInput {
|
||||
object_store_url: ObjectStoreUrl::parse(format!("iox://{}/", self.id))
|
||||
.expect("valid object store URL"),
|
||||
file_schema: Arc::clone(&schema),
|
||||
file_groups: vec![vec![PartitionedFile {
|
||||
object_meta,
|
||||
partition_values: vec![],
|
||||
range: None,
|
||||
extensions: None,
|
||||
}]],
|
||||
statistics: Statistics::default(),
|
||||
projection: None,
|
||||
limit: None,
|
||||
table_partition_cols: vec![],
|
||||
// TODO avoid this `copied_config` when config_options are directly available on context
|
||||
config_options: session_ctx.copied_config().config_options(),
|
||||
};
|
||||
let exec = ParquetExec::new(base_config, expr, None);
|
||||
|
||||
Ok(Box::pin(RecordBatchStreamAdapter::new(
|
||||
Arc::clone(&schema),
|
||||
futures::stream::once(execute_stream(Arc::new(exec), session_ctx.task_ctx()))
|
||||
.try_flatten(),
|
||||
)))
|
||||
object_meta: ObjectMeta {
|
||||
location: path.object_store_path(),
|
||||
// we don't care about the "last modified" field
|
||||
last_modified: Default::default(),
|
||||
size: file_size,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -348,7 +329,7 @@ mod tests {
|
|||
let batch = RecordBatch::try_from_iter([("a", to_string_array(&["value"]))]).unwrap();
|
||||
let schema = batch.schema();
|
||||
|
||||
assert_roundtrip(batch.clone(), Selection::All, schema, batch).await;
|
||||
assert_roundtrip(batch.clone(), Projection::All, schema, batch).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
@ -367,7 +348,7 @@ mod tests {
|
|||
("c", to_string_array(&["foo"])),
|
||||
])
|
||||
.unwrap();
|
||||
assert_roundtrip(batch, Selection::Some(&["d", "c"]), schema, expected_batch).await;
|
||||
assert_roundtrip(batch, Projection::Some(&["d", "c"]), schema, expected_batch).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
@ -380,7 +361,7 @@ mod tests {
|
|||
let schema = batch.schema();
|
||||
|
||||
let expected_batch = RecordBatch::try_from_iter([("b", to_int_array(&[1]))]).unwrap();
|
||||
assert_roundtrip(batch, Selection::Some(&["b", "c"]), schema, expected_batch).await;
|
||||
assert_roundtrip(batch, Projection::Some(&["b", "c"]), schema, expected_batch).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
@ -396,7 +377,7 @@ mod tests {
|
|||
])
|
||||
.unwrap();
|
||||
let schema = schema_batch.schema();
|
||||
assert_roundtrip(file_batch, Selection::All, schema, schema_batch).await;
|
||||
assert_roundtrip(file_batch, Projection::All, schema, schema_batch).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
@ -422,7 +403,7 @@ mod tests {
|
|||
("c", to_string_array(&["foo"])),
|
||||
])
|
||||
.unwrap();
|
||||
assert_roundtrip(batch, Selection::Some(&["d", "c"]), schema, expected_batch).await;
|
||||
assert_roundtrip(batch, Projection::Some(&["d", "c"]), schema, expected_batch).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
@ -485,7 +466,7 @@ mod tests {
|
|||
.clone()
|
||||
.with_metadata(HashMap::from([(String::from("foo"), String::from("bar"))])),
|
||||
);
|
||||
download(&store, &meta, Selection::All, schema, file_size)
|
||||
download(&store, &meta, Projection::All, schema, file_size)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
@ -514,7 +495,7 @@ mod tests {
|
|||
// Serialize & upload the record batches.
|
||||
let (_iox_md, file_size) = upload(&store, &meta, batch).await;
|
||||
|
||||
download(&store, &meta, Selection::All, schema, file_size)
|
||||
download(&store, &meta, Projection::All, schema, file_size)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
@ -529,7 +510,7 @@ mod tests {
|
|||
let expected_batch =
|
||||
RecordBatch::try_from_iter([("a", to_string_array(&["value"]))]).unwrap();
|
||||
let schema = expected_batch.schema();
|
||||
assert_roundtrip(file_batch, Selection::All, schema, expected_batch).await;
|
||||
assert_roundtrip(file_batch, Projection::All, schema, expected_batch).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
@ -547,7 +528,7 @@ mod tests {
|
|||
let schema = schema_batch.schema();
|
||||
let expected_batch =
|
||||
RecordBatch::try_from_iter([("a", to_string_array(&["value"]))]).unwrap();
|
||||
assert_roundtrip(file_batch, Selection::Some(&["a"]), schema, expected_batch).await;
|
||||
assert_roundtrip(file_batch, Projection::Some(&["a"]), schema, expected_batch).await;
|
||||
}
|
||||
|
||||
fn to_string_array(strs: &[&str]) -> ArrayRef {
|
||||
|
@ -592,35 +573,24 @@ mod tests {
|
|||
async fn download<'a>(
|
||||
store: &ParquetStorage,
|
||||
meta: &IoxMetadata,
|
||||
selection: Selection<'_>,
|
||||
selection: Projection<'_>,
|
||||
expected_schema: SchemaRef,
|
||||
file_size: usize,
|
||||
) -> Result<RecordBatch, DataFusionError> {
|
||||
let path: ParquetFilePath = meta.into();
|
||||
let rx = store
|
||||
.read_filter(
|
||||
&Predicate::default(),
|
||||
selection,
|
||||
expected_schema,
|
||||
&path,
|
||||
file_size,
|
||||
&store.test_df_context(),
|
||||
)
|
||||
.expect("should read record batches from object store");
|
||||
let schema = rx.schema();
|
||||
datafusion::physical_plan::common::collect(rx)
|
||||
store
|
||||
.parquet_exec_input(&path, file_size)
|
||||
.read_to_batches(expected_schema, selection, &store.test_df_context())
|
||||
.await
|
||||
.map(|mut batches| {
|
||||
assert_eq!(batches.len(), 1);
|
||||
let batch = batches.remove(0);
|
||||
assert_eq!(batch.schema(), schema);
|
||||
batch
|
||||
batches.remove(0)
|
||||
})
|
||||
}
|
||||
|
||||
async fn assert_roundtrip(
|
||||
upload_batch: RecordBatch,
|
||||
selection: Selection<'_>,
|
||||
selection: Projection<'_>,
|
||||
expected_schema: SchemaRef,
|
||||
expected_batch: RecordBatch,
|
||||
) {
|
||||
|
@ -651,7 +621,7 @@ mod tests {
|
|||
let meta = meta();
|
||||
let (_iox_md, file_size) = upload(&store, &meta, persisted_batch).await;
|
||||
|
||||
let err = download(&store, &meta, Selection::All, expected_schema, file_size)
|
||||
let err = download(&store, &meta, Projection::All, expected_schema, file_size)
|
||||
.await
|
||||
.unwrap_err();
|
||||
|
||||
|
|
|
@ -180,7 +180,7 @@ fn timestamp_value<'a>(
|
|||
mod tests {
|
||||
use super::*;
|
||||
use mutable_batch_lp::lines_to_batches;
|
||||
use schema::selection::Selection;
|
||||
use schema::Projection;
|
||||
|
||||
#[test]
|
||||
fn basic() {
|
||||
|
@ -228,7 +228,7 @@ m,tag2=multi_field bool_field=false,str_field="blargh" 610
|
|||
);
|
||||
let (table_name, mutable_batch) = mutable_batches.into_iter().next().unwrap();
|
||||
|
||||
let selection = Selection::All;
|
||||
let selection = Projection::All;
|
||||
let record_batch = mutable_batch.to_arrow(selection).unwrap();
|
||||
let iox_schema = mutable_batch.schema(selection).unwrap();
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@ pub(crate) fn expr_to_df(expr: DeleteExpr) -> Expr {
|
|||
}
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum DataFusionToExprError {
|
||||
#[snafu(display("unsupported expression: {:?}", expr))]
|
||||
UnsupportedExpression { expr: Expr },
|
||||
|
|
|
@ -11,7 +11,7 @@ use datafusion::error::{DataFusionError, Result as DataFusionResult};
|
|||
use datafusion::execution::context::ExecutionProps;
|
||||
use datafusion::logical_expr::expr_rewriter::ExprRewritable;
|
||||
use datafusion::logical_expr::ExprSchemable;
|
||||
use datafusion::optimizer::expr_simplifier::{ExprSimplifier, SimplifyInfo};
|
||||
use datafusion::optimizer::simplify_expressions::{ExprSimplifier, SimplifyInfo};
|
||||
use datafusion::prelude::{lit, Column, Expr};
|
||||
use observability_deps::tracing::{debug, trace};
|
||||
use schema::Schema;
|
||||
|
|
|
@ -32,6 +32,7 @@ rand = "0.8.3"
|
|||
service_common = { path = "../service_common" }
|
||||
service_grpc_catalog = { path = "../service_grpc_catalog"}
|
||||
service_grpc_schema = { path = "../service_grpc_schema" }
|
||||
service_grpc_object_store = { path = "../service_grpc_object_store" }
|
||||
schema = { path = "../schema" }
|
||||
sharder = { path = "../sharder" }
|
||||
snafu = "0.7"
|
||||
|
|
|
@ -340,14 +340,13 @@ pub mod tests {
|
|||
use arrow::{datatypes::DataType, record_batch::RecordBatch};
|
||||
use arrow_util::assert_batches_eq;
|
||||
use data_types::{ColumnType, NamespaceSchema};
|
||||
use futures::StreamExt;
|
||||
use iox_query::{
|
||||
exec::{ExecutorType, IOxSessionContext},
|
||||
QueryChunk, QueryChunkMeta,
|
||||
};
|
||||
use iox_tests::util::{TestCatalog, TestNamespace, TestParquetFileBuilder};
|
||||
use metric::{Attributes, Observation, RawReporter};
|
||||
use schema::{builder::SchemaBuilder, selection::Selection, sort::SortKeyBuilder};
|
||||
use schema::{builder::SchemaBuilder, sort::SortKeyBuilder};
|
||||
use test_helpers::maybe_start_logging;
|
||||
use tokio::runtime::Handle;
|
||||
|
||||
|
@ -373,7 +372,7 @@ pub mod tests {
|
|||
assert_sort_key(&chunk);
|
||||
|
||||
// back up table summary
|
||||
let table_summary_1 = chunk.summary().unwrap();
|
||||
let table_summary_1 = chunk.summary();
|
||||
|
||||
// check if chunk can be queried
|
||||
assert_content(&chunk, &test_data).await;
|
||||
|
@ -382,7 +381,7 @@ pub mod tests {
|
|||
assert_eq!(chunk.chunk_type(), "parquet");
|
||||
|
||||
// summary has NOT changed
|
||||
let table_summary_2 = chunk.summary().unwrap();
|
||||
let table_summary_2 = chunk.summary();
|
||||
assert_eq!(table_summary_1, table_summary_2);
|
||||
|
||||
// retrieving the chunk again should not require any catalog requests
|
||||
|
@ -397,13 +396,9 @@ pub mod tests {
|
|||
ctx: IOxSessionContext,
|
||||
) -> Vec<RecordBatch> {
|
||||
chunk
|
||||
.read_filter(ctx, &Default::default(), Selection::All)
|
||||
.unwrap()
|
||||
.collect::<Vec<_>>()
|
||||
.data()
|
||||
.read_to_batches(chunk.schema(), ctx.inner())
|
||||
.await
|
||||
.into_iter()
|
||||
.map(Result::unwrap)
|
||||
.collect()
|
||||
}
|
||||
|
||||
struct TestData {
|
||||
|
|
|
@ -1,39 +1,17 @@
|
|||
use crate::chunk::QuerierChunk;
|
||||
use data_types::{ChunkId, ChunkOrder, DeletePredicate, PartitionId, TableSummary};
|
||||
use datafusion::{error::DataFusionError, physical_plan::SendableRecordBatchStream};
|
||||
use datafusion::error::DataFusionError;
|
||||
use iox_query::{
|
||||
exec::{stringset::StringSet, IOxSessionContext},
|
||||
QueryChunk, QueryChunkMeta,
|
||||
QueryChunk, QueryChunkData, QueryChunkMeta,
|
||||
};
|
||||
use observability_deps::tracing::debug;
|
||||
use predicate::Predicate;
|
||||
use schema::{selection::Selection, sort::SortKey, Schema};
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use schema::{sort::SortKey, Projection, Schema};
|
||||
use std::{any::Any, sync::Arc};
|
||||
use trace::span::SpanRecorder;
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
#[snafu(display("Parquet File Error in chunk {}: {}", chunk_id, source))]
|
||||
ParquetFileChunk {
|
||||
source: Box<parquet_file::storage::ReadError>,
|
||||
chunk_id: ChunkId,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Could not find column name '{}' in read buffer column_values results for chunk {}",
|
||||
column_name,
|
||||
chunk_id,
|
||||
))]
|
||||
ColumnNameNotFound {
|
||||
column_name: String,
|
||||
chunk_id: ChunkId,
|
||||
},
|
||||
}
|
||||
|
||||
impl QueryChunkMeta for QuerierChunk {
|
||||
fn summary(&self) -> Option<Arc<TableSummary>> {
|
||||
Some(Arc::clone(&self.table_summary))
|
||||
fn summary(&self) -> Arc<TableSummary> {
|
||||
Arc::clone(&self.table_summary)
|
||||
}
|
||||
|
||||
fn schema(&self) -> Arc<Schema> {
|
||||
|
@ -74,7 +52,7 @@ impl QueryChunk for QuerierChunk {
|
|||
&self,
|
||||
mut ctx: IOxSessionContext,
|
||||
predicate: &Predicate,
|
||||
columns: Selection<'_>,
|
||||
columns: Projection<'_>,
|
||||
) -> Result<Option<StringSet>, DataFusionError> {
|
||||
ctx.set_metadata("projection", format!("{}", columns));
|
||||
ctx.set_metadata("predicate", format!("{}", &predicate));
|
||||
|
@ -103,42 +81,8 @@ impl QueryChunk for QuerierChunk {
|
|||
Ok(None)
|
||||
}
|
||||
|
||||
fn read_filter(
|
||||
&self,
|
||||
mut ctx: IOxSessionContext,
|
||||
predicate: &Predicate,
|
||||
selection: Selection<'_>,
|
||||
) -> Result<SendableRecordBatchStream, DataFusionError> {
|
||||
let span_recorder = SpanRecorder::new(
|
||||
ctx.span()
|
||||
.map(|span| span.child("QuerierChunk::read_filter")),
|
||||
);
|
||||
let delete_predicates: Vec<_> = self
|
||||
.delete_predicates()
|
||||
.iter()
|
||||
.map(|pred| Arc::new(pred.as_ref().clone().into()))
|
||||
.collect();
|
||||
ctx.set_metadata("delete_predicates", delete_predicates.len() as i64);
|
||||
|
||||
// merge the negated delete predicates into the select predicate
|
||||
let pred_with_deleted_exprs = predicate.clone().with_delete_predicates(&delete_predicates);
|
||||
debug!(?pred_with_deleted_exprs, "Merged negated predicate");
|
||||
|
||||
ctx.set_metadata("predicate", format!("{}", &pred_with_deleted_exprs));
|
||||
ctx.set_metadata("projection", format!("{}", selection));
|
||||
ctx.set_metadata("storage", "parquet");
|
||||
|
||||
let chunk_id = self.id();
|
||||
debug!(?predicate, "parquet read_filter");
|
||||
|
||||
// TODO(marco): propagate span all the way down to the object store cache access
|
||||
let _span_recorder = span_recorder;
|
||||
|
||||
self.parquet_chunk
|
||||
.read_filter(&pred_with_deleted_exprs, selection, ctx.inner())
|
||||
.map_err(Box::new)
|
||||
.context(ParquetFileChunkSnafu { chunk_id })
|
||||
.map_err(|e| DataFusionError::External(Box::new(e)))
|
||||
fn data(&self) -> QueryChunkData {
|
||||
QueryChunkData::Parquet(self.parquet_chunk.parquet_exec_input())
|
||||
}
|
||||
|
||||
fn chunk_type(&self) -> &str {
|
||||
|
|
|
@ -9,10 +9,13 @@ use futures::{
|
|||
use influxdb_iox_client::{
|
||||
catalog::generated_types::catalog_service_server::CatalogServiceServer,
|
||||
schema::generated_types::schema_service_server::SchemaServiceServer,
|
||||
store::generated_types::object_store_service_server::ObjectStoreServiceServer,
|
||||
};
|
||||
use iox_catalog::interface::Catalog;
|
||||
use object_store::ObjectStore;
|
||||
use observability_deps::tracing::warn;
|
||||
use service_grpc_catalog::CatalogService;
|
||||
use service_grpc_object_store::ObjectStoreService;
|
||||
use service_grpc_schema::SchemaService;
|
||||
use std::sync::Arc;
|
||||
use thiserror::Error;
|
||||
|
@ -28,16 +31,15 @@ pub enum Error {}
|
|||
/// The [`QuerierHandler`] does nothing at this point
|
||||
#[async_trait]
|
||||
pub trait QuerierHandler: Send + Sync {
|
||||
/// Acquire a [`SchemaService`] gRPC service implementation.
|
||||
///
|
||||
/// [`SchemaService`]: generated_types::influxdata::iox::schema::v1::schema_service_server::SchemaService.
|
||||
/// Acquire a [`SchemaServiceServer`] gRPC service implementation.
|
||||
fn schema_service(&self) -> SchemaServiceServer<SchemaService>;
|
||||
|
||||
/// Acquire a [`CatalogService`] gRPC service implementation.
|
||||
///
|
||||
/// [`CatalogService`]: generated_types::influxdata::iox::catalog::v1::catalog_service_server::CatalogService.
|
||||
/// Acquire a [`CatalogServiceServer`] gRPC service implementation.
|
||||
fn catalog_service(&self) -> CatalogServiceServer<CatalogService>;
|
||||
|
||||
/// Acquire an [`ObjectStoreServiceServer`] gRPC service implementation.
|
||||
fn object_store_service(&self) -> ObjectStoreServiceServer<ObjectStoreService>;
|
||||
|
||||
/// Wait until the handler finished to shutdown.
|
||||
///
|
||||
/// Use [`shutdown`](Self::shutdown) to trigger a shutdown.
|
||||
|
@ -65,6 +67,9 @@ pub struct QuerierHandlerImpl {
|
|||
/// Database that handles query operation
|
||||
database: Arc<QuerierDatabase>,
|
||||
|
||||
/// The object store
|
||||
object_store: Arc<dyn ObjectStore>,
|
||||
|
||||
/// Future that resolves when the background worker exits
|
||||
join_handles: Vec<(String, SharedJoinHandle)>,
|
||||
|
||||
|
@ -78,7 +83,11 @@ pub struct QuerierHandlerImpl {
|
|||
|
||||
impl QuerierHandlerImpl {
|
||||
/// Initialize the Querier
|
||||
pub fn new(catalog: Arc<dyn Catalog>, database: Arc<QuerierDatabase>) -> Self {
|
||||
pub fn new(
|
||||
catalog: Arc<dyn Catalog>,
|
||||
database: Arc<QuerierDatabase>,
|
||||
object_store: Arc<dyn ObjectStore>,
|
||||
) -> Self {
|
||||
let shutdown = CancellationToken::new();
|
||||
let poison_cabinet = Arc::new(PoisonCabinet::new());
|
||||
|
||||
|
@ -86,6 +95,7 @@ impl QuerierHandlerImpl {
|
|||
Self {
|
||||
catalog,
|
||||
database,
|
||||
object_store,
|
||||
join_handles,
|
||||
shutdown,
|
||||
poison_cabinet,
|
||||
|
@ -103,6 +113,13 @@ impl QuerierHandler for QuerierHandlerImpl {
|
|||
CatalogServiceServer::new(CatalogService::new(Arc::clone(&self.catalog)))
|
||||
}
|
||||
|
||||
fn object_store_service(&self) -> ObjectStoreServiceServer<ObjectStoreService> {
|
||||
ObjectStoreServiceServer::new(ObjectStoreService::new(
|
||||
Arc::clone(&self.catalog),
|
||||
Arc::clone(&self.object_store),
|
||||
))
|
||||
}
|
||||
|
||||
async fn join(&self) {
|
||||
// Need to poll handlers unordered to detect early exists of any worker in the list.
|
||||
let mut unordered: FuturesUnordered<_> = self
|
||||
|
@ -176,14 +193,15 @@ mod tests {
|
|||
async fn new() -> Self {
|
||||
let metric_registry = Arc::new(metric::Registry::new());
|
||||
let catalog = Arc::new(MemCatalog::new(Arc::clone(&metric_registry))) as _;
|
||||
let object_store = Arc::new(InMemory::new());
|
||||
let object_store = Arc::new(InMemory::new()) as _;
|
||||
|
||||
let time_provider = Arc::new(MockProvider::new(Time::from_timestamp_nanos(0)));
|
||||
let exec = Arc::new(Executor::new(1));
|
||||
let catalog_cache = Arc::new(CatalogCache::new_testing(
|
||||
Arc::clone(&catalog),
|
||||
time_provider,
|
||||
Arc::clone(&metric_registry),
|
||||
Arc::clone(&object_store) as _,
|
||||
Arc::clone(&object_store),
|
||||
&Handle::current(),
|
||||
));
|
||||
// QuerierDatabase::new returns an error if there are no shards in the catalog
|
||||
|
@ -211,7 +229,7 @@ mod tests {
|
|||
.await
|
||||
.unwrap(),
|
||||
);
|
||||
let querier = QuerierHandlerImpl::new(catalog, database);
|
||||
let querier = QuerierHandlerImpl::new(catalog, database, object_store);
|
||||
|
||||
Self { querier }
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@ use influxdb_iox_client::flight::{
|
|||
generated_types as proto,
|
||||
low_level::{Client as LowLevelFlightClient, LowLevelMessage, PerformQuery},
|
||||
};
|
||||
use observability_deps::tracing::debug;
|
||||
use observability_deps::tracing::{debug, warn};
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use std::{collections::HashMap, fmt::Debug, ops::DerefMut, sync::Arc};
|
||||
use trace::ctx::SpanContext;
|
||||
|
@ -100,14 +100,35 @@ impl FlightClient for FlightClientImpl {
|
|||
LowLevelFlightClient::<proto::IngesterQueryRequest>::new(connection, span_context);
|
||||
|
||||
debug!(%ingester_addr, ?request, "Sending request to ingester");
|
||||
let request: proto::IngesterQueryRequest =
|
||||
request.try_into().context(CreatingRequestSnafu)?;
|
||||
let request = serialize_ingester_query_request(request)?;
|
||||
|
||||
let perform_query = client.perform_query(request).await.context(FlightSnafu)?;
|
||||
Ok(Box::new(perform_query))
|
||||
}
|
||||
}
|
||||
|
||||
/// Tries to serialize the request to the ingester
|
||||
///
|
||||
/// Note if the predicate is too "complicated" to be serialized simply
|
||||
/// ask for all the data from the ingester. More details:
|
||||
/// <https://github.com/apache/arrow-datafusion/issues/3968>
|
||||
fn serialize_ingester_query_request(
|
||||
mut request: IngesterQueryRequest,
|
||||
) -> Result<proto::IngesterQueryRequest, Error> {
|
||||
match request.clone().try_into() {
|
||||
Ok(proto) => Ok(proto),
|
||||
Err(e) if (e.field == "exprs") && (e.description.contains("recursion limit reached")) => {
|
||||
warn!(
|
||||
predicate=?request.predicate,
|
||||
"Cannot serialize predicate due to recursion limit, stripping it",
|
||||
);
|
||||
request.predicate = None;
|
||||
request.try_into().context(CreatingRequestSnafu)
|
||||
}
|
||||
Err(e) => Err(Error::CreatingRequest { source: e }),
|
||||
}
|
||||
}
|
||||
|
||||
/// Data that is returned by an ingester gRPC query.
|
||||
///
|
||||
/// This is mostly the same as [`PerformQuery`] but allows some easier mocking.
|
||||
|
@ -189,3 +210,48 @@ impl CachedConnection {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use datafusion::prelude::{col, lit};
|
||||
use predicate::Predicate;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn serialize_deeply_nested_predicate() {
|
||||
// see https://github.com/influxdata/influxdb_iox/issues/5974
|
||||
|
||||
// we need more stack space so this doesn't overflow in dev builds
|
||||
std::thread::Builder::new().stack_size(10_000_000).spawn(|| {
|
||||
// don't know what "too much" is, so let's slowly try to increase complexity
|
||||
let n_max = 100;
|
||||
|
||||
for n in [1, 2, n_max] {
|
||||
println!("testing: {n}");
|
||||
|
||||
let expr_base = col("a").lt(lit(5i32));
|
||||
let expr = (0..n).fold(expr_base.clone(), |expr, _| expr.and(expr_base.clone()));
|
||||
|
||||
let predicate = Predicate {exprs: vec![expr], ..Default::default()};
|
||||
|
||||
let request = IngesterQueryRequest {
|
||||
namespace: String::from("ns"),
|
||||
table: String::from("table"),
|
||||
columns: vec![String::from("col1"), String::from("col2")],
|
||||
predicate: Some(predicate),
|
||||
};
|
||||
|
||||
let proto = serialize_ingester_query_request(request.clone()).expect("serialization");
|
||||
let request2 = IngesterQueryRequest::try_from(proto).expect("deserialization");
|
||||
|
||||
if request2.predicate.is_none() {
|
||||
assert!(n > 2, "not really deeply nested");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
panic!("did not find a 'too deeply nested' expression, tested up to a depth of {n_max}")
|
||||
}).expect("spawning thread").join().expect("joining thread");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -12,7 +12,6 @@ use data_types::{
|
|||
TableSummary, TimestampMinMax,
|
||||
};
|
||||
use datafusion::error::DataFusionError;
|
||||
use datafusion_util::MemoryStream;
|
||||
use futures::{stream::FuturesUnordered, TryStreamExt};
|
||||
use generated_types::{
|
||||
influxdata::iox::ingester::v1::GetWriteInfoResponse,
|
||||
|
@ -25,13 +24,13 @@ use influxdb_iox_client::flight::{
|
|||
use iox_query::{
|
||||
exec::{stringset::StringSet, IOxSessionContext},
|
||||
util::{compute_timenanosecond_min_max, create_basic_summary},
|
||||
QueryChunk, QueryChunkMeta,
|
||||
QueryChunk, QueryChunkData, QueryChunkMeta,
|
||||
};
|
||||
use iox_time::{Time, TimeProvider};
|
||||
use metric::{DurationHistogram, Metric};
|
||||
use observability_deps::tracing::{debug, trace, warn};
|
||||
use predicate::Predicate;
|
||||
use schema::{selection::Selection, sort::SortKey, Schema};
|
||||
use schema::{sort::SortKey, Projection, Schema};
|
||||
use snafu::{ensure, OptionExt, ResultExt, Snafu};
|
||||
use std::{
|
||||
any::Any,
|
||||
|
@ -1050,8 +1049,8 @@ impl IngesterChunk {
|
|||
}
|
||||
|
||||
impl QueryChunkMeta for IngesterChunk {
|
||||
fn summary(&self) -> Option<Arc<TableSummary>> {
|
||||
Some(Arc::clone(&self.summary))
|
||||
fn summary(&self) -> Arc<TableSummary> {
|
||||
Arc::clone(&self.summary)
|
||||
}
|
||||
|
||||
fn schema(&self) -> Arc<Schema> {
|
||||
|
@ -1095,7 +1094,7 @@ impl QueryChunk for IngesterChunk {
|
|||
&self,
|
||||
_ctx: IOxSessionContext,
|
||||
_predicate: &Predicate,
|
||||
_columns: Selection<'_>,
|
||||
_columns: Projection<'_>,
|
||||
) -> Result<Option<StringSet>, DataFusionError> {
|
||||
// TODO maybe some special handling?
|
||||
Ok(None)
|
||||
|
@ -1111,30 +1110,8 @@ impl QueryChunk for IngesterChunk {
|
|||
Ok(None)
|
||||
}
|
||||
|
||||
fn read_filter(
|
||||
&self,
|
||||
_ctx: IOxSessionContext,
|
||||
predicate: &Predicate,
|
||||
selection: Selection<'_>,
|
||||
) -> Result<datafusion::physical_plan::SendableRecordBatchStream, DataFusionError> {
|
||||
trace!(?predicate, ?selection, input_batches=?self.batches, "Reading data");
|
||||
|
||||
// Apply selection to in-memory batch
|
||||
let batches = match self
|
||||
.schema
|
||||
.df_projection(selection)
|
||||
.map_err(|e| DataFusionError::External(Box::new(e)))?
|
||||
{
|
||||
None => self.batches.clone(),
|
||||
Some(projection) => self
|
||||
.batches
|
||||
.iter()
|
||||
.map(|batch| batch.project(&projection))
|
||||
.collect::<std::result::Result<Vec<_>, ArrowError>>()?,
|
||||
};
|
||||
trace!(?predicate, ?selection, output_batches=?batches, input_batches=?self.batches, "Reading data");
|
||||
|
||||
Ok(Box::pin(MemoryStream::new(batches)))
|
||||
fn data(&self) -> QueryChunkData {
|
||||
QueryChunkData::RecordBatches(self.batches.clone())
|
||||
}
|
||||
|
||||
fn chunk_type(&self) -> &str {
|
||||
|
@ -1806,7 +1783,7 @@ mod tests {
|
|||
}
|
||||
|
||||
fn lp_to_record_batch(lp: &str) -> RecordBatch {
|
||||
lp_to_mutable_batch(lp).1.to_arrow(Selection::All).unwrap()
|
||||
lp_to_mutable_batch(lp).1.to_arrow(Projection::All).unwrap()
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
|
|
|
@ -1,12 +1,10 @@
|
|||
use super::IngesterConnection;
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use async_trait::async_trait;
|
||||
use data_types::ShardIndex;
|
||||
use futures::StreamExt;
|
||||
use generated_types::influxdata::iox::ingester::v1::GetWriteInfoResponse;
|
||||
use iox_query::{exec::IOxSessionContext, util::create_basic_summary, QueryChunk};
|
||||
use iox_query::util::create_basic_summary;
|
||||
use parking_lot::Mutex;
|
||||
use schema::selection::Selection;
|
||||
use schema::Projection;
|
||||
use schema::Schema as IOxSchema;
|
||||
use std::{any::Any, sync::Arc};
|
||||
use trace::span::Span;
|
||||
|
@ -38,17 +36,17 @@ impl IngesterConnection for MockIngesterConnection {
|
|||
_namespace_name: Arc<str>,
|
||||
_table_name: Arc<str>,
|
||||
columns: Vec<String>,
|
||||
predicate: &predicate::Predicate,
|
||||
_predicate: &predicate::Predicate,
|
||||
_expected_schema: Arc<schema::Schema>,
|
||||
_span: Option<Span>,
|
||||
) -> super::Result<Vec<super::IngesterPartition>> {
|
||||
// see if we want to do projection pushdown
|
||||
let mut prune_columns = true;
|
||||
let cols: Vec<&str> = columns.iter().map(|s| s.as_str()).collect();
|
||||
let selection = Selection::Some(&cols);
|
||||
let selection = Projection::Some(&cols);
|
||||
match selection {
|
||||
Selection::All => prune_columns = false,
|
||||
Selection::Some(val) => {
|
||||
Projection::All => prune_columns = false,
|
||||
Projection::Some(val) => {
|
||||
if val.is_empty() {
|
||||
prune_columns = false;
|
||||
}
|
||||
|
@ -77,14 +75,14 @@ impl IngesterConnection for MockIngesterConnection {
|
|||
.chunks
|
||||
.into_iter()
|
||||
.map(|ic| async move {
|
||||
let mut batches: Vec<RecordBatch> = vec![];
|
||||
let mut stream = ic
|
||||
.read_filter(IOxSessionContext::with_testing(), predicate, selection)
|
||||
.expect("Error in read_filter");
|
||||
while let Some(b) = stream.next().await {
|
||||
let b = b.expect("Error in stream");
|
||||
batches.push(b)
|
||||
}
|
||||
let batches: Vec<_> = ic
|
||||
.batches
|
||||
.iter()
|
||||
.map(|batch| match ic.schema.df_projection(selection).unwrap() {
|
||||
Some(projection) => batch.project(&projection).unwrap(),
|
||||
None => batch.clone(),
|
||||
})
|
||||
.collect();
|
||||
|
||||
assert!(!batches.is_empty(), "Error: empty batches");
|
||||
let new_schema = IOxSchema::try_from(batches[0].schema()).unwrap();
|
||||
|
|
|
@ -13,9 +13,10 @@ use datafusion::{
|
|||
datasource::TableProvider,
|
||||
error::DataFusionError,
|
||||
};
|
||||
use datafusion_util::config::DEFAULT_SCHEMA;
|
||||
use iox_query::{
|
||||
exec::{ExecutionContextProvider, ExecutorType, IOxSessionContext},
|
||||
QueryChunk, QueryCompletedToken, QueryDatabase, QueryText, DEFAULT_SCHEMA,
|
||||
QueryChunk, QueryCompletedToken, QueryDatabase, QueryText,
|
||||
};
|
||||
use observability_deps::tracing::{debug, trace};
|
||||
use predicate::{rpc_predicate::QueryDatabaseMeta, Predicate};
|
||||
|
|
|
@ -312,7 +312,6 @@ impl QuerierTable {
|
|||
},
|
||||
))
|
||||
})
|
||||
.map(Some)
|
||||
.collect();
|
||||
|
||||
// Prune on the most basic summary data (timestamps and column names) before trying to fully load the chunks
|
||||
|
@ -521,7 +520,7 @@ mod tests {
|
|||
use iox_query::exec::IOxSessionContext;
|
||||
use iox_tests::util::{TestCatalog, TestParquetFileBuilder, TestTable};
|
||||
use predicate::Predicate;
|
||||
use schema::{builder::SchemaBuilder, selection::Selection, InfluxFieldType};
|
||||
use schema::{builder::SchemaBuilder, InfluxFieldType};
|
||||
use std::sync::Arc;
|
||||
use test_helpers::maybe_start_logging;
|
||||
use trace::{span::SpanStatus, RingBufferTraceCollector};
|
||||
|
@ -712,8 +711,8 @@ mod tests {
|
|||
.await
|
||||
.unwrap();
|
||||
assert_eq!(chunks.len(), 1);
|
||||
|
||||
let chunk = &chunks[0];
|
||||
assert_eq!(chunk.chunk_type(), "IngesterPartition");
|
||||
|
||||
// verify chunk schema
|
||||
let schema = chunk.schema();
|
||||
|
@ -740,17 +739,9 @@ mod tests {
|
|||
|
||||
// verify chunk data
|
||||
let batches = chunk
|
||||
.read_filter(
|
||||
IOxSessionContext::with_testing(),
|
||||
&Default::default(),
|
||||
Selection::All,
|
||||
)
|
||||
.unwrap()
|
||||
.collect::<Vec<_>>()
|
||||
.await
|
||||
.into_iter()
|
||||
.map(Result::unwrap)
|
||||
.collect::<Vec<_>>();
|
||||
.data()
|
||||
.read_to_batches(chunk.schema(), IOxSessionContext::with_testing().inner())
|
||||
.await;
|
||||
let expected = vec![
|
||||
"+-----+------+------+--------------------------------+",
|
||||
"| foo | tag1 | tag2 | time |",
|
||||
|
|
|
@ -8,7 +8,7 @@ use data_types::{ChunkId, SequenceNumber, ShardIndex};
|
|||
use iox_catalog::interface::get_schema_by_name;
|
||||
use iox_tests::util::{TestCatalog, TestPartition, TestShard, TestTable};
|
||||
use mutable_batch_lp::test_helpers::lp_to_mutable_batch;
|
||||
use schema::{selection::Selection, sort::SortKey, Schema};
|
||||
use schema::{sort::SortKey, Projection, Schema};
|
||||
use sharder::JumpHash;
|
||||
use std::sync::Arc;
|
||||
use tokio::runtime::Handle;
|
||||
|
@ -49,7 +49,7 @@ pub async fn querier_table(catalog: &Arc<TestCatalog>, table: &Arc<TestTable>) -
|
|||
|
||||
/// Convert the line protocol in `lp `to a RecordBatch
|
||||
pub(crate) fn lp_to_record_batch(lp: &str) -> RecordBatch {
|
||||
lp_to_mutable_batch(lp).1.to_arrow(Selection::All).unwrap()
|
||||
lp_to_mutable_batch(lp).1.to_arrow(Projection::All).unwrap()
|
||||
}
|
||||
|
||||
/// Helper for creating IngesterPartitions
|
||||
|
|
|
@ -25,7 +25,7 @@ iox_tests = { path = "../iox_tests" }
|
|||
itertools = "0.10"
|
||||
mutable_batch = { path = "../mutable_batch" }
|
||||
mutable_batch_lp = { path = "../mutable_batch_lp" }
|
||||
once_cell = { version = "1.15.0", features = ["parking_lot"] }
|
||||
once_cell = { version = "1.16.0", features = ["parking_lot"] }
|
||||
parquet_file = { version = "0.1.0", path = "../parquet_file" }
|
||||
predicate = { path = "../predicate" }
|
||||
querier = { path = "../querier" }
|
||||
|
|
|
@ -15,6 +15,8 @@ mod runner;
|
|||
#[cfg(test)]
|
||||
pub mod sql;
|
||||
#[cfg(test)]
|
||||
pub mod sql_metrics;
|
||||
#[cfg(test)]
|
||||
pub mod table_schema;
|
||||
|
||||
pub mod db;
|
||||
|
|
|
@ -30,7 +30,7 @@ use querier::{
|
|||
IngesterConnectionImpl, IngesterFlightClient, IngesterFlightClientError,
|
||||
IngesterFlightClientQueryData, QuerierCatalogCache, QuerierNamespace,
|
||||
};
|
||||
use schema::selection::Selection;
|
||||
use schema::Projection;
|
||||
use sharder::JumpHash;
|
||||
use std::{
|
||||
cmp::Ordering,
|
||||
|
@ -806,11 +806,17 @@ impl MockIngester {
|
|||
.await;
|
||||
partition_ids.push(partition.partition.id);
|
||||
}
|
||||
|
||||
let ids = tables
|
||||
.iter()
|
||||
.map(|v| (v.table.name.clone(), v.table.id))
|
||||
.collect();
|
||||
|
||||
for table in tables {
|
||||
let schema = mutable_batches
|
||||
.get(&table.table.name)
|
||||
.unwrap()
|
||||
.schema(Selection::All)
|
||||
.schema(Projection::All)
|
||||
.unwrap();
|
||||
|
||||
for (t, field) in schema.iter() {
|
||||
|
@ -829,7 +835,9 @@ impl MockIngester {
|
|||
);
|
||||
let op = DmlOperation::Write(DmlWrite::new(
|
||||
self.ns.namespace.name.clone(),
|
||||
self.ns.namespace.id,
|
||||
mutable_batches,
|
||||
ids,
|
||||
PartitionKey::from(partition_key),
|
||||
meta,
|
||||
));
|
||||
|
|
|
@ -0,0 +1,99 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use crate::scenarios::{DbScenario, DbSetup, OneMeasurementFourChunksWithDuplicatesParquetOnly};
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use arrow_util::assert_batches_sorted_eq;
|
||||
use datafusion::physical_plan::{
|
||||
display::DisplayableExecutionPlan,
|
||||
metrics::{MetricValue, MetricsSet},
|
||||
};
|
||||
use iox_query::{frontend::sql::SqlQueryPlanner, provider::parquet_metrics};
|
||||
|
||||
#[tokio::test]
|
||||
async fn sql_predicate_pushdown() {
|
||||
test_helpers::maybe_start_logging();
|
||||
|
||||
// parquet pushdown is only relevant for parquet
|
||||
let db_setup = OneMeasurementFourChunksWithDuplicatesParquetOnly {};
|
||||
|
||||
// This predicate should result in rows being pruned, and we verify this with metrics
|
||||
let sql = "SELECT * from h2o where state = 'MA'".to_string();
|
||||
|
||||
let expected = vec![
|
||||
"+------+---------+----------+----------+-------+--------------------------------+",
|
||||
"| area | city | max_temp | min_temp | state | time |",
|
||||
"+------+---------+----------+----------+-------+--------------------------------+",
|
||||
"| | Andover | 69.2 | | MA | 1970-01-01T00:00:00.000000250Z |",
|
||||
"| | Boston | | 67.4 | MA | 1970-01-01T00:00:00.000000600Z |",
|
||||
"| | Boston | | 70.4 | MA | 1970-01-01T00:00:00.000000050Z |",
|
||||
"| | Boston | 75.4 | 65.4 | MA | 1970-01-01T00:00:00.000000250Z |",
|
||||
"| | Boston | 82.67 | 65.4 | MA | 1970-01-01T00:00:00.000000400Z |",
|
||||
"| | Reading | | 53.4 | MA | 1970-01-01T00:00:00.000000250Z |",
|
||||
"| | Reading | | 60.4 | MA | 1970-01-01T00:00:00.000000600Z |",
|
||||
"| 742 | Bedford | 78.75 | 71.59 | MA | 1970-01-01T00:00:00.000000150Z |",
|
||||
"| 742 | Bedford | 88.75 | | MA | 1970-01-01T00:00:00.000000600Z |",
|
||||
"| 750 | Bedford | 80.75 | 65.22 | MA | 1970-01-01T00:00:00.000000400Z |",
|
||||
"+------+---------+----------+----------+-------+--------------------------------+",
|
||||
];
|
||||
|
||||
for scenario in db_setup.make().await {
|
||||
let DbScenario {
|
||||
scenario_name, db, ..
|
||||
} = scenario;
|
||||
|
||||
println!("Running scenario '{}'", scenario_name);
|
||||
println!("SQL: '{:#?}'", sql);
|
||||
let planner = SqlQueryPlanner::default();
|
||||
let ctx = db.new_query_context(None);
|
||||
|
||||
let physical_plan = planner
|
||||
.query(&sql, &ctx)
|
||||
.await
|
||||
.expect("built plan successfully");
|
||||
|
||||
let results: Vec<RecordBatch> = ctx
|
||||
.collect(Arc::clone(&physical_plan))
|
||||
.await
|
||||
.expect("Running plan");
|
||||
assert_batches_sorted_eq!(expected, &results);
|
||||
|
||||
println!(
|
||||
"Physical plan:\n\n{}",
|
||||
DisplayableExecutionPlan::new(physical_plan.as_ref()).indent()
|
||||
);
|
||||
|
||||
// verify that pushdown was enabled and that it filtered rows
|
||||
let metrics = parquet_metrics(physical_plan);
|
||||
assert_eq!(
|
||||
metric_value_sum(&metrics, "pushdown_rows_filtered"),
|
||||
8,
|
||||
"Unexpected number of rows filtered in:\n\n{:#?}",
|
||||
metrics
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// returns the sum of all the metrics with the specified name
|
||||
/// the returned set.
|
||||
///
|
||||
/// Count: returns value
|
||||
///
|
||||
/// Panics if no such metric.
|
||||
fn metric_value_sum(metrics: &[MetricsSet], metric_name: &str) -> usize {
|
||||
metrics.iter().map(|m| metric_value(m, metric_name)).sum()
|
||||
}
|
||||
|
||||
fn metric_value(metrics: &MetricsSet, metric_name: &str) -> usize {
|
||||
let sum = metrics
|
||||
.sum(|m| matches!(m.value(), MetricValue::Count { name, .. } if name == metric_name));
|
||||
|
||||
match sum {
|
||||
Some(MetricValue::Count { count, .. }) => count.value(),
|
||||
_ => {
|
||||
panic!(
|
||||
"Expected metric not found. Looking for '{}' in\n\n{:#?}",
|
||||
metric_name, metrics
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
use arrow::datatypes::DataType;
|
||||
use iox_query::QueryChunk;
|
||||
use schema::selection::Selection;
|
||||
use schema::Projection;
|
||||
use schema::{builder::SchemaBuilder, sort::SortKey, Schema, TIME_COLUMN_NAME};
|
||||
|
||||
use super::scenarios::*;
|
||||
|
@ -14,7 +14,7 @@ use super::scenarios::*;
|
|||
/// output
|
||||
async fn run_table_schema_test_case<D>(
|
||||
db_setup: D,
|
||||
selection: Selection<'_>,
|
||||
selection: Projection<'_>,
|
||||
table_name: &str,
|
||||
expected_schema: Schema,
|
||||
expected_sort_key: Option<&SortKey>,
|
||||
|
@ -91,7 +91,7 @@ async fn list_schema_cpu_all() {
|
|||
|
||||
run_table_schema_test_case(
|
||||
TwoMeasurements {},
|
||||
Selection::All,
|
||||
Projection::All,
|
||||
"cpu",
|
||||
expected_schema,
|
||||
Some(&sort_key),
|
||||
|
@ -114,7 +114,7 @@ async fn list_schema_cpu_all_set_sort_key() {
|
|||
|
||||
run_table_schema_test_case(
|
||||
TwoMeasurements {},
|
||||
Selection::All,
|
||||
Projection::All,
|
||||
"cpu",
|
||||
expected_schema,
|
||||
Some(&sort_key),
|
||||
|
@ -137,7 +137,7 @@ async fn list_schema_disk_all() {
|
|||
|
||||
run_table_schema_test_case(
|
||||
TwoMeasurements {},
|
||||
Selection::All,
|
||||
Projection::All,
|
||||
"disk",
|
||||
expected_schema,
|
||||
None,
|
||||
|
@ -155,7 +155,7 @@ async fn list_schema_cpu_selection() {
|
|||
.unwrap();
|
||||
|
||||
// Pick an order that is not lexographic
|
||||
let selection = Selection::Some(&["user", "region"]);
|
||||
let selection = Projection::Some(&["user", "region"]);
|
||||
|
||||
run_table_schema_test_case(TwoMeasurements {}, selection, "cpu", expected_schema, None).await;
|
||||
}
|
||||
|
@ -171,7 +171,7 @@ async fn list_schema_disk_selection() {
|
|||
.unwrap();
|
||||
|
||||
// Pick an order that is not lexicographic
|
||||
let selection = Selection::Some(&["time", "bytes"]);
|
||||
let selection = Projection::Some(&["time", "bytes"]);
|
||||
|
||||
run_table_schema_test_case(TwoMeasurements {}, selection, "disk", expected_schema, None).await;
|
||||
}
|
||||
|
@ -189,7 +189,7 @@ async fn list_schema_location_all() {
|
|||
|
||||
run_table_schema_test_case(
|
||||
TwoMeasurementsUnsignedType {},
|
||||
Selection::All,
|
||||
Projection::All,
|
||||
"restaurant",
|
||||
expected_schema,
|
||||
None,
|
||||
|
|
|
@ -13,7 +13,7 @@ use router::{
|
|||
dml_handlers::{DmlHandler, SchemaValidator},
|
||||
namespace_cache::{MemoryNamespaceCache, ShardedCache},
|
||||
};
|
||||
use schema::selection::Selection;
|
||||
use schema::Projection;
|
||||
use tokio::runtime::Runtime;
|
||||
|
||||
static NAMESPACE: Lazy<DatabaseName<'static>> = Lazy::new(|| "bananas".try_into().unwrap());
|
||||
|
@ -55,7 +55,7 @@ fn bench(group: &mut BenchmarkGroup<WallTime>, tables: usize, columns_per_table:
|
|||
let write = lp_to_writes(&generate_lp(tables, columns_per_table));
|
||||
let column_count = write
|
||||
.values()
|
||||
.fold(0, |acc, b| acc + b.schema(Selection::All).unwrap().len());
|
||||
.fold(0, |acc, b| acc + b.schema(Projection::All).unwrap().len());
|
||||
|
||||
group.throughput(Throughput::Elements(column_count as _));
|
||||
group.bench_function(format!("{tables}x{columns_per_table}"), |b| {
|
||||
|
|
|
@ -79,17 +79,24 @@ where
|
|||
async fn delete(
|
||||
&self,
|
||||
namespace: &DatabaseName<'static>,
|
||||
namespace_id: NamespaceId,
|
||||
table_name: &str,
|
||||
predicate: &DeletePredicate,
|
||||
span_ctx: Option<SpanContext>,
|
||||
) -> Result<(), Self::DeleteError> {
|
||||
self.first
|
||||
.delete(namespace, table_name, predicate, span_ctx.clone())
|
||||
.delete(
|
||||
namespace,
|
||||
namespace_id,
|
||||
table_name,
|
||||
predicate,
|
||||
span_ctx.clone(),
|
||||
)
|
||||
.await
|
||||
.map_err(Into::into)?;
|
||||
|
||||
self.second
|
||||
.delete(namespace, table_name, predicate, span_ctx)
|
||||
.delete(namespace, namespace_id, table_name, predicate, span_ctx)
|
||||
.await
|
||||
.map_err(Into::into)
|
||||
}
|
||||
|
|
|
@ -75,12 +75,13 @@ where
|
|||
async fn delete(
|
||||
&self,
|
||||
namespace: &DatabaseName<'static>,
|
||||
namespace_id: NamespaceId,
|
||||
table_name: &str,
|
||||
predicate: &DeletePredicate,
|
||||
span_ctx: Option<SpanContext>,
|
||||
) -> Result<(), Self::DeleteError> {
|
||||
self.inner
|
||||
.delete(namespace, table_name, predicate, span_ctx)
|
||||
.delete(namespace, namespace_id, table_name, predicate, span_ctx)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
|
|
@ -105,6 +105,7 @@ where
|
|||
async fn delete(
|
||||
&self,
|
||||
namespace: &DatabaseName<'static>,
|
||||
namespace_id: NamespaceId,
|
||||
table_name: &str,
|
||||
predicate: &DeletePredicate,
|
||||
span_ctx: Option<SpanContext>,
|
||||
|
@ -116,7 +117,7 @@ where
|
|||
|
||||
let res = self
|
||||
.inner
|
||||
.delete(namespace, table_name, predicate, span_ctx)
|
||||
.delete(namespace, namespace_id, table_name, predicate, span_ctx)
|
||||
.await;
|
||||
|
||||
// Avoid exploding if time goes backwards - simply drop the measurement
|
||||
|
@ -256,7 +257,7 @@ mod tests {
|
|||
};
|
||||
|
||||
decorator
|
||||
.delete(&ns, "a table", &pred, Some(span))
|
||||
.delete(&ns, NamespaceId::new(42), "a table", &pred, Some(span))
|
||||
.await
|
||||
.expect("inner handler configured to succeed");
|
||||
|
||||
|
@ -284,7 +285,7 @@ mod tests {
|
|||
};
|
||||
|
||||
decorator
|
||||
.delete(&ns, "a table", &pred, Some(span))
|
||||
.delete(&ns, NamespaceId::new(42), "a table", &pred, Some(span))
|
||||
.await
|
||||
.expect_err("inner handler configured to fail");
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@ pub enum MockDmlHandlerCall<W> {
|
|||
},
|
||||
Delete {
|
||||
namespace: String,
|
||||
namespace_id: NamespaceId,
|
||||
table: String,
|
||||
predicate: DeletePredicate,
|
||||
},
|
||||
|
@ -121,6 +122,7 @@ where
|
|||
async fn delete(
|
||||
&self,
|
||||
namespace: &DatabaseName<'static>,
|
||||
namespace_id: NamespaceId,
|
||||
table_name: &str,
|
||||
predicate: &DeletePredicate,
|
||||
_span_ctx: Option<SpanContext>,
|
||||
|
@ -129,6 +131,7 @@ where
|
|||
self,
|
||||
MockDmlHandlerCall::Delete {
|
||||
namespace: namespace.into(),
|
||||
namespace_id,
|
||||
table: table_name.to_owned(),
|
||||
predicate: predicate.clone(),
|
||||
},
|
||||
|
|
|
@ -43,11 +43,12 @@ where
|
|||
async fn delete(
|
||||
&self,
|
||||
namespace: &DatabaseName<'static>,
|
||||
namespace_id: NamespaceId,
|
||||
table_name: &str,
|
||||
predicate: &DeletePredicate,
|
||||
_span_ctx: Option<SpanContext>,
|
||||
) -> Result<(), Self::DeleteError> {
|
||||
info!(%namespace, %table_name, ?predicate, "dropping delete operation");
|
||||
info!(%namespace, %namespace_id, %table_name, ?predicate, "dropping delete operation");
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
use async_trait::async_trait;
|
||||
use data_types::{DatabaseName, DeletePredicate, NamespaceId, PartitionKey, PartitionTemplate};
|
||||
use data_types::{
|
||||
DatabaseName, DeletePredicate, NamespaceId, PartitionKey, PartitionTemplate, TableId,
|
||||
};
|
||||
use hashbrown::HashMap;
|
||||
use mutable_batch::{MutableBatch, PartitionWrite, WritePayload};
|
||||
use observability_deps::tracing::*;
|
||||
|
@ -64,7 +66,7 @@ impl DmlHandler for Partitioner {
|
|||
type WriteError = PartitionError;
|
||||
type DeleteError = PartitionError;
|
||||
|
||||
type WriteInput = HashMap<String, MutableBatch>;
|
||||
type WriteInput = HashMap<TableId, (String, MutableBatch)>;
|
||||
type WriteOutput = Vec<Partitioned<Self::WriteInput>>;
|
||||
|
||||
/// Partition the per-table [`MutableBatch`].
|
||||
|
@ -76,9 +78,10 @@ impl DmlHandler for Partitioner {
|
|||
_span_ctx: Option<SpanContext>,
|
||||
) -> Result<Self::WriteOutput, Self::WriteError> {
|
||||
// A collection of partition-keyed, per-table MutableBatch instances.
|
||||
let mut partitions: HashMap<PartitionKey, HashMap<_, MutableBatch>> = HashMap::default();
|
||||
let mut partitions: HashMap<PartitionKey, HashMap<_, (String, MutableBatch)>> =
|
||||
HashMap::default();
|
||||
|
||||
for (table_name, batch) in batch {
|
||||
for (table_id, (table_name, batch)) in batch {
|
||||
// Partition the table batch according to the configured partition
|
||||
// template and write it into the partition-keyed map.
|
||||
for (partition_key, partition_payload) in
|
||||
|
@ -87,10 +90,12 @@ impl DmlHandler for Partitioner {
|
|||
let partition = partitions.entry(partition_key).or_default();
|
||||
let table_batch = partition
|
||||
.raw_entry_mut()
|
||||
.from_key(&table_name)
|
||||
.or_insert_with(|| (table_name.to_owned(), MutableBatch::default()));
|
||||
.from_key(&table_id)
|
||||
.or_insert_with(|| {
|
||||
(table_id, (table_name.to_owned(), MutableBatch::default()))
|
||||
});
|
||||
|
||||
partition_payload.write_to_batch(table_batch.1)?;
|
||||
partition_payload.write_to_batch(&mut table_batch.1 .1)?;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -104,6 +109,7 @@ impl DmlHandler for Partitioner {
|
|||
async fn delete(
|
||||
&self,
|
||||
_namespace: &DatabaseName<'static>,
|
||||
_namespace_id: NamespaceId,
|
||||
_table_name: &str,
|
||||
_predicate: &DeletePredicate,
|
||||
_span_ctx: Option<SpanContext>,
|
||||
|
@ -119,9 +125,17 @@ mod tests {
|
|||
|
||||
use super::*;
|
||||
|
||||
/// The default timestamp applied to test LP if the write does not specify
|
||||
/// one.
|
||||
const DEFAULT_TIMESTAMP_NANOS: i64 = 42000000000000000;
|
||||
// Parse `lp` into a table-keyed MutableBatch map.
|
||||
fn lp_to_writes(lp: &str) -> HashMap<TableId, (String, MutableBatch)> {
|
||||
let (writes, _) = mutable_batch_lp::lines_to_batches_stats(lp, 42)
|
||||
.expect("failed to build test writes from LP");
|
||||
|
||||
writes
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.map(|(i, (name, data))| (TableId::new(i as _), (name, data)))
|
||||
.collect()
|
||||
}
|
||||
|
||||
// Generate a test case that partitions "lp".
|
||||
//
|
||||
|
@ -144,7 +158,7 @@ mod tests {
|
|||
let partitioner = Partitioner::new(partition_template);
|
||||
let ns = DatabaseName::new("bananas").expect("valid db name");
|
||||
|
||||
let (writes, _) = mutable_batch_lp::lines_to_batches_stats($lp, DEFAULT_TIMESTAMP_NANOS).expect("failed to parse test LP");
|
||||
let writes = lp_to_writes($lp);
|
||||
|
||||
let handler_ret = partitioner.write(&ns, NamespaceId::new(42), writes, None).await;
|
||||
assert_matches!(handler_ret, $($want_handler_ret)+);
|
||||
|
@ -156,8 +170,7 @@ mod tests {
|
|||
// Extract the table names in this partition
|
||||
let mut tables = partition
|
||||
.payload
|
||||
.keys()
|
||||
.cloned()
|
||||
.values().map(|v| v.0.clone())
|
||||
.collect::<Vec<String>>();
|
||||
|
||||
tables.sort();
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue