refactor: move SessionConfig creation into datafusion_utils (#6011)
Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>pull/24376/head
parent
00953460fb
commit
9c1f0a3644
|
|
@ -0,0 +1,24 @@
|
|||
use datafusion::{config::OPT_COALESCE_TARGET_BATCH_SIZE, prelude::SessionConfig};
|
||||
|
||||
// The default catalog name - this impacts what SQL queries use if not specified
|
||||
pub const DEFAULT_CATALOG: &str = "public";
|
||||
// The default schema name - this impacts what SQL queries use if not specified
|
||||
pub const DEFAULT_SCHEMA: &str = "iox";
|
||||
|
||||
/// The maximum number of rows that DataFusion should create in each RecordBatch
|
||||
pub const BATCH_SIZE: usize = 8 * 1024;
|
||||
|
||||
const COALESCE_BATCH_SIZE: usize = BATCH_SIZE / 2;
|
||||
|
||||
/// Return a SessionConfig object configured for IOx
|
||||
pub fn iox_session_config() -> SessionConfig {
|
||||
SessionConfig::new()
|
||||
.with_batch_size(BATCH_SIZE)
|
||||
.set_u64(
|
||||
OPT_COALESCE_TARGET_BATCH_SIZE,
|
||||
COALESCE_BATCH_SIZE.try_into().unwrap(),
|
||||
)
|
||||
.create_default_catalog_and_schema(true)
|
||||
.with_information_schema(true)
|
||||
.with_default_catalog_and_schema(DEFAULT_CATALOG, DEFAULT_SCHEMA)
|
||||
}
|
||||
|
|
@ -10,6 +10,7 @@
|
|||
//! [datafusion_optimizer::utils](https://docs.rs/datafusion-optimizer/13.0.0/datafusion_optimizer/utils/index.html)
|
||||
//! for expression manipulation functions.
|
||||
|
||||
pub mod config;
|
||||
pub mod sender;
|
||||
pub mod watch;
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ mod schema_pivot;
|
|||
pub mod seriesset;
|
||||
pub(crate) mod split;
|
||||
pub mod stringset;
|
||||
pub use context::{DEFAULT_CATALOG, DEFAULT_SCHEMA};
|
||||
use executor::DedicatedExecutor;
|
||||
use object_store::DynObjectStore;
|
||||
use parquet_file::storage::StorageId;
|
||||
|
|
|
|||
|
|
@ -27,7 +27,6 @@ use arrow::record_batch::RecordBatch;
|
|||
use async_trait::async_trait;
|
||||
use datafusion::{
|
||||
catalog::catalog::CatalogProvider,
|
||||
config::OPT_COALESCE_TARGET_BATCH_SIZE,
|
||||
execution::{
|
||||
context::{QueryPlanner, SessionState, TaskContext},
|
||||
runtime_env::RuntimeEnv,
|
||||
|
|
@ -41,10 +40,10 @@ use datafusion::{
|
|||
},
|
||||
prelude::*,
|
||||
};
|
||||
use datafusion_util::config::{iox_session_config, DEFAULT_CATALOG};
|
||||
use executor::DedicatedExecutor;
|
||||
use futures::TryStreamExt;
|
||||
use observability_deps::tracing::debug;
|
||||
use parquet_file::serialize::ROW_GROUP_WRITE_SIZE;
|
||||
use query_functions::selectors::register_selector_aggregates;
|
||||
use std::{convert::TryInto, fmt, sync::Arc};
|
||||
use trace::{
|
||||
|
|
@ -55,11 +54,6 @@ use trace::{
|
|||
// Reuse DataFusion error and Result types for this module
|
||||
pub use datafusion::error::{DataFusionError as Error, Result};
|
||||
|
||||
// The default catalog name - this impacts what SQL queries use if not specified
|
||||
pub const DEFAULT_CATALOG: &str = "public";
|
||||
// The default schema name - this impacts what SQL queries use if not specified
|
||||
pub const DEFAULT_SCHEMA: &str = "iox";
|
||||
|
||||
/// This structure implements the DataFusion notion of "query planner"
|
||||
/// and is needed to create plans with the IOx extension nodes.
|
||||
struct IOxQueryPlanner {}
|
||||
|
|
@ -175,26 +169,9 @@ impl fmt::Debug for IOxSessionConfig {
|
|||
}
|
||||
}
|
||||
|
||||
const BATCH_SIZE: usize = 8 * 1024;
|
||||
const COALESCE_BATCH_SIZE: usize = BATCH_SIZE / 2;
|
||||
|
||||
// ensure read and write work well together
|
||||
// Skip clippy due to <https://github.com/rust-lang/rust-clippy/issues/8159>.
|
||||
#[allow(clippy::assertions_on_constants)]
|
||||
const _: () = assert!(ROW_GROUP_WRITE_SIZE % BATCH_SIZE == 0);
|
||||
|
||||
impl IOxSessionConfig {
|
||||
pub(super) fn new(exec: DedicatedExecutor, runtime: Arc<RuntimeEnv>) -> Self {
|
||||
let session_config = SessionConfig::new()
|
||||
.with_batch_size(BATCH_SIZE)
|
||||
// TODO add function in SessionCofig
|
||||
.set_u64(
|
||||
OPT_COALESCE_TARGET_BATCH_SIZE,
|
||||
COALESCE_BATCH_SIZE.try_into().unwrap(),
|
||||
)
|
||||
.create_default_catalog_and_schema(true)
|
||||
.with_information_schema(true)
|
||||
.with_default_catalog_and_schema(DEFAULT_CATALOG, DEFAULT_SCHEMA);
|
||||
let session_config = iox_session_config();
|
||||
|
||||
Self {
|
||||
exec,
|
||||
|
|
|
|||
|
|
@ -32,7 +32,6 @@ pub mod pruning;
|
|||
pub mod statistics;
|
||||
pub mod util;
|
||||
|
||||
pub use exec::context::{DEFAULT_CATALOG, DEFAULT_SCHEMA};
|
||||
pub use frontend::common::ScanPlanBuilder;
|
||||
pub use query_functions::group_by::{Aggregate, WindowDuration};
|
||||
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ use std::{io::Write, sync::Arc};
|
|||
|
||||
use arrow::error::ArrowError;
|
||||
use datafusion::physical_plan::SendableRecordBatchStream;
|
||||
use datafusion_util::config::BATCH_SIZE;
|
||||
use futures::{pin_mut, TryStreamExt};
|
||||
use observability_deps::tracing::{debug, trace, warn};
|
||||
use parquet::{
|
||||
|
|
@ -21,6 +22,11 @@ use crate::metadata::{IoxMetadata, METADATA_KEY};
|
|||
/// Parquet row group write size
|
||||
pub const ROW_GROUP_WRITE_SIZE: usize = 1024 * 1024;
|
||||
|
||||
/// ensure read and write work well together
|
||||
/// Skip clippy due to <https://github.com/rust-lang/rust-clippy/issues/8159>.
|
||||
#[allow(clippy::assertions_on_constants)]
|
||||
const _: () = assert!(ROW_GROUP_WRITE_SIZE % BATCH_SIZE == 0);
|
||||
|
||||
/// [`RecordBatch`] to Parquet serialisation errors.
|
||||
///
|
||||
/// [`RecordBatch`]: arrow::record_batch::RecordBatch
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ use datafusion::{
|
|||
},
|
||||
prelude::SessionContext,
|
||||
};
|
||||
use datafusion_util::config::iox_session_config;
|
||||
use futures::TryStreamExt;
|
||||
use object_store::{DynObjectStore, ObjectMeta};
|
||||
use observability_deps::tracing::*;
|
||||
|
|
@ -147,7 +148,7 @@ impl ParquetStorage {
|
|||
pub fn test_df_context(&self) -> SessionContext {
|
||||
// set up "fake" DataFusion session
|
||||
let object_store = Arc::clone(&self.object_store);
|
||||
let session_ctx = SessionContext::new();
|
||||
let session_ctx = SessionContext::with_config(iox_session_config());
|
||||
let task_ctx = Arc::new(TaskContext::from(&session_ctx));
|
||||
task_ctx
|
||||
.runtime_env()
|
||||
|
|
|
|||
|
|
@ -13,9 +13,10 @@ use datafusion::{
|
|||
datasource::TableProvider,
|
||||
error::DataFusionError,
|
||||
};
|
||||
use datafusion_util::config::DEFAULT_SCHEMA;
|
||||
use iox_query::{
|
||||
exec::{ExecutionContextProvider, ExecutorType, IOxSessionContext},
|
||||
QueryChunk, QueryCompletedToken, QueryDatabase, QueryText, DEFAULT_SCHEMA,
|
||||
QueryChunk, QueryCompletedToken, QueryDatabase, QueryText,
|
||||
};
|
||||
use observability_deps::tracing::{debug, trace};
|
||||
use predicate::{rpc_predicate::QueryDatabaseMeta, Predicate};
|
||||
|
|
|
|||
Loading…
Reference in New Issue