Merge branch 'main' into dom/write-rpc-client
commit
cc55ab384c
|
@ -247,16 +247,11 @@ impl TableProvider for ChunkTableProvider {
|
|||
// This debug shows the self.arrow_schema() includes all columns in all chunks
|
||||
// which means the schema of all chunks are merged before invoking this scan
|
||||
debug!(schema=?self.arrow_schema(), "All chunks schema");
|
||||
// However, the schema of each chunk is still in its original form which does not
|
||||
// include the merged columns of other chunks. The code below (put in comments on purpose) proves it
|
||||
// for chunk in chunks.clone() {
|
||||
// trace!("Schema of chunk {}: {:#?}", chunk.id(), chunk.schema());
|
||||
// }
|
||||
|
||||
// Note that `filters` don't actually need to be evaluated in
|
||||
// the scan for the plans to be correct, they are an extra
|
||||
// optimization for providers which can offer them
|
||||
let predicate = Predicate::default().with_pushdown_exprs(filters);
|
||||
let predicate = Predicate::default().with_exprs(filters.to_vec());
|
||||
let deduplicate = Deduplicater::new(self.ctx.child_ctx("deduplicator"))
|
||||
.enable_deduplication(self.deduplication());
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@ use datafusion::{
|
|||
binary_expr,
|
||||
expr_visitor::{ExprVisitable, ExpressionVisitor, Recursion},
|
||||
utils::expr_to_columns,
|
||||
BinaryExpr, Operator,
|
||||
BinaryExpr,
|
||||
},
|
||||
optimizer::utils::split_conjunction,
|
||||
physical_optimizer::pruning::{PruningPredicate, PruningStatistics},
|
||||
|
@ -443,9 +443,8 @@ impl Predicate {
|
|||
}
|
||||
|
||||
/// Adds an expression to the list of general purpose predicates
|
||||
pub fn with_expr(mut self, expr: Expr) -> Self {
|
||||
self.exprs.push(expr);
|
||||
self
|
||||
pub fn with_expr(self, expr: Expr) -> Self {
|
||||
self.with_exprs([expr])
|
||||
}
|
||||
|
||||
/// Adds a ValueExpr to the list of value expressons
|
||||
|
@ -489,6 +488,12 @@ impl Predicate {
|
|||
self
|
||||
}
|
||||
|
||||
/// Adds all expressions to the list of general purpose predicates
|
||||
pub fn with_exprs(mut self, filters: impl IntoIterator<Item = Expr>) -> Self {
|
||||
self.exprs.extend(filters.into_iter());
|
||||
self
|
||||
}
|
||||
|
||||
/// Remove any clauses of this predicate that can not be run before deduplication.
|
||||
///
|
||||
/// See <https://github.com/influxdata/influxdb_iox/issues/6066> for more details.
|
||||
|
@ -536,60 +541,6 @@ impl Predicate {
|
|||
value_expr: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds only the expressions from `filters` that can be pushed down to
|
||||
/// execution engines.
|
||||
pub fn with_pushdown_exprs(mut self, filters: &[Expr]) -> Self {
|
||||
// For each expression of the filters, recursively split it, if it is is an AND conjunction
|
||||
// For example, expression (x AND y) will be split into a vector of 2 expressions [x, y]
|
||||
let mut exprs = filters.iter().flat_map(split_conjunction);
|
||||
|
||||
// Only keep single_column and primitive binary expressions
|
||||
let mut pushdown_exprs: Vec<Expr> = vec![];
|
||||
let exprs_result = exprs.try_for_each::<_, Result<_, DataFusionError>>(|expr| {
|
||||
let mut columns = HashSet::new();
|
||||
expr_to_columns(expr, &mut columns)?;
|
||||
|
||||
if columns.len() == 1 && Self::primitive_binary_expr(expr) {
|
||||
pushdown_exprs.push(expr.clone());
|
||||
}
|
||||
Ok(())
|
||||
});
|
||||
|
||||
match exprs_result {
|
||||
Ok(()) => {
|
||||
// Return the builder with only the pushdownable expressions on it.
|
||||
self.exprs.append(&mut pushdown_exprs);
|
||||
}
|
||||
Err(e) => {
|
||||
debug!("Error, {}, building push-down predicates for filters: {:#?}. No predicates are pushed down", e, filters);
|
||||
}
|
||||
}
|
||||
|
||||
self
|
||||
}
|
||||
|
||||
/// Return true if the given expression is in a primitive binary in the form: `column op constant`
|
||||
// and op must be a comparison one
|
||||
pub fn primitive_binary_expr(expr: &Expr) -> bool {
|
||||
match expr {
|
||||
Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
|
||||
matches!(
|
||||
(&**left, &**right),
|
||||
(Expr::Column(_), Expr::Literal(_)) | (Expr::Literal(_), Expr::Column(_))
|
||||
) && matches!(
|
||||
op,
|
||||
Operator::Eq
|
||||
| Operator::NotEq
|
||||
| Operator::Lt
|
||||
| Operator::LtEq
|
||||
| Operator::Gt
|
||||
| Operator::GtEq
|
||||
)
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Wrapper around `Expr::BinaryExpr` where left input is known to be
|
||||
|
@ -719,94 +670,6 @@ mod tests {
|
|||
assert!(!p.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pushdown_predicates() {
|
||||
let mut filters = vec![];
|
||||
|
||||
// state = CA
|
||||
let expr1 = col("state").eq(lit("CA"));
|
||||
filters.push(expr1);
|
||||
|
||||
// "price > 10"
|
||||
let expr2 = col("price").gt(lit(10));
|
||||
filters.push(expr2);
|
||||
|
||||
// a < 10 AND b >= 50 --> will be split to [a < 10, b >= 50]
|
||||
let expr3 = col("a").lt(lit(10)).and(col("b").gt_eq(lit(50)));
|
||||
filters.push(expr3);
|
||||
|
||||
// c != 3 OR d = 8 --> won't be pushed down
|
||||
let expr4 = col("c").not_eq(lit(3)).or(col("d").eq(lit(8)));
|
||||
filters.push(expr4);
|
||||
|
||||
// e is null --> won't be pushed down
|
||||
let expr5 = col("e").is_null();
|
||||
filters.push(expr5);
|
||||
|
||||
// f <= 60
|
||||
let expr6 = col("f").lt_eq(lit(60));
|
||||
filters.push(expr6);
|
||||
|
||||
// g is not null --> won't be pushed down
|
||||
let expr7 = col("g").is_not_null();
|
||||
filters.push(expr7);
|
||||
|
||||
// h + i --> won't be pushed down
|
||||
let expr8 = col("h") + col("i");
|
||||
filters.push(expr8);
|
||||
|
||||
// city = Boston
|
||||
let expr9 = col("city").eq(lit("Boston"));
|
||||
filters.push(expr9);
|
||||
|
||||
// city != Braintree
|
||||
let expr9 = col("city").not_eq(lit("Braintree"));
|
||||
filters.push(expr9);
|
||||
|
||||
// city != state --> won't be pushed down
|
||||
let expr10 = col("city").not_eq(col("state"));
|
||||
filters.push(expr10);
|
||||
|
||||
// city = state --> won't be pushed down
|
||||
let expr11 = col("city").eq(col("state"));
|
||||
filters.push(expr11);
|
||||
|
||||
// city_state = city + state --> won't be pushed down
|
||||
let expr12 = col("city_sate").eq(col("city") + col("state"));
|
||||
filters.push(expr12);
|
||||
|
||||
// city = city + 5 --> won't be pushed down
|
||||
let expr13 = col("city").eq(col("city") + lit(5));
|
||||
filters.push(expr13);
|
||||
|
||||
// city = city --> won't be pushed down
|
||||
let expr14 = col("city").eq(col("city"));
|
||||
filters.push(expr14);
|
||||
|
||||
// city + 5 = city --> won't be pushed down
|
||||
let expr15 = (col("city") + lit(5)).eq(col("city"));
|
||||
filters.push(expr15);
|
||||
|
||||
// 5 = city
|
||||
let expr16 = lit(5).eq(col("city"));
|
||||
filters.push(expr16);
|
||||
|
||||
println!(" --------------- Filters: {:#?}", filters);
|
||||
|
||||
// Expected pushdown predicates: [state = CA, price > 10, a < 10, b >= 50, f <= 60, city = Boston, city != Braintree, 5 = city]
|
||||
let predicate = Predicate::default().with_pushdown_exprs(&filters);
|
||||
|
||||
println!(" ------------- Predicates: {:#?}", predicate);
|
||||
assert_eq!(predicate.exprs.len(), 8);
|
||||
assert_eq!(predicate.exprs[0], col("state").eq(lit("CA")));
|
||||
assert_eq!(predicate.exprs[1], col("price").gt(lit(10)));
|
||||
assert_eq!(predicate.exprs[2], col("a").lt(lit(10)));
|
||||
assert_eq!(predicate.exprs[3], col("b").gt_eq(lit(50)));
|
||||
assert_eq!(predicate.exprs[4], col("f").lt_eq(lit(60)));
|
||||
assert_eq!(predicate.exprs[5], col("city").eq(lit("Boston")));
|
||||
assert_eq!(predicate.exprs[6], col("city").not_eq(lit("Braintree")));
|
||||
assert_eq!(predicate.exprs[7], lit(5).eq(col("city")));
|
||||
}
|
||||
#[test]
|
||||
fn predicate_display_ts() {
|
||||
// TODO make this a doc example?
|
||||
|
|
|
@ -37,9 +37,9 @@
|
|||
+-------+--------+--------------------------------+-----------+
|
||||
-- SQL: EXPLAIN SELECT * from restaurant where count > 200;
|
||||
-- Results After Normalizing UUIDs
|
||||
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| plan_type | plan |
|
||||
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
|
||||
| | Filter: CAST(restaurant.count AS Int64) > Int64(200) |
|
||||
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Int64) > Int64(200)] |
|
||||
|
@ -47,14 +47,14 @@
|
|||
| | CoalesceBatchesExec: target_batch_size=4096 |
|
||||
| | FilterExec: CAST(count@0 AS Int64) > 200 |
|
||||
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
|
||||
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[count, system, time, town] |
|
||||
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=true, projection=[count, system, time, town] |
|
||||
| | |
|
||||
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
-- SQL: EXPLAIN SELECT * from restaurant where count > 200.0;
|
||||
-- Results After Normalizing UUIDs
|
||||
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| plan_type | plan |
|
||||
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
|
||||
| | Filter: CAST(restaurant.count AS Float64) > Float64(200) |
|
||||
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Float64) > Float64(200)] |
|
||||
|
@ -62,9 +62,9 @@
|
|||
| | CoalesceBatchesExec: target_batch_size=4096 |
|
||||
| | FilterExec: CAST(count@0 AS Float64) > 200 |
|
||||
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
|
||||
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[count, system, time, town] |
|
||||
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=true, projection=[count, system, time, town] |
|
||||
| | |
|
||||
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
-- SQL: EXPLAIN SELECT * from restaurant where system > 4.0;
|
||||
-- Results After Normalizing UUIDs
|
||||
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
|
@ -93,9 +93,9 @@
|
|||
+-------+--------+--------------------------------+-----------+
|
||||
-- SQL: EXPLAIN SELECT * from restaurant where count > 200 and town != 'tewsbury';
|
||||
-- Results After Normalizing UUIDs
|
||||
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| plan_type | plan |
|
||||
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
|
||||
| | Filter: CAST(restaurant.count AS Int64) > Int64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) |
|
||||
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Int64) > Int64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury"))] |
|
||||
|
@ -103,9 +103,9 @@
|
|||
| | CoalesceBatchesExec: target_batch_size=4096 |
|
||||
| | FilterExec: CAST(count@0 AS Int64) > 200 AND town@3 != tewsbury |
|
||||
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
|
||||
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=town_min@0 != tewsbury OR tewsbury != town_max@1, projection=[count, system, time, town] |
|
||||
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=true AND town_min@0 != tewsbury OR tewsbury != town_max@1, projection=[count, system, time, town] |
|
||||
| | |
|
||||
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
-- SQL: SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence');
|
||||
-- Results After Sorting
|
||||
+-------+--------+--------------------------------+-----------+
|
||||
|
@ -118,9 +118,9 @@
|
|||
+-------+--------+--------------------------------+-----------+
|
||||
-- SQL: EXPLAIN SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence');
|
||||
-- Results After Normalizing UUIDs
|
||||
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| plan_type | plan |
|
||||
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
|
||||
| | Filter: CAST(restaurant.count AS Int64) > Int64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND (restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence"))) |
|
||||
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Int64) > Int64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence"))] |
|
||||
|
@ -128,9 +128,9 @@
|
|||
| | CoalesceBatchesExec: target_batch_size=4096 |
|
||||
| | FilterExec: CAST(count@0 AS Int64) > 200 AND town@3 != tewsbury AND system@1 = 5 OR town@3 = lawrence |
|
||||
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
|
||||
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=town_min@0 != tewsbury OR tewsbury != town_max@1, projection=[count, system, time, town] |
|
||||
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=true AND town_min@0 != tewsbury OR tewsbury != town_max@1 AND system_min@2 <= 5 AND 5 <= system_max@3 OR town_min@0 <= lawrence AND lawrence <= town_max@1, projection=[count, system, time, town] |
|
||||
| | |
|
||||
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
-- SQL: SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence') and count < 40000;
|
||||
-- Results After Sorting
|
||||
+-------+--------+--------------------------------+-----------+
|
||||
|
@ -154,7 +154,7 @@
|
|||
| | FilterExec: CAST(restaurant.count AS Int64)restaurant.count@0 > 200 AND town@4 != tewsbury AND system@2 = 5 OR town@4 = lawrence AND CAST(restaurant.count AS Int64)restaurant.count@0 < 40000 |
|
||||
| | ProjectionExec: expr=[CAST(count@0 AS Int64) as CAST(restaurant.count AS Int64)restaurant.count, count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
|
||||
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
|
||||
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=town_min@0 != tewsbury OR tewsbury != town_max@1, projection=[count, system, time, town] |
|
||||
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=true AND town_min@0 != tewsbury OR tewsbury != town_max@1 AND system_min@2 <= 5 AND 5 <= system_max@3 OR town_min@0 <= lawrence AND lawrence <= town_max@1 AND true, projection=[count, system, time, town] |
|
||||
| | |
|
||||
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
-- SQL: SELECT * from restaurant where count > 200 and count < 40000;
|
||||
|
@ -182,7 +182,7 @@
|
|||
| | FilterExec: CAST(restaurant.count AS Int64)restaurant.count@0 > 200 AND CAST(restaurant.count AS Int64)restaurant.count@0 < 40000 |
|
||||
| | ProjectionExec: expr=[CAST(count@0 AS Int64) as CAST(restaurant.count AS Int64)restaurant.count, count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
|
||||
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
|
||||
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[count, system, time, town] |
|
||||
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=true AND true, projection=[count, system, time, town] |
|
||||
| | |
|
||||
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
-- SQL: SELECT * from restaurant where system > 4.0 and system < 7.0;
|
||||
|
@ -278,7 +278,7 @@
|
|||
| | CoalesceBatchesExec: target_batch_size=4096 |
|
||||
| | FilterExec: system@1 > 5 AND tewsbury != town@3 AND system@1 < 7 AND CAST(count@0 AS Int64) = 632 OR town@3 = reading |
|
||||
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
|
||||
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 5 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 < 7, projection=[count, system, time, town] |
|
||||
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 5 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 < 7 AND true OR town_min@1 <= reading AND reading <= town_max@2, projection=[count, system, time, town] |
|
||||
| | |
|
||||
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
-- SQL: SELECT * from restaurant where 5.0 < system and town != 'tewsbury' and system < 7.0 and (count = 632 or town = 'reading') and time > to_timestamp('1970-01-01T00:00:00.000000130+00:00');
|
||||
|
|
Loading…
Reference in New Issue