Merge branch 'main' into dom/write-rpc-client

pull/24376/head
kodiakhq[bot] 2022-11-18 16:22:19 +00:00 committed by GitHub
commit cc55ab384c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 65 additions and 207 deletions

View File

@ -247,16 +247,11 @@ impl TableProvider for ChunkTableProvider {
// This debug shows the self.arrow_schema() includes all columns in all chunks
// which means the schema of all chunks are merged before invoking this scan
debug!(schema=?self.arrow_schema(), "All chunks schema");
// However, the schema of each chunk is still in its original form which does not
// include the merged columns of other chunks. The code below (put in comments on purpose) proves it
// for chunk in chunks.clone() {
// trace!("Schema of chunk {}: {:#?}", chunk.id(), chunk.schema());
// }
// Note that `filters` don't actually need to be evaluated in
// the scan for the plans to be correct, they are an extra
// optimization for providers which can offer them
let predicate = Predicate::default().with_pushdown_exprs(filters);
let predicate = Predicate::default().with_exprs(filters.to_vec());
let deduplicate = Deduplicater::new(self.ctx.child_ctx("deduplicator"))
.enable_deduplication(self.deduplication());

View File

@ -27,7 +27,7 @@ use datafusion::{
binary_expr,
expr_visitor::{ExprVisitable, ExpressionVisitor, Recursion},
utils::expr_to_columns,
BinaryExpr, Operator,
BinaryExpr,
},
optimizer::utils::split_conjunction,
physical_optimizer::pruning::{PruningPredicate, PruningStatistics},
@ -443,9 +443,8 @@ impl Predicate {
}
/// Adds an expression to the list of general purpose predicates
pub fn with_expr(mut self, expr: Expr) -> Self {
self.exprs.push(expr);
self
pub fn with_expr(self, expr: Expr) -> Self {
self.with_exprs([expr])
}
/// Adds a ValueExpr to the list of value expressons
@ -489,6 +488,12 @@ impl Predicate {
self
}
/// Adds all expressions to the list of general purpose predicates
pub fn with_exprs(mut self, filters: impl IntoIterator<Item = Expr>) -> Self {
self.exprs.extend(filters.into_iter());
self
}
/// Remove any clauses of this predicate that can not be run before deduplication.
///
/// See <https://github.com/influxdata/influxdb_iox/issues/6066> for more details.
@ -536,60 +541,6 @@ impl Predicate {
value_expr: vec![],
}
}
/// Adds only the expressions from `filters` that can be pushed down to
/// execution engines.
pub fn with_pushdown_exprs(mut self, filters: &[Expr]) -> Self {
// For each expression of the filters, recursively split it, if it is is an AND conjunction
// For example, expression (x AND y) will be split into a vector of 2 expressions [x, y]
let mut exprs = filters.iter().flat_map(split_conjunction);
// Only keep single_column and primitive binary expressions
let mut pushdown_exprs: Vec<Expr> = vec![];
let exprs_result = exprs.try_for_each::<_, Result<_, DataFusionError>>(|expr| {
let mut columns = HashSet::new();
expr_to_columns(expr, &mut columns)?;
if columns.len() == 1 && Self::primitive_binary_expr(expr) {
pushdown_exprs.push(expr.clone());
}
Ok(())
});
match exprs_result {
Ok(()) => {
// Return the builder with only the pushdownable expressions on it.
self.exprs.append(&mut pushdown_exprs);
}
Err(e) => {
debug!("Error, {}, building push-down predicates for filters: {:#?}. No predicates are pushed down", e, filters);
}
}
self
}
/// Return true if the given expression is in a primitive binary in the form: `column op constant`
// and op must be a comparison one
pub fn primitive_binary_expr(expr: &Expr) -> bool {
match expr {
Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
matches!(
(&**left, &**right),
(Expr::Column(_), Expr::Literal(_)) | (Expr::Literal(_), Expr::Column(_))
) && matches!(
op,
Operator::Eq
| Operator::NotEq
| Operator::Lt
| Operator::LtEq
| Operator::Gt
| Operator::GtEq
)
}
_ => false,
}
}
}
// Wrapper around `Expr::BinaryExpr` where left input is known to be
@ -719,94 +670,6 @@ mod tests {
assert!(!p.is_empty());
}
#[test]
fn test_pushdown_predicates() {
let mut filters = vec![];
// state = CA
let expr1 = col("state").eq(lit("CA"));
filters.push(expr1);
// "price > 10"
let expr2 = col("price").gt(lit(10));
filters.push(expr2);
// a < 10 AND b >= 50 --> will be split to [a < 10, b >= 50]
let expr3 = col("a").lt(lit(10)).and(col("b").gt_eq(lit(50)));
filters.push(expr3);
// c != 3 OR d = 8 --> won't be pushed down
let expr4 = col("c").not_eq(lit(3)).or(col("d").eq(lit(8)));
filters.push(expr4);
// e is null --> won't be pushed down
let expr5 = col("e").is_null();
filters.push(expr5);
// f <= 60
let expr6 = col("f").lt_eq(lit(60));
filters.push(expr6);
// g is not null --> won't be pushed down
let expr7 = col("g").is_not_null();
filters.push(expr7);
// h + i --> won't be pushed down
let expr8 = col("h") + col("i");
filters.push(expr8);
// city = Boston
let expr9 = col("city").eq(lit("Boston"));
filters.push(expr9);
// city != Braintree
let expr9 = col("city").not_eq(lit("Braintree"));
filters.push(expr9);
// city != state --> won't be pushed down
let expr10 = col("city").not_eq(col("state"));
filters.push(expr10);
// city = state --> won't be pushed down
let expr11 = col("city").eq(col("state"));
filters.push(expr11);
// city_state = city + state --> won't be pushed down
let expr12 = col("city_sate").eq(col("city") + col("state"));
filters.push(expr12);
// city = city + 5 --> won't be pushed down
let expr13 = col("city").eq(col("city") + lit(5));
filters.push(expr13);
// city = city --> won't be pushed down
let expr14 = col("city").eq(col("city"));
filters.push(expr14);
// city + 5 = city --> won't be pushed down
let expr15 = (col("city") + lit(5)).eq(col("city"));
filters.push(expr15);
// 5 = city
let expr16 = lit(5).eq(col("city"));
filters.push(expr16);
println!(" --------------- Filters: {:#?}", filters);
// Expected pushdown predicates: [state = CA, price > 10, a < 10, b >= 50, f <= 60, city = Boston, city != Braintree, 5 = city]
let predicate = Predicate::default().with_pushdown_exprs(&filters);
println!(" ------------- Predicates: {:#?}", predicate);
assert_eq!(predicate.exprs.len(), 8);
assert_eq!(predicate.exprs[0], col("state").eq(lit("CA")));
assert_eq!(predicate.exprs[1], col("price").gt(lit(10)));
assert_eq!(predicate.exprs[2], col("a").lt(lit(10)));
assert_eq!(predicate.exprs[3], col("b").gt_eq(lit(50)));
assert_eq!(predicate.exprs[4], col("f").lt_eq(lit(60)));
assert_eq!(predicate.exprs[5], col("city").eq(lit("Boston")));
assert_eq!(predicate.exprs[6], col("city").not_eq(lit("Braintree")));
assert_eq!(predicate.exprs[7], lit(5).eq(col("city")));
}
#[test]
fn predicate_display_ts() {
// TODO make this a doc example?

View File

@ -37,9 +37,9 @@
+-------+--------+--------------------------------+-----------+
-- SQL: EXPLAIN SELECT * from restaurant where count > 200;
-- Results After Normalizing UUIDs
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: CAST(restaurant.count AS Int64) > Int64(200) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Int64) > Int64(200)] |
@ -47,14 +47,14 @@
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: CAST(count@0 AS Int64) > 200 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[count, system, time, town] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=true, projection=[count, system, time, town] |
| | |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: EXPLAIN SELECT * from restaurant where count > 200.0;
-- Results After Normalizing UUIDs
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: CAST(restaurant.count AS Float64) > Float64(200) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Float64) > Float64(200)] |
@ -62,9 +62,9 @@
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: CAST(count@0 AS Float64) > 200 |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[count, system, time, town] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=true, projection=[count, system, time, town] |
| | |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: EXPLAIN SELECT * from restaurant where system > 4.0;
-- Results After Normalizing UUIDs
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
@ -93,9 +93,9 @@
+-------+--------+--------------------------------+-----------+
-- SQL: EXPLAIN SELECT * from restaurant where count > 200 and town != 'tewsbury';
-- Results After Normalizing UUIDs
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: CAST(restaurant.count AS Int64) > Int64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Int64) > Int64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury"))] |
@ -103,9 +103,9 @@
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: CAST(count@0 AS Int64) > 200 AND town@3 != tewsbury |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=town_min@0 != tewsbury OR tewsbury != town_max@1, projection=[count, system, time, town] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=true AND town_min@0 != tewsbury OR tewsbury != town_max@1, projection=[count, system, time, town] |
| | |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence');
-- Results After Sorting
+-------+--------+--------------------------------+-----------+
@ -118,9 +118,9 @@
+-------+--------+--------------------------------+-----------+
-- SQL: EXPLAIN SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence');
-- Results After Normalizing UUIDs
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: restaurant.count, restaurant.system, restaurant.time, restaurant.town |
| | Filter: CAST(restaurant.count AS Int64) > Int64(200) AND restaurant.town != Dictionary(Int32, Utf8("tewsbury")) AND (restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence"))) |
| | TableScan: restaurant projection=[count, system, time, town], partial_filters=[CAST(restaurant.count AS Int64) > Int64(200), restaurant.town != Dictionary(Int32, Utf8("tewsbury")), restaurant.system = Float64(5) OR restaurant.town = Dictionary(Int32, Utf8("lawrence"))] |
@ -128,9 +128,9 @@
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: CAST(count@0 AS Int64) > 200 AND town@3 != tewsbury AND system@1 = 5 OR town@3 = lawrence |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=town_min@0 != tewsbury OR tewsbury != town_max@1, projection=[count, system, time, town] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=true AND town_min@0 != tewsbury OR tewsbury != town_max@1 AND system_min@2 <= 5 AND 5 <= system_max@3 OR town_min@0 <= lawrence AND lawrence <= town_max@1, projection=[count, system, time, town] |
| | |
+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * from restaurant where count > 200 and town != 'tewsbury' and (system =5 or town = 'lawrence') and count < 40000;
-- Results After Sorting
+-------+--------+--------------------------------+-----------+
@ -154,7 +154,7 @@
| | FilterExec: CAST(restaurant.count AS Int64)restaurant.count@0 > 200 AND town@4 != tewsbury AND system@2 = 5 OR town@4 = lawrence AND CAST(restaurant.count AS Int64)restaurant.count@0 < 40000 |
| | ProjectionExec: expr=[CAST(count@0 AS Int64) as CAST(restaurant.count AS Int64)restaurant.count, count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=town_min@0 != tewsbury OR tewsbury != town_max@1, projection=[count, system, time, town] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=true AND town_min@0 != tewsbury OR tewsbury != town_max@1 AND system_min@2 <= 5 AND 5 <= system_max@3 OR town_min@0 <= lawrence AND lawrence <= town_max@1 AND true, projection=[count, system, time, town] |
| | |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * from restaurant where count > 200 and count < 40000;
@ -182,7 +182,7 @@
| | FilterExec: CAST(restaurant.count AS Int64)restaurant.count@0 > 200 AND CAST(restaurant.count AS Int64)restaurant.count@0 < 40000 |
| | ProjectionExec: expr=[CAST(count@0 AS Int64) as CAST(restaurant.count AS Int64)restaurant.count, count@0 as count, system@1 as system, time@2 as time, town@3 as town] |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], projection=[count, system, time, town] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=true AND true, projection=[count, system, time, town] |
| | |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * from restaurant where system > 4.0 and system < 7.0;
@ -278,7 +278,7 @@
| | CoalesceBatchesExec: target_batch_size=4096 |
| | FilterExec: system@1 > 5 AND tewsbury != town@3 AND system@1 < 7 AND CAST(count@0 AS Int64) = 632 OR town@3 = reading |
| | RepartitionExec: partitioning=RoundRobinBatch(4) |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 5 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 < 7, projection=[count, system, time, town] |
| | ParquetExec: limit=None, partitions=[1/1/1/1/00000000-0000-0000-0000-000000000000.parquet], predicate=system_max@0 > 5 AND town_min@1 != tewsbury OR tewsbury != town_max@2 AND system_min@3 < 7 AND true OR town_min@1 <= reading AND reading <= town_max@2, projection=[count, system, time, town] |
| | |
+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- SQL: SELECT * from restaurant where 5.0 < system and town != 'tewsbury' and system < 7.0 and (count = 632 or town = 'reading') and time > to_timestamp('1970-01-01T00:00:00.000000130+00:00');