feat: outer GROUP BY pushed down to subqueries; more Cloud 2 examples
parent
e75a95bca7
commit
7ba619a32b
|
@ -548,7 +548,56 @@ SELECT * FROM (SELECT usage_idle FROM cpu GROUP BY cpu) GROUP BY cpu;
|
||||||
-- GROUP BY should be pushed to subquery
|
-- GROUP BY should be pushed to subquery
|
||||||
SELECT * FROM (SELECT usage_idle FROM cpu) GROUP BY cpu;
|
SELECT * FROM (SELECT usage_idle FROM cpu) GROUP BY cpu;
|
||||||
|
|
||||||
|
-- GROUP BY should be pushed through multiple levels of subqueries
|
||||||
SELECT * FROM (SELECT MAX(value) FROM (SELECT DISTINCT(usage_idle) AS value FROM cpu)) GROUP BY cpu;
|
SELECT * FROM (SELECT MAX(value) FROM (SELECT DISTINCT(usage_idle) AS value FROM cpu)) GROUP BY cpu;
|
||||||
|
|
||||||
-- GROUP BY time with default FILL(none) for subquery
|
-- GROUP BY time with default FILL(none) for subquery
|
||||||
SELECT value FROM (SELECT mean(usage_idle) AS value FROM cpu GROUP BY TIME(10s));
|
SELECT value FROM (SELECT mean(usage_idle) AS value FROM cpu GROUP BY TIME(10s));
|
||||||
|
|
||||||
|
-- Multiple subqueries from same measurement, rows with same timestamp are not combined
|
||||||
|
SELECT sum_idle, sum_system FROM (SELECT sum(usage_idle) AS sum_idle FROM cpu WHERE cpu = 'cpu-total' GROUP BY time(10s)), (SELECT sum(usage_system) AS sum_system FROM cpu WHERE cpu = 'cpu-total' GROUP BY time(10s));
|
||||||
|
|
||||||
|
-- Similar query as prior, rows with same timestamp are combined (as expected)
|
||||||
|
SELECT sum_idle, sum_system FROM (SELECT sum(usage_idle) AS sum_idle, sum(usage_system) AS sum_system FROM cpu WHERE cpu = 'cpu-total' GROUP BY time(10s));
|
||||||
|
|
||||||
|
-- TODO(sgc): Incorrect output
|
||||||
|
-- Multiple subqueries from same measurement
|
||||||
|
-- * Outer query projects aggregates, values at same timestamp should be combined
|
||||||
|
-- Source: Cloud 2
|
||||||
|
-- Expected output:
|
||||||
|
-- name: cpu
|
||||||
|
-- +---------------------+------+--------+
|
||||||
|
-- | time | last | last_1 |
|
||||||
|
-- +---------------------+------+--------+
|
||||||
|
-- | 1970-01-01T00:00:00 | 2.99 | 2.1 |
|
||||||
|
-- +---------------------+------+--------+
|
||||||
|
-- SELECT last(sum_idle), last(sum_system) FROM (SELECT sum(usage_idle) AS sum_idle FROM cpu WHERE cpu = 'cpu-total' GROUP BY time(10s)), (SELECT sum(usage_system) AS sum_system FROM cpu WHERE cpu = 'cpu-total' GROUP BY time(10s));
|
||||||
|
|
||||||
|
-- Similar query produces expected output of prior query
|
||||||
|
SELECT last(sum_idle), last(sum_system) FROM (SELECT sum(usage_idle) AS sum_idle, sum(usage_system) AS sum_system FROM cpu WHERE cpu = 'cpu-total' GROUP BY time(10s));
|
||||||
|
|
||||||
|
|
||||||
|
-- TODO(sgc): Incorrect output
|
||||||
|
-- Multiple subqueries from same measurement, GROUP BY TIME
|
||||||
|
-- * Outer query projects aggregates, values at same timestamp should be combined
|
||||||
|
-- Source: Cloud 2
|
||||||
|
-- Expected output:
|
||||||
|
-- name: cpu
|
||||||
|
-- +---------------------+------+--------+
|
||||||
|
-- | time | last | last_1 |
|
||||||
|
-- +---------------------+------+--------+
|
||||||
|
-- | 2022-10-31T02:00:00 | 2.98 | 2.2 |
|
||||||
|
-- | 2022-10-31T02:00:10 | 2.99 | 2.1 |
|
||||||
|
-- +---------------------+------+--------+
|
||||||
|
-- SELECT last(sum_idle), last(sum_system) FROM (SELECT sum(usage_idle) AS sum_idle FROM cpu WHERE cpu = 'cpu-total' GROUP BY time(10s)), (SELECT sum(usage_system) AS sum_system FROM cpu WHERE cpu = 'cpu-total' GROUP BY time(10s)) GROUP BY time(10s) FILL(none);
|
||||||
|
|
||||||
|
-- Similar query produces expected output of prior query
|
||||||
|
SELECT last(sum_idle), last(sum_system) FROM (SELECT sum(usage_idle) AS sum_idle, sum(usage_system) AS sum_system FROM cpu WHERE cpu = 'cpu-total' GROUP BY time(10s)) GROUP BY time(10s) FILL(none);
|
||||||
|
|
||||||
|
-- Conditions on fields projected of subquery
|
||||||
|
-- Source: Cloud 2
|
||||||
|
SELECT P / 99.99 FROM (SELECT max(usage_system) AS P, mean(usage_idle) AS S FROM cpu GROUP BY time(10s)) WHERE S > 0.50 AND S < 1.50 AND P > 0.0 AND P < 1.0 GROUP BY cpu FILL(null);
|
||||||
|
|
||||||
|
-- DISTINCT tag value from subquery
|
||||||
|
-- Source: Cloud 2
|
||||||
|
SELECT distinct(cpu) FROM (SELECT usage_idle, cpu FROM cpu);
|
|
@ -2851,3 +2851,54 @@ name: cpu
|
||||||
| 2022-10-31T02:00:00 | 1.9799999999999998 |
|
| 2022-10-31T02:00:00 | 1.9799999999999998 |
|
||||||
| 2022-10-31T02:00:10 | 1.9900000000000002 |
|
| 2022-10-31T02:00:10 | 1.9900000000000002 |
|
||||||
+---------------------+--------------------+
|
+---------------------+--------------------+
|
||||||
|
-- InfluxQL: SELECT sum_idle, sum_system FROM (SELECT sum(usage_idle) AS sum_idle FROM cpu WHERE cpu = 'cpu-total' GROUP BY time(10s)), (SELECT sum(usage_system) AS sum_system FROM cpu WHERE cpu = 'cpu-total' GROUP BY time(10s));
|
||||||
|
name: cpu
|
||||||
|
+---------------------+----------+------------+
|
||||||
|
| time | sum_idle | sum_system |
|
||||||
|
+---------------------+----------+------------+
|
||||||
|
| 2022-10-31T02:00:00 | 2.98 | |
|
||||||
|
| 2022-10-31T02:00:00 | | 2.2 |
|
||||||
|
| 2022-10-31T02:00:10 | 2.99 | |
|
||||||
|
| 2022-10-31T02:00:10 | | 2.1 |
|
||||||
|
+---------------------+----------+------------+
|
||||||
|
-- InfluxQL: SELECT sum_idle, sum_system FROM (SELECT sum(usage_idle) AS sum_idle, sum(usage_system) AS sum_system FROM cpu WHERE cpu = 'cpu-total' GROUP BY time(10s));
|
||||||
|
name: cpu
|
||||||
|
+---------------------+----------+------------+
|
||||||
|
| time | sum_idle | sum_system |
|
||||||
|
+---------------------+----------+------------+
|
||||||
|
| 2022-10-31T02:00:00 | 2.98 | 2.2 |
|
||||||
|
| 2022-10-31T02:00:10 | 2.99 | 2.1 |
|
||||||
|
+---------------------+----------+------------+
|
||||||
|
-- InfluxQL: SELECT last(sum_idle), last(sum_system) FROM (SELECT sum(usage_idle) AS sum_idle, sum(usage_system) AS sum_system FROM cpu WHERE cpu = 'cpu-total' GROUP BY time(10s));
|
||||||
|
name: cpu
|
||||||
|
+---------------------+------+--------+
|
||||||
|
| time | last | last_1 |
|
||||||
|
+---------------------+------+--------+
|
||||||
|
| 1970-01-01T00:00:00 | 2.99 | 2.1 |
|
||||||
|
+---------------------+------+--------+
|
||||||
|
-- InfluxQL: SELECT last(sum_idle), last(sum_system) FROM (SELECT sum(usage_idle) AS sum_idle, sum(usage_system) AS sum_system FROM cpu WHERE cpu = 'cpu-total' GROUP BY time(10s)) GROUP BY time(10s) FILL(none);
|
||||||
|
name: cpu
|
||||||
|
+---------------------+------+--------+
|
||||||
|
| time | last | last_1 |
|
||||||
|
+---------------------+------+--------+
|
||||||
|
| 2022-10-31T02:00:00 | 2.98 | 2.2 |
|
||||||
|
| 2022-10-31T02:00:10 | 2.99 | 2.1 |
|
||||||
|
+---------------------+------+--------+
|
||||||
|
-- InfluxQL: SELECT P / 99.99 FROM (SELECT max(usage_system) AS P, mean(usage_idle) AS S FROM cpu GROUP BY time(10s)) WHERE S > 0.50 AND S < 1.50 AND P > 0.0 AND P < 1.0 GROUP BY cpu FILL(null);
|
||||||
|
name: cpu
|
||||||
|
tags: cpu=cpu0
|
||||||
|
+---------------------+-------------------+
|
||||||
|
| time | P |
|
||||||
|
+---------------------+-------------------+
|
||||||
|
| 2022-10-31T02:00:00 | 0.002000200020002 |
|
||||||
|
| 2022-10-31T02:00:10 | 0.001000100010001 |
|
||||||
|
+---------------------+-------------------+
|
||||||
|
-- InfluxQL: SELECT distinct(cpu) FROM (SELECT usage_idle, cpu FROM cpu);
|
||||||
|
name: cpu
|
||||||
|
+---------------------+-----------+
|
||||||
|
| time | distinct |
|
||||||
|
+---------------------+-----------+
|
||||||
|
| 1970-01-01T00:00:00 | cpu-total |
|
||||||
|
| 1970-01-01T00:00:00 | cpu0 |
|
||||||
|
| 1970-01-01T00:00:00 | cpu1 |
|
||||||
|
+---------------------+-----------+
|
|
@ -31,8 +31,11 @@ pub(super) struct SelectQuery {
|
||||||
|
|
||||||
#[derive(Debug, Default, Clone)]
|
#[derive(Debug, Default, Clone)]
|
||||||
pub(super) struct Select {
|
pub(super) struct Select {
|
||||||
/// The schema of the selection.
|
/// The depth of the selection, where a value > 0 indicates
|
||||||
// pub(super) schema: Todo,
|
/// this is a subquery.
|
||||||
|
pub(super) depth: u32,
|
||||||
|
|
||||||
|
/// The projection type of the selection.
|
||||||
pub(super) projection_type: ProjectionType,
|
pub(super) projection_type: ProjectionType,
|
||||||
|
|
||||||
/// Projection clause of the selection.
|
/// Projection clause of the selection.
|
||||||
|
|
|
@ -325,153 +325,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
||||||
.with_has_multiple_measurements(query.has_multiple_measurements)
|
.with_has_multiple_measurements(query.has_multiple_measurements)
|
||||||
.with_root_group_by_tags(&group_by_tags);
|
.with_root_group_by_tags(&group_by_tags);
|
||||||
|
|
||||||
// Skip the `time` column
|
self.select_to_plan(&ctx, select)
|
||||||
let fields_no_time = &select.fields[1..];
|
|
||||||
// always start with the time column
|
|
||||||
let mut fields = vec![select.fields.first().cloned().unwrap()];
|
|
||||||
|
|
||||||
// group_by_tag_set : a list of tag columns specified in the GROUP BY clause
|
|
||||||
// projection_tag_set : a list of tag columns specified exclusively in the SELECT projection
|
|
||||||
// is_projected : a list of booleans indicating whether matching elements in the
|
|
||||||
// group_by_tag_set are also projected in the query
|
|
||||||
let (group_by_tag_set, projection_tag_set, is_projected) =
|
|
||||||
if let Some(group_by) = &select.group_by {
|
|
||||||
let mut tag_columns =
|
|
||||||
find_tag_and_unknown_columns(fields_no_time).collect::<HashSet<_>>();
|
|
||||||
|
|
||||||
// Find the list of tag keys specified in the `GROUP BY` clause, and
|
|
||||||
// whether any of the tag keys are also projected in the SELECT list.
|
|
||||||
let (tag_set, is_projected): (Vec<_>, Vec<_>) = group_by
|
|
||||||
.tags()
|
|
||||||
.map(|t| t.deref().as_str())
|
|
||||||
.map(|s| (s, tag_columns.contains(s)))
|
|
||||||
// We sort the tag set, to ensure correct ordering of the results. The tag columns
|
|
||||||
// referenced in the `tag_set` variable are added to the sort operator in
|
|
||||||
// lexicographically ascending order.
|
|
||||||
.sorted_by(|a, b| a.0.cmp(b.0))
|
|
||||||
.unzip();
|
|
||||||
|
|
||||||
// Tags specified in the `GROUP BY` clause that are not already added to the
|
|
||||||
// projection must be projected, so they can be used in the group key.
|
|
||||||
//
|
|
||||||
// At the end of the loop, the `tag_columns` set will contain the tag columns that
|
|
||||||
// exist in the projection and not in the `GROUP BY`.
|
|
||||||
fields.extend(
|
|
||||||
tag_set
|
|
||||||
.iter()
|
|
||||||
.filter_map(|col| match tag_columns.remove(*col) {
|
|
||||||
true => None,
|
|
||||||
false => Some(Field {
|
|
||||||
expr: IQLExpr::VarRef(VarRef {
|
|
||||||
name: (*col).into(),
|
|
||||||
data_type: Some(VarRefDataType::Tag),
|
|
||||||
}),
|
|
||||||
name: col.to_string(),
|
|
||||||
data_type: Some(InfluxColumnType::Tag),
|
|
||||||
}),
|
|
||||||
}),
|
|
||||||
);
|
|
||||||
|
|
||||||
(
|
|
||||||
tag_set,
|
|
||||||
tag_columns.into_iter().sorted().collect::<Vec<_>>(),
|
|
||||||
is_projected,
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
let tag_columns = find_tag_and_unknown_columns(fields_no_time)
|
|
||||||
.sorted()
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
(vec![], tag_columns, vec![])
|
|
||||||
};
|
|
||||||
|
|
||||||
fields.extend(fields_no_time.iter().cloned());
|
|
||||||
|
|
||||||
let plan = {
|
|
||||||
let mut iter = select.from.iter();
|
|
||||||
let plan = match iter.next() {
|
|
||||||
Some(ds) => self.project_select(&ctx, ds, select, &fields, &group_by_tag_set),
|
|
||||||
None => {
|
|
||||||
// empty result, but let's at least have all the strictly necessary metadata
|
|
||||||
let schema = Arc::new(ArrowSchema::new(vec![ArrowField::new(
|
|
||||||
INFLUXQL_MEASUREMENT_COLUMN_NAME,
|
|
||||||
(&InfluxColumnType::Tag).into(),
|
|
||||||
false,
|
|
||||||
)]));
|
|
||||||
let plan = LogicalPlan::EmptyRelation(EmptyRelation {
|
|
||||||
produce_one_row: false,
|
|
||||||
schema: schema.to_dfschema_ref()?,
|
|
||||||
});
|
|
||||||
let plan = plan_with_metadata(
|
|
||||||
plan,
|
|
||||||
&InfluxQlMetadata {
|
|
||||||
measurement_column_index: MEASUREMENT_COLUMN_INDEX,
|
|
||||||
tag_key_columns: vec![],
|
|
||||||
},
|
|
||||||
)?;
|
|
||||||
return Ok(plan);
|
|
||||||
}
|
|
||||||
}?;
|
|
||||||
|
|
||||||
iter.try_fold(plan, |prev, ds| {
|
|
||||||
let next = self.project_select(&ctx, ds, select, &fields, &group_by_tag_set)?;
|
|
||||||
LogicalPlanBuilder::from(prev).union(next)?.build()
|
|
||||||
})?
|
|
||||||
};
|
|
||||||
|
|
||||||
let plan = plan_with_metadata(
|
|
||||||
plan,
|
|
||||||
&InfluxQlMetadata {
|
|
||||||
measurement_column_index: MEASUREMENT_COLUMN_INDEX,
|
|
||||||
tag_key_columns: make_tag_key_column_meta(
|
|
||||||
&fields,
|
|
||||||
&group_by_tag_set,
|
|
||||||
&is_projected,
|
|
||||||
),
|
|
||||||
},
|
|
||||||
)?;
|
|
||||||
|
|
||||||
// the sort planner node must refer to the time column using
|
|
||||||
// the alias that was specified
|
|
||||||
let time_alias = fields[0].name.as_str();
|
|
||||||
|
|
||||||
let time_sort_expr = time_alias.as_expr().sort(
|
|
||||||
match select.order_by {
|
|
||||||
// Default behaviour is to sort by time in ascending order if there is no ORDER BY
|
|
||||||
None | Some(OrderByClause::Ascending) => true,
|
|
||||||
Some(OrderByClause::Descending) => false,
|
|
||||||
},
|
|
||||||
false,
|
|
||||||
);
|
|
||||||
|
|
||||||
let plan = plan_with_sort(
|
|
||||||
plan,
|
|
||||||
vec![time_sort_expr.clone()],
|
|
||||||
ctx.has_multiple_measurements,
|
|
||||||
&group_by_tag_set,
|
|
||||||
&projection_tag_set,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
let plan = self.limit(
|
|
||||||
plan,
|
|
||||||
select.offset,
|
|
||||||
select.limit,
|
|
||||||
vec![time_sort_expr],
|
|
||||||
ctx.has_multiple_measurements,
|
|
||||||
&group_by_tag_set,
|
|
||||||
&projection_tag_set,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
Ok(plan)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn select_to_plan(&self, ctx: &Context<'_>, select: &Select) -> Result<LogicalPlan> {
|
fn select_to_plan(&self, ctx: &Context<'_>, select: &Select) -> Result<LogicalPlan> {
|
||||||
let ctx = Context::new()
|
|
||||||
.with_projection_type(select.projection_type)
|
|
||||||
.with_timezone(select.timezone)
|
|
||||||
.with_group_by_fill(select)
|
|
||||||
.with_has_multiple_measurements(ctx.has_multiple_measurements)
|
|
||||||
.with_root_group_by_tags(ctx.root_group_by_tags);
|
|
||||||
|
|
||||||
// Skip the `time` column
|
// Skip the `time` column
|
||||||
let fields_no_time = &select.fields[1..];
|
let fields_no_time = &select.fields[1..];
|
||||||
// always start with the time column
|
// always start with the time column
|
||||||
|
@ -561,6 +418,23 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
||||||
})?
|
})?
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let plan = if select.depth == 0 {
|
||||||
|
// Add the metadata to the root SELECT query
|
||||||
|
plan_with_metadata(
|
||||||
|
plan,
|
||||||
|
&InfluxQlMetadata {
|
||||||
|
measurement_column_index: MEASUREMENT_COLUMN_INDEX,
|
||||||
|
tag_key_columns: make_tag_key_column_meta(
|
||||||
|
&fields,
|
||||||
|
&group_by_tag_set,
|
||||||
|
&is_projected,
|
||||||
|
),
|
||||||
|
},
|
||||||
|
)?
|
||||||
|
} else {
|
||||||
|
plan
|
||||||
|
};
|
||||||
|
|
||||||
// the sort planner node must refer to the time column using
|
// the sort planner node must refer to the time column using
|
||||||
// the alias that was specified
|
// the alias that was specified
|
||||||
let time_alias = fields[0].name.as_str();
|
let time_alias = fields[0].name.as_str();
|
||||||
|
@ -1369,10 +1243,19 @@ impl<'a> InfluxQLToLogicalPlan<'a> {
|
||||||
vec![lit_dict(table_name).alias(INFLUXQL_MEASUREMENT_COLUMN_NAME)],
|
vec![lit_dict(table_name).alias(INFLUXQL_MEASUREMENT_COLUMN_NAME)],
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
DataSource::Subquery(select) => Ok((
|
DataSource::Subquery(select) => {
|
||||||
self.select_to_plan(ctx, select)?,
|
let ctx = Context::new()
|
||||||
|
.with_projection_type(select.projection_type)
|
||||||
|
.with_timezone(select.timezone)
|
||||||
|
.with_group_by_fill(select)
|
||||||
|
.with_has_multiple_measurements(ctx.has_multiple_measurements)
|
||||||
|
.with_root_group_by_tags(ctx.root_group_by_tags);
|
||||||
|
|
||||||
|
Ok((
|
||||||
|
self.select_to_plan(&ctx, select)?,
|
||||||
vec![INFLUXQL_MEASUREMENT_COLUMN_NAME.as_expr()],
|
vec![INFLUXQL_MEASUREMENT_COLUMN_NAME.as_expr()],
|
||||||
)),
|
))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue