Merge pull request #2836 from influxdata/er/fix/storage_grpc/2697
fix: use correct timestamps for selectors in read_window_aggregatepull/24376/head
commit
0829711fe3
|
@ -1070,7 +1070,7 @@ impl InfluxRpcPlanner {
|
||||||
let AggExprs {
|
let AggExprs {
|
||||||
agg_exprs,
|
agg_exprs,
|
||||||
field_columns,
|
field_columns,
|
||||||
} = AggExprs::try_new(agg, &schema, &predicate)?;
|
} = AggExprs::try_new_for_read_group(agg, &schema, &predicate)?;
|
||||||
|
|
||||||
let plan_builder = plan_builder
|
let plan_builder = plan_builder
|
||||||
.aggregate(group_exprs, agg_exprs)
|
.aggregate(group_exprs, agg_exprs)
|
||||||
|
@ -1190,10 +1190,10 @@ impl InfluxRpcPlanner {
|
||||||
.chain(std::iter::once(window_bound))
|
.chain(std::iter::once(window_bound))
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
// aggregate each field
|
let AggExprs {
|
||||||
let agg_exprs = filtered_fields_iter(&schema, &predicate)
|
agg_exprs,
|
||||||
.map(|field| make_agg_expr(agg, field.name()))
|
field_columns,
|
||||||
.collect::<Result<Vec<_>>>()?;
|
} = AggExprs::try_new_for_read_window_aggregate(agg, &schema, &predicate)?;
|
||||||
|
|
||||||
// sort by the group by expressions as well
|
// sort by the group by expressions as well
|
||||||
let sort_exprs = group_exprs
|
let sort_exprs = group_exprs
|
||||||
|
@ -1215,21 +1215,12 @@ impl InfluxRpcPlanner {
|
||||||
.map(|field| Arc::from(field.name().as_str()))
|
.map(|field| Arc::from(field.name().as_str()))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let field_columns = filtered_fields_iter(&schema, &predicate)
|
Ok(Some(SeriesSetPlan::new(
|
||||||
.map(|field| Arc::from(field.name().as_str()))
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
// TODO: remove the use of tag_columns and field_column names
|
|
||||||
// and instead use the schema directly)
|
|
||||||
|
|
||||||
let ss_plan = SeriesSetPlan::new_from_shared_timestamp(
|
|
||||||
Arc::from(table_name),
|
Arc::from(table_name),
|
||||||
plan,
|
plan,
|
||||||
tag_columns,
|
tag_columns,
|
||||||
field_columns,
|
field_columns,
|
||||||
);
|
)))
|
||||||
|
|
||||||
Ok(Some(ss_plan))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create a plan that scans the specified table, and applies any
|
/// Create a plan that scans the specified table, and applies any
|
||||||
|
@ -1461,75 +1452,144 @@ fn filtered_fields_iter<'a>(
|
||||||
/// the rules explained on `read_group_plan`
|
/// the rules explained on `read_group_plan`
|
||||||
impl AggExprs {
|
impl AggExprs {
|
||||||
/// Create the appropriate aggregate expressions, based on the type of the
|
/// Create the appropriate aggregate expressions, based on the type of the
|
||||||
/// field
|
/// field for a `read_group` plan.
|
||||||
pub fn try_new(agg: Aggregate, schema: &Schema, predicate: &Predicate) -> Result<Self> {
|
pub fn try_new_for_read_group(
|
||||||
|
agg: Aggregate,
|
||||||
|
schema: &Schema,
|
||||||
|
predicate: &Predicate,
|
||||||
|
) -> Result<Self> {
|
||||||
match agg {
|
match agg {
|
||||||
Aggregate::Sum | Aggregate::Count | Aggregate::Mean => {
|
Aggregate::Sum | Aggregate::Count | Aggregate::Mean => {
|
||||||
// agg_function(_val1) as _value1
|
Self::agg_for_read_group(agg, schema, predicate)
|
||||||
// ...
|
|
||||||
// agg_function(_valN) as _valueN
|
|
||||||
// agg_function(time) as time
|
|
||||||
|
|
||||||
let agg_exprs = filtered_fields_iter(schema, predicate)
|
|
||||||
.chain(schema.time_iter())
|
|
||||||
.map(|field| make_agg_expr(agg, field.name()))
|
|
||||||
.collect::<Result<Vec<_>>>()?;
|
|
||||||
|
|
||||||
let field_columns = filtered_fields_iter(schema, predicate)
|
|
||||||
.map(|field| Arc::from(field.name().as_str()))
|
|
||||||
.collect::<Vec<_>>()
|
|
||||||
.into();
|
|
||||||
|
|
||||||
Ok(Self {
|
|
||||||
agg_exprs,
|
|
||||||
field_columns,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
Aggregate::First | Aggregate::Last | Aggregate::Min | Aggregate::Max => {
|
Aggregate::First | Aggregate::Last | Aggregate::Min | Aggregate::Max => {
|
||||||
// agg_function(_val1) as _value1
|
Self::selector_aggregates(agg, schema, predicate)
|
||||||
// agg_function(time) as time1
|
|
||||||
// ..
|
|
||||||
// agg_function(_valN) as _valueN
|
|
||||||
// agg_function(time) as timeN
|
|
||||||
|
|
||||||
// might be nice to use a more functional style here
|
|
||||||
let mut agg_exprs = Vec::new();
|
|
||||||
let mut field_list = Vec::new();
|
|
||||||
|
|
||||||
for field in filtered_fields_iter(schema, predicate) {
|
|
||||||
agg_exprs.push(make_selector_expr(
|
|
||||||
agg,
|
|
||||||
SelectorOutput::Value,
|
|
||||||
field.name(),
|
|
||||||
field.data_type(),
|
|
||||||
field.name(),
|
|
||||||
)?);
|
|
||||||
|
|
||||||
let time_column_name = format!("{}_{}", TIME_COLUMN_NAME, field.name());
|
|
||||||
|
|
||||||
agg_exprs.push(make_selector_expr(
|
|
||||||
agg,
|
|
||||||
SelectorOutput::Time,
|
|
||||||
field.name(),
|
|
||||||
field.data_type(),
|
|
||||||
&time_column_name,
|
|
||||||
)?);
|
|
||||||
|
|
||||||
field_list.push((
|
|
||||||
Arc::from(field.name().as_str()), // value name
|
|
||||||
Arc::from(time_column_name.as_str()),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
let field_columns = field_list.into();
|
|
||||||
Ok(Self {
|
|
||||||
agg_exprs,
|
|
||||||
field_columns,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
Aggregate::None => InternalUnexpectedNoneAggregate.fail(),
|
Aggregate::None => InternalUnexpectedNoneAggregate.fail(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Create the appropriate aggregate expressions, based on the type of the
|
||||||
|
/// field for a `read_window_aggregate` plan.
|
||||||
|
pub fn try_new_for_read_window_aggregate(
|
||||||
|
agg: Aggregate,
|
||||||
|
schema: &Schema,
|
||||||
|
predicate: &Predicate,
|
||||||
|
) -> Result<Self> {
|
||||||
|
match agg {
|
||||||
|
Aggregate::Sum | Aggregate::Count | Aggregate::Mean => {
|
||||||
|
Self::agg_for_read_window_aggregate(agg, schema, predicate)
|
||||||
|
}
|
||||||
|
Aggregate::First | Aggregate::Last | Aggregate::Min | Aggregate::Max => {
|
||||||
|
Self::selector_aggregates(agg, schema, predicate)
|
||||||
|
}
|
||||||
|
Aggregate::None => InternalUnexpectedNoneAggregate.fail(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Creates special aggregate "selector" expressions for the fields in the
|
||||||
|
// provided schema. Selectors ensure that the relevant aggregate functions
|
||||||
|
// are also provided to a distinct time column for each field column.
|
||||||
|
//
|
||||||
|
// Equivalent SQL would look like:
|
||||||
|
//
|
||||||
|
// agg_function(_val1) as _value1
|
||||||
|
// agg_function(time) as time1
|
||||||
|
// ..
|
||||||
|
// agg_function(_valN) as _valueN
|
||||||
|
// agg_function(time) as timeN
|
||||||
|
fn selector_aggregates(agg: Aggregate, schema: &Schema, predicate: &Predicate) -> Result<Self> {
|
||||||
|
// might be nice to use a more functional style here
|
||||||
|
let mut agg_exprs = Vec::new();
|
||||||
|
let mut field_list = Vec::new();
|
||||||
|
|
||||||
|
for field in filtered_fields_iter(schema, predicate) {
|
||||||
|
agg_exprs.push(make_selector_expr(
|
||||||
|
agg,
|
||||||
|
SelectorOutput::Value,
|
||||||
|
field.name(),
|
||||||
|
field.data_type(),
|
||||||
|
field.name(),
|
||||||
|
)?);
|
||||||
|
|
||||||
|
let time_column_name = format!("{}_{}", TIME_COLUMN_NAME, field.name());
|
||||||
|
|
||||||
|
agg_exprs.push(make_selector_expr(
|
||||||
|
agg,
|
||||||
|
SelectorOutput::Time,
|
||||||
|
field.name(),
|
||||||
|
field.data_type(),
|
||||||
|
&time_column_name,
|
||||||
|
)?);
|
||||||
|
|
||||||
|
field_list.push((
|
||||||
|
Arc::from(field.name().as_str()), // value name
|
||||||
|
Arc::from(time_column_name.as_str()),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let field_columns = field_list.into();
|
||||||
|
Ok(Self {
|
||||||
|
agg_exprs,
|
||||||
|
field_columns,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Creates aggregate expressions for use in a read_group plan, which
|
||||||
|
// includes the time column.
|
||||||
|
//
|
||||||
|
// Equivalent SQL would look like this:
|
||||||
|
//
|
||||||
|
// agg_function(_val1) as _value1
|
||||||
|
// ...
|
||||||
|
// agg_function(_valN) as _valueN
|
||||||
|
// agg_function(time) as time
|
||||||
|
fn agg_for_read_group(agg: Aggregate, schema: &Schema, predicate: &Predicate) -> Result<Self> {
|
||||||
|
let agg_exprs = filtered_fields_iter(schema, predicate)
|
||||||
|
.chain(schema.time_iter())
|
||||||
|
.map(|field| make_agg_expr(agg, field.name()))
|
||||||
|
.collect::<Result<Vec<_>>>()?;
|
||||||
|
|
||||||
|
let field_columns = filtered_fields_iter(schema, predicate)
|
||||||
|
.map(|field| Arc::from(field.name().as_str()))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.into();
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
agg_exprs,
|
||||||
|
field_columns,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Creates aggregate expressions for use in a read_window_aggregate plan. No
|
||||||
|
// aggregates are created for the time column because the
|
||||||
|
// `read_window_aggregate` uses a time column calculated using window
|
||||||
|
// bounds.
|
||||||
|
//
|
||||||
|
// Equivalent SQL would look like this:
|
||||||
|
//
|
||||||
|
// agg_function(_val1) as _value1
|
||||||
|
// ...
|
||||||
|
// agg_function(_valN) as _valueN
|
||||||
|
fn agg_for_read_window_aggregate(
|
||||||
|
agg: Aggregate,
|
||||||
|
schema: &Schema,
|
||||||
|
predicate: &Predicate,
|
||||||
|
) -> Result<Self> {
|
||||||
|
let agg_exprs = filtered_fields_iter(schema, predicate)
|
||||||
|
.map(|field| make_agg_expr(agg, field.name()))
|
||||||
|
.collect::<Result<Vec<_>>>()?;
|
||||||
|
|
||||||
|
let field_columns = filtered_fields_iter(schema, predicate)
|
||||||
|
.map(|field| Arc::from(field.name().as_str()))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.into();
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
agg_exprs,
|
||||||
|
field_columns,
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a DataFusion expression suitable for calculating an aggregate:
|
/// Creates a DataFusion expression suitable for calculating an aggregate:
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
//! Tests for the Influx gRPC queries
|
//! Tests for the Influx gRPC queries
|
||||||
use crate::scenarios::*;
|
use crate::scenarios::{util::all_scenarios_for_one_chunk, *};
|
||||||
|
|
||||||
use server::{db::test_helpers::write_lp, utils::make_db};
|
use server::{db::test_helpers::write_lp, utils::make_db};
|
||||||
|
|
||||||
|
@ -272,3 +272,106 @@ async fn test_read_window_aggregate_months() {
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Test data to validate fix for:
|
||||||
|
// https://github.com/influxdata/influxdb_iox/issues/2697
|
||||||
|
struct MeasurementForDefect2697 {}
|
||||||
|
#[async_trait]
|
||||||
|
impl DbSetup for MeasurementForDefect2697 {
|
||||||
|
async fn make(&self) -> Vec<DbScenario> {
|
||||||
|
let partition_key = "2021-01-01T00";
|
||||||
|
|
||||||
|
let lp = vec![
|
||||||
|
"mm,section=1a bar=5.0 1609459201000000011",
|
||||||
|
"mm,section=1a bar=0.28 1609459201000000031",
|
||||||
|
"mm,section=2b bar=4.0 1609459201000000009",
|
||||||
|
"mm,section=2b bar=6.0 1609459201000000015",
|
||||||
|
"mm,section=2b bar=1.2 1609459201000000022",
|
||||||
|
"mm,section=1a foo=1.0 1609459201000000001",
|
||||||
|
"mm,section=1a foo=3.0 1609459201000000005",
|
||||||
|
"mm,section=1a foo=11.24 1609459201000000024",
|
||||||
|
"mm,section=2b foo=2.0 1609459201000000002",
|
||||||
|
];
|
||||||
|
|
||||||
|
all_scenarios_for_one_chunk(vec![], vec![], lp, "mm", partition_key).await
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// See https://github.com/influxdata/influxdb_iox/issues/2697
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_grouped_series_set_plan_group_aggregate_min_defect_2697() {
|
||||||
|
let predicate = PredicateBuilder::default()
|
||||||
|
// time >= '2021-01-01T00:00:01.000000001Z' AND time <= '2021-01-01T00:00:01.000000031Z'
|
||||||
|
.timestamp_range(1609459201000000001, 1609459201000000031)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
let agg = Aggregate::Min;
|
||||||
|
let every = WindowDuration::from_nanoseconds(10);
|
||||||
|
let offset = WindowDuration::from_nanoseconds(0);
|
||||||
|
|
||||||
|
// Because the windowed aggregate is using a selector aggregate (one of MIN,
|
||||||
|
// MAX, FIRST, LAST) we need to run a plan that brings along the timestamps
|
||||||
|
// for the chosen aggregate in the window.
|
||||||
|
//
|
||||||
|
// The window is defined by the `time` column
|
||||||
|
let expected_results = vec![
|
||||||
|
"+---------+--------------------------------+-----+--------------------------------+-------+--------------------------------+",
|
||||||
|
"| section | time | bar | time_bar | foo | time_foo |",
|
||||||
|
"+---------+--------------------------------+-----+--------------------------------+-------+--------------------------------+",
|
||||||
|
"| 1a | 2021-01-01T00:00:01.000000010Z | | | 1 | 2021-01-01T00:00:01.000000001Z |",
|
||||||
|
"| 1a | 2021-01-01T00:00:01.000000020Z | 5 | 2021-01-01T00:00:01.000000011Z | | |",
|
||||||
|
"| 1a | 2021-01-01T00:00:01.000000030Z | | | 11.24 | 2021-01-01T00:00:01.000000024Z |",
|
||||||
|
"| 2b | 2021-01-01T00:00:01.000000010Z | 4 | 2021-01-01T00:00:01.000000009Z | 2 | 2021-01-01T00:00:01.000000002Z |",
|
||||||
|
"| 2b | 2021-01-01T00:00:01.000000020Z | 6 | 2021-01-01T00:00:01.000000015Z | | |",
|
||||||
|
"| 2b | 2021-01-01T00:00:01.000000030Z | 1.2 | 2021-01-01T00:00:01.000000022Z | | |",
|
||||||
|
"+---------+--------------------------------+-----+--------------------------------+-------+--------------------------------+",
|
||||||
|
];
|
||||||
|
|
||||||
|
run_read_window_aggregate_test_case(
|
||||||
|
MeasurementForDefect2697 {},
|
||||||
|
predicate,
|
||||||
|
agg,
|
||||||
|
every,
|
||||||
|
offset,
|
||||||
|
expected_results,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
// See https://github.com/influxdata/influxdb_iox/issues/2697
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_grouped_series_set_plan_group_aggregate_sum_defect_2697() {
|
||||||
|
let predicate = PredicateBuilder::default()
|
||||||
|
// time >= '2021-01-01T00:00:01.000000001Z' AND time <= '2021-01-01T00:00:01.000000031Z'
|
||||||
|
.timestamp_range(1609459201000000001, 1609459201000000031)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
let agg = Aggregate::Sum;
|
||||||
|
let every = WindowDuration::from_nanoseconds(10);
|
||||||
|
let offset = WindowDuration::from_nanoseconds(0);
|
||||||
|
|
||||||
|
// The windowed aggregate is using a non-selector aggregate (SUM, COUNT, MEAD).
|
||||||
|
// For each distinct series the window defines the `time` column
|
||||||
|
let expected_results = vec![
|
||||||
|
"+---------+--------------------------------+-----+-------+",
|
||||||
|
"| section | time | bar | foo |",
|
||||||
|
"+---------+--------------------------------+-----+-------+",
|
||||||
|
"| 1a | 2021-01-01T00:00:01.000000010Z | | 4 |",
|
||||||
|
"| 1a | 2021-01-01T00:00:01.000000020Z | 5 | |",
|
||||||
|
"| 1a | 2021-01-01T00:00:01.000000030Z | | 11.24 |",
|
||||||
|
"| 2b | 2021-01-01T00:00:01.000000010Z | 4 | 2 |",
|
||||||
|
"| 2b | 2021-01-01T00:00:01.000000020Z | 6 | |",
|
||||||
|
"| 2b | 2021-01-01T00:00:01.000000030Z | 1.2 | |",
|
||||||
|
"+---------+--------------------------------+-----+-------+",
|
||||||
|
];
|
||||||
|
|
||||||
|
run_read_window_aggregate_test_case(
|
||||||
|
MeasurementForDefect2697 {},
|
||||||
|
predicate,
|
||||||
|
agg,
|
||||||
|
every,
|
||||||
|
offset,
|
||||||
|
expected_results,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue