Fix incorrect grouping when multiple aggregates are used with sparse data

When a query would use a grouping with two different aggregates, it was possible for one of the aggregates to return a value from a different series key than the second aggregate. When these series keys didn't match, the returned grouping would be screwed up because it sorted by time before checking for name and tags. This did not happen when the aggregates returned values for the same series keys because then the iterators were aligned with each other.
2016-11-02 13:35:22 -05:00 · 2016-11-02 13:35:22 -05:00 · e7d4a601a6
parent e4e5edcbea
commit e7d4a601a6
3 changed files with 45 additions and 2 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -75,6 +75,7 @@ The query language has been extended with a few new features:
 - [#7494](https://github.com/influxdata/influxdb/issues/7494): influx_inspect: export does not escape field keys.
 - [#7526](https://github.com/influxdata/influxdb/issues/7526): Truncate the version string when linking to the documentation.
 - [#7548](https://github.com/influxdata/influxdb/issues/7548): Fix output duration units for SHOW QUERIES.
+- [#7564](https://github.com/influxdata/influxdb/issues/7564): Fix incorrect grouping when multiple aggregates are used with sparse data.

 ## v1.0.2 [2016-10-05]

--- a/cmd/influxd/run/server_test.go
+++ b/cmd/influxd/run/server_test.go
@ -2670,6 +2670,48 @@ cpu value=20 1278010021000000000
 	}
 }

+func TestServer_Query_SelectGroupByTime_MultipleAggregates(t *testing.T) {
+	t.Parallel()
+	s := OpenServer(NewConfig())
+	defer s.Close()
+
+	test := NewTest("db0", "rp0")
+	test.writes = Writes{
+		&Write{data: fmt.Sprintf(`test,t=a x=1i 1000000000
+test,t=b y=1i 1000000000
+test,t=a x=2i 2000000000
+test,t=b y=2i 2000000000
+test,t=a x=3i 3000000000
+test,t=b y=3i 3000000000
+`)},
+	}
+
+	test.addQueries([]*Query{
+		&Query{
+			name:    "two aggregates with a group by host",
+			command: `SELECT mean(x) as x, mean(y) as y from db0.rp0.test where time >= 1s and time < 4s group by t, time(1s)`,
+			exp:     `{"results":[{"series":[{"name":"test","tags":{"t":"a"},"columns":["time","x","y"],"values":[["1970-01-01T00:00:01Z",1,null],["1970-01-01T00:00:02Z",2,null],["1970-01-01T00:00:03Z",3,null]]},{"name":"test","tags":{"t":"b"},"columns":["time","x","y"],"values":[["1970-01-01T00:00:01Z",null,1],["1970-01-01T00:00:02Z",null,2],["1970-01-01T00:00:03Z",null,3]]}]}]}`,
+		},
+	}...)
+
+	for i, query := range test.queries {
+		if i == 0 {
+			if err := test.init(s); err != nil {
+				t.Fatalf("test init failed: %s", err)
+			}
+		}
+		if query.skip {
+			t.Logf("SKIP:: %s", query.name)
+			continue
+		}
+		if err := query.Execute(s); err != nil {
+			t.Error(query.Error(err))
+		} else if !query.success() {
+			t.Error(query.failureMessage())
+		}
+	}
+}
+
 func TestServer_Query_MathWithFill(t *testing.T) {
 	t.Parallel()
 	s := OpenServer(NewConfig())
--- a/influxql/emitter.go
+++ b/influxql/emitter.go
@ -113,14 +113,14 @@ func (e *Emitter) loadBuf() (t int64, name string, tags Tags, err error) {

 		// Update range values if lower and emitter is in time ascending order.
 		if e.ascending {
-			if (itrTime < t) || (itrTime == t && itrName < name) || (itrTime == t && itrName == name && itrTags.ID() < tags.ID()) {
+			if (itrName < name) || (itrName == name && itrTags.ID() < tags.ID()) || (itrName == name && itrTags.ID() == tags.ID() && itrTime < t) {
 				t, name, tags = itrTime, itrName, itrTags
 			}
 			continue
 		}

 		// Update range values if higher and emitter is in time descending order.
-		if (itrTime > t) || (itrTime == t && itrName > name) || (itrTime == t && itrName == name && itrTags.ID() > tags.ID()) {
+		if (itrName < name) || (itrName == name && itrTags.ID() < tags.ID()) || (itrName == name && itrTags.ID() == tags.ID() && itrTime < t) {
 			t, name, tags = itrTime, itrName, itrTags
 		}
 	}