From cb7baa6d9ed44c79c57d7afa1d91d0ce0be74757 Mon Sep 17 00:00:00 2001 From: Philip O'Toole Date: Sat, 20 Jun 2015 11:18:33 -0700 Subject: [PATCH 1/3] Don't group TagSets when tag values are identical Fixes issue #3059 --- tsdb/meta.go | 60 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 24 deletions(-) diff --git a/tsdb/meta.go b/tsdb/meta.go index e196277de1..5b8e568991 100644 --- a/tsdb/meta.go +++ b/tsdb/meta.go @@ -469,43 +469,55 @@ func (m *Measurement) TagSets(stmt *influxql.SelectStatement, dimensions []strin return nil, err } - // build the tag sets - var tagStrings []string + // For every series, get the tag values for the requested tag keys i.e. dimensions. This is the + // TagSet for that series. Series with the same TagSet are then grouped together, because for the + // purpose of GROUP BY they are part of the same composite series. tagSets := make(map[string]*influxql.TagSet) for id, filter := range filters { - // get the series and set the tag values for the dimensions we care about s := m.seriesByID[id] - tags := make([]string, len(dimensions)) - for i, dim := range dimensions { - tags[i] = s.Tags[dim] + tags := make(map[string]string) + + // Build the TagSet for this series. + for _, dim := range dimensions { + tags[dim] = s.Tags[dim] } - // marshal it into a string and put this series and its expr into the tagSets map - t := strings.Join(tags, "") - set, ok := tagSets[t] + // Convert the TagSet to a string, so it can be added to a map allowing TagSets to be handled + // as a set. + tagsAsKey := string(marshalTags(tags)) + tagSet, ok := tagSets[tagsAsKey] if !ok { - tagStrings = append(tagStrings, t) - set = &influxql.TagSet{} - // set the tags for this set + // This TagSet is new, create a new entry for it. + tagSet = &influxql.TagSet{} tagsForSet := make(map[string]string) - for i, dim := range dimensions { - tagsForSet[dim] = tags[i] + for k, v := range tags { + tagsForSet[k] = v } - set.Tags = tagsForSet - set.Key = marshalTags(tagsForSet) + tagSet.Tags = tagsForSet + tagSet.Key = marshalTags(tagsForSet) } - set.AddFilter(m.seriesByID[id].Key, filter) - tagSets[t] = set + + // Associate the series and filter with the Tagset. + tagSet.AddFilter(m.seriesByID[id].Key, filter) + + // Ensure it's back in the map. + tagSets[tagsAsKey] = tagSet } - // return the tag sets in sorted order - a := make([]*influxql.TagSet, 0, len(tagSets)) - sort.Strings(tagStrings) - for _, s := range tagStrings { - a = append(a, tagSets[s]) + // The TagSets have been created, as a map of TagSets. Just send + // the values back as a slice, sorting for consistency. + sortedTagSetKeys := make([]string, 0, len(tagSets)) + for k, _ := range tagSets { + sortedTagSetKeys = append(sortedTagSetKeys, k) + } + sort.Strings(sortedTagSetKeys) + + sortedTagsSets := make([]*influxql.TagSet, 0, len(sortedTagSetKeys)) + for _, k := range sortedTagSetKeys { + sortedTagsSets = append(sortedTagsSets, tagSets[k]) } - return a, nil + return sortedTagsSets, nil } // mergeSeriesFilters merges two sets of filter expressions and culls series IDs. From e010daf74cf57c186fedbef40954824868f85c15 Mon Sep 17 00:00:00 2001 From: Philip O'Toole Date: Sat, 20 Jun 2015 11:25:37 -0700 Subject: [PATCH 2/3] Test GROUP BY when different tags have same value --- cmd/influxd/run/server_test.go | 53 ++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/cmd/influxd/run/server_test.go b/cmd/influxd/run/server_test.go index 29688b4759..d372e2d861 100644 --- a/cmd/influxd/run/server_test.go +++ b/cmd/influxd/run/server_test.go @@ -582,6 +582,59 @@ func TestServer_Query_Multiple_Measurements(t *testing.T) { } } +// Ensure the server correctly supports data with identical tag values. +func TestServer_Query_IdenticalTagValues(t *testing.T) { + t.Parallel() + s := OpenServer(NewConfig(), "") + defer s.Close() + + if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicyInfo("rp0", 1, 1*time.Hour)); err != nil { + t.Fatal(err) + } + + writes := []string{ + fmt.Sprintf("cpu,t1=val1 value=1 %d", mustParseTime(time.RFC3339Nano, "2000-01-01T00:00:00Z").UnixNano()), + fmt.Sprintf("cpu,t2=val2 value=2 %d", mustParseTime(time.RFC3339Nano, "2000-01-01T00:01:00Z").UnixNano()), + fmt.Sprintf("cpu,t1=val2 value=3 %d", mustParseTime(time.RFC3339Nano, "2000-01-01T00:02:00Z").UnixNano()), + } + test := NewTest("db0", "rp0") + test.write = strings.Join(writes, "\n") + + test.addQueries([]*Query{ + &Query{ + name: "measurements with identical tag values - SELECT *, no GROUP BY", + command: `SELECT * FROM db0.rp0.cpu`, + exp: `{"results":[{"series":[{"name":"cpu","tags":{"t1":"val1","t2":""},"columns":["time","value"],"values":[["2000-01-01T00:00:00Z",1]]},{"name":"cpu","tags":{"t1":"val2","t2":""},"columns":["time","value"],"values":[["2000-01-01T00:02:00Z",3]]},{"name":"cpu","tags":{"t1":"","t2":"val2"},"columns":["time","value"],"values":[["2000-01-01T00:01:00Z",2]]}]}]}`, + }, + &Query{ + name: "measurements with identical tag values - SELECT *, with GROUP BY", + command: `SELECT value FROM db0.rp0.cpu GROUP BY t1,t2`, + exp: `{"results":[{"series":[{"name":"cpu","tags":{"t1":"val1","t2":""},"columns":["time","value"],"values":[["2000-01-01T00:00:00Z",1]]},{"name":"cpu","tags":{"t1":"val2","t2":""},"columns":["time","value"],"values":[["2000-01-01T00:02:00Z",3]]},{"name":"cpu","tags":{"t1":"","t2":"val2"},"columns":["time","value"],"values":[["2000-01-01T00:01:00Z",2]]}]}]}`, + }, + &Query{ + name: "measurements with identical tag values - SELECT value no GROUP BY", + command: `SELECT value FROM db0.rp0.cpu`, + exp: `{"results":[{"series":[{"name":"cpu","columns":["time","value"],"values":[["2000-01-01T00:00:00Z",1],["2000-01-01T00:01:00Z",2],["2000-01-01T00:02:00Z",3]]}]}]}`, + }, + }...) + + if err := test.init(s); err != nil { + t.Fatalf("test init failed: %s", err) + } + + for _, query := range test.queries { + if query.skip { + t.Logf("SKIP:: %s", query.name) + continue + } + if err := query.Execute(s); err != nil { + t.Error(query.Error(err)) + } else if !query.success() { + t.Error(query.failureMessage()) + } + } +} + // Ensure the server can query with the count aggregate function func TestServer_Query_Count(t *testing.T) { t.Parallel() From 4b0871e2931f8c0f37d6e579190115e904d4e386 Mon Sep 17 00:00:00 2001 From: Philip O'Toole Date: Sat, 20 Jun 2015 11:26:53 -0700 Subject: [PATCH 3/3] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c8285fb573..3b4a54740f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ - [#3033](https://github.com/influxdb/influxdb/pull/3033): Add support for marshaling `uint64` in client. - [#3090](https://github.com/influxdb/influxdb/pull/3090): Remove database from TSDB index on DROP DATABASE. - [#2944](https://github.com/influxdb/influxdb/issues/2944): Don't require "WHERE time" when creating continuous queries. +- [#3075](https://github.com/influxdb/influxdb/pull/3075): GROUP BY correctly when different tags have same value. ## v0.9.0 [2015-06-11]