From 35e137e1f6faee70d6eb2593db79a054bf232820 Mon Sep 17 00:00:00 2001 From: Stuart Carnie Date: Fri, 19 Apr 2019 16:20:45 -0700 Subject: [PATCH] feat(storage): TagKeysSet is a set of tag keys derived from models.Tags This type will be used by the storage TagKeys API to quickly determine the distinct set of tag keys over a set of series keys. It is also intended to replace the implementation of keyMerger in the reads package in a separate PR. --- models/tagkeysset.go | 156 ++++++++++++++++++ models/tagkeysset_test.go | 325 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 481 insertions(+) create mode 100644 models/tagkeysset.go create mode 100644 models/tagkeysset_test.go diff --git a/models/tagkeysset.go b/models/tagkeysset.go new file mode 100644 index 0000000000..d165bdce33 --- /dev/null +++ b/models/tagkeysset.go @@ -0,0 +1,156 @@ +package models + +import ( + "bytes" + "strings" +) + +// TagKeysSet provides set operations for combining Tags. +type TagKeysSet struct { + i int + keys [2][][]byte + tmp [][]byte +} + +// Clear removes all the elements of TagKeysSet and ensures all internal +// buffers are reset. +func (set *TagKeysSet) Clear() { + set.clear(set.keys[0]) + set.clear(set.keys[1]) + set.clear(set.tmp) + set.i = 0 + set.keys[0] = set.keys[0][:0] +} + +func (set *TagKeysSet) clear(b [][]byte) { + b = b[:cap(b)] + for i := range b { + b[i] = nil + } +} + +// KeysBytes returns the merged keys in lexicographical order. +// The slice is valid until the next call to UnionKeys, UnionBytes or Reset. +func (set *TagKeysSet) KeysBytes() [][]byte { + return set.keys[set.i&1] +} + +// Keys returns a copy of the merged keys in lexicographical order. +func (set *TagKeysSet) Keys() []string { + keys := set.KeysBytes() + s := make([]string, 0, len(keys)) + for i := range keys { + s = append(s, string(keys[i])) + } + return s +} + +func (set *TagKeysSet) String() string { + var s []string + for _, k := range set.KeysBytes() { + s = append(s, string(k)) + } + return strings.Join(s, ",") +} + +// IsSupersetKeys returns true if the TagKeysSet is a superset of all the keys +// contained in other. +func (set *TagKeysSet) IsSupersetKeys(other Tags) bool { + keys := set.keys[set.i&1] + i, j := 0, 0 + for i < len(keys) && j < len(other) { + if cmp := bytes.Compare(keys[i], other[j].Key); cmp > 0 { + return false + } else if cmp == 0 { + j++ + } + i++ + } + + return j == len(other) +} + +// IsSupersetBytes returns true if the TagKeysSet is a superset of all the keys +// in other. +// Other must be lexicographically sorted or the results are undefined. +func (set *TagKeysSet) IsSupersetBytes(other [][]byte) bool { + keys := set.keys[set.i&1] + i, j := 0, 0 + for i < len(keys) && j < len(other) { + if cmp := bytes.Compare(keys[i], other[j]); cmp > 0 { + return false + } else if cmp == 0 { + j++ + } + i++ + } + + return j == len(other) +} + +// UnionKeys updates the set so that it is the union of itself and all the +// keys contained in other. +func (set *TagKeysSet) UnionKeys(other Tags) { + if set.IsSupersetKeys(other) { + return + } + + if l := len(other); cap(set.tmp) < l { + set.tmp = make([][]byte, l) + } else { + set.tmp = set.tmp[:l] + } + + for i := range other { + set.tmp[i] = other[i].Key + } + + set.merge(set.tmp) +} + +// UnionBytes updates the set so that it is the union of itself and all the +// keys contained in other. +// Other must be lexicographically sorted or the results are undefined. +func (set *TagKeysSet) UnionBytes(other [][]byte) { + if set.IsSupersetBytes(other) { + return + } + + set.merge(other) +} + +func (set *TagKeysSet) merge(in [][]byte) { + keys := set.keys[set.i&1] + l := len(keys) + len(in) + set.i = (set.i + 1) & 1 + keya := set.keys[set.i&1] + if cap(keya) < l { + keya = make([][]byte, 0, l) + } else { + keya = keya[:0] + } + + i, j := 0, 0 + for i < len(keys) && j < len(in) { + ki, kj := keys[i], in[j] + if cmp := bytes.Compare(ki, kj); cmp < 0 { + i++ + } else if cmp > 0 { + ki = kj + j++ + } else { + i++ + j++ + } + + keya = append(keya, ki) + } + + if i < len(keys) { + keya = append(keya, keys[i:]...) + } else if j < len(in) { + keya = append(keya, in[j:]...) + } + + set.keys[set.i&1] = keya +} diff --git a/models/tagkeysset_test.go b/models/tagkeysset_test.go new file mode 100644 index 0000000000..7ef4910d8c --- /dev/null +++ b/models/tagkeysset_test.go @@ -0,0 +1,325 @@ +package models_test + +import ( + "bytes" + "math/rand" + "strconv" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/influxdata/influxdb/models" +) + +func TestTagKeysSet_UnionKeys(t *testing.T) { + tests := []struct { + name string + tags []models.Tags + exp string + }{ + { + name: "mixed", + tags: []models.Tags{ + models.ParseTags([]byte("foo,tag0=v0,tag1=v0,tag2=v0")), + models.ParseTags([]byte("foo,tag0=v0,tag1=v0,tag2=v1")), + models.ParseTags([]byte("foo,tag0=v0")), + models.ParseTags([]byte("foo,tag0=v0,tag3=v0")), + }, + exp: "tag0,tag1,tag2,tag3", + }, + { + name: "mixed 2", + tags: []models.Tags{ + models.ParseTags([]byte("foo,tag0=v0")), + models.ParseTags([]byte("foo,tag0=v0,tag3=v0")), + models.ParseTags([]byte("foo,tag0=v0,tag1=v0,tag2=v0")), + models.ParseTags([]byte("foo,tag0=v0,tag1=v0,tag2=v1")), + }, + exp: "tag0,tag1,tag2,tag3", + }, + { + name: "all different", + tags: []models.Tags{ + models.ParseTags([]byte("foo,tag0=v0")), + models.ParseTags([]byte("foo,tag1=v0")), + models.ParseTags([]byte("foo,tag2=v1")), + models.ParseTags([]byte("foo,tag3=v0")), + }, + exp: "tag0,tag1,tag2,tag3", + }, + { + name: "new tags,verify clear", + tags: []models.Tags{ + models.ParseTags([]byte("foo,tag9=v0")), + models.ParseTags([]byte("foo,tag8=v0")), + }, + exp: "tag8,tag9", + }, + } + + var km models.TagKeysSet + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + km.Clear() + for _, tags := range tt.tags { + km.UnionKeys(tags) + } + + if got := km.String(); !cmp.Equal(got, tt.exp) { + t.Errorf("unexpected keys -got/+exp\n%s", cmp.Diff(got, tt.exp)) + } + }) + } +} + +func TestTagKeysSet_IsSuperset(t *testing.T) { + var km models.TagKeysSet + km.UnionBytes(bytes.Split([]byte("tag0,tag3,tag5,tag7"), commaB)) + + tests := []struct { + name string + tags models.Tags + exp bool + }{ + { + tags: models.ParseTags([]byte("foo,tag0=v,tag3=v")), + exp: true, + }, + { + tags: models.ParseTags([]byte("foo,tag3=v")), + exp: true, + }, + { + tags: models.ParseTags([]byte("foo,tag7=v")), + exp: true, + }, + { + tags: models.ParseTags([]byte("foo,tag3=v,tag7=v")), + exp: true, + }, + { + tags: models.ParseTags([]byte("foo,tag0=v,tag3=v,tag5=v,tag7=v")), + exp: true, + }, + { + tags: models.ParseTags([]byte("foo")), + exp: true, + }, + { + tags: models.ParseTags([]byte("foo,tag0=v,tag2=v")), + exp: false, + }, + { + tags: models.ParseTags([]byte("foo,tag1=v")), + exp: false, + }, + { + tags: models.ParseTags([]byte("foo,tag6=v")), + exp: false, + }, + { + tags: models.ParseTags([]byte("foo,tag8=v")), + exp: false, + }, + { + tags: models.ParseTags([]byte("foo,tag0=v,tag3=v,tag5=v,tag8=v")), + exp: false, + }, + { + tags: models.ParseTags([]byte("foo,tag0=v,tag3=v,tag5=v,tag6=v")), + exp: false, + }, + { + tags: models.ParseTags([]byte("foo,tag0=v,tag3=v,tag5=v,tag7=v,tag8=v")), + exp: false, + }, + } + + for _, tt := range tests { + t.Run("tags/" + tt.name, func(t *testing.T) { + if got := km.IsSupersetKeys(tt.tags); got != tt.exp { + t.Errorf("unexpected IsSuperset -got/+exp\n%s", cmp.Diff(got, tt.exp)) + } + }) + } + + for _, tt := range tests { + t.Run("bytes/" + tt.name, func(t *testing.T) { + var keys [][]byte + for i := range tt.tags { + keys = append(keys, tt.tags[i].Key) + } + if got := km.IsSupersetBytes(keys); got != tt.exp { + t.Errorf("unexpected IsSupersetBytes -got/+exp\n%s", cmp.Diff(got, tt.exp)) + } + }) + } +} + +var commaB = []byte(",") + +func TestTagKeysSet_UnionBytes(t *testing.T) { + + tests := []struct { + name string + keys [][][]byte + exp string + }{ + { + name: "mixed", + keys: [][][]byte{ + bytes.Split([]byte("tag0,tag1,tag2"), commaB), + bytes.Split([]byte("tag0,tag1,tag2"), commaB), + bytes.Split([]byte("tag0"), commaB), + bytes.Split([]byte("tag0,tag3"), commaB), + }, + exp: "tag0,tag1,tag2,tag3", + }, + { + name: "mixed 2", + keys: [][][]byte{ + bytes.Split([]byte("tag0"), commaB), + bytes.Split([]byte("tag0,tag3"), commaB), + bytes.Split([]byte("tag0,tag1,tag2"), commaB), + bytes.Split([]byte("tag0,tag1,tag2"), commaB), + }, + exp: "tag0,tag1,tag2,tag3", + }, + { + name: "all different", + keys: [][][]byte{ + bytes.Split([]byte("tag0"), commaB), + bytes.Split([]byte("tag3"), commaB), + bytes.Split([]byte("tag1"), commaB), + bytes.Split([]byte("tag2"), commaB), + }, + exp: "tag0,tag1,tag2,tag3", + }, + { + name: "new tags,verify clear", + keys: [][][]byte{ + bytes.Split([]byte("tag9"), commaB), + bytes.Split([]byte("tag8"), commaB), + }, + exp: "tag8,tag9", + }, + } + + var km models.TagKeysSet + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + km.Clear() + for _, keys := range tt.keys { + km.UnionBytes(keys) + } + + if got := km.String(); !cmp.Equal(got, tt.exp) { + t.Errorf("unexpected keys -got/+exp\n%s", cmp.Diff(got, tt.exp)) + } + }) + } +} + +func BenchmarkTagKeysSet_UnionBytes(b *testing.B) { + keys := [][][]byte{ + bytes.Split([]byte("tag00,tag01,tag02"), commaB), + bytes.Split([]byte("tag00,tag01,tag02"), commaB), + bytes.Split([]byte("tag00,tag01,tag05,tag06,tag10,tag11,tag12,tag13,tag14,tag15"), commaB), + bytes.Split([]byte("tag00"), commaB), + bytes.Split([]byte("tag00,tag03"), commaB), + bytes.Split([]byte("tag01,tag03,tag13,tag14,tag15"), commaB), + bytes.Split([]byte("tag04,tag05"), commaB), + } + + rand.Seed(20040409) + + tests := []int{ + 10, + 1000, + 1000000, + } + + for _, n := range tests { + b.Run(strconv.Itoa(n), func(b *testing.B) { + b.ResetTimer() + + var km models.TagKeysSet + for i := 0; i < b.N; i++ { + for j := 0; j < n; j++ { + km.UnionBytes(keys[rand.Int()%len(keys)]) + } + km.Clear() + } + }) + } +} + +type XorShift64Star struct { + state uint64 +} + +func (x *XorShift64Star) Next() uint64 { + x.state ^= x.state >> 12 + x.state ^= x.state << 25 + x.state ^= x.state >> 27 + return x.state * 2685821657736338717 +} + +func BenchmarkTagKeysSet_UnionKeys(b *testing.B) { + tags := []models.Tags{ + models.ParseTags([]byte("foo,tag00=v0,tag01=v0,tag02=v0")), + models.ParseTags([]byte("foo,tag00=v0,tag01=v0,tag02=v0")), + models.ParseTags([]byte("foo,tag00=v0,tag01=v0,tag05=v0,tag06=v0,tag10=v0,tag11=v0,tag12=v0,tag13=v0,tag14=v0,tag15=v0")), + models.ParseTags([]byte("foo,tag00=v0")), + models.ParseTags([]byte("foo,tag00=v0,tag03=v0")), + models.ParseTags([]byte("foo,tag01=v0,tag03=v0,tag13=v0,tag14=v0,tag15=v0")), + models.ParseTags([]byte("foo,tag04=v0,tag05=v0")), + } + + rnd := XorShift64Star{state: 20040409} + + tests := []int{ + 10, + 1000, + 1000000, + } + + for _, n := range tests { + b.Run(strconv.Itoa(n), func(b *testing.B) { + b.ResetTimer() + + var km models.TagKeysSet + for i := 0; i < b.N; i++ { + for j := 0; j < n; j++ { + km.UnionKeys(tags[rnd.Next()%uint64(len(tags))]) + } + km.Clear() + } + }) + } +} + +func BenchmarkTagKeysSet_IsSuperset(b *testing.B) { + var km models.TagKeysSet + km.UnionBytes(bytes.Split([]byte("tag0,tag3,tag5,tag7"), commaB)) + + tests := []struct { + name string + tags models.Tags + }{ + {name: "last/true", tags: models.ParseTags([]byte("foo,tag7=v"))}, + {name: "last/false", tags: models.ParseTags([]byte("foo,tag8=v"))}, + {name: "first_last/true", tags: models.ParseTags([]byte("foo,tag0=v,tag7=v"))}, + {name: "all/true", tags: models.ParseTags([]byte("foo,tag0=v,tag3=v,tag5=v,tag7=v"))}, + {name: "first not last/false", tags: models.ParseTags([]byte("foo,tag0=v,tag8=v"))}, + {name: "all but last/false", tags: models.ParseTags([]byte("foo,tag0=v,tag3=v,tag5=v,tag7=v,tag8=v"))}, + } + + for _, n := range tests { + b.Run(n.name, func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + km.IsSupersetKeys(n.tags) + } + }) + } +}