feat(storage): TagKeysSet is a set of tag keys derived from models.Tags

This type will be used by the storage TagKeys API to quickly determine
the distinct set of tag keys over a set of series keys.

It is also intended to replace the implementation of keyMerger in
the reads package in a separate PR.
pull/13527/head
Stuart Carnie 2019-04-19 16:20:45 -07:00
parent 49230619e6
commit 35e137e1f6
No known key found for this signature in database
GPG Key ID: 848D9C9718D78B4F
2 changed files with 481 additions and 0 deletions

156
models/tagkeysset.go Normal file
View File

@ -0,0 +1,156 @@
package models
import (
"bytes"
"strings"
)
// TagKeysSet provides set operations for combining Tags.
type TagKeysSet struct {
i int
keys [2][][]byte
tmp [][]byte
}
// Clear removes all the elements of TagKeysSet and ensures all internal
// buffers are reset.
func (set *TagKeysSet) Clear() {
set.clear(set.keys[0])
set.clear(set.keys[1])
set.clear(set.tmp)
set.i = 0
set.keys[0] = set.keys[0][:0]
}
func (set *TagKeysSet) clear(b [][]byte) {
b = b[:cap(b)]
for i := range b {
b[i] = nil
}
}
// KeysBytes returns the merged keys in lexicographical order.
// The slice is valid until the next call to UnionKeys, UnionBytes or Reset.
func (set *TagKeysSet) KeysBytes() [][]byte {
return set.keys[set.i&1]
}
// Keys returns a copy of the merged keys in lexicographical order.
func (set *TagKeysSet) Keys() []string {
keys := set.KeysBytes()
s := make([]string, 0, len(keys))
for i := range keys {
s = append(s, string(keys[i]))
}
return s
}
func (set *TagKeysSet) String() string {
var s []string
for _, k := range set.KeysBytes() {
s = append(s, string(k))
}
return strings.Join(s, ",")
}
// IsSupersetKeys returns true if the TagKeysSet is a superset of all the keys
// contained in other.
func (set *TagKeysSet) IsSupersetKeys(other Tags) bool {
keys := set.keys[set.i&1]
i, j := 0, 0
for i < len(keys) && j < len(other) {
if cmp := bytes.Compare(keys[i], other[j].Key); cmp > 0 {
return false
} else if cmp == 0 {
j++
}
i++
}
return j == len(other)
}
// IsSupersetBytes returns true if the TagKeysSet is a superset of all the keys
// in other.
// Other must be lexicographically sorted or the results are undefined.
func (set *TagKeysSet) IsSupersetBytes(other [][]byte) bool {
keys := set.keys[set.i&1]
i, j := 0, 0
for i < len(keys) && j < len(other) {
if cmp := bytes.Compare(keys[i], other[j]); cmp > 0 {
return false
} else if cmp == 0 {
j++
}
i++
}
return j == len(other)
}
// UnionKeys updates the set so that it is the union of itself and all the
// keys contained in other.
func (set *TagKeysSet) UnionKeys(other Tags) {
if set.IsSupersetKeys(other) {
return
}
if l := len(other); cap(set.tmp) < l {
set.tmp = make([][]byte, l)
} else {
set.tmp = set.tmp[:l]
}
for i := range other {
set.tmp[i] = other[i].Key
}
set.merge(set.tmp)
}
// UnionBytes updates the set so that it is the union of itself and all the
// keys contained in other.
// Other must be lexicographically sorted or the results are undefined.
func (set *TagKeysSet) UnionBytes(other [][]byte) {
if set.IsSupersetBytes(other) {
return
}
set.merge(other)
}
func (set *TagKeysSet) merge(in [][]byte) {
keys := set.keys[set.i&1]
l := len(keys) + len(in)
set.i = (set.i + 1) & 1
keya := set.keys[set.i&1]
if cap(keya) < l {
keya = make([][]byte, 0, l)
} else {
keya = keya[:0]
}
i, j := 0, 0
for i < len(keys) && j < len(in) {
ki, kj := keys[i], in[j]
if cmp := bytes.Compare(ki, kj); cmp < 0 {
i++
} else if cmp > 0 {
ki = kj
j++
} else {
i++
j++
}
keya = append(keya, ki)
}
if i < len(keys) {
keya = append(keya, keys[i:]...)
} else if j < len(in) {
keya = append(keya, in[j:]...)
}
set.keys[set.i&1] = keya
}

325
models/tagkeysset_test.go Normal file
View File

@ -0,0 +1,325 @@
package models_test
import (
"bytes"
"math/rand"
"strconv"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/influxdata/influxdb/models"
)
func TestTagKeysSet_UnionKeys(t *testing.T) {
tests := []struct {
name string
tags []models.Tags
exp string
}{
{
name: "mixed",
tags: []models.Tags{
models.ParseTags([]byte("foo,tag0=v0,tag1=v0,tag2=v0")),
models.ParseTags([]byte("foo,tag0=v0,tag1=v0,tag2=v1")),
models.ParseTags([]byte("foo,tag0=v0")),
models.ParseTags([]byte("foo,tag0=v0,tag3=v0")),
},
exp: "tag0,tag1,tag2,tag3",
},
{
name: "mixed 2",
tags: []models.Tags{
models.ParseTags([]byte("foo,tag0=v0")),
models.ParseTags([]byte("foo,tag0=v0,tag3=v0")),
models.ParseTags([]byte("foo,tag0=v0,tag1=v0,tag2=v0")),
models.ParseTags([]byte("foo,tag0=v0,tag1=v0,tag2=v1")),
},
exp: "tag0,tag1,tag2,tag3",
},
{
name: "all different",
tags: []models.Tags{
models.ParseTags([]byte("foo,tag0=v0")),
models.ParseTags([]byte("foo,tag1=v0")),
models.ParseTags([]byte("foo,tag2=v1")),
models.ParseTags([]byte("foo,tag3=v0")),
},
exp: "tag0,tag1,tag2,tag3",
},
{
name: "new tags,verify clear",
tags: []models.Tags{
models.ParseTags([]byte("foo,tag9=v0")),
models.ParseTags([]byte("foo,tag8=v0")),
},
exp: "tag8,tag9",
},
}
var km models.TagKeysSet
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
km.Clear()
for _, tags := range tt.tags {
km.UnionKeys(tags)
}
if got := km.String(); !cmp.Equal(got, tt.exp) {
t.Errorf("unexpected keys -got/+exp\n%s", cmp.Diff(got, tt.exp))
}
})
}
}
func TestTagKeysSet_IsSuperset(t *testing.T) {
var km models.TagKeysSet
km.UnionBytes(bytes.Split([]byte("tag0,tag3,tag5,tag7"), commaB))
tests := []struct {
name string
tags models.Tags
exp bool
}{
{
tags: models.ParseTags([]byte("foo,tag0=v,tag3=v")),
exp: true,
},
{
tags: models.ParseTags([]byte("foo,tag3=v")),
exp: true,
},
{
tags: models.ParseTags([]byte("foo,tag7=v")),
exp: true,
},
{
tags: models.ParseTags([]byte("foo,tag3=v,tag7=v")),
exp: true,
},
{
tags: models.ParseTags([]byte("foo,tag0=v,tag3=v,tag5=v,tag7=v")),
exp: true,
},
{
tags: models.ParseTags([]byte("foo")),
exp: true,
},
{
tags: models.ParseTags([]byte("foo,tag0=v,tag2=v")),
exp: false,
},
{
tags: models.ParseTags([]byte("foo,tag1=v")),
exp: false,
},
{
tags: models.ParseTags([]byte("foo,tag6=v")),
exp: false,
},
{
tags: models.ParseTags([]byte("foo,tag8=v")),
exp: false,
},
{
tags: models.ParseTags([]byte("foo,tag0=v,tag3=v,tag5=v,tag8=v")),
exp: false,
},
{
tags: models.ParseTags([]byte("foo,tag0=v,tag3=v,tag5=v,tag6=v")),
exp: false,
},
{
tags: models.ParseTags([]byte("foo,tag0=v,tag3=v,tag5=v,tag7=v,tag8=v")),
exp: false,
},
}
for _, tt := range tests {
t.Run("tags/" + tt.name, func(t *testing.T) {
if got := km.IsSupersetKeys(tt.tags); got != tt.exp {
t.Errorf("unexpected IsSuperset -got/+exp\n%s", cmp.Diff(got, tt.exp))
}
})
}
for _, tt := range tests {
t.Run("bytes/" + tt.name, func(t *testing.T) {
var keys [][]byte
for i := range tt.tags {
keys = append(keys, tt.tags[i].Key)
}
if got := km.IsSupersetBytes(keys); got != tt.exp {
t.Errorf("unexpected IsSupersetBytes -got/+exp\n%s", cmp.Diff(got, tt.exp))
}
})
}
}
var commaB = []byte(",")
func TestTagKeysSet_UnionBytes(t *testing.T) {
tests := []struct {
name string
keys [][][]byte
exp string
}{
{
name: "mixed",
keys: [][][]byte{
bytes.Split([]byte("tag0,tag1,tag2"), commaB),
bytes.Split([]byte("tag0,tag1,tag2"), commaB),
bytes.Split([]byte("tag0"), commaB),
bytes.Split([]byte("tag0,tag3"), commaB),
},
exp: "tag0,tag1,tag2,tag3",
},
{
name: "mixed 2",
keys: [][][]byte{
bytes.Split([]byte("tag0"), commaB),
bytes.Split([]byte("tag0,tag3"), commaB),
bytes.Split([]byte("tag0,tag1,tag2"), commaB),
bytes.Split([]byte("tag0,tag1,tag2"), commaB),
},
exp: "tag0,tag1,tag2,tag3",
},
{
name: "all different",
keys: [][][]byte{
bytes.Split([]byte("tag0"), commaB),
bytes.Split([]byte("tag3"), commaB),
bytes.Split([]byte("tag1"), commaB),
bytes.Split([]byte("tag2"), commaB),
},
exp: "tag0,tag1,tag2,tag3",
},
{
name: "new tags,verify clear",
keys: [][][]byte{
bytes.Split([]byte("tag9"), commaB),
bytes.Split([]byte("tag8"), commaB),
},
exp: "tag8,tag9",
},
}
var km models.TagKeysSet
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
km.Clear()
for _, keys := range tt.keys {
km.UnionBytes(keys)
}
if got := km.String(); !cmp.Equal(got, tt.exp) {
t.Errorf("unexpected keys -got/+exp\n%s", cmp.Diff(got, tt.exp))
}
})
}
}
func BenchmarkTagKeysSet_UnionBytes(b *testing.B) {
keys := [][][]byte{
bytes.Split([]byte("tag00,tag01,tag02"), commaB),
bytes.Split([]byte("tag00,tag01,tag02"), commaB),
bytes.Split([]byte("tag00,tag01,tag05,tag06,tag10,tag11,tag12,tag13,tag14,tag15"), commaB),
bytes.Split([]byte("tag00"), commaB),
bytes.Split([]byte("tag00,tag03"), commaB),
bytes.Split([]byte("tag01,tag03,tag13,tag14,tag15"), commaB),
bytes.Split([]byte("tag04,tag05"), commaB),
}
rand.Seed(20040409)
tests := []int{
10,
1000,
1000000,
}
for _, n := range tests {
b.Run(strconv.Itoa(n), func(b *testing.B) {
b.ResetTimer()
var km models.TagKeysSet
for i := 0; i < b.N; i++ {
for j := 0; j < n; j++ {
km.UnionBytes(keys[rand.Int()%len(keys)])
}
km.Clear()
}
})
}
}
type XorShift64Star struct {
state uint64
}
func (x *XorShift64Star) Next() uint64 {
x.state ^= x.state >> 12
x.state ^= x.state << 25
x.state ^= x.state >> 27
return x.state * 2685821657736338717
}
func BenchmarkTagKeysSet_UnionKeys(b *testing.B) {
tags := []models.Tags{
models.ParseTags([]byte("foo,tag00=v0,tag01=v0,tag02=v0")),
models.ParseTags([]byte("foo,tag00=v0,tag01=v0,tag02=v0")),
models.ParseTags([]byte("foo,tag00=v0,tag01=v0,tag05=v0,tag06=v0,tag10=v0,tag11=v0,tag12=v0,tag13=v0,tag14=v0,tag15=v0")),
models.ParseTags([]byte("foo,tag00=v0")),
models.ParseTags([]byte("foo,tag00=v0,tag03=v0")),
models.ParseTags([]byte("foo,tag01=v0,tag03=v0,tag13=v0,tag14=v0,tag15=v0")),
models.ParseTags([]byte("foo,tag04=v0,tag05=v0")),
}
rnd := XorShift64Star{state: 20040409}
tests := []int{
10,
1000,
1000000,
}
for _, n := range tests {
b.Run(strconv.Itoa(n), func(b *testing.B) {
b.ResetTimer()
var km models.TagKeysSet
for i := 0; i < b.N; i++ {
for j := 0; j < n; j++ {
km.UnionKeys(tags[rnd.Next()%uint64(len(tags))])
}
km.Clear()
}
})
}
}
func BenchmarkTagKeysSet_IsSuperset(b *testing.B) {
var km models.TagKeysSet
km.UnionBytes(bytes.Split([]byte("tag0,tag3,tag5,tag7"), commaB))
tests := []struct {
name string
tags models.Tags
}{
{name: "last/true", tags: models.ParseTags([]byte("foo,tag7=v"))},
{name: "last/false", tags: models.ParseTags([]byte("foo,tag8=v"))},
{name: "first_last/true", tags: models.ParseTags([]byte("foo,tag0=v,tag7=v"))},
{name: "all/true", tags: models.ParseTags([]byte("foo,tag0=v,tag3=v,tag5=v,tag7=v"))},
{name: "first not last/false", tags: models.ParseTags([]byte("foo,tag0=v,tag8=v"))},
{name: "all but last/false", tags: models.ParseTags([]byte("foo,tag0=v,tag3=v,tag5=v,tag7=v,tag8=v"))},
}
for _, n := range tests {
b.Run(n.name, func(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
km.IsSupersetKeys(n.tags)
}
})
}
}