feat(storage): TagKeysSet is a set of tag keys derived from models.Tags
This type will be used by the storage TagKeys API to quickly determine the distinct set of tag keys over a set of series keys. It is also intended to replace the implementation of keyMerger in the reads package in a separate PR.pull/13527/head
parent
49230619e6
commit
35e137e1f6
|
@ -0,0 +1,156 @@
|
|||
package models
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// TagKeysSet provides set operations for combining Tags.
|
||||
type TagKeysSet struct {
|
||||
i int
|
||||
keys [2][][]byte
|
||||
tmp [][]byte
|
||||
}
|
||||
|
||||
// Clear removes all the elements of TagKeysSet and ensures all internal
|
||||
// buffers are reset.
|
||||
func (set *TagKeysSet) Clear() {
|
||||
set.clear(set.keys[0])
|
||||
set.clear(set.keys[1])
|
||||
set.clear(set.tmp)
|
||||
set.i = 0
|
||||
set.keys[0] = set.keys[0][:0]
|
||||
}
|
||||
|
||||
func (set *TagKeysSet) clear(b [][]byte) {
|
||||
b = b[:cap(b)]
|
||||
for i := range b {
|
||||
b[i] = nil
|
||||
}
|
||||
}
|
||||
|
||||
// KeysBytes returns the merged keys in lexicographical order.
|
||||
// The slice is valid until the next call to UnionKeys, UnionBytes or Reset.
|
||||
func (set *TagKeysSet) KeysBytes() [][]byte {
|
||||
return set.keys[set.i&1]
|
||||
}
|
||||
|
||||
// Keys returns a copy of the merged keys in lexicographical order.
|
||||
func (set *TagKeysSet) Keys() []string {
|
||||
keys := set.KeysBytes()
|
||||
s := make([]string, 0, len(keys))
|
||||
for i := range keys {
|
||||
s = append(s, string(keys[i]))
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func (set *TagKeysSet) String() string {
|
||||
var s []string
|
||||
for _, k := range set.KeysBytes() {
|
||||
s = append(s, string(k))
|
||||
}
|
||||
return strings.Join(s, ",")
|
||||
}
|
||||
|
||||
// IsSupersetKeys returns true if the TagKeysSet is a superset of all the keys
|
||||
// contained in other.
|
||||
func (set *TagKeysSet) IsSupersetKeys(other Tags) bool {
|
||||
keys := set.keys[set.i&1]
|
||||
i, j := 0, 0
|
||||
for i < len(keys) && j < len(other) {
|
||||
if cmp := bytes.Compare(keys[i], other[j].Key); cmp > 0 {
|
||||
return false
|
||||
} else if cmp == 0 {
|
||||
j++
|
||||
}
|
||||
i++
|
||||
}
|
||||
|
||||
return j == len(other)
|
||||
}
|
||||
|
||||
// IsSupersetBytes returns true if the TagKeysSet is a superset of all the keys
|
||||
// in other.
|
||||
// Other must be lexicographically sorted or the results are undefined.
|
||||
func (set *TagKeysSet) IsSupersetBytes(other [][]byte) bool {
|
||||
keys := set.keys[set.i&1]
|
||||
i, j := 0, 0
|
||||
for i < len(keys) && j < len(other) {
|
||||
if cmp := bytes.Compare(keys[i], other[j]); cmp > 0 {
|
||||
return false
|
||||
} else if cmp == 0 {
|
||||
j++
|
||||
}
|
||||
i++
|
||||
}
|
||||
|
||||
return j == len(other)
|
||||
}
|
||||
|
||||
// UnionKeys updates the set so that it is the union of itself and all the
|
||||
// keys contained in other.
|
||||
func (set *TagKeysSet) UnionKeys(other Tags) {
|
||||
if set.IsSupersetKeys(other) {
|
||||
return
|
||||
}
|
||||
|
||||
if l := len(other); cap(set.tmp) < l {
|
||||
set.tmp = make([][]byte, l)
|
||||
} else {
|
||||
set.tmp = set.tmp[:l]
|
||||
}
|
||||
|
||||
for i := range other {
|
||||
set.tmp[i] = other[i].Key
|
||||
}
|
||||
|
||||
set.merge(set.tmp)
|
||||
}
|
||||
|
||||
// UnionBytes updates the set so that it is the union of itself and all the
|
||||
// keys contained in other.
|
||||
// Other must be lexicographically sorted or the results are undefined.
|
||||
func (set *TagKeysSet) UnionBytes(other [][]byte) {
|
||||
if set.IsSupersetBytes(other) {
|
||||
return
|
||||
}
|
||||
|
||||
set.merge(other)
|
||||
}
|
||||
|
||||
func (set *TagKeysSet) merge(in [][]byte) {
|
||||
keys := set.keys[set.i&1]
|
||||
l := len(keys) + len(in)
|
||||
set.i = (set.i + 1) & 1
|
||||
keya := set.keys[set.i&1]
|
||||
if cap(keya) < l {
|
||||
keya = make([][]byte, 0, l)
|
||||
} else {
|
||||
keya = keya[:0]
|
||||
}
|
||||
|
||||
i, j := 0, 0
|
||||
for i < len(keys) && j < len(in) {
|
||||
ki, kj := keys[i], in[j]
|
||||
if cmp := bytes.Compare(ki, kj); cmp < 0 {
|
||||
i++
|
||||
} else if cmp > 0 {
|
||||
ki = kj
|
||||
j++
|
||||
} else {
|
||||
i++
|
||||
j++
|
||||
}
|
||||
|
||||
keya = append(keya, ki)
|
||||
}
|
||||
|
||||
if i < len(keys) {
|
||||
keya = append(keya, keys[i:]...)
|
||||
} else if j < len(in) {
|
||||
keya = append(keya, in[j:]...)
|
||||
}
|
||||
|
||||
set.keys[set.i&1] = keya
|
||||
}
|
|
@ -0,0 +1,325 @@
|
|||
package models_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"math/rand"
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/influxdata/influxdb/models"
|
||||
)
|
||||
|
||||
func TestTagKeysSet_UnionKeys(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
tags []models.Tags
|
||||
exp string
|
||||
}{
|
||||
{
|
||||
name: "mixed",
|
||||
tags: []models.Tags{
|
||||
models.ParseTags([]byte("foo,tag0=v0,tag1=v0,tag2=v0")),
|
||||
models.ParseTags([]byte("foo,tag0=v0,tag1=v0,tag2=v1")),
|
||||
models.ParseTags([]byte("foo,tag0=v0")),
|
||||
models.ParseTags([]byte("foo,tag0=v0,tag3=v0")),
|
||||
},
|
||||
exp: "tag0,tag1,tag2,tag3",
|
||||
},
|
||||
{
|
||||
name: "mixed 2",
|
||||
tags: []models.Tags{
|
||||
models.ParseTags([]byte("foo,tag0=v0")),
|
||||
models.ParseTags([]byte("foo,tag0=v0,tag3=v0")),
|
||||
models.ParseTags([]byte("foo,tag0=v0,tag1=v0,tag2=v0")),
|
||||
models.ParseTags([]byte("foo,tag0=v0,tag1=v0,tag2=v1")),
|
||||
},
|
||||
exp: "tag0,tag1,tag2,tag3",
|
||||
},
|
||||
{
|
||||
name: "all different",
|
||||
tags: []models.Tags{
|
||||
models.ParseTags([]byte("foo,tag0=v0")),
|
||||
models.ParseTags([]byte("foo,tag1=v0")),
|
||||
models.ParseTags([]byte("foo,tag2=v1")),
|
||||
models.ParseTags([]byte("foo,tag3=v0")),
|
||||
},
|
||||
exp: "tag0,tag1,tag2,tag3",
|
||||
},
|
||||
{
|
||||
name: "new tags,verify clear",
|
||||
tags: []models.Tags{
|
||||
models.ParseTags([]byte("foo,tag9=v0")),
|
||||
models.ParseTags([]byte("foo,tag8=v0")),
|
||||
},
|
||||
exp: "tag8,tag9",
|
||||
},
|
||||
}
|
||||
|
||||
var km models.TagKeysSet
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
km.Clear()
|
||||
for _, tags := range tt.tags {
|
||||
km.UnionKeys(tags)
|
||||
}
|
||||
|
||||
if got := km.String(); !cmp.Equal(got, tt.exp) {
|
||||
t.Errorf("unexpected keys -got/+exp\n%s", cmp.Diff(got, tt.exp))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestTagKeysSet_IsSuperset(t *testing.T) {
|
||||
var km models.TagKeysSet
|
||||
km.UnionBytes(bytes.Split([]byte("tag0,tag3,tag5,tag7"), commaB))
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
tags models.Tags
|
||||
exp bool
|
||||
}{
|
||||
{
|
||||
tags: models.ParseTags([]byte("foo,tag0=v,tag3=v")),
|
||||
exp: true,
|
||||
},
|
||||
{
|
||||
tags: models.ParseTags([]byte("foo,tag3=v")),
|
||||
exp: true,
|
||||
},
|
||||
{
|
||||
tags: models.ParseTags([]byte("foo,tag7=v")),
|
||||
exp: true,
|
||||
},
|
||||
{
|
||||
tags: models.ParseTags([]byte("foo,tag3=v,tag7=v")),
|
||||
exp: true,
|
||||
},
|
||||
{
|
||||
tags: models.ParseTags([]byte("foo,tag0=v,tag3=v,tag5=v,tag7=v")),
|
||||
exp: true,
|
||||
},
|
||||
{
|
||||
tags: models.ParseTags([]byte("foo")),
|
||||
exp: true,
|
||||
},
|
||||
{
|
||||
tags: models.ParseTags([]byte("foo,tag0=v,tag2=v")),
|
||||
exp: false,
|
||||
},
|
||||
{
|
||||
tags: models.ParseTags([]byte("foo,tag1=v")),
|
||||
exp: false,
|
||||
},
|
||||
{
|
||||
tags: models.ParseTags([]byte("foo,tag6=v")),
|
||||
exp: false,
|
||||
},
|
||||
{
|
||||
tags: models.ParseTags([]byte("foo,tag8=v")),
|
||||
exp: false,
|
||||
},
|
||||
{
|
||||
tags: models.ParseTags([]byte("foo,tag0=v,tag3=v,tag5=v,tag8=v")),
|
||||
exp: false,
|
||||
},
|
||||
{
|
||||
tags: models.ParseTags([]byte("foo,tag0=v,tag3=v,tag5=v,tag6=v")),
|
||||
exp: false,
|
||||
},
|
||||
{
|
||||
tags: models.ParseTags([]byte("foo,tag0=v,tag3=v,tag5=v,tag7=v,tag8=v")),
|
||||
exp: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run("tags/" + tt.name, func(t *testing.T) {
|
||||
if got := km.IsSupersetKeys(tt.tags); got != tt.exp {
|
||||
t.Errorf("unexpected IsSuperset -got/+exp\n%s", cmp.Diff(got, tt.exp))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run("bytes/" + tt.name, func(t *testing.T) {
|
||||
var keys [][]byte
|
||||
for i := range tt.tags {
|
||||
keys = append(keys, tt.tags[i].Key)
|
||||
}
|
||||
if got := km.IsSupersetBytes(keys); got != tt.exp {
|
||||
t.Errorf("unexpected IsSupersetBytes -got/+exp\n%s", cmp.Diff(got, tt.exp))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
var commaB = []byte(",")
|
||||
|
||||
func TestTagKeysSet_UnionBytes(t *testing.T) {
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
keys [][][]byte
|
||||
exp string
|
||||
}{
|
||||
{
|
||||
name: "mixed",
|
||||
keys: [][][]byte{
|
||||
bytes.Split([]byte("tag0,tag1,tag2"), commaB),
|
||||
bytes.Split([]byte("tag0,tag1,tag2"), commaB),
|
||||
bytes.Split([]byte("tag0"), commaB),
|
||||
bytes.Split([]byte("tag0,tag3"), commaB),
|
||||
},
|
||||
exp: "tag0,tag1,tag2,tag3",
|
||||
},
|
||||
{
|
||||
name: "mixed 2",
|
||||
keys: [][][]byte{
|
||||
bytes.Split([]byte("tag0"), commaB),
|
||||
bytes.Split([]byte("tag0,tag3"), commaB),
|
||||
bytes.Split([]byte("tag0,tag1,tag2"), commaB),
|
||||
bytes.Split([]byte("tag0,tag1,tag2"), commaB),
|
||||
},
|
||||
exp: "tag0,tag1,tag2,tag3",
|
||||
},
|
||||
{
|
||||
name: "all different",
|
||||
keys: [][][]byte{
|
||||
bytes.Split([]byte("tag0"), commaB),
|
||||
bytes.Split([]byte("tag3"), commaB),
|
||||
bytes.Split([]byte("tag1"), commaB),
|
||||
bytes.Split([]byte("tag2"), commaB),
|
||||
},
|
||||
exp: "tag0,tag1,tag2,tag3",
|
||||
},
|
||||
{
|
||||
name: "new tags,verify clear",
|
||||
keys: [][][]byte{
|
||||
bytes.Split([]byte("tag9"), commaB),
|
||||
bytes.Split([]byte("tag8"), commaB),
|
||||
},
|
||||
exp: "tag8,tag9",
|
||||
},
|
||||
}
|
||||
|
||||
var km models.TagKeysSet
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
km.Clear()
|
||||
for _, keys := range tt.keys {
|
||||
km.UnionBytes(keys)
|
||||
}
|
||||
|
||||
if got := km.String(); !cmp.Equal(got, tt.exp) {
|
||||
t.Errorf("unexpected keys -got/+exp\n%s", cmp.Diff(got, tt.exp))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkTagKeysSet_UnionBytes(b *testing.B) {
|
||||
keys := [][][]byte{
|
||||
bytes.Split([]byte("tag00,tag01,tag02"), commaB),
|
||||
bytes.Split([]byte("tag00,tag01,tag02"), commaB),
|
||||
bytes.Split([]byte("tag00,tag01,tag05,tag06,tag10,tag11,tag12,tag13,tag14,tag15"), commaB),
|
||||
bytes.Split([]byte("tag00"), commaB),
|
||||
bytes.Split([]byte("tag00,tag03"), commaB),
|
||||
bytes.Split([]byte("tag01,tag03,tag13,tag14,tag15"), commaB),
|
||||
bytes.Split([]byte("tag04,tag05"), commaB),
|
||||
}
|
||||
|
||||
rand.Seed(20040409)
|
||||
|
||||
tests := []int{
|
||||
10,
|
||||
1000,
|
||||
1000000,
|
||||
}
|
||||
|
||||
for _, n := range tests {
|
||||
b.Run(strconv.Itoa(n), func(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
|
||||
var km models.TagKeysSet
|
||||
for i := 0; i < b.N; i++ {
|
||||
for j := 0; j < n; j++ {
|
||||
km.UnionBytes(keys[rand.Int()%len(keys)])
|
||||
}
|
||||
km.Clear()
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
type XorShift64Star struct {
|
||||
state uint64
|
||||
}
|
||||
|
||||
func (x *XorShift64Star) Next() uint64 {
|
||||
x.state ^= x.state >> 12
|
||||
x.state ^= x.state << 25
|
||||
x.state ^= x.state >> 27
|
||||
return x.state * 2685821657736338717
|
||||
}
|
||||
|
||||
func BenchmarkTagKeysSet_UnionKeys(b *testing.B) {
|
||||
tags := []models.Tags{
|
||||
models.ParseTags([]byte("foo,tag00=v0,tag01=v0,tag02=v0")),
|
||||
models.ParseTags([]byte("foo,tag00=v0,tag01=v0,tag02=v0")),
|
||||
models.ParseTags([]byte("foo,tag00=v0,tag01=v0,tag05=v0,tag06=v0,tag10=v0,tag11=v0,tag12=v0,tag13=v0,tag14=v0,tag15=v0")),
|
||||
models.ParseTags([]byte("foo,tag00=v0")),
|
||||
models.ParseTags([]byte("foo,tag00=v0,tag03=v0")),
|
||||
models.ParseTags([]byte("foo,tag01=v0,tag03=v0,tag13=v0,tag14=v0,tag15=v0")),
|
||||
models.ParseTags([]byte("foo,tag04=v0,tag05=v0")),
|
||||
}
|
||||
|
||||
rnd := XorShift64Star{state: 20040409}
|
||||
|
||||
tests := []int{
|
||||
10,
|
||||
1000,
|
||||
1000000,
|
||||
}
|
||||
|
||||
for _, n := range tests {
|
||||
b.Run(strconv.Itoa(n), func(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
|
||||
var km models.TagKeysSet
|
||||
for i := 0; i < b.N; i++ {
|
||||
for j := 0; j < n; j++ {
|
||||
km.UnionKeys(tags[rnd.Next()%uint64(len(tags))])
|
||||
}
|
||||
km.Clear()
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkTagKeysSet_IsSuperset(b *testing.B) {
|
||||
var km models.TagKeysSet
|
||||
km.UnionBytes(bytes.Split([]byte("tag0,tag3,tag5,tag7"), commaB))
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
tags models.Tags
|
||||
}{
|
||||
{name: "last/true", tags: models.ParseTags([]byte("foo,tag7=v"))},
|
||||
{name: "last/false", tags: models.ParseTags([]byte("foo,tag8=v"))},
|
||||
{name: "first_last/true", tags: models.ParseTags([]byte("foo,tag0=v,tag7=v"))},
|
||||
{name: "all/true", tags: models.ParseTags([]byte("foo,tag0=v,tag3=v,tag5=v,tag7=v"))},
|
||||
{name: "first not last/false", tags: models.ParseTags([]byte("foo,tag0=v,tag8=v"))},
|
||||
{name: "all but last/false", tags: models.ParseTags([]byte("foo,tag0=v,tag3=v,tag5=v,tag7=v,tag8=v"))},
|
||||
}
|
||||
|
||||
for _, n := range tests {
|
||||
b.Run(n.name, func(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
km.IsSupersetKeys(n.tags)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue