Clarify best method of set difference

pull/11042/head
Edd Robinson 2019-01-11 19:27:53 +00:00
parent c7d26d8950
commit 7ee4f499e1
4 changed files with 143 additions and 9 deletions

View File

@ -609,7 +609,7 @@ func DifferenceSeriesIDIterators(itr0, itr1 SeriesIDIterator) SeriesIDIterator {
if a := NewSeriesIDSetIterators([]SeriesIDIterator{itr0, itr1}); a != nil {
itr0.Close()
itr1.Close()
return NewSeriesIDSetIterator(a[0].SeriesIDSet().AndNot(a[1].SeriesIDSet()))
return NewSeriesIDSetIterator(NewSeriesIDSetNegate(a[0].SeriesIDSet(), a[1].SeriesIDSet()))
}
return &seriesIDDifferenceIterator{itrs: [2]SeriesIDIterator{itr0, itr1}}

View File

@ -27,6 +27,17 @@ func NewSeriesIDSet(a ...SeriesID) *SeriesIDSet {
return ss
}
// NewSeriesIDSetNegate returns a new SeriesIDSet containing all the elements in a
// that are not present in b. That is, the set difference between a and b.
func NewSeriesIDSetNegate(a, b *SeriesIDSet) *SeriesIDSet {
a.RLock()
defer a.RUnlock()
b.RLock()
defer b.RUnlock()
return &SeriesIDSet{bitmap: roaring.AndNot(a.bitmap, b.bitmap)}
}
// Bytes estimates the memory footprint of this SeriesIDSet, in bytes.
func (s *SeriesIDSet) Bytes() int {
var b int
@ -170,15 +181,13 @@ func (s *SeriesIDSet) And(other *SeriesIDSet) *SeriesIDSet {
return &SeriesIDSet{bitmap: roaring.And(s.bitmap, other.bitmap)}
}
// AndNot returns a new SeriesIDSet containing elements that were present in s,
// but not present in other.
func (s *SeriesIDSet) AndNot(other *SeriesIDSet) *SeriesIDSet {
// RemoveSet removes all values in other from s, if they exist.
func (s *SeriesIDSet) RemoveSet(other *SeriesIDSet) {
s.RLock()
defer s.RUnlock()
other.RLock()
defer other.RUnlock()
return &SeriesIDSet{bitmap: roaring.AndNot(s.bitmap, other.bitmap)}
s.bitmap.AndNot(other.bitmap)
}
// ForEach calls f for each id in the set. The function is applied to the IDs

View File

@ -10,7 +10,7 @@ import (
"testing"
)
func TestSeriesIDSet_AndNot(t *testing.T) {
func TestSeriesIDSet_NewSeriesIDSetNegate(t *testing.T) {
examples := [][3][]uint64{
[3][]uint64{
{1, 10, 20, 30},
@ -55,7 +55,7 @@ func TestSeriesIDSet_AndNot(t *testing.T) {
expected.Add(NewSeriesID(v))
}
got := a.AndNot(b)
got := NewSeriesIDSetNegate(a, b)
if got.String() != expected.String() {
t.Fatalf("got %s, expected %s", got.String(), expected.String())
}
@ -63,6 +63,59 @@ func TestSeriesIDSet_AndNot(t *testing.T) {
}
}
func TestSeriesIDSet_RemoveSet(t *testing.T) {
examples := [][3][]uint64{
[3][]uint64{
{1, 10, 20, 30},
{10, 12, 13, 14, 20},
{1, 30},
},
[3][]uint64{
{},
{10},
{},
},
[3][]uint64{
{1, 10, 20, 30},
{1, 10, 20, 30},
{},
},
[3][]uint64{
{1, 10},
{1, 10, 100},
{},
},
[3][]uint64{
{1, 10},
{},
{1, 10},
},
}
for i, example := range examples {
t.Run(fmt.Sprint(i), func(t *testing.T) {
// Build sets.
a, b := NewSeriesIDSet(), NewSeriesIDSet()
for _, v := range example[0] {
a.Add(NewSeriesID(v))
}
for _, v := range example[1] {
b.Add(NewSeriesID(v))
}
expected := NewSeriesIDSet()
for _, v := range example[2] {
expected.Add(NewSeriesID(v))
}
a.RemoveSet(b)
if a.String() != expected.String() {
t.Fatalf("got %s, expected %s", a.String(), expected.String())
}
})
}
}
// Ensure that cloning is race-free.
func TestSeriesIDSet_Clone_Race(t *testing.T) {
main := NewSeriesIDSet()
@ -556,6 +609,78 @@ func BenchmarkSeriesIDSet_Remove(b *testing.B) {
})
}
// BenchmarkSeriesIDSet_MassRemove benchmarks the cost of removing a large set of values.
func BenchmarkSeriesIDSet_MassRemove(b *testing.B) {
var size = uint64(1000000)
// Setup...
set = NewSeriesIDSet()
for i := uint64(0); i < size; i++ {
set.Add(NewSeriesID(i))
}
// Remove one at a time
b.Run(fmt.Sprint("cardinality_1000000_remove_each"), func(b *testing.B) {
clone := set.Clone()
for i := 0; i < b.N; i++ {
for j := uint64(0); j < size/2; j++ {
clone.RemoveNoLock(NewSeriesID(j))
}
b.StopTimer()
clone = set.Clone()
b.StartTimer()
}
})
// This is the case where a target series id set exists.
b.Run(fmt.Sprint("cardinality_1000000_remove_set_exists"), func(b *testing.B) {
clone := set.Clone()
other := NewSeriesIDSet()
for j := uint64(0); j < size/2; j++ {
other.AddNoLock(NewSeriesID(j))
}
for i := 0; i < b.N; i++ {
clone.RemoveSet(other)
b.StopTimer()
clone = set.Clone()
b.StartTimer()
}
})
// Make a target series id set and negate it
b.Run(fmt.Sprint("cardinality_1000000_remove_set"), func(b *testing.B) {
clone := set.Clone()
for i := 0; i < b.N; i++ {
other := NewSeriesIDSet()
for j := uint64(0); j < size/2; j++ {
other.AddNoLock(NewSeriesID(j))
}
clone.RemoveSet(other)
b.StopTimer()
clone = set.Clone()
b.StartTimer()
}
})
// This is the case where a new result set is created.
b.Run(fmt.Sprint("cardinality_1000000_remove_set_new"), func(b *testing.B) {
clone := set.Clone()
other := NewSeriesIDSet()
for j := uint64(0); j < size/2; j++ {
other.AddNoLock(NewSeriesID(j))
}
for i := 0; i < b.N; i++ {
_ = NewSeriesIDSetNegate(clone, other)
b.StopTimer()
clone = set.Clone()
b.StartTimer()
}
})
}
// Typical benchmarks for a laptop:
//
// BenchmarkSeriesIDSet_Merge_Duplicates/cardinality_1/shards_1-4 200000 8095 ns/op 16656 B/op 11 allocs/op

View File

@ -387,7 +387,7 @@ func (fs *FileSet) TagValueSeriesIDIterator(name, key, value []byte) (tsdb.Serie
// Remove tombstones set in previous file.
if ftss != nil && ftss.Cardinality() > 0 {
ss = ss.AndNot(ftss)
ss.RemoveSet(ftss)
}
// Fetch tag value series set for this file and merge into overall set.