Clarify best method of set difference
parent
c7d26d8950
commit
7ee4f499e1
|
|
@ -609,7 +609,7 @@ func DifferenceSeriesIDIterators(itr0, itr1 SeriesIDIterator) SeriesIDIterator {
|
|||
if a := NewSeriesIDSetIterators([]SeriesIDIterator{itr0, itr1}); a != nil {
|
||||
itr0.Close()
|
||||
itr1.Close()
|
||||
return NewSeriesIDSetIterator(a[0].SeriesIDSet().AndNot(a[1].SeriesIDSet()))
|
||||
return NewSeriesIDSetIterator(NewSeriesIDSetNegate(a[0].SeriesIDSet(), a[1].SeriesIDSet()))
|
||||
}
|
||||
|
||||
return &seriesIDDifferenceIterator{itrs: [2]SeriesIDIterator{itr0, itr1}}
|
||||
|
|
|
|||
|
|
@ -27,6 +27,17 @@ func NewSeriesIDSet(a ...SeriesID) *SeriesIDSet {
|
|||
return ss
|
||||
}
|
||||
|
||||
// NewSeriesIDSetNegate returns a new SeriesIDSet containing all the elements in a
|
||||
// that are not present in b. That is, the set difference between a and b.
|
||||
func NewSeriesIDSetNegate(a, b *SeriesIDSet) *SeriesIDSet {
|
||||
a.RLock()
|
||||
defer a.RUnlock()
|
||||
b.RLock()
|
||||
defer b.RUnlock()
|
||||
|
||||
return &SeriesIDSet{bitmap: roaring.AndNot(a.bitmap, b.bitmap)}
|
||||
}
|
||||
|
||||
// Bytes estimates the memory footprint of this SeriesIDSet, in bytes.
|
||||
func (s *SeriesIDSet) Bytes() int {
|
||||
var b int
|
||||
|
|
@ -170,15 +181,13 @@ func (s *SeriesIDSet) And(other *SeriesIDSet) *SeriesIDSet {
|
|||
return &SeriesIDSet{bitmap: roaring.And(s.bitmap, other.bitmap)}
|
||||
}
|
||||
|
||||
// AndNot returns a new SeriesIDSet containing elements that were present in s,
|
||||
// but not present in other.
|
||||
func (s *SeriesIDSet) AndNot(other *SeriesIDSet) *SeriesIDSet {
|
||||
// RemoveSet removes all values in other from s, if they exist.
|
||||
func (s *SeriesIDSet) RemoveSet(other *SeriesIDSet) {
|
||||
s.RLock()
|
||||
defer s.RUnlock()
|
||||
other.RLock()
|
||||
defer other.RUnlock()
|
||||
|
||||
return &SeriesIDSet{bitmap: roaring.AndNot(s.bitmap, other.bitmap)}
|
||||
s.bitmap.AndNot(other.bitmap)
|
||||
}
|
||||
|
||||
// ForEach calls f for each id in the set. The function is applied to the IDs
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ import (
|
|||
"testing"
|
||||
)
|
||||
|
||||
func TestSeriesIDSet_AndNot(t *testing.T) {
|
||||
func TestSeriesIDSet_NewSeriesIDSetNegate(t *testing.T) {
|
||||
examples := [][3][]uint64{
|
||||
[3][]uint64{
|
||||
{1, 10, 20, 30},
|
||||
|
|
@ -55,7 +55,7 @@ func TestSeriesIDSet_AndNot(t *testing.T) {
|
|||
expected.Add(NewSeriesID(v))
|
||||
}
|
||||
|
||||
got := a.AndNot(b)
|
||||
got := NewSeriesIDSetNegate(a, b)
|
||||
if got.String() != expected.String() {
|
||||
t.Fatalf("got %s, expected %s", got.String(), expected.String())
|
||||
}
|
||||
|
|
@ -63,6 +63,59 @@ func TestSeriesIDSet_AndNot(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestSeriesIDSet_RemoveSet(t *testing.T) {
|
||||
examples := [][3][]uint64{
|
||||
[3][]uint64{
|
||||
{1, 10, 20, 30},
|
||||
{10, 12, 13, 14, 20},
|
||||
{1, 30},
|
||||
},
|
||||
[3][]uint64{
|
||||
{},
|
||||
{10},
|
||||
{},
|
||||
},
|
||||
[3][]uint64{
|
||||
{1, 10, 20, 30},
|
||||
{1, 10, 20, 30},
|
||||
{},
|
||||
},
|
||||
[3][]uint64{
|
||||
{1, 10},
|
||||
{1, 10, 100},
|
||||
{},
|
||||
},
|
||||
[3][]uint64{
|
||||
{1, 10},
|
||||
{},
|
||||
{1, 10},
|
||||
},
|
||||
}
|
||||
|
||||
for i, example := range examples {
|
||||
t.Run(fmt.Sprint(i), func(t *testing.T) {
|
||||
// Build sets.
|
||||
a, b := NewSeriesIDSet(), NewSeriesIDSet()
|
||||
for _, v := range example[0] {
|
||||
a.Add(NewSeriesID(v))
|
||||
}
|
||||
for _, v := range example[1] {
|
||||
b.Add(NewSeriesID(v))
|
||||
}
|
||||
|
||||
expected := NewSeriesIDSet()
|
||||
for _, v := range example[2] {
|
||||
expected.Add(NewSeriesID(v))
|
||||
}
|
||||
|
||||
a.RemoveSet(b)
|
||||
if a.String() != expected.String() {
|
||||
t.Fatalf("got %s, expected %s", a.String(), expected.String())
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure that cloning is race-free.
|
||||
func TestSeriesIDSet_Clone_Race(t *testing.T) {
|
||||
main := NewSeriesIDSet()
|
||||
|
|
@ -556,6 +609,78 @@ func BenchmarkSeriesIDSet_Remove(b *testing.B) {
|
|||
})
|
||||
}
|
||||
|
||||
// BenchmarkSeriesIDSet_MassRemove benchmarks the cost of removing a large set of values.
|
||||
func BenchmarkSeriesIDSet_MassRemove(b *testing.B) {
|
||||
var size = uint64(1000000)
|
||||
// Setup...
|
||||
set = NewSeriesIDSet()
|
||||
for i := uint64(0); i < size; i++ {
|
||||
set.Add(NewSeriesID(i))
|
||||
}
|
||||
|
||||
// Remove one at a time
|
||||
b.Run(fmt.Sprint("cardinality_1000000_remove_each"), func(b *testing.B) {
|
||||
clone := set.Clone()
|
||||
for i := 0; i < b.N; i++ {
|
||||
for j := uint64(0); j < size/2; j++ {
|
||||
clone.RemoveNoLock(NewSeriesID(j))
|
||||
}
|
||||
|
||||
b.StopTimer()
|
||||
clone = set.Clone()
|
||||
b.StartTimer()
|
||||
}
|
||||
})
|
||||
|
||||
// This is the case where a target series id set exists.
|
||||
b.Run(fmt.Sprint("cardinality_1000000_remove_set_exists"), func(b *testing.B) {
|
||||
clone := set.Clone()
|
||||
other := NewSeriesIDSet()
|
||||
for j := uint64(0); j < size/2; j++ {
|
||||
other.AddNoLock(NewSeriesID(j))
|
||||
}
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
clone.RemoveSet(other)
|
||||
b.StopTimer()
|
||||
clone = set.Clone()
|
||||
b.StartTimer()
|
||||
}
|
||||
})
|
||||
|
||||
// Make a target series id set and negate it
|
||||
b.Run(fmt.Sprint("cardinality_1000000_remove_set"), func(b *testing.B) {
|
||||
clone := set.Clone()
|
||||
for i := 0; i < b.N; i++ {
|
||||
other := NewSeriesIDSet()
|
||||
for j := uint64(0); j < size/2; j++ {
|
||||
other.AddNoLock(NewSeriesID(j))
|
||||
}
|
||||
|
||||
clone.RemoveSet(other)
|
||||
b.StopTimer()
|
||||
clone = set.Clone()
|
||||
b.StartTimer()
|
||||
}
|
||||
})
|
||||
|
||||
// This is the case where a new result set is created.
|
||||
b.Run(fmt.Sprint("cardinality_1000000_remove_set_new"), func(b *testing.B) {
|
||||
clone := set.Clone()
|
||||
other := NewSeriesIDSet()
|
||||
for j := uint64(0); j < size/2; j++ {
|
||||
other.AddNoLock(NewSeriesID(j))
|
||||
}
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = NewSeriesIDSetNegate(clone, other)
|
||||
b.StopTimer()
|
||||
clone = set.Clone()
|
||||
b.StartTimer()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Typical benchmarks for a laptop:
|
||||
//
|
||||
// BenchmarkSeriesIDSet_Merge_Duplicates/cardinality_1/shards_1-4 200000 8095 ns/op 16656 B/op 11 allocs/op
|
||||
|
|
|
|||
|
|
@ -387,7 +387,7 @@ func (fs *FileSet) TagValueSeriesIDIterator(name, key, value []byte) (tsdb.Serie
|
|||
|
||||
// Remove tombstones set in previous file.
|
||||
if ftss != nil && ftss.Cardinality() > 0 {
|
||||
ss = ss.AndNot(ftss)
|
||||
ss.RemoveSet(ftss)
|
||||
}
|
||||
|
||||
// Fetch tag value series set for this file and merge into overall set.
|
||||
|
|
|
|||
Loading…
Reference in New Issue