From 7ee4f499e164308a2dc9390caef489bc459b4fbb Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Fri, 11 Jan 2019 19:27:53 +0000 Subject: [PATCH] Clarify best method of set difference --- tsdb/index.go | 2 +- tsdb/series_set.go | 19 ++++-- tsdb/series_set_test.go | 129 +++++++++++++++++++++++++++++++++++++++- tsdb/tsi1/file_set.go | 2 +- 4 files changed, 143 insertions(+), 9 deletions(-) diff --git a/tsdb/index.go b/tsdb/index.go index d621150c51..a45df95696 100644 --- a/tsdb/index.go +++ b/tsdb/index.go @@ -609,7 +609,7 @@ func DifferenceSeriesIDIterators(itr0, itr1 SeriesIDIterator) SeriesIDIterator { if a := NewSeriesIDSetIterators([]SeriesIDIterator{itr0, itr1}); a != nil { itr0.Close() itr1.Close() - return NewSeriesIDSetIterator(a[0].SeriesIDSet().AndNot(a[1].SeriesIDSet())) + return NewSeriesIDSetIterator(NewSeriesIDSetNegate(a[0].SeriesIDSet(), a[1].SeriesIDSet())) } return &seriesIDDifferenceIterator{itrs: [2]SeriesIDIterator{itr0, itr1}} diff --git a/tsdb/series_set.go b/tsdb/series_set.go index c5558ebd06..8cfac4de4e 100644 --- a/tsdb/series_set.go +++ b/tsdb/series_set.go @@ -27,6 +27,17 @@ func NewSeriesIDSet(a ...SeriesID) *SeriesIDSet { return ss } +// NewSeriesIDSetNegate returns a new SeriesIDSet containing all the elements in a +// that are not present in b. That is, the set difference between a and b. +func NewSeriesIDSetNegate(a, b *SeriesIDSet) *SeriesIDSet { + a.RLock() + defer a.RUnlock() + b.RLock() + defer b.RUnlock() + + return &SeriesIDSet{bitmap: roaring.AndNot(a.bitmap, b.bitmap)} +} + // Bytes estimates the memory footprint of this SeriesIDSet, in bytes. func (s *SeriesIDSet) Bytes() int { var b int @@ -170,15 +181,13 @@ func (s *SeriesIDSet) And(other *SeriesIDSet) *SeriesIDSet { return &SeriesIDSet{bitmap: roaring.And(s.bitmap, other.bitmap)} } -// AndNot returns a new SeriesIDSet containing elements that were present in s, -// but not present in other. -func (s *SeriesIDSet) AndNot(other *SeriesIDSet) *SeriesIDSet { +// RemoveSet removes all values in other from s, if they exist. +func (s *SeriesIDSet) RemoveSet(other *SeriesIDSet) { s.RLock() defer s.RUnlock() other.RLock() defer other.RUnlock() - - return &SeriesIDSet{bitmap: roaring.AndNot(s.bitmap, other.bitmap)} + s.bitmap.AndNot(other.bitmap) } // ForEach calls f for each id in the set. The function is applied to the IDs diff --git a/tsdb/series_set_test.go b/tsdb/series_set_test.go index ebb1b3fb8d..10bd7887ce 100644 --- a/tsdb/series_set_test.go +++ b/tsdb/series_set_test.go @@ -10,7 +10,7 @@ import ( "testing" ) -func TestSeriesIDSet_AndNot(t *testing.T) { +func TestSeriesIDSet_NewSeriesIDSetNegate(t *testing.T) { examples := [][3][]uint64{ [3][]uint64{ {1, 10, 20, 30}, @@ -55,7 +55,7 @@ func TestSeriesIDSet_AndNot(t *testing.T) { expected.Add(NewSeriesID(v)) } - got := a.AndNot(b) + got := NewSeriesIDSetNegate(a, b) if got.String() != expected.String() { t.Fatalf("got %s, expected %s", got.String(), expected.String()) } @@ -63,6 +63,59 @@ func TestSeriesIDSet_AndNot(t *testing.T) { } } +func TestSeriesIDSet_RemoveSet(t *testing.T) { + examples := [][3][]uint64{ + [3][]uint64{ + {1, 10, 20, 30}, + {10, 12, 13, 14, 20}, + {1, 30}, + }, + [3][]uint64{ + {}, + {10}, + {}, + }, + [3][]uint64{ + {1, 10, 20, 30}, + {1, 10, 20, 30}, + {}, + }, + [3][]uint64{ + {1, 10}, + {1, 10, 100}, + {}, + }, + [3][]uint64{ + {1, 10}, + {}, + {1, 10}, + }, + } + + for i, example := range examples { + t.Run(fmt.Sprint(i), func(t *testing.T) { + // Build sets. + a, b := NewSeriesIDSet(), NewSeriesIDSet() + for _, v := range example[0] { + a.Add(NewSeriesID(v)) + } + for _, v := range example[1] { + b.Add(NewSeriesID(v)) + } + + expected := NewSeriesIDSet() + for _, v := range example[2] { + expected.Add(NewSeriesID(v)) + } + + a.RemoveSet(b) + if a.String() != expected.String() { + t.Fatalf("got %s, expected %s", a.String(), expected.String()) + } + }) + } +} + // Ensure that cloning is race-free. func TestSeriesIDSet_Clone_Race(t *testing.T) { main := NewSeriesIDSet() @@ -556,6 +609,78 @@ func BenchmarkSeriesIDSet_Remove(b *testing.B) { }) } +// BenchmarkSeriesIDSet_MassRemove benchmarks the cost of removing a large set of values. +func BenchmarkSeriesIDSet_MassRemove(b *testing.B) { + var size = uint64(1000000) + // Setup... + set = NewSeriesIDSet() + for i := uint64(0); i < size; i++ { + set.Add(NewSeriesID(i)) + } + + // Remove one at a time + b.Run(fmt.Sprint("cardinality_1000000_remove_each"), func(b *testing.B) { + clone := set.Clone() + for i := 0; i < b.N; i++ { + for j := uint64(0); j < size/2; j++ { + clone.RemoveNoLock(NewSeriesID(j)) + } + + b.StopTimer() + clone = set.Clone() + b.StartTimer() + } + }) + + // This is the case where a target series id set exists. + b.Run(fmt.Sprint("cardinality_1000000_remove_set_exists"), func(b *testing.B) { + clone := set.Clone() + other := NewSeriesIDSet() + for j := uint64(0); j < size/2; j++ { + other.AddNoLock(NewSeriesID(j)) + } + + for i := 0; i < b.N; i++ { + clone.RemoveSet(other) + b.StopTimer() + clone = set.Clone() + b.StartTimer() + } + }) + + // Make a target series id set and negate it + b.Run(fmt.Sprint("cardinality_1000000_remove_set"), func(b *testing.B) { + clone := set.Clone() + for i := 0; i < b.N; i++ { + other := NewSeriesIDSet() + for j := uint64(0); j < size/2; j++ { + other.AddNoLock(NewSeriesID(j)) + } + + clone.RemoveSet(other) + b.StopTimer() + clone = set.Clone() + b.StartTimer() + } + }) + + // This is the case where a new result set is created. + b.Run(fmt.Sprint("cardinality_1000000_remove_set_new"), func(b *testing.B) { + clone := set.Clone() + other := NewSeriesIDSet() + for j := uint64(0); j < size/2; j++ { + other.AddNoLock(NewSeriesID(j)) + } + + for i := 0; i < b.N; i++ { + _ = NewSeriesIDSetNegate(clone, other) + b.StopTimer() + clone = set.Clone() + b.StartTimer() + } + }) +} + // Typical benchmarks for a laptop: // // BenchmarkSeriesIDSet_Merge_Duplicates/cardinality_1/shards_1-4 200000 8095 ns/op 16656 B/op 11 allocs/op diff --git a/tsdb/tsi1/file_set.go b/tsdb/tsi1/file_set.go index 1a20f9cb58..3e47d969a3 100644 --- a/tsdb/tsi1/file_set.go +++ b/tsdb/tsi1/file_set.go @@ -387,7 +387,7 @@ func (fs *FileSet) TagValueSeriesIDIterator(name, key, value []byte) (tsdb.Serie // Remove tombstones set in previous file. if ftss != nil && ftss.Cardinality() > 0 { - ss = ss.AndNot(ftss) + ss.RemoveSet(ftss) } // Fetch tag value series set for this file and merge into overall set.