From a7a70a920e39c1aea008ed24cd6e2cc2f1b6e47c Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Fri, 14 Sep 2018 13:19:33 +0100 Subject: [PATCH] Batch oriented boolean encoders This commit adds a tsm1 function for encoding a batch of booleans into a provided buffer. The following benchmarks compare the performance of the existing iterator based encoders, and the new batch oriented encoders using randomly generated sets of booleans. --- tsdb/engine/tsm1/batch_boolean.go | 34 +++++ tsdb/engine/tsm1/batch_boolean_test.go | 183 +++++++++++++++++++++++++ 2 files changed, 217 insertions(+) diff --git a/tsdb/engine/tsm1/batch_boolean.go b/tsdb/engine/tsm1/batch_boolean.go index e08774c3d2..7b99390081 100644 --- a/tsdb/engine/tsm1/batch_boolean.go +++ b/tsdb/engine/tsm1/batch_boolean.go @@ -5,6 +5,40 @@ import ( "fmt" ) +// BooleanArrayEncodeAll encodes src into b, returning b and any error encountered. +// The returned slice may be of a different length and capactity to b. +func BooleanArrayEncodeAll(src []bool, b []byte) ([]byte, error) { + sz := 1 + 8 + ((len(src) + 7) / 8) // Header + Num bools + bool data. + if len(b) < sz && cap(b) > sz { + b = b[:sz] + } else if len(b) < sz { + b = append(b, make([]byte, sz)...) + } + + // Store the encoding type in the 4 high bits of the first byte + b[0] = byte(booleanCompressedBitPacked) << 4 + n := uint64(8) // Current bit in current byte. + + // Encode the number of booleans written. + i := binary.PutUvarint(b[n>>3:], uint64(len(src))) + n += uint64(i * 8) + + for _, v := range src { + if v { + b[n>>3] |= 128 >> (n & 7) // Set current bit on current byte. + } else { + b[n>>3] &^= 128 >> (n & 7) // Clear current bit on current byte. + } + n++ + } + + length := n >> 3 + if n&7 > 0 { + length++ // Add an extra byte to capture overflowing bits. + } + return b[:length], nil +} + func BooleanArrayDecodeAll(b []byte, dst []bool) ([]bool, error) { if len(b) == 0 { return nil, nil diff --git a/tsdb/engine/tsm1/batch_boolean_test.go b/tsdb/engine/tsm1/batch_boolean_test.go index bfca547319..27feb227ed 100644 --- a/tsdb/engine/tsm1/batch_boolean_test.go +++ b/tsdb/engine/tsm1/batch_boolean_test.go @@ -1,14 +1,155 @@ package tsm1_test import ( + "bytes" "fmt" "math/rand" + "reflect" "testing" + "testing/quick" "github.com/google/go-cmp/cmp" "github.com/influxdata/influxdb/tsdb/engine/tsm1" ) +func TestBooleanArrayEncodeAll_NoValues(t *testing.T) { + b, err := tsm1.BooleanArrayEncodeAll(nil, nil) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + var dec tsm1.BooleanDecoder + dec.SetBytes(b) + if dec.Next() { + t.Fatalf("unexpected next value: got true, exp false") + } +} + +func TestBooleanArrayEncodeAll_Single(t *testing.T) { + src := []bool{true} + + b, err := tsm1.BooleanArrayEncodeAll(src, nil) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + var dec tsm1.BooleanDecoder + dec.SetBytes(b) + if !dec.Next() { + t.Fatalf("unexpected next value: got false, exp true") + } + + if src[0] != dec.Read() { + t.Fatalf("unexpected value: got %v, exp %v", dec.Read(), src[0]) + } +} + +func TestBooleanArrayEncodeAll_Compare(t *testing.T) { + // generate random values + input := make([]bool, 1000) + for i := 0; i < len(input); i++ { + input[i] = rand.Int63n(2) == 1 + } + + s := tsm1.NewBooleanEncoder(1000) + for _, v := range input { + s.Write(v) + } + s.Flush() + + buf1, err := s.Bytes() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + buf2 := append([]byte("this is some jibberish"), make([]byte, 100, 200)...) + buf2, err = tsm1.BooleanArrayEncodeAll(input, buf2) + if err != nil { + t.Fatalf("unexpected error: %v\nbuf: %db %x", err, len(buf2), buf2) + } + + result, err := tsm1.BooleanArrayDecodeAll(buf2, nil) + if err != nil { + dumpBufs(buf1, buf2) + t.Fatalf("unexpected error: %v\nbuf: %db %x", err, len(buf2), buf2) + } + + if got, exp := result, input; !reflect.DeepEqual(got, exp) { + dumpBufs(buf1, buf2) + t.Fatalf("got result %v, expected %v", got, exp) + } + + // Check that the encoders are byte for byte the same... + if !bytes.Equal(buf1, buf2) { + dumpBufs(buf1, buf2) + t.Fatalf("Raw bytes differ for encoders") + } +} + +func TestBooleanArrayEncodeAll_Multi_Compressed(t *testing.T) { + src := make([]bool, 10) + for i := range src { + src[i] = i%2 == 0 + } + + b, err := tsm1.BooleanArrayEncodeAll(src, nil) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if exp := 4; len(b) != exp { + t.Fatalf("unexpected length: got %v, exp %v", len(b), exp) + } + + var dec tsm1.BooleanDecoder + dec.SetBytes(b) + + for i, v := range src { + if !dec.Next() { + t.Fatalf("unexpected next value: got false, exp true") + } + if v != dec.Read() { + t.Fatalf("unexpected value at pos %d: got %v, exp %v", i, dec.Read(), v) + } + } + + if dec.Next() { + t.Fatalf("unexpected next value: got true, exp false") + } +} + +func TestBooleanArrayEncodeAll_Quick(t *testing.T) { + if err := quick.Check(func(values []bool) bool { + src := values + if values == nil { + src = []bool{} + } + + // Retrieve compressed bytes. + buf, err := tsm1.BooleanArrayEncodeAll(src, nil) + if err != nil { + t.Fatal(err) + } + + // Read values out of decoder. + got := make([]bool, 0, len(values)) + var dec tsm1.BooleanDecoder + dec.SetBytes(buf) + for dec.Next() { + got = append(got, dec.Read()) + } + + // Verify that input and output values match. + if !reflect.DeepEqual(src, got) { + t.Fatalf("mismatch:\n\nexp=%#v\n\ngot=%#v\n\n", src, got) + } + + return true + }, nil); err != nil { + t.Fatal(err) + } +} + func Test_BooleanArrayDecodeAll_Single(t *testing.T) { enc := tsm1.NewBooleanEncoder(1) exp := true @@ -86,6 +227,48 @@ func Test_BooleanBatchDecoder_Corrupt(t *testing.T) { } } +func BenchmarkEncodeBooleans(b *testing.B) { + var err error + cases := []int{10, 100, 1000} + + for _, n := range cases { + enc := tsm1.NewBooleanEncoder(n) + b.Run(fmt.Sprintf("%d_ran", n), func(b *testing.B) { + input := make([]bool, n) + for i := 0; i < n; i++ { + input[i] = rand.Int63n(2) == 1 + } + + b.Run("itr", func(b *testing.B) { + b.ReportAllocs() + enc.Reset() + b.ResetTimer() + for n := 0; n < b.N; n++ { + enc.Reset() + for _, x := range input { + enc.Write(x) + } + enc.Flush() + if bufResult, err = enc.Bytes(); err != nil { + b.Fatal(err) + } + } + }) + + b.Run("batch", func(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + for n := 0; n < b.N; n++ { + if bufResult, err = tsm1.BooleanArrayEncodeAll(input, bufResult); err != nil { + b.Fatal(err) + } + } + }) + + }) + } +} + func BenchmarkBooleanArrayDecodeAll(b *testing.B) { benchmarks := []struct { n int