Batch oriented int encoders
This commit adds a tsm1 function for encoding a batch of ints into a provided buffer. The following benchmarks compare the performance of the existing iterator based encoders, and the new batch oriented encoders. They look at a sequential input slice, a randomly generated input slice and a duplicate slice: name old time/op new time/op delta EncodeIntegers/10_seq 144ns ± 2% 41ns ± 1% -71.46% (p=0.000 n=10+10) EncodeIntegers/10_ran 304ns ± 7% 140ns ± 2% -53.99% (p=0.000 n=10+10) EncodeIntegers/10_dup 147ns ± 4% 41ns ± 2% -72.14% (p=0.000 n=10+9) EncodeIntegers/100_seq 483ns ± 7% 208ns ± 1% -56.98% (p=0.000 n=10+9) EncodeIntegers/100_ran 1.64µs ± 7% 1.01µs ± 1% -38.42% (p=0.000 n=9+9) EncodeIntegers/100_dup 484ns ±14% 210ns ± 2% -56.63% (p=0.000 n=10+10) EncodeIntegers/1000_seq 3.11µs ± 2% 1.81µs ± 2% -41.68% (p=0.000 n=10+10) EncodeIntegers/1000_ran 16.9µs ±10% 11.0µs ± 2% -34.58% (p=0.000 n=10+10) EncodeIntegers/1000_dup 3.05µs ± 3% 1.81µs ± 2% -40.71% (p=0.000 n=10+8) name old alloc/op new alloc/op delta EncodeIntegers/10_seq 32.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10) EncodeIntegers/10_ran 32.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10) EncodeIntegers/10_dup 32.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10) EncodeIntegers/100_seq 32.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10) EncodeIntegers/100_ran 128B ± 0% 0B -100.00% (p=0.000 n=10+10) EncodeIntegers/100_dup 32.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10) EncodeIntegers/1000_seq 32.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10) EncodeIntegers/1000_ran 1.15kB ± 0% 0.00kB -100.00% (p=0.000 n=10+10) EncodeIntegers/1000_dup 32.0B ± 0% 0.0B -100.00% (p=0.000 n=10+10) name old allocs/op new allocs/op delta EncodeIntegers/10_seq 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) EncodeIntegers/10_ran 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) EncodeIntegers/10_dup 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) EncodeIntegers/100_seq 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) EncodeIntegers/100_ran 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) EncodeIntegers/100_dup 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) EncodeIntegers/1000_seq 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) EncodeIntegers/1000_ran 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) EncodeIntegers/1000_dup 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10)pull/10300/head
parent
6b52231a37
commit
de5ca4a108
|
@ -443,6 +443,7 @@ func BenchmarkEncodeFloats(b *testing.B) {
|
|||
|
||||
b.Run("itr", func(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
enc.Reset()
|
||||
b.ResetTimer()
|
||||
for n := 0; n < b.N; n++ {
|
||||
enc.Reset()
|
||||
|
|
|
@ -8,6 +8,112 @@ import (
|
|||
"github.com/influxdata/influxdb/pkg/encoding/simple8b"
|
||||
)
|
||||
|
||||
// IntegerArrayEncodeAll encodes src into b, returning b and any error encountered.
|
||||
// The returned slice may be of a different length and capactity to b.
|
||||
//
|
||||
// IntegerArrayEncodeAll implements batch oriented versions of the three integer
|
||||
// encoding types we support: uncompressed, simple8b and RLE.
|
||||
//
|
||||
// Important: IntegerArrayEncodeAll modifies the contents of src by using it as
|
||||
// scratch space for delta encoded values. It is NOT SAFE to use src after
|
||||
// passing it into IntegerArrayEncodeAll.
|
||||
func IntegerArrayEncodeAll(src []int64, b []byte) ([]byte, error) {
|
||||
if len(src) == 0 {
|
||||
return nil, nil // Nothing to do
|
||||
}
|
||||
|
||||
// Zigzag encode deltas of all provided values.
|
||||
var prev int64
|
||||
var rle = true
|
||||
var nopack bool
|
||||
|
||||
// To prevent an allocation of the entire block we're encoding reuse the
|
||||
// src slice to store the encoded deltas.
|
||||
deltas := reintepretInt64ToUint64Slice(src)
|
||||
|
||||
for i, v := range src {
|
||||
delta := v - prev
|
||||
prev = v
|
||||
enc := ZigZagEncode(delta)
|
||||
if i > 1 {
|
||||
rle = rle && deltas[i-1] == enc
|
||||
}
|
||||
deltas[i] = enc
|
||||
|
||||
// Check if the encoded value is too big to be simple8b encoded.
|
||||
if enc > simple8b.MaxValue {
|
||||
nopack = true
|
||||
}
|
||||
}
|
||||
|
||||
// Encode with RLE
|
||||
if rle && len(deltas) > 2 {
|
||||
// Large varints can take up to 10 bytes. We're storing 3 + 1
|
||||
// type byte.
|
||||
if len(b) < 31 && cap(b) >= 31 {
|
||||
b = b[:31]
|
||||
} else if len(b) < 31 {
|
||||
b = append(b, make([]byte, 31-len(b))...)
|
||||
}
|
||||
|
||||
// 4 high bits used for the encoding type
|
||||
b[0] = byte(intCompressedRLE) << 4
|
||||
|
||||
i := 1
|
||||
// The first value
|
||||
binary.BigEndian.PutUint64(b[i:], deltas[0])
|
||||
i += 8
|
||||
// The first delta
|
||||
i += binary.PutUvarint(b[i:], deltas[1])
|
||||
// The number of times the delta is repeated
|
||||
i += binary.PutUvarint(b[i:], uint64(len(deltas)-1))
|
||||
|
||||
return b[:i], nil
|
||||
}
|
||||
|
||||
if nopack { // There is an encoded value that's too big to simple8b encode.
|
||||
// Encode uncompressed.
|
||||
sz := 1 + len(deltas)*8
|
||||
if len(b) < sz && cap(b) >= sz {
|
||||
b = b[:sz]
|
||||
} else if len(b) < sz {
|
||||
b = append(b, make([]byte, sz-len(b))...)
|
||||
}
|
||||
|
||||
// 4 high bits of first byte store the encoding type for the block
|
||||
b[0] = byte(intUncompressed) << 4
|
||||
for i, v := range deltas {
|
||||
binary.BigEndian.PutUint64(b[1+i*8:1+i*8+8], uint64(v))
|
||||
}
|
||||
return b[:sz], nil
|
||||
}
|
||||
|
||||
// Encode with simple8b - fist value is written unencoded using 8 bytes.
|
||||
encoded, err := simple8b.EncodeAll(deltas[1:])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
sz := 1 + (len(encoded)+1)*8
|
||||
if len(b) < sz && cap(b) >= sz {
|
||||
b = b[:sz]
|
||||
} else if len(b) < sz {
|
||||
b = append(b, make([]byte, sz-len(b))...)
|
||||
}
|
||||
|
||||
// 4 high bits of first byte store the encoding type for the block
|
||||
b[0] = byte(intCompressedSimple) << 4
|
||||
|
||||
// Write the first value since it's not part of the encoded values
|
||||
binary.BigEndian.PutUint64(b[1:9], deltas[0])
|
||||
|
||||
// Write the encoded values
|
||||
for i, v := range encoded {
|
||||
binary.BigEndian.PutUint64(b[9+i*8:9+i*8+8], v)
|
||||
}
|
||||
return b[:sz], nil
|
||||
}
|
||||
|
||||
var (
|
||||
integerBatchDecoderFunc = [...]func(b []byte, dst []int64) ([]int64, error){
|
||||
integerBatchDecodeAllUncompressed,
|
||||
|
|
|
@ -1,15 +1,624 @@
|
|||
package tsm1
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"math"
|
||||
"math/rand"
|
||||
"reflect"
|
||||
"sort"
|
||||
"testing"
|
||||
"testing/quick"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
)
|
||||
|
||||
func dumpBufs(a, b []byte) {
|
||||
longest := len(a)
|
||||
if len(b) > longest {
|
||||
longest = len(b)
|
||||
}
|
||||
|
||||
for i := 0; i < longest; i++ {
|
||||
var as, bs string
|
||||
if i < len(a) {
|
||||
as = fmt.Sprintf("%08b", a[i])
|
||||
}
|
||||
if i < len(b) {
|
||||
bs = fmt.Sprintf("%08b", b[i])
|
||||
}
|
||||
|
||||
same := as == bs
|
||||
fmt.Printf("%d (%d) %s - %s :: %v\n", i, i*8, as, bs, same)
|
||||
}
|
||||
fmt.Println()
|
||||
}
|
||||
|
||||
func dumpBuf(b []byte) {
|
||||
for i, v := range b {
|
||||
fmt.Printf("%d %08b\n", i, v)
|
||||
}
|
||||
fmt.Println()
|
||||
}
|
||||
|
||||
func TestIntegerArrayEncodeAll_NoValues(t *testing.T) {
|
||||
b, err := IntegerArrayEncodeAll(nil, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
if len(b) > 0 {
|
||||
t.Fatalf("unexpected lenght: exp 0, got %v", len(b))
|
||||
}
|
||||
|
||||
var dec IntegerDecoder
|
||||
dec.SetBytes(b)
|
||||
if dec.Next() {
|
||||
t.Fatalf("unexpected next value: got true, exp false")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIntegerArrayEncodeAll_Compare(t *testing.T) {
|
||||
// generate random values (should use simple8b)
|
||||
input := make([]int64, 1000)
|
||||
for i := 0; i < len(input); i++ {
|
||||
input[i] = rand.Int63n(100000) - 50000
|
||||
}
|
||||
sort.Slice(input, func(i int, j int) bool { return input[i] < input[j] })
|
||||
testIntegerArrayEncodeAll_Compare(t, input, intCompressedSimple)
|
||||
|
||||
// Generate same values (should use RLE)
|
||||
for i := 0; i < len(input); i++ {
|
||||
input[i] = 1232342341234
|
||||
}
|
||||
testIntegerArrayEncodeAll_Compare(t, input, intCompressedRLE)
|
||||
|
||||
// Generate large random values that are not sorted. The deltas will be large
|
||||
// and the values should be stored uncompressed.
|
||||
for i := 0; i < len(input); i++ {
|
||||
input[i] = int64(rand.Uint64())
|
||||
}
|
||||
testIntegerArrayEncodeAll_Compare(t, input, intUncompressed)
|
||||
}
|
||||
|
||||
func testIntegerArrayEncodeAll_Compare(t *testing.T, input []int64, encoding byte) {
|
||||
exp := make([]int64, len(input))
|
||||
copy(exp, input)
|
||||
|
||||
s := NewIntegerEncoder(1000)
|
||||
for _, v := range input {
|
||||
s.Write(v)
|
||||
}
|
||||
|
||||
buf1, err := s.Bytes()
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
if got, exp := buf1[0]>>4, encoding; got != exp {
|
||||
t.Fatalf("got encoding %v, expected %v", got, encoding)
|
||||
}
|
||||
|
||||
var buf2 []byte
|
||||
buf2, err = IntegerArrayEncodeAll(input, buf2)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v\nbuf: %db %x", err, len(buf2), buf2)
|
||||
}
|
||||
|
||||
if got, exp := buf2[0]>>4, encoding; got != exp {
|
||||
t.Fatalf("got encoding %v, expected %v", got, encoding)
|
||||
}
|
||||
|
||||
result, err := IntegerArrayDecodeAll(buf2, nil)
|
||||
if err != nil {
|
||||
dumpBufs(buf1, buf2)
|
||||
t.Fatalf("unexpected error: %v\nbuf: %db %x", err, len(buf2), buf2)
|
||||
}
|
||||
|
||||
if got := result; !reflect.DeepEqual(got, exp) {
|
||||
t.Fatalf("got result %v, expected %v", got, exp)
|
||||
}
|
||||
|
||||
// Check that the encoders are byte for byte the same...
|
||||
if !bytes.Equal(buf1, buf2) {
|
||||
dumpBufs(buf1, buf2)
|
||||
t.Fatalf("Raw bytes differ for encoders")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIntegerArrayEncodeAll_One(t *testing.T) {
|
||||
v1 := int64(1)
|
||||
|
||||
src := []int64{1}
|
||||
b, err := IntegerArrayEncodeAll(src, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
if got := b[0] >> 4; intCompressedSimple != got {
|
||||
t.Fatalf("encoding type mismatch: exp uncompressed, got %v", got)
|
||||
}
|
||||
|
||||
var dec IntegerDecoder
|
||||
dec.SetBytes(b)
|
||||
if !dec.Next() {
|
||||
t.Fatalf("unexpected next value: got true, exp false")
|
||||
}
|
||||
|
||||
if v1 != dec.Read() {
|
||||
t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v1)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIntegerArrayEncodeAll_Two(t *testing.T) {
|
||||
var v1, v2 int64 = 1, 2
|
||||
|
||||
src := []int64{v1, v2}
|
||||
b, err := IntegerArrayEncodeAll(src, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
if got := b[0] >> 4; intCompressedSimple != got {
|
||||
t.Fatalf("encoding type mismatch: exp uncompressed, got %v", got)
|
||||
}
|
||||
|
||||
var dec IntegerDecoder
|
||||
dec.SetBytes(b)
|
||||
if !dec.Next() {
|
||||
t.Fatalf("unexpected next value: got true, exp false")
|
||||
}
|
||||
|
||||
if v1 != dec.Read() {
|
||||
t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v1)
|
||||
}
|
||||
|
||||
if !dec.Next() {
|
||||
t.Fatalf("unexpected next value: got true, exp false")
|
||||
}
|
||||
|
||||
if v2 != dec.Read() {
|
||||
t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v2)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIntegerArrayEncodeAll_Negative(t *testing.T) {
|
||||
var v1, v2, v3 int64 = -2, 0, 1
|
||||
|
||||
src := []int64{v1, v2, v3}
|
||||
b, err := IntegerArrayEncodeAll(src, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
if got := b[0] >> 4; intCompressedSimple != got {
|
||||
t.Fatalf("encoding type mismatch: exp uncompressed, got %v", got)
|
||||
}
|
||||
|
||||
var dec IntegerDecoder
|
||||
dec.SetBytes(b)
|
||||
if !dec.Next() {
|
||||
t.Fatalf("unexpected next value: got true, exp false")
|
||||
}
|
||||
|
||||
if v1 != dec.Read() {
|
||||
t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v1)
|
||||
}
|
||||
|
||||
if !dec.Next() {
|
||||
t.Fatalf("unexpected next value: got true, exp false")
|
||||
}
|
||||
|
||||
if v2 != dec.Read() {
|
||||
t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v2)
|
||||
}
|
||||
|
||||
if !dec.Next() {
|
||||
t.Fatalf("unexpected next value: got true, exp false")
|
||||
}
|
||||
|
||||
if v3 != dec.Read() {
|
||||
t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v3)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIntegerArrayEncodeAll_Large_Range(t *testing.T) {
|
||||
var v1, v2 int64 = math.MinInt64, math.MaxInt64
|
||||
|
||||
src := []int64{v1, v2}
|
||||
b, err := IntegerArrayEncodeAll(src, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
if got := b[0] >> 4; intUncompressed != got {
|
||||
t.Fatalf("encoding type mismatch: exp uncompressed, got %v", got)
|
||||
}
|
||||
|
||||
var dec IntegerDecoder
|
||||
dec.SetBytes(b)
|
||||
if !dec.Next() {
|
||||
t.Fatalf("unexpected next value: got true, exp false")
|
||||
}
|
||||
|
||||
if v1 != dec.Read() {
|
||||
t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v1)
|
||||
}
|
||||
|
||||
if !dec.Next() {
|
||||
t.Fatalf("unexpected next value: got true, exp false")
|
||||
}
|
||||
|
||||
if v2 != dec.Read() {
|
||||
t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v2)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIntegerArrayEncodeAll_Uncompressed(t *testing.T) {
|
||||
var v1, v2, v3 int64 = 0, 1, 1 << 60
|
||||
|
||||
src := []int64{v1, v2, v3}
|
||||
b, err := IntegerArrayEncodeAll(src, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("expected error: %v", err)
|
||||
}
|
||||
|
||||
// 1 byte header + 3 * 8 byte values
|
||||
if exp := 25; len(b) != exp {
|
||||
t.Fatalf("length mismatch: got %v, exp %v", len(b), exp)
|
||||
}
|
||||
|
||||
if got := b[0] >> 4; intUncompressed != got {
|
||||
t.Fatalf("encoding type mismatch: exp uncompressed, got %v", got)
|
||||
}
|
||||
|
||||
var dec IntegerDecoder
|
||||
dec.SetBytes(b)
|
||||
if !dec.Next() {
|
||||
t.Fatalf("unexpected next value: got true, exp false")
|
||||
}
|
||||
|
||||
if v1 != dec.Read() {
|
||||
t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v1)
|
||||
}
|
||||
|
||||
if !dec.Next() {
|
||||
t.Fatalf("unexpected next value: got true, exp false")
|
||||
}
|
||||
|
||||
if v2 != dec.Read() {
|
||||
t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v2)
|
||||
}
|
||||
|
||||
if !dec.Next() {
|
||||
t.Fatalf("unexpected next value: got true, exp false")
|
||||
}
|
||||
|
||||
if v3 != dec.Read() {
|
||||
t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v3)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIntegerArrayEncodeAll_NegativeUncompressed(t *testing.T) {
|
||||
src := []int64{
|
||||
-2352281900722994752, 1438442655375607923, -4110452567888190110,
|
||||
-1221292455668011702, -1941700286034261841, -2836753127140407751,
|
||||
1432686216250034552, 3663244026151507025, -3068113732684750258,
|
||||
-1949953187327444488, 3713374280993588804, 3226153669854871355,
|
||||
-2093273755080502606, 1006087192578600616, -2272122301622271655,
|
||||
2533238229511593671, -4450454445568858273, 2647789901083530435,
|
||||
2761419461769776844, -1324397441074946198, -680758138988210958,
|
||||
94468846694902125, -2394093124890745254, -2682139311758778198,
|
||||
}
|
||||
exp := make([]int64, len(src))
|
||||
copy(exp, src)
|
||||
|
||||
b, err := IntegerArrayEncodeAll(src, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("expected error: %v", err)
|
||||
}
|
||||
|
||||
if got := b[0] >> 4; intUncompressed != got {
|
||||
t.Fatalf("encoding type mismatch: exp uncompressed, got %v", got)
|
||||
}
|
||||
|
||||
var dec IntegerDecoder
|
||||
dec.SetBytes(b)
|
||||
|
||||
i := 0
|
||||
for dec.Next() {
|
||||
if i > len(src) {
|
||||
t.Fatalf("read too many values: got %v, exp %v", i, len(exp))
|
||||
}
|
||||
|
||||
if exp[i] != dec.Read() {
|
||||
t.Fatalf("read value %d mismatch: got %v, exp %v", i, dec.Read(), exp[i])
|
||||
}
|
||||
i += 1
|
||||
}
|
||||
|
||||
if i != len(exp) {
|
||||
t.Fatalf("failed to read enough values: got %v, exp %v", i, len(exp))
|
||||
}
|
||||
}
|
||||
|
||||
func TestIntegerArrayEncodeAll_AllNegative(t *testing.T) {
|
||||
src := []int64{
|
||||
-10, -5, -1,
|
||||
}
|
||||
exp := make([]int64, len(src))
|
||||
copy(exp, src)
|
||||
|
||||
b, err := IntegerArrayEncodeAll(src, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
if got := b[0] >> 4; intCompressedSimple != got {
|
||||
t.Fatalf("encoding type mismatch: exp uncompressed, got %v", got)
|
||||
}
|
||||
|
||||
var dec IntegerDecoder
|
||||
dec.SetBytes(b)
|
||||
i := 0
|
||||
for dec.Next() {
|
||||
if i > len(exp) {
|
||||
t.Fatalf("read too many values: got %v, exp %v", i, len(exp))
|
||||
}
|
||||
|
||||
if exp[i] != dec.Read() {
|
||||
t.Fatalf("read value %d mismatch: got %v, exp %v", i, dec.Read(), exp[i])
|
||||
}
|
||||
i += 1
|
||||
}
|
||||
|
||||
if i != len(exp) {
|
||||
t.Fatalf("failed to read enough values: got %v, exp %v", i, len(exp))
|
||||
}
|
||||
}
|
||||
|
||||
func TestIntegerArrayEncodeAll_CounterPacked(t *testing.T) {
|
||||
src := []int64{
|
||||
1e15, 1e15 + 1, 1e15 + 2, 1e15 + 3, 1e15 + 4, 1e15 + 6,
|
||||
}
|
||||
exp := make([]int64, len(src))
|
||||
copy(exp, src)
|
||||
|
||||
b, err := IntegerArrayEncodeAll(src, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
if b[0]>>4 != intCompressedSimple {
|
||||
t.Fatalf("unexpected encoding format: expected simple, got %v", b[0]>>4)
|
||||
}
|
||||
|
||||
// Should use 1 header byte + 2, 8 byte words if delta-encoding is used based on
|
||||
// values sizes. Without delta-encoding, we'd get 49 bytes.
|
||||
if exp := 17; len(b) != exp {
|
||||
t.Fatalf("encoded length mismatch: got %v, exp %v", len(b), exp)
|
||||
}
|
||||
|
||||
var dec IntegerDecoder
|
||||
dec.SetBytes(b)
|
||||
i := 0
|
||||
for dec.Next() {
|
||||
if i > len(exp) {
|
||||
t.Fatalf("read too many values: got %v, exp %v", i, len(exp))
|
||||
}
|
||||
|
||||
if exp[i] != dec.Read() {
|
||||
t.Fatalf("read value %d mismatch: got %v, exp %v", i, dec.Read(), exp[i])
|
||||
}
|
||||
i += 1
|
||||
}
|
||||
|
||||
if i != len(exp) {
|
||||
t.Fatalf("failed to read enough values: got %v, exp %v", i, len(exp))
|
||||
}
|
||||
}
|
||||
|
||||
func TestIntegerArrayEncodeAll_CounterRLE(t *testing.T) {
|
||||
src := []int64{
|
||||
1e15, 1e15 + 1, 1e15 + 2, 1e15 + 3, 1e15 + 4, 1e15 + 5,
|
||||
}
|
||||
exp := make([]int64, len(src))
|
||||
copy(exp, src)
|
||||
|
||||
b, err := IntegerArrayEncodeAll(src, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
if b[0]>>4 != intCompressedRLE {
|
||||
t.Fatalf("unexpected encoding format: expected RLE, got %v", b[0]>>4)
|
||||
}
|
||||
|
||||
// Should use 1 header byte, 8 byte first value, 1 var-byte for delta and 1 var-byte for
|
||||
// count of deltas in this particular RLE.
|
||||
if exp := 11; len(b) != exp {
|
||||
t.Fatalf("encoded length mismatch: got %v, exp %v", len(b), exp)
|
||||
}
|
||||
|
||||
var dec IntegerDecoder
|
||||
dec.SetBytes(b)
|
||||
i := 0
|
||||
for dec.Next() {
|
||||
if i > len(exp) {
|
||||
t.Fatalf("read too many values: got %v, exp %v", i, len(exp))
|
||||
}
|
||||
|
||||
if exp[i] != dec.Read() {
|
||||
t.Fatalf("read value %d mismatch: got %v, exp %v", i, dec.Read(), exp[i])
|
||||
}
|
||||
i += 1
|
||||
}
|
||||
|
||||
if i != len(exp) {
|
||||
t.Fatalf("failed to read enough values: got %v, exp %v", i, len(exp))
|
||||
}
|
||||
}
|
||||
|
||||
func TestIntegerArrayEncodeAll_Descending(t *testing.T) {
|
||||
src := []int64{
|
||||
7094, 4472, 1850,
|
||||
}
|
||||
exp := make([]int64, len(src))
|
||||
copy(exp, src)
|
||||
|
||||
b, err := IntegerArrayEncodeAll(src, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
if b[0]>>4 != intCompressedRLE {
|
||||
t.Fatalf("unexpected encoding format: expected simple, got %v", b[0]>>4)
|
||||
}
|
||||
|
||||
// Should use 1 header byte, 8 byte first value, 1 var-byte for delta and 1 var-byte for
|
||||
// count of deltas in this particular RLE.
|
||||
if exp := 12; len(b) != exp {
|
||||
t.Fatalf("encoded length mismatch: got %v, exp %v", len(b), exp)
|
||||
}
|
||||
|
||||
var dec IntegerDecoder
|
||||
dec.SetBytes(b)
|
||||
i := 0
|
||||
for dec.Next() {
|
||||
if i > len(exp) {
|
||||
t.Fatalf("read too many values: got %v, exp %v", i, len(exp))
|
||||
}
|
||||
|
||||
if exp[i] != dec.Read() {
|
||||
t.Fatalf("read value %d mismatch: got %v, exp %v", i, dec.Read(), exp[i])
|
||||
}
|
||||
i += 1
|
||||
}
|
||||
|
||||
if i != len(exp) {
|
||||
t.Fatalf("failed to read enough values: got %v, exp %v", i, len(exp))
|
||||
}
|
||||
}
|
||||
|
||||
func TestIntegerArrayEncodeAll_Flat(t *testing.T) {
|
||||
src := []int64{
|
||||
1, 1, 1, 1,
|
||||
}
|
||||
exp := make([]int64, len(src))
|
||||
copy(exp, src)
|
||||
|
||||
b, err := IntegerArrayEncodeAll(src, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
if b[0]>>4 != intCompressedRLE {
|
||||
t.Fatalf("unexpected encoding format: expected simple, got %v", b[0]>>4)
|
||||
}
|
||||
|
||||
// Should use 1 header byte, 8 byte first value, 1 var-byte for delta and 1 var-byte for
|
||||
// count of deltas in this particular RLE.
|
||||
if exp := 11; len(b) != exp {
|
||||
t.Fatalf("encoded length mismatch: got %v, exp %v", len(b), exp)
|
||||
}
|
||||
|
||||
var dec IntegerDecoder
|
||||
dec.SetBytes(b)
|
||||
i := 0
|
||||
for dec.Next() {
|
||||
if i > len(exp) {
|
||||
t.Fatalf("read too many values: got %v, exp %v", i, len(exp))
|
||||
}
|
||||
|
||||
if exp[i] != dec.Read() {
|
||||
t.Fatalf("read value %d mismatch: got %v, exp %v", i, dec.Read(), exp[i])
|
||||
}
|
||||
i += 1
|
||||
}
|
||||
|
||||
if i != len(exp) {
|
||||
t.Fatalf("failed to read enough values: got %v, exp %v", i, len(exp))
|
||||
}
|
||||
}
|
||||
|
||||
func TestIntegerArrayEncodeAll_MinMax(t *testing.T) {
|
||||
src := []int64{
|
||||
math.MinInt64, math.MaxInt64,
|
||||
}
|
||||
exp := make([]int64, len(src))
|
||||
copy(exp, src)
|
||||
|
||||
b, err := IntegerArrayEncodeAll(src, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
if b[0]>>4 != intUncompressed {
|
||||
t.Fatalf("unexpected encoding format: expected simple, got %v", b[0]>>4)
|
||||
}
|
||||
|
||||
if exp := 17; len(b) != exp {
|
||||
t.Fatalf("encoded length mismatch: got %v, exp %v", len(b), exp)
|
||||
}
|
||||
|
||||
var dec IntegerDecoder
|
||||
dec.SetBytes(b)
|
||||
i := 0
|
||||
for dec.Next() {
|
||||
if i > len(exp) {
|
||||
t.Fatalf("read too many values: got %v, exp %v", i, len(exp))
|
||||
}
|
||||
|
||||
if exp[i] != dec.Read() {
|
||||
t.Fatalf("read value %d mismatch: got %v, exp %v", i, dec.Read(), exp[i])
|
||||
}
|
||||
i += 1
|
||||
}
|
||||
|
||||
if i != len(exp) {
|
||||
t.Fatalf("failed to read enough values: got %v, exp %v", i, len(exp))
|
||||
}
|
||||
}
|
||||
|
||||
func TestIntegerArrayEncodeAll_Quick(t *testing.T) {
|
||||
quick.Check(func(values []int64) bool {
|
||||
src := values
|
||||
if values == nil {
|
||||
src = []int64{} // is this really expected?
|
||||
}
|
||||
|
||||
// Copy over values to compare result—src is modified...
|
||||
exp := make([]int64, 0, len(src))
|
||||
for _, v := range src {
|
||||
exp = append(exp, v)
|
||||
}
|
||||
|
||||
// Retrieve encoded bytes from encoder.
|
||||
b, err := IntegerArrayEncodeAll(src, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Read values out of decoder.
|
||||
got := make([]int64, 0, len(src))
|
||||
var dec IntegerDecoder
|
||||
dec.SetBytes(b)
|
||||
for dec.Next() {
|
||||
if err := dec.Error(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
got = append(got, dec.Read())
|
||||
}
|
||||
|
||||
// Verify that input and output values match.
|
||||
if !reflect.DeepEqual(exp, got) {
|
||||
t.Fatalf("mismatch:\n\nexp=%#v\n\ngot=%#v\n\n", src, got)
|
||||
}
|
||||
|
||||
return true
|
||||
}, nil)
|
||||
}
|
||||
|
||||
func TestIntegerArrayDecodeAll_NegativeUncompressed(t *testing.T) {
|
||||
exp := []int64{
|
||||
-2352281900722994752, 1438442655375607923, -4110452567888190110,
|
||||
|
@ -280,6 +889,138 @@ func TestIntegerArrayDecodeAll_Quick(t *testing.T) {
|
|||
}, nil)
|
||||
}
|
||||
|
||||
var bufResult []byte
|
||||
|
||||
func BenchmarkEncodeIntegers(b *testing.B) {
|
||||
var err error
|
||||
cases := []int{10, 100, 1000}
|
||||
|
||||
for _, n := range cases {
|
||||
enc := NewIntegerEncoder(n)
|
||||
|
||||
b.Run(fmt.Sprintf("%d_seq", n), func(b *testing.B) {
|
||||
src := make([]int64, n)
|
||||
for i := 0; i < n; i++ {
|
||||
src[i] = int64(i)
|
||||
}
|
||||
|
||||
input := make([]int64, len(src))
|
||||
copy(input, src)
|
||||
|
||||
b.Run("itr", func(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
for n := 0; n < b.N; n++ {
|
||||
enc.Reset()
|
||||
for _, x := range src {
|
||||
enc.Write(x)
|
||||
}
|
||||
enc.Flush()
|
||||
if bufResult, err = enc.Bytes(); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
// Since the batch encoder needs to do a copy to reset the
|
||||
// input, we will add a copy here too.
|
||||
copy(input, src)
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("batch", func(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
for n := 0; n < b.N; n++ {
|
||||
if bufResult, err = IntegerArrayEncodeAll(input, bufResult); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
copy(input, src) // Reset input that gets modified in IntegerArrayEncodeAll
|
||||
}
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
b.Run(fmt.Sprintf("%d_ran", n), func(b *testing.B) {
|
||||
src := make([]int64, n)
|
||||
for i := 0; i < n; i++ {
|
||||
src[i] = rand.Int63n(100)
|
||||
}
|
||||
|
||||
input := make([]int64, len(src))
|
||||
copy(input, src)
|
||||
|
||||
b.Run("itr", func(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
for n := 0; n < b.N; n++ {
|
||||
enc.Reset()
|
||||
for _, x := range src {
|
||||
enc.Write(x)
|
||||
}
|
||||
enc.Flush()
|
||||
if bufResult, err = enc.Bytes(); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
// Since the batch encoder needs to do a copy to reset the
|
||||
// input, we will add a copy here too.
|
||||
copy(input, src)
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("batch", func(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
for n := 0; n < b.N; n++ {
|
||||
if bufResult, err = IntegerArrayEncodeAll(input, bufResult); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
copy(input, src) // Reset input that gets modified in IntegerArrayEncodeAll
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
b.Run(fmt.Sprintf("%d_dup", n), func(b *testing.B) {
|
||||
src := make([]int64, n)
|
||||
for i := 0; i < n; i++ {
|
||||
src[i] = 1233242
|
||||
}
|
||||
|
||||
input := make([]int64, len(src))
|
||||
copy(input, src)
|
||||
|
||||
b.Run("itr", func(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
for n := 0; n < b.N; n++ {
|
||||
enc.Reset()
|
||||
for _, x := range src {
|
||||
enc.Write(x)
|
||||
}
|
||||
enc.Flush()
|
||||
if bufResult, err = enc.Bytes(); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
// Since the batch encoder needs to do a copy to reset the
|
||||
// input, we will add a copy here too.
|
||||
copy(input, src)
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("batch", func(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
for n := 0; n < b.N; n++ {
|
||||
if bufResult, err = IntegerArrayEncodeAll(input, bufResult); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
copy(input, src) // Reset input that gets modified in IntegerArrayEncodeAll
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkIntegerArrayDecodeAllUncompressed(b *testing.B) {
|
||||
benchmarks := []int{
|
||||
5,
|
||||
|
|
Loading…
Reference in New Issue