feat(encoding): Improve integer and simple8b encoding performance

simple8b EncodeAll improvements should

```
name                     old time/op  new time/op  delta
EncodeAll/1_bit-8        28.5µs ± 1%  28.6µs ± 1%     ~     (p=0.133 n=9+10)
EncodeAll/2_bits-8       28.9µs ± 2%  28.7µs ± 0%     ~     (p=0.068 n=10+8)
EncodeAll/3_bits-8       29.3µs ± 1%  28.8µs ± 0%   -1.70%  (p=0.000 n=10+10)
EncodeAll/4_bits-8       29.6µs ± 1%  29.1µs ± 1%   -1.85%  (p=0.000 n=10+10)
EncodeAll/5_bits-8       30.6µs ± 1%  29.8µs ± 2%   -2.70%  (p=0.000 n=10+10)
EncodeAll/6_bits-8       31.3µs ± 1%  30.0µs ± 1%   -4.08%  (p=0.000 n=9+9)
EncodeAll/7_bits-8       32.6µs ± 1%  30.8µs ± 0%   -5.49%  (p=0.000 n=9+9)
EncodeAll/8_bits-8       33.6µs ± 2%  31.0µs ± 1%   -7.77%  (p=0.000 n=10+9)
EncodeAll/10_bits-8      34.9µs ± 0%  31.9µs ± 2%   -8.55%  (p=0.000 n=9+10)
EncodeAll/12_bits-8      36.8µs ± 1%  32.6µs ± 1%  -11.35%  (p=0.000 n=9+10)
EncodeAll/15_bits-8      39.8µs ± 1%  34.1µs ± 2%  -14.40%  (p=0.000 n=10+10)
EncodeAll/20_bits-8      45.2µs ± 3%  36.2µs ± 1%  -19.97%  (p=0.000 n=10+9)
EncodeAll/30_bits-8      55.0µs ± 0%  40.9µs ± 1%  -25.62%  (p=0.000 n=9+9)
EncodeAll/60_bits-8      86.2µs ± 1%  55.2µs ± 1%  -35.92%  (p=0.000 n=10+10)
EncodeAll/combination-8   582µs ± 2%   502µs ± 1%  -13.80%  (p=0.000 n=9+9)
```

EncodeIntegers:

```
name                             old time/op    new time/op    delta
EncodeIntegers/1000_seq/batch-8    2.04µs ± 0%    1.50µs ± 1%  -26.22%  (p=0.008 n=5+5)
EncodeIntegers/1000_ran/batch-8    8.80µs ± 2%    6.10µs ± 0%  -30.73%  (p=0.008 n=5+5)
EncodeIntegers/1000_dup/batch-8    2.03µs ± 1%    1.50µs ± 1%  -26.04%  (p=0.008 n=5+5)
```

EncodeTimestamps (ran is improved due to simple8b improvements)

```
name                               old time/op    new time/op    delta
EncodeTimestamps/1000_seq/batch-8    2.64µs ± 1%    2.65µs ± 2%     ~     (p=0.310 n=5+5)
EncodeTimestamps/1000_ran/batch-8    64.0µs ± 1%    33.8µs ± 1%  -47.23%  (p=0.008 n=5+5)
EncodeTimestamps/1000_dup/batch-8    9.32µs ± 0%    9.28µs ± 1%     ~     (p=0.087 n=5+5)
```
pull/10616/head
Stuart Carnie 2018-09-28 09:44:30 -07:00 committed by Edd Robinson
parent dd83bd230f
commit a339f8f620
2 changed files with 108 additions and 73 deletions

View File

@ -347,6 +347,31 @@ func Encode(src []uint64) (value uint64, n int, err error) {
} }
} }
const (
S8B_BIT_SIZE = 60
)
var (
numBits = [...][2]byte{
// { number of values, max bits per value }
{60, 1},
{30, 2},
{20, 3},
{15, 4},
{12, 5},
{10, 6},
{8, 7},
{7, 8},
{6, 10},
{5, 12},
{4, 15},
{3, 20},
{2, 30},
{1, 60},
}
ErrValueOutOfBounds = errors.New("value out of bounds")
)
// Encode returns a packed slice of the values from src. If a value is over // Encode returns a packed slice of the values from src. If a value is over
// 1 << 60, an error is returned. The input src is modified to avoid extra // 1 << 60, an error is returned. The input src is modified to avoid extra
// allocations. If you need to re-use, use a copy. // allocations. If you need to re-use, use a copy.
@ -357,64 +382,69 @@ func EncodeAll(src []uint64) ([]uint64, error) {
dst := src dst := src
j := 0 j := 0
for { NEXTVALUE:
if i >= len(src) { for i < len(src) {
break
}
remaining := src[i:] remaining := src[i:]
if canPack(remaining, 240, 0) { // try to pack run of 240 or 120 1s
dst[j] = 0 if len(remaining) >= 120 {
i += 240 var a []uint64
} else if canPack(remaining, 120, 0) { if len(remaining) >= 240 {
dst[j] = 1 << 60 a = remaining[:240]
i += 120 } else {
} else if canPack(remaining, 60, 1) { a = remaining[:120]
dst[j] = pack60(src[i : i+60]) }
i += 60
} else if canPack(remaining, 30, 2) { k := 0
dst[j] = pack30(src[i : i+30]) for k = range a {
i += 30 if a[k] != 1 {
} else if canPack(remaining, 20, 3) { break
dst[j] = pack20(src[i : i+20]) }
i += 20 }
} else if canPack(remaining, 15, 4) {
dst[j] = pack15(src[i : i+15]) v := uint64(0)
i += 15 switch {
} else if canPack(remaining, 12, 5) { case k >= 239:
dst[j] = pack12(src[i : i+12]) i += 240
i += 12 case k >= 119:
} else if canPack(remaining, 10, 6) { v = 1 << 60
dst[j] = pack10(src[i : i+10]) i += 120
i += 10
} else if canPack(remaining, 8, 7) { default:
dst[j] = pack8(src[i : i+8]) goto CODES
i += 8 }
} else if canPack(remaining, 7, 8) { dst[j] = v
dst[j] = pack7(src[i : i+7]) j++
i += 7 continue
} else if canPack(remaining, 6, 10) {
dst[j] = pack6(src[i : i+6])
i += 6
} else if canPack(remaining, 5, 12) {
dst[j] = pack5(src[i : i+5])
i += 5
} else if canPack(remaining, 4, 15) {
dst[j] = pack4(src[i : i+4])
i += 4
} else if canPack(remaining, 3, 20) {
dst[j] = pack3(src[i : i+3])
i += 3
} else if canPack(remaining, 2, 30) {
dst[j] = pack2(src[i : i+2])
i += 2
} else if canPack(remaining, 1, 60) {
dst[j] = pack1(src[i : i+1])
i += 1
} else {
return nil, fmt.Errorf("value out of bounds")
} }
j += 1
CODES:
for code := range numBits {
intN := int(numBits[code][0])
bitN := numBits[code][1]
if intN > len(remaining) {
continue
}
maxVal := uint64(1 << (bitN & 0x3f))
val := uint64(code+2) << S8B_BIT_SIZE
for k, inV := range remaining {
if k < intN {
if inV >= maxVal {
continue CODES
}
val |= inV << ((byte(k) * bitN) & 0x3f)
} else {
break
}
}
dst[j] = val
j += 1
i += intN
continue NEXTVALUE
}
return nil, ErrValueOutOfBounds
} }
return dst[:j], nil return dst[:j], nil
} }
@ -433,10 +463,7 @@ func Decode(dst *[240]uint64, v uint64) (n int, err error) {
func DecodeAll(dst, src []uint64) (value int, err error) { func DecodeAll(dst, src []uint64) (value int, err error) {
j := 0 j := 0
for _, v := range src { for _, v := range src {
sel := v >> 60 sel := (v >> 60) & 0xf
if sel >= 16 {
return 0, fmt.Errorf("invalid selector value: %b", sel)
}
selector[sel].unpack(v, (*[240]uint64)(unsafe.Pointer(&dst[j]))) selector[sel].unpack(v, (*[240]uint64)(unsafe.Pointer(&dst[j])))
j += selector[sel].n j += selector[sel].n
} }

View File

@ -25,24 +25,32 @@ func IntegerArrayEncodeAll(src []int64, b []byte) ([]byte, error) {
// Zigzag encode deltas of all provided values. // Zigzag encode deltas of all provided values.
var prev int64 var prev int64
var rle = true var rle = true
var nopack bool var canpack = true
// To prevent an allocation of the entire block we're encoding reuse the // To prevent an allocation of the entire block we're encoding reuse the
// src slice to store the encoded deltas. // src slice to store the encoded deltas.
deltas := reintepretInt64ToUint64Slice(src) deltas := reintepretInt64ToUint64Slice(src)
for i, v := range src { prev = src[0]
delta := v - prev enc := ZigZagEncode(prev)
prev = v src[0] = int64(enc)
enc := ZigZagEncode(delta) canpack = enc <= simple8b.MaxValue
if i > 1 {
rle = rle && deltas[i-1] == enc
}
deltas[i] = enc
// Check if the encoded value is too big to be simple8b encoded. if len(src) > 1 {
if enc > simple8b.MaxValue { delta := src[1] - prev
nopack = true prev = src[1]
enc = ZigZagEncode(delta)
d0 := enc
src[1] = int64(enc)
canpack = canpack && enc <= simple8b.MaxValue
for i := 2; i < len(src); i++ {
delta := src[i] - prev
prev = src[i]
enc = ZigZagEncode(delta)
src[i] = int64(enc)
rle = rle && d0 == enc
canpack = canpack && enc <= simple8b.MaxValue
} }
} }
@ -71,7 +79,7 @@ func IntegerArrayEncodeAll(src []int64, b []byte) ([]byte, error) {
return b[:i], nil return b[:i], nil
} }
if nopack { // There is an encoded value that's too big to simple8b encode. if !canpack { // There is an encoded value that's too big to simple8b encode.
// Encode uncompressed. // Encode uncompressed.
sz := 1 + len(deltas)*8 sz := 1 + len(deltas)*8
if len(b) < sz && cap(b) >= sz { if len(b) < sz && cap(b) >= sz {
@ -111,7 +119,7 @@ func IntegerArrayEncodeAll(src []int64, b []byte) ([]byte, error) {
for i, v := range encoded { for i, v := range encoded {
binary.BigEndian.PutUint64(b[9+i*8:9+i*8+8], v) binary.BigEndian.PutUint64(b[9+i*8:9+i*8+8], v)
} }
return b[:sz], nil return b, nil
} }
// UnsignedArrayEncodeAll encodes src into b, returning b and any error encountered. // UnsignedArrayEncodeAll encodes src into b, returning b and any error encountered.