feat(encoding): Improve integer and simple8b encoding performance

simple8b EncodeAll improvements should

```
name                     old time/op  new time/op  delta
EncodeAll/1_bit-8        28.5µs ± 1%  28.6µs ± 1%     ~     (p=0.133 n=9+10)
EncodeAll/2_bits-8       28.9µs ± 2%  28.7µs ± 0%     ~     (p=0.068 n=10+8)
EncodeAll/3_bits-8       29.3µs ± 1%  28.8µs ± 0%   -1.70%  (p=0.000 n=10+10)
EncodeAll/4_bits-8       29.6µs ± 1%  29.1µs ± 1%   -1.85%  (p=0.000 n=10+10)
EncodeAll/5_bits-8       30.6µs ± 1%  29.8µs ± 2%   -2.70%  (p=0.000 n=10+10)
EncodeAll/6_bits-8       31.3µs ± 1%  30.0µs ± 1%   -4.08%  (p=0.000 n=9+9)
EncodeAll/7_bits-8       32.6µs ± 1%  30.8µs ± 0%   -5.49%  (p=0.000 n=9+9)
EncodeAll/8_bits-8       33.6µs ± 2%  31.0µs ± 1%   -7.77%  (p=0.000 n=10+9)
EncodeAll/10_bits-8      34.9µs ± 0%  31.9µs ± 2%   -8.55%  (p=0.000 n=9+10)
EncodeAll/12_bits-8      36.8µs ± 1%  32.6µs ± 1%  -11.35%  (p=0.000 n=9+10)
EncodeAll/15_bits-8      39.8µs ± 1%  34.1µs ± 2%  -14.40%  (p=0.000 n=10+10)
EncodeAll/20_bits-8      45.2µs ± 3%  36.2µs ± 1%  -19.97%  (p=0.000 n=10+9)
EncodeAll/30_bits-8      55.0µs ± 0%  40.9µs ± 1%  -25.62%  (p=0.000 n=9+9)
EncodeAll/60_bits-8      86.2µs ± 1%  55.2µs ± 1%  -35.92%  (p=0.000 n=10+10)
EncodeAll/combination-8   582µs ± 2%   502µs ± 1%  -13.80%  (p=0.000 n=9+9)
```

EncodeIntegers:

```
name                             old time/op    new time/op    delta
EncodeIntegers/1000_seq/batch-8    2.04µs ± 0%    1.50µs ± 1%  -26.22%  (p=0.008 n=5+5)
EncodeIntegers/1000_ran/batch-8    8.80µs ± 2%    6.10µs ± 0%  -30.73%  (p=0.008 n=5+5)
EncodeIntegers/1000_dup/batch-8    2.03µs ± 1%    1.50µs ± 1%  -26.04%  (p=0.008 n=5+5)
```

EncodeTimestamps (ran is improved due to simple8b improvements)

```
name                               old time/op    new time/op    delta
EncodeTimestamps/1000_seq/batch-8    2.64µs ± 1%    2.65µs ± 2%     ~     (p=0.310 n=5+5)
EncodeTimestamps/1000_ran/batch-8    64.0µs ± 1%    33.8µs ± 1%  -47.23%  (p=0.008 n=5+5)
EncodeTimestamps/1000_dup/batch-8    9.32µs ± 0%    9.28µs ± 1%     ~     (p=0.087 n=5+5)
```
pull/10616/head
Stuart Carnie 2018-09-28 09:44:30 -07:00 committed by Edd Robinson
parent dd83bd230f
commit a339f8f620
2 changed files with 108 additions and 73 deletions

View File

@ -347,6 +347,31 @@ func Encode(src []uint64) (value uint64, n int, err error) {
}
}
const (
S8B_BIT_SIZE = 60
)
var (
numBits = [...][2]byte{
// { number of values, max bits per value }
{60, 1},
{30, 2},
{20, 3},
{15, 4},
{12, 5},
{10, 6},
{8, 7},
{7, 8},
{6, 10},
{5, 12},
{4, 15},
{3, 20},
{2, 30},
{1, 60},
}
ErrValueOutOfBounds = errors.New("value out of bounds")
)
// Encode returns a packed slice of the values from src. If a value is over
// 1 << 60, an error is returned. The input src is modified to avoid extra
// allocations. If you need to re-use, use a copy.
@ -357,64 +382,69 @@ func EncodeAll(src []uint64) ([]uint64, error) {
dst := src
j := 0
for {
if i >= len(src) {
break
}
NEXTVALUE:
for i < len(src) {
remaining := src[i:]
if canPack(remaining, 240, 0) {
dst[j] = 0
i += 240
} else if canPack(remaining, 120, 0) {
dst[j] = 1 << 60
i += 120
} else if canPack(remaining, 60, 1) {
dst[j] = pack60(src[i : i+60])
i += 60
} else if canPack(remaining, 30, 2) {
dst[j] = pack30(src[i : i+30])
i += 30
} else if canPack(remaining, 20, 3) {
dst[j] = pack20(src[i : i+20])
i += 20
} else if canPack(remaining, 15, 4) {
dst[j] = pack15(src[i : i+15])
i += 15
} else if canPack(remaining, 12, 5) {
dst[j] = pack12(src[i : i+12])
i += 12
} else if canPack(remaining, 10, 6) {
dst[j] = pack10(src[i : i+10])
i += 10
} else if canPack(remaining, 8, 7) {
dst[j] = pack8(src[i : i+8])
i += 8
} else if canPack(remaining, 7, 8) {
dst[j] = pack7(src[i : i+7])
i += 7
} else if canPack(remaining, 6, 10) {
dst[j] = pack6(src[i : i+6])
i += 6
} else if canPack(remaining, 5, 12) {
dst[j] = pack5(src[i : i+5])
i += 5
} else if canPack(remaining, 4, 15) {
dst[j] = pack4(src[i : i+4])
i += 4
} else if canPack(remaining, 3, 20) {
dst[j] = pack3(src[i : i+3])
i += 3
} else if canPack(remaining, 2, 30) {
dst[j] = pack2(src[i : i+2])
i += 2
} else if canPack(remaining, 1, 60) {
dst[j] = pack1(src[i : i+1])
i += 1
} else {
return nil, fmt.Errorf("value out of bounds")
// try to pack run of 240 or 120 1s
if len(remaining) >= 120 {
var a []uint64
if len(remaining) >= 240 {
a = remaining[:240]
} else {
a = remaining[:120]
}
k := 0
for k = range a {
if a[k] != 1 {
break
}
}
v := uint64(0)
switch {
case k >= 239:
i += 240
case k >= 119:
v = 1 << 60
i += 120
default:
goto CODES
}
dst[j] = v
j++
continue
}
j += 1
CODES:
for code := range numBits {
intN := int(numBits[code][0])
bitN := numBits[code][1]
if intN > len(remaining) {
continue
}
maxVal := uint64(1 << (bitN & 0x3f))
val := uint64(code+2) << S8B_BIT_SIZE
for k, inV := range remaining {
if k < intN {
if inV >= maxVal {
continue CODES
}
val |= inV << ((byte(k) * bitN) & 0x3f)
} else {
break
}
}
dst[j] = val
j += 1
i += intN
continue NEXTVALUE
}
return nil, ErrValueOutOfBounds
}
return dst[:j], nil
}
@ -433,10 +463,7 @@ func Decode(dst *[240]uint64, v uint64) (n int, err error) {
func DecodeAll(dst, src []uint64) (value int, err error) {
j := 0
for _, v := range src {
sel := v >> 60
if sel >= 16 {
return 0, fmt.Errorf("invalid selector value: %b", sel)
}
sel := (v >> 60) & 0xf
selector[sel].unpack(v, (*[240]uint64)(unsafe.Pointer(&dst[j])))
j += selector[sel].n
}

View File

@ -25,24 +25,32 @@ func IntegerArrayEncodeAll(src []int64, b []byte) ([]byte, error) {
// Zigzag encode deltas of all provided values.
var prev int64
var rle = true
var nopack bool
var canpack = true
// To prevent an allocation of the entire block we're encoding reuse the
// src slice to store the encoded deltas.
deltas := reintepretInt64ToUint64Slice(src)
for i, v := range src {
delta := v - prev
prev = v
enc := ZigZagEncode(delta)
if i > 1 {
rle = rle && deltas[i-1] == enc
}
deltas[i] = enc
prev = src[0]
enc := ZigZagEncode(prev)
src[0] = int64(enc)
canpack = enc <= simple8b.MaxValue
// Check if the encoded value is too big to be simple8b encoded.
if enc > simple8b.MaxValue {
nopack = true
if len(src) > 1 {
delta := src[1] - prev
prev = src[1]
enc = ZigZagEncode(delta)
d0 := enc
src[1] = int64(enc)
canpack = canpack && enc <= simple8b.MaxValue
for i := 2; i < len(src); i++ {
delta := src[i] - prev
prev = src[i]
enc = ZigZagEncode(delta)
src[i] = int64(enc)
rle = rle && d0 == enc
canpack = canpack && enc <= simple8b.MaxValue
}
}
@ -71,7 +79,7 @@ func IntegerArrayEncodeAll(src []int64, b []byte) ([]byte, error) {
return b[:i], nil
}
if nopack { // There is an encoded value that's too big to simple8b encode.
if !canpack { // There is an encoded value that's too big to simple8b encode.
// Encode uncompressed.
sz := 1 + len(deltas)*8
if len(b) < sz && cap(b) >= sz {
@ -111,7 +119,7 @@ func IntegerArrayEncodeAll(src []int64, b []byte) ([]byte, error) {
for i, v := range encoded {
binary.BigEndian.PutUint64(b[9+i*8:9+i*8+8], v)
}
return b[:sz], nil
return b, nil
}
// UnsignedArrayEncodeAll encodes src into b, returning b and any error encountered.