feat(encoding): Improve integer and simple8b encoding performance
simple8b EncodeAll improvements should ``` name old time/op new time/op delta EncodeAll/1_bit-8 28.5µs ± 1% 28.6µs ± 1% ~ (p=0.133 n=9+10) EncodeAll/2_bits-8 28.9µs ± 2% 28.7µs ± 0% ~ (p=0.068 n=10+8) EncodeAll/3_bits-8 29.3µs ± 1% 28.8µs ± 0% -1.70% (p=0.000 n=10+10) EncodeAll/4_bits-8 29.6µs ± 1% 29.1µs ± 1% -1.85% (p=0.000 n=10+10) EncodeAll/5_bits-8 30.6µs ± 1% 29.8µs ± 2% -2.70% (p=0.000 n=10+10) EncodeAll/6_bits-8 31.3µs ± 1% 30.0µs ± 1% -4.08% (p=0.000 n=9+9) EncodeAll/7_bits-8 32.6µs ± 1% 30.8µs ± 0% -5.49% (p=0.000 n=9+9) EncodeAll/8_bits-8 33.6µs ± 2% 31.0µs ± 1% -7.77% (p=0.000 n=10+9) EncodeAll/10_bits-8 34.9µs ± 0% 31.9µs ± 2% -8.55% (p=0.000 n=9+10) EncodeAll/12_bits-8 36.8µs ± 1% 32.6µs ± 1% -11.35% (p=0.000 n=9+10) EncodeAll/15_bits-8 39.8µs ± 1% 34.1µs ± 2% -14.40% (p=0.000 n=10+10) EncodeAll/20_bits-8 45.2µs ± 3% 36.2µs ± 1% -19.97% (p=0.000 n=10+9) EncodeAll/30_bits-8 55.0µs ± 0% 40.9µs ± 1% -25.62% (p=0.000 n=9+9) EncodeAll/60_bits-8 86.2µs ± 1% 55.2µs ± 1% -35.92% (p=0.000 n=10+10) EncodeAll/combination-8 582µs ± 2% 502µs ± 1% -13.80% (p=0.000 n=9+9) ``` EncodeIntegers: ``` name old time/op new time/op delta EncodeIntegers/1000_seq/batch-8 2.04µs ± 0% 1.50µs ± 1% -26.22% (p=0.008 n=5+5) EncodeIntegers/1000_ran/batch-8 8.80µs ± 2% 6.10µs ± 0% -30.73% (p=0.008 n=5+5) EncodeIntegers/1000_dup/batch-8 2.03µs ± 1% 1.50µs ± 1% -26.04% (p=0.008 n=5+5) ``` EncodeTimestamps (ran is improved due to simple8b improvements) ``` name old time/op new time/op delta EncodeTimestamps/1000_seq/batch-8 2.64µs ± 1% 2.65µs ± 2% ~ (p=0.310 n=5+5) EncodeTimestamps/1000_ran/batch-8 64.0µs ± 1% 33.8µs ± 1% -47.23% (p=0.008 n=5+5) EncodeTimestamps/1000_dup/batch-8 9.32µs ± 0% 9.28µs ± 1% ~ (p=0.087 n=5+5) ```pull/10616/head
parent
dd83bd230f
commit
a339f8f620
|
@ -347,6 +347,31 @@ func Encode(src []uint64) (value uint64, n int, err error) {
|
|||
}
|
||||
}
|
||||
|
||||
const (
|
||||
S8B_BIT_SIZE = 60
|
||||
)
|
||||
|
||||
var (
|
||||
numBits = [...][2]byte{
|
||||
// { number of values, max bits per value }
|
||||
{60, 1},
|
||||
{30, 2},
|
||||
{20, 3},
|
||||
{15, 4},
|
||||
{12, 5},
|
||||
{10, 6},
|
||||
{8, 7},
|
||||
{7, 8},
|
||||
{6, 10},
|
||||
{5, 12},
|
||||
{4, 15},
|
||||
{3, 20},
|
||||
{2, 30},
|
||||
{1, 60},
|
||||
}
|
||||
ErrValueOutOfBounds = errors.New("value out of bounds")
|
||||
)
|
||||
|
||||
// Encode returns a packed slice of the values from src. If a value is over
|
||||
// 1 << 60, an error is returned. The input src is modified to avoid extra
|
||||
// allocations. If you need to re-use, use a copy.
|
||||
|
@ -357,64 +382,69 @@ func EncodeAll(src []uint64) ([]uint64, error) {
|
|||
dst := src
|
||||
j := 0
|
||||
|
||||
for {
|
||||
if i >= len(src) {
|
||||
break
|
||||
}
|
||||
NEXTVALUE:
|
||||
for i < len(src) {
|
||||
remaining := src[i:]
|
||||
|
||||
if canPack(remaining, 240, 0) {
|
||||
dst[j] = 0
|
||||
i += 240
|
||||
} else if canPack(remaining, 120, 0) {
|
||||
dst[j] = 1 << 60
|
||||
i += 120
|
||||
} else if canPack(remaining, 60, 1) {
|
||||
dst[j] = pack60(src[i : i+60])
|
||||
i += 60
|
||||
} else if canPack(remaining, 30, 2) {
|
||||
dst[j] = pack30(src[i : i+30])
|
||||
i += 30
|
||||
} else if canPack(remaining, 20, 3) {
|
||||
dst[j] = pack20(src[i : i+20])
|
||||
i += 20
|
||||
} else if canPack(remaining, 15, 4) {
|
||||
dst[j] = pack15(src[i : i+15])
|
||||
i += 15
|
||||
} else if canPack(remaining, 12, 5) {
|
||||
dst[j] = pack12(src[i : i+12])
|
||||
i += 12
|
||||
} else if canPack(remaining, 10, 6) {
|
||||
dst[j] = pack10(src[i : i+10])
|
||||
i += 10
|
||||
} else if canPack(remaining, 8, 7) {
|
||||
dst[j] = pack8(src[i : i+8])
|
||||
i += 8
|
||||
} else if canPack(remaining, 7, 8) {
|
||||
dst[j] = pack7(src[i : i+7])
|
||||
i += 7
|
||||
} else if canPack(remaining, 6, 10) {
|
||||
dst[j] = pack6(src[i : i+6])
|
||||
i += 6
|
||||
} else if canPack(remaining, 5, 12) {
|
||||
dst[j] = pack5(src[i : i+5])
|
||||
i += 5
|
||||
} else if canPack(remaining, 4, 15) {
|
||||
dst[j] = pack4(src[i : i+4])
|
||||
i += 4
|
||||
} else if canPack(remaining, 3, 20) {
|
||||
dst[j] = pack3(src[i : i+3])
|
||||
i += 3
|
||||
} else if canPack(remaining, 2, 30) {
|
||||
dst[j] = pack2(src[i : i+2])
|
||||
i += 2
|
||||
} else if canPack(remaining, 1, 60) {
|
||||
dst[j] = pack1(src[i : i+1])
|
||||
i += 1
|
||||
} else {
|
||||
return nil, fmt.Errorf("value out of bounds")
|
||||
// try to pack run of 240 or 120 1s
|
||||
if len(remaining) >= 120 {
|
||||
var a []uint64
|
||||
if len(remaining) >= 240 {
|
||||
a = remaining[:240]
|
||||
} else {
|
||||
a = remaining[:120]
|
||||
}
|
||||
|
||||
k := 0
|
||||
for k = range a {
|
||||
if a[k] != 1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
v := uint64(0)
|
||||
switch {
|
||||
case k >= 239:
|
||||
i += 240
|
||||
case k >= 119:
|
||||
v = 1 << 60
|
||||
i += 120
|
||||
|
||||
default:
|
||||
goto CODES
|
||||
}
|
||||
dst[j] = v
|
||||
j++
|
||||
continue
|
||||
}
|
||||
j += 1
|
||||
|
||||
CODES:
|
||||
for code := range numBits {
|
||||
intN := int(numBits[code][0])
|
||||
bitN := numBits[code][1]
|
||||
if intN > len(remaining) {
|
||||
continue
|
||||
}
|
||||
|
||||
maxVal := uint64(1 << (bitN & 0x3f))
|
||||
val := uint64(code+2) << S8B_BIT_SIZE
|
||||
|
||||
for k, inV := range remaining {
|
||||
if k < intN {
|
||||
if inV >= maxVal {
|
||||
continue CODES
|
||||
}
|
||||
val |= inV << ((byte(k) * bitN) & 0x3f)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
dst[j] = val
|
||||
j += 1
|
||||
i += intN
|
||||
continue NEXTVALUE
|
||||
}
|
||||
return nil, ErrValueOutOfBounds
|
||||
}
|
||||
return dst[:j], nil
|
||||
}
|
||||
|
@ -433,10 +463,7 @@ func Decode(dst *[240]uint64, v uint64) (n int, err error) {
|
|||
func DecodeAll(dst, src []uint64) (value int, err error) {
|
||||
j := 0
|
||||
for _, v := range src {
|
||||
sel := v >> 60
|
||||
if sel >= 16 {
|
||||
return 0, fmt.Errorf("invalid selector value: %b", sel)
|
||||
}
|
||||
sel := (v >> 60) & 0xf
|
||||
selector[sel].unpack(v, (*[240]uint64)(unsafe.Pointer(&dst[j])))
|
||||
j += selector[sel].n
|
||||
}
|
||||
|
|
|
@ -25,24 +25,32 @@ func IntegerArrayEncodeAll(src []int64, b []byte) ([]byte, error) {
|
|||
// Zigzag encode deltas of all provided values.
|
||||
var prev int64
|
||||
var rle = true
|
||||
var nopack bool
|
||||
var canpack = true
|
||||
|
||||
// To prevent an allocation of the entire block we're encoding reuse the
|
||||
// src slice to store the encoded deltas.
|
||||
deltas := reintepretInt64ToUint64Slice(src)
|
||||
|
||||
for i, v := range src {
|
||||
delta := v - prev
|
||||
prev = v
|
||||
enc := ZigZagEncode(delta)
|
||||
if i > 1 {
|
||||
rle = rle && deltas[i-1] == enc
|
||||
}
|
||||
deltas[i] = enc
|
||||
prev = src[0]
|
||||
enc := ZigZagEncode(prev)
|
||||
src[0] = int64(enc)
|
||||
canpack = enc <= simple8b.MaxValue
|
||||
|
||||
// Check if the encoded value is too big to be simple8b encoded.
|
||||
if enc > simple8b.MaxValue {
|
||||
nopack = true
|
||||
if len(src) > 1 {
|
||||
delta := src[1] - prev
|
||||
prev = src[1]
|
||||
enc = ZigZagEncode(delta)
|
||||
d0 := enc
|
||||
src[1] = int64(enc)
|
||||
canpack = canpack && enc <= simple8b.MaxValue
|
||||
|
||||
for i := 2; i < len(src); i++ {
|
||||
delta := src[i] - prev
|
||||
prev = src[i]
|
||||
enc = ZigZagEncode(delta)
|
||||
src[i] = int64(enc)
|
||||
rle = rle && d0 == enc
|
||||
canpack = canpack && enc <= simple8b.MaxValue
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -71,7 +79,7 @@ func IntegerArrayEncodeAll(src []int64, b []byte) ([]byte, error) {
|
|||
return b[:i], nil
|
||||
}
|
||||
|
||||
if nopack { // There is an encoded value that's too big to simple8b encode.
|
||||
if !canpack { // There is an encoded value that's too big to simple8b encode.
|
||||
// Encode uncompressed.
|
||||
sz := 1 + len(deltas)*8
|
||||
if len(b) < sz && cap(b) >= sz {
|
||||
|
@ -111,7 +119,7 @@ func IntegerArrayEncodeAll(src []int64, b []byte) ([]byte, error) {
|
|||
for i, v := range encoded {
|
||||
binary.BigEndian.PutUint64(b[9+i*8:9+i*8+8], v)
|
||||
}
|
||||
return b[:sz], nil
|
||||
return b, nil
|
||||
}
|
||||
|
||||
// UnsignedArrayEncodeAll encodes src into b, returning b and any error encountered.
|
||||
|
|
Loading…
Reference in New Issue