From a339f8f620919a59915b8db8d7cd077e69658f2c Mon Sep 17 00:00:00 2001 From: Stuart Carnie Date: Fri, 28 Sep 2018 09:44:30 -0700 Subject: [PATCH] feat(encoding): Improve integer and simple8b encoding performance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit simple8b EncodeAll improvements should ``` name                     old time/op  new time/op  delta EncodeAll/1_bit-8        28.5µs ± 1%  28.6µs ± 1%     ~     (p=0.133 n=9+10) EncodeAll/2_bits-8       28.9µs ± 2%  28.7µs ± 0%     ~     (p=0.068 n=10+8) EncodeAll/3_bits-8       29.3µs ± 1%  28.8µs ± 0%   -1.70%  (p=0.000 n=10+10) EncodeAll/4_bits-8       29.6µs ± 1%  29.1µs ± 1%   -1.85%  (p=0.000 n=10+10) EncodeAll/5_bits-8       30.6µs ± 1%  29.8µs ± 2%   -2.70%  (p=0.000 n=10+10) EncodeAll/6_bits-8       31.3µs ± 1%  30.0µs ± 1%   -4.08%  (p=0.000 n=9+9) EncodeAll/7_bits-8       32.6µs ± 1%  30.8µs ± 0%   -5.49%  (p=0.000 n=9+9) EncodeAll/8_bits-8       33.6µs ± 2%  31.0µs ± 1%   -7.77%  (p=0.000 n=10+9) EncodeAll/10_bits-8      34.9µs ± 0%  31.9µs ± 2%   -8.55%  (p=0.000 n=9+10) EncodeAll/12_bits-8      36.8µs ± 1%  32.6µs ± 1%  -11.35%  (p=0.000 n=9+10) EncodeAll/15_bits-8      39.8µs ± 1%  34.1µs ± 2%  -14.40%  (p=0.000 n=10+10) EncodeAll/20_bits-8      45.2µs ± 3%  36.2µs ± 1%  -19.97%  (p=0.000 n=10+9) EncodeAll/30_bits-8      55.0µs ± 0%  40.9µs ± 1%  -25.62%  (p=0.000 n=9+9) EncodeAll/60_bits-8      86.2µs ± 1%  55.2µs ± 1%  -35.92%  (p=0.000 n=10+10) EncodeAll/combination-8   582µs ± 2%   502µs ± 1%  -13.80%  (p=0.000 n=9+9) ``` EncodeIntegers: ``` name                             old time/op    new time/op    delta EncodeIntegers/1000_seq/batch-8    2.04µs ± 0%    1.50µs ± 1%  -26.22%  (p=0.008 n=5+5) EncodeIntegers/1000_ran/batch-8    8.80µs ± 2%    6.10µs ± 0%  -30.73%  (p=0.008 n=5+5) EncodeIntegers/1000_dup/batch-8    2.03µs ± 1%    1.50µs ± 1%  -26.04%  (p=0.008 n=5+5) ``` EncodeTimestamps (ran is improved due to simple8b improvements) ``` name old time/op new time/op delta EncodeTimestamps/1000_seq/batch-8 2.64µs ± 1% 2.65µs ± 2% ~ (p=0.310 n=5+5) EncodeTimestamps/1000_ran/batch-8 64.0µs ± 1% 33.8µs ± 1% -47.23% (p=0.008 n=5+5) EncodeTimestamps/1000_dup/batch-8 9.32µs ± 0% 9.28µs ± 1% ~ (p=0.087 n=5+5) ``` --- pkg/encoding/simple8b/encoding.go | 145 ++++++++++++++++++------------ tsdb/tsm1/batch_integer.go | 36 +++++--- 2 files changed, 108 insertions(+), 73 deletions(-) diff --git a/pkg/encoding/simple8b/encoding.go b/pkg/encoding/simple8b/encoding.go index 83177a3479..b924c28635 100644 --- a/pkg/encoding/simple8b/encoding.go +++ b/pkg/encoding/simple8b/encoding.go @@ -347,6 +347,31 @@ func Encode(src []uint64) (value uint64, n int, err error) { } } +const ( + S8B_BIT_SIZE = 60 +) + +var ( + numBits = [...][2]byte{ + // { number of values, max bits per value } + {60, 1}, + {30, 2}, + {20, 3}, + {15, 4}, + {12, 5}, + {10, 6}, + {8, 7}, + {7, 8}, + {6, 10}, + {5, 12}, + {4, 15}, + {3, 20}, + {2, 30}, + {1, 60}, + } + ErrValueOutOfBounds = errors.New("value out of bounds") +) + // Encode returns a packed slice of the values from src. If a value is over // 1 << 60, an error is returned. The input src is modified to avoid extra // allocations. If you need to re-use, use a copy. @@ -357,64 +382,69 @@ func EncodeAll(src []uint64) ([]uint64, error) { dst := src j := 0 - for { - if i >= len(src) { - break - } +NEXTVALUE: + for i < len(src) { remaining := src[i:] - if canPack(remaining, 240, 0) { - dst[j] = 0 - i += 240 - } else if canPack(remaining, 120, 0) { - dst[j] = 1 << 60 - i += 120 - } else if canPack(remaining, 60, 1) { - dst[j] = pack60(src[i : i+60]) - i += 60 - } else if canPack(remaining, 30, 2) { - dst[j] = pack30(src[i : i+30]) - i += 30 - } else if canPack(remaining, 20, 3) { - dst[j] = pack20(src[i : i+20]) - i += 20 - } else if canPack(remaining, 15, 4) { - dst[j] = pack15(src[i : i+15]) - i += 15 - } else if canPack(remaining, 12, 5) { - dst[j] = pack12(src[i : i+12]) - i += 12 - } else if canPack(remaining, 10, 6) { - dst[j] = pack10(src[i : i+10]) - i += 10 - } else if canPack(remaining, 8, 7) { - dst[j] = pack8(src[i : i+8]) - i += 8 - } else if canPack(remaining, 7, 8) { - dst[j] = pack7(src[i : i+7]) - i += 7 - } else if canPack(remaining, 6, 10) { - dst[j] = pack6(src[i : i+6]) - i += 6 - } else if canPack(remaining, 5, 12) { - dst[j] = pack5(src[i : i+5]) - i += 5 - } else if canPack(remaining, 4, 15) { - dst[j] = pack4(src[i : i+4]) - i += 4 - } else if canPack(remaining, 3, 20) { - dst[j] = pack3(src[i : i+3]) - i += 3 - } else if canPack(remaining, 2, 30) { - dst[j] = pack2(src[i : i+2]) - i += 2 - } else if canPack(remaining, 1, 60) { - dst[j] = pack1(src[i : i+1]) - i += 1 - } else { - return nil, fmt.Errorf("value out of bounds") + // try to pack run of 240 or 120 1s + if len(remaining) >= 120 { + var a []uint64 + if len(remaining) >= 240 { + a = remaining[:240] + } else { + a = remaining[:120] + } + + k := 0 + for k = range a { + if a[k] != 1 { + break + } + } + + v := uint64(0) + switch { + case k >= 239: + i += 240 + case k >= 119: + v = 1 << 60 + i += 120 + + default: + goto CODES + } + dst[j] = v + j++ + continue } - j += 1 + + CODES: + for code := range numBits { + intN := int(numBits[code][0]) + bitN := numBits[code][1] + if intN > len(remaining) { + continue + } + + maxVal := uint64(1 << (bitN & 0x3f)) + val := uint64(code+2) << S8B_BIT_SIZE + + for k, inV := range remaining { + if k < intN { + if inV >= maxVal { + continue CODES + } + val |= inV << ((byte(k) * bitN) & 0x3f) + } else { + break + } + } + dst[j] = val + j += 1 + i += intN + continue NEXTVALUE + } + return nil, ErrValueOutOfBounds } return dst[:j], nil } @@ -433,10 +463,7 @@ func Decode(dst *[240]uint64, v uint64) (n int, err error) { func DecodeAll(dst, src []uint64) (value int, err error) { j := 0 for _, v := range src { - sel := v >> 60 - if sel >= 16 { - return 0, fmt.Errorf("invalid selector value: %b", sel) - } + sel := (v >> 60) & 0xf selector[sel].unpack(v, (*[240]uint64)(unsafe.Pointer(&dst[j]))) j += selector[sel].n } diff --git a/tsdb/tsm1/batch_integer.go b/tsdb/tsm1/batch_integer.go index 1c60b55b35..60ea392397 100644 --- a/tsdb/tsm1/batch_integer.go +++ b/tsdb/tsm1/batch_integer.go @@ -25,24 +25,32 @@ func IntegerArrayEncodeAll(src []int64, b []byte) ([]byte, error) { // Zigzag encode deltas of all provided values. var prev int64 var rle = true - var nopack bool + var canpack = true // To prevent an allocation of the entire block we're encoding reuse the // src slice to store the encoded deltas. deltas := reintepretInt64ToUint64Slice(src) - for i, v := range src { - delta := v - prev - prev = v - enc := ZigZagEncode(delta) - if i > 1 { - rle = rle && deltas[i-1] == enc - } - deltas[i] = enc + prev = src[0] + enc := ZigZagEncode(prev) + src[0] = int64(enc) + canpack = enc <= simple8b.MaxValue - // Check if the encoded value is too big to be simple8b encoded. - if enc > simple8b.MaxValue { - nopack = true + if len(src) > 1 { + delta := src[1] - prev + prev = src[1] + enc = ZigZagEncode(delta) + d0 := enc + src[1] = int64(enc) + canpack = canpack && enc <= simple8b.MaxValue + + for i := 2; i < len(src); i++ { + delta := src[i] - prev + prev = src[i] + enc = ZigZagEncode(delta) + src[i] = int64(enc) + rle = rle && d0 == enc + canpack = canpack && enc <= simple8b.MaxValue } } @@ -71,7 +79,7 @@ func IntegerArrayEncodeAll(src []int64, b []byte) ([]byte, error) { return b[:i], nil } - if nopack { // There is an encoded value that's too big to simple8b encode. + if !canpack { // There is an encoded value that's too big to simple8b encode. // Encode uncompressed. sz := 1 + len(deltas)*8 if len(b) < sz && cap(b) >= sz { @@ -111,7 +119,7 @@ func IntegerArrayEncodeAll(src []int64, b []byte) ([]byte, error) { for i, v := range encoded { binary.BigEndian.PutUint64(b[9+i*8:9+i*8+8], v) } - return b[:sz], nil + return b, nil } // UnsignedArrayEncodeAll encodes src into b, returning b and any error encountered.