diff --git a/pkg/encoding/simple8b/encoding.go b/pkg/encoding/simple8b/encoding.go index 83177a3479..b924c28635 100644 --- a/pkg/encoding/simple8b/encoding.go +++ b/pkg/encoding/simple8b/encoding.go @@ -347,6 +347,31 @@ func Encode(src []uint64) (value uint64, n int, err error) { } } +const ( + S8B_BIT_SIZE = 60 +) + +var ( + numBits = [...][2]byte{ + // { number of values, max bits per value } + {60, 1}, + {30, 2}, + {20, 3}, + {15, 4}, + {12, 5}, + {10, 6}, + {8, 7}, + {7, 8}, + {6, 10}, + {5, 12}, + {4, 15}, + {3, 20}, + {2, 30}, + {1, 60}, + } + ErrValueOutOfBounds = errors.New("value out of bounds") +) + // Encode returns a packed slice of the values from src. If a value is over // 1 << 60, an error is returned. The input src is modified to avoid extra // allocations. If you need to re-use, use a copy. @@ -357,64 +382,69 @@ func EncodeAll(src []uint64) ([]uint64, error) { dst := src j := 0 - for { - if i >= len(src) { - break - } +NEXTVALUE: + for i < len(src) { remaining := src[i:] - if canPack(remaining, 240, 0) { - dst[j] = 0 - i += 240 - } else if canPack(remaining, 120, 0) { - dst[j] = 1 << 60 - i += 120 - } else if canPack(remaining, 60, 1) { - dst[j] = pack60(src[i : i+60]) - i += 60 - } else if canPack(remaining, 30, 2) { - dst[j] = pack30(src[i : i+30]) - i += 30 - } else if canPack(remaining, 20, 3) { - dst[j] = pack20(src[i : i+20]) - i += 20 - } else if canPack(remaining, 15, 4) { - dst[j] = pack15(src[i : i+15]) - i += 15 - } else if canPack(remaining, 12, 5) { - dst[j] = pack12(src[i : i+12]) - i += 12 - } else if canPack(remaining, 10, 6) { - dst[j] = pack10(src[i : i+10]) - i += 10 - } else if canPack(remaining, 8, 7) { - dst[j] = pack8(src[i : i+8]) - i += 8 - } else if canPack(remaining, 7, 8) { - dst[j] = pack7(src[i : i+7]) - i += 7 - } else if canPack(remaining, 6, 10) { - dst[j] = pack6(src[i : i+6]) - i += 6 - } else if canPack(remaining, 5, 12) { - dst[j] = pack5(src[i : i+5]) - i += 5 - } else if canPack(remaining, 4, 15) { - dst[j] = pack4(src[i : i+4]) - i += 4 - } else if canPack(remaining, 3, 20) { - dst[j] = pack3(src[i : i+3]) - i += 3 - } else if canPack(remaining, 2, 30) { - dst[j] = pack2(src[i : i+2]) - i += 2 - } else if canPack(remaining, 1, 60) { - dst[j] = pack1(src[i : i+1]) - i += 1 - } else { - return nil, fmt.Errorf("value out of bounds") + // try to pack run of 240 or 120 1s + if len(remaining) >= 120 { + var a []uint64 + if len(remaining) >= 240 { + a = remaining[:240] + } else { + a = remaining[:120] + } + + k := 0 + for k = range a { + if a[k] != 1 { + break + } + } + + v := uint64(0) + switch { + case k >= 239: + i += 240 + case k >= 119: + v = 1 << 60 + i += 120 + + default: + goto CODES + } + dst[j] = v + j++ + continue } - j += 1 + + CODES: + for code := range numBits { + intN := int(numBits[code][0]) + bitN := numBits[code][1] + if intN > len(remaining) { + continue + } + + maxVal := uint64(1 << (bitN & 0x3f)) + val := uint64(code+2) << S8B_BIT_SIZE + + for k, inV := range remaining { + if k < intN { + if inV >= maxVal { + continue CODES + } + val |= inV << ((byte(k) * bitN) & 0x3f) + } else { + break + } + } + dst[j] = val + j += 1 + i += intN + continue NEXTVALUE + } + return nil, ErrValueOutOfBounds } return dst[:j], nil } @@ -433,10 +463,7 @@ func Decode(dst *[240]uint64, v uint64) (n int, err error) { func DecodeAll(dst, src []uint64) (value int, err error) { j := 0 for _, v := range src { - sel := v >> 60 - if sel >= 16 { - return 0, fmt.Errorf("invalid selector value: %b", sel) - } + sel := (v >> 60) & 0xf selector[sel].unpack(v, (*[240]uint64)(unsafe.Pointer(&dst[j]))) j += selector[sel].n } diff --git a/tsdb/tsm1/batch_integer.go b/tsdb/tsm1/batch_integer.go index 1c60b55b35..60ea392397 100644 --- a/tsdb/tsm1/batch_integer.go +++ b/tsdb/tsm1/batch_integer.go @@ -25,24 +25,32 @@ func IntegerArrayEncodeAll(src []int64, b []byte) ([]byte, error) { // Zigzag encode deltas of all provided values. var prev int64 var rle = true - var nopack bool + var canpack = true // To prevent an allocation of the entire block we're encoding reuse the // src slice to store the encoded deltas. deltas := reintepretInt64ToUint64Slice(src) - for i, v := range src { - delta := v - prev - prev = v - enc := ZigZagEncode(delta) - if i > 1 { - rle = rle && deltas[i-1] == enc - } - deltas[i] = enc + prev = src[0] + enc := ZigZagEncode(prev) + src[0] = int64(enc) + canpack = enc <= simple8b.MaxValue - // Check if the encoded value is too big to be simple8b encoded. - if enc > simple8b.MaxValue { - nopack = true + if len(src) > 1 { + delta := src[1] - prev + prev = src[1] + enc = ZigZagEncode(delta) + d0 := enc + src[1] = int64(enc) + canpack = canpack && enc <= simple8b.MaxValue + + for i := 2; i < len(src); i++ { + delta := src[i] - prev + prev = src[i] + enc = ZigZagEncode(delta) + src[i] = int64(enc) + rle = rle && d0 == enc + canpack = canpack && enc <= simple8b.MaxValue } } @@ -71,7 +79,7 @@ func IntegerArrayEncodeAll(src []int64, b []byte) ([]byte, error) { return b[:i], nil } - if nopack { // There is an encoded value that's too big to simple8b encode. + if !canpack { // There is an encoded value that's too big to simple8b encode. // Encode uncompressed. sz := 1 + len(deltas)*8 if len(b) < sz && cap(b) >= sz { @@ -111,7 +119,7 @@ func IntegerArrayEncodeAll(src []int64, b []byte) ([]byte, error) { for i, v := range encoded { binary.BigEndian.PutUint64(b[9+i*8:9+i*8+8], v) } - return b[:sz], nil + return b, nil } // UnsignedArrayEncodeAll encodes src into b, returning b and any error encountered.