influxdb/tsdb/engine/tsm1/int.go

181 lines
4.3 KiB
Go
Raw Normal View History

package tsm1
2015-09-29 16:46:19 +00:00
// Int64 encoding uses two different strategies depending on the range of values in
// the uncompressed data. Encoded values are first encoding used zig zag encoding.
// This interleaves postiive and negative integers across a range of positive integers.
//
// For example, [-2,-1,0,1] becomes [3,1,0,2]. See
// https://developers.google.com/protocol-buffers/docs/encoding?hl=en#signed-integers
// for more information.
//
// If all the zig zag encoded values less than 1 << 60 - 1, they are compressed using
// simple8b encoding. If any values is larger than 1 << 60 - 1, the values are stored uncompressed.
//
// Each encoded byte slice, contains a 1 byte header followed by multiple 8 byte packed integers
// or 8 byte uncompressed integers. The 4 high bits of the first byte indicate the encoding type
// for the remaining bytes.
//
2015-10-05 18:32:29 +00:00
// There are currently two encoding types that can be used with room for 16 total. These additional
// encoding slots are reserved for future use. One improvement to be made is to use a patched
2015-09-29 16:46:19 +00:00
// encoding such as PFOR if only a small number of values exceed the max compressed value range. This
2015-10-05 18:32:29 +00:00
// should improve compression ratios with very large integers near the ends of the int64 range.
2015-09-29 16:46:19 +00:00
import (
"encoding/binary"
"fmt"
"github.com/jwilder/encoding/simple8b"
)
const (
// intUncompressed is an uncompressed format using 8 bytes per point
intUncompressed = 0
// intCompressedSimple is a bit-packed format using simple8b encoding
intCompressedSimple = 1
)
2015-10-05 18:32:29 +00:00
// Int64Encoder encoders int64 into byte slices
type Int64Encoder interface {
Write(v int64)
Bytes() ([]byte, error)
}
2015-10-05 18:32:29 +00:00
// Int64Decoder decodes a byte slice into int64s
type Int64Decoder interface {
Next() bool
Read() int64
Error() error
}
type int64Encoder struct {
values []uint64
}
func NewInt64Encoder() Int64Encoder {
return &int64Encoder{}
}
func (e *int64Encoder) Write(v int64) {
e.values = append(e.values, ZigZagEncode(v))
}
func (e *int64Encoder) Bytes() ([]byte, error) {
for _, v := range e.values {
// Value is too large to encode using packed format
if v > simple8b.MaxValue {
return e.encodeUncompressed()
}
}
return e.encodePacked()
}
func (e *int64Encoder) encodePacked() ([]byte, error) {
2015-09-29 05:06:17 +00:00
encoded, err := simple8b.EncodeAll(e.values)
if err != nil {
return nil, err
}
b := make([]byte, 1+len(encoded)*8)
// 4 high bits of first byte store the encoding type for the block
b[0] = byte(intCompressedSimple) << 4
for i, v := range encoded {
binary.BigEndian.PutUint64(b[1+i*8:1+i*8+8], v)
}
return b, nil
}
func (e *int64Encoder) encodeUncompressed() ([]byte, error) {
b := make([]byte, 1+len(e.values)*8)
// 4 high bits of first byte store the encoding type for the block
b[0] = byte(intUncompressed) << 4
for i, v := range e.values {
binary.BigEndian.PutUint64(b[1+i*8:1+i*8+8], v)
}
return b, nil
}
type int64Decoder struct {
values []uint64
bytes []byte
i int
n int
encoding byte
err error
}
func NewInt64Decoder(b []byte) Int64Decoder {
d := &int64Decoder{
// 240 is the maximum number of values that can be encoded into a single uint64 using simple8b
values: make([]uint64, 240),
}
d.SetBytes(b)
return d
}
func (d *int64Decoder) SetBytes(b []byte) {
if len(b) > 0 {
d.encoding = b[0] >> 4
d.bytes = b[1:]
}
d.i = 0
d.n = 0
}
func (d *int64Decoder) Next() bool {
if d.i >= d.n && len(d.bytes) == 0 {
return false
}
d.i += 1
if d.i >= d.n {
switch d.encoding {
case intUncompressed:
d.decodeUncompressed()
case intCompressedSimple:
d.decodePacked()
default:
d.err = fmt.Errorf("unknown encoding %v", d.encoding)
}
}
return d.i < d.n
}
func (d *int64Decoder) Error() error {
return d.err
}
func (d *int64Decoder) Read() int64 {
return ZigZagDecode(d.values[d.i])
}
func (d *int64Decoder) decodePacked() {
if len(d.bytes) == 0 {
return
}
v := binary.BigEndian.Uint64(d.bytes[0:8])
n, err := simple8b.Decode(d.values, v)
if err != nil {
// Should never happen, only error that could be returned is if the the value to be decoded was not
// actually encoded by simple8b encoder.
d.err = fmt.Errorf("failed to decode value %v: %v", v, err)
}
d.n = n
d.i = 0
d.bytes = d.bytes[8:]
}
func (d *int64Decoder) decodeUncompressed() {
d.values[0] = binary.BigEndian.Uint64(d.bytes[0:8])
d.i = 0
d.n = 1
d.bytes = d.bytes[8:]
}