2015-10-01 19:23:38 +00:00
|
|
|
package tsm1
|
2015-09-24 20:29:51 +00:00
|
|
|
|
2015-09-29 16:46:19 +00:00
|
|
|
// Int64 encoding uses two different strategies depending on the range of values in
|
|
|
|
// the uncompressed data. Encoded values are first encoding used zig zag encoding.
|
|
|
|
// This interleaves postiive and negative integers across a range of positive integers.
|
|
|
|
//
|
|
|
|
// For example, [-2,-1,0,1] becomes [3,1,0,2]. See
|
|
|
|
// https://developers.google.com/protocol-buffers/docs/encoding?hl=en#signed-integers
|
|
|
|
// for more information.
|
|
|
|
//
|
|
|
|
// If all the zig zag encoded values less than 1 << 60 - 1, they are compressed using
|
|
|
|
// simple8b encoding. If any values is larger than 1 << 60 - 1, the values are stored uncompressed.
|
|
|
|
//
|
|
|
|
// Each encoded byte slice, contains a 1 byte header followed by multiple 8 byte packed integers
|
|
|
|
// or 8 byte uncompressed integers. The 4 high bits of the first byte indicate the encoding type
|
|
|
|
// for the remaining bytes.
|
|
|
|
//
|
2015-10-05 18:32:29 +00:00
|
|
|
// There are currently two encoding types that can be used with room for 16 total. These additional
|
|
|
|
// encoding slots are reserved for future use. One improvement to be made is to use a patched
|
2015-09-29 16:46:19 +00:00
|
|
|
// encoding such as PFOR if only a small number of values exceed the max compressed value range. This
|
2015-10-05 18:32:29 +00:00
|
|
|
// should improve compression ratios with very large integers near the ends of the int64 range.
|
2015-09-29 16:46:19 +00:00
|
|
|
|
2015-09-24 20:29:51 +00:00
|
|
|
import (
|
|
|
|
"encoding/binary"
|
|
|
|
"fmt"
|
|
|
|
|
|
|
|
"github.com/jwilder/encoding/simple8b"
|
|
|
|
)
|
|
|
|
|
2015-10-02 16:03:20 +00:00
|
|
|
const (
|
|
|
|
// intUncompressed is an uncompressed format using 8 bytes per point
|
|
|
|
intUncompressed = 0
|
|
|
|
// intCompressedSimple is a bit-packed format using simple8b encoding
|
|
|
|
intCompressedSimple = 1
|
|
|
|
)
|
|
|
|
|
2015-10-05 18:32:29 +00:00
|
|
|
// Int64Encoder encoders int64 into byte slices
|
2015-09-25 19:30:24 +00:00
|
|
|
type Int64Encoder interface {
|
|
|
|
Write(v int64)
|
|
|
|
Bytes() ([]byte, error)
|
|
|
|
}
|
|
|
|
|
2015-10-05 18:32:29 +00:00
|
|
|
// Int64Decoder decodes a byte slice into int64s
|
2015-09-25 19:30:24 +00:00
|
|
|
type Int64Decoder interface {
|
|
|
|
Next() bool
|
|
|
|
Read() int64
|
2015-10-02 16:46:58 +00:00
|
|
|
Error() error
|
2015-09-25 19:30:24 +00:00
|
|
|
}
|
|
|
|
|
2015-09-24 20:29:51 +00:00
|
|
|
type int64Encoder struct {
|
2015-09-25 19:30:24 +00:00
|
|
|
values []uint64
|
2015-09-24 20:29:51 +00:00
|
|
|
}
|
|
|
|
|
2015-09-25 19:30:24 +00:00
|
|
|
func NewInt64Encoder() Int64Encoder {
|
2015-09-24 20:29:51 +00:00
|
|
|
return &int64Encoder{}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (e *int64Encoder) Write(v int64) {
|
2015-09-25 19:30:24 +00:00
|
|
|
e.values = append(e.values, ZigZagEncode(v))
|
2015-09-24 20:29:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (e *int64Encoder) Bytes() ([]byte, error) {
|
|
|
|
for _, v := range e.values {
|
|
|
|
// Value is too large to encode using packed format
|
2015-09-25 19:30:24 +00:00
|
|
|
if v > simple8b.MaxValue {
|
2015-09-24 20:29:51 +00:00
|
|
|
return e.encodeUncompressed()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-09-25 19:30:24 +00:00
|
|
|
return e.encodePacked()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (e *int64Encoder) encodePacked() ([]byte, error) {
|
2015-09-29 05:06:17 +00:00
|
|
|
encoded, err := simple8b.EncodeAll(e.values)
|
2015-09-24 20:29:51 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2015-09-25 21:27:43 +00:00
|
|
|
b := make([]byte, 1+len(encoded)*8)
|
2015-09-25 19:30:24 +00:00
|
|
|
// 4 high bits of first byte store the encoding type for the block
|
2015-10-02 16:03:20 +00:00
|
|
|
b[0] = byte(intCompressedSimple) << 4
|
2015-09-25 19:30:24 +00:00
|
|
|
|
|
|
|
for i, v := range encoded {
|
2015-09-25 21:27:43 +00:00
|
|
|
binary.BigEndian.PutUint64(b[1+i*8:1+i*8+8], v)
|
2015-09-25 19:30:24 +00:00
|
|
|
}
|
|
|
|
return b, nil
|
2015-09-24 20:29:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (e *int64Encoder) encodeUncompressed() ([]byte, error) {
|
|
|
|
b := make([]byte, 1+len(e.values)*8)
|
|
|
|
// 4 high bits of first byte store the encoding type for the block
|
2015-10-02 16:03:20 +00:00
|
|
|
b[0] = byte(intUncompressed) << 4
|
2015-09-25 19:30:24 +00:00
|
|
|
|
2015-09-24 20:29:51 +00:00
|
|
|
for i, v := range e.values {
|
2015-09-25 21:27:43 +00:00
|
|
|
binary.BigEndian.PutUint64(b[1+i*8:1+i*8+8], v)
|
2015-09-24 20:29:51 +00:00
|
|
|
}
|
|
|
|
return b, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
type int64Decoder struct {
|
2015-09-25 19:30:24 +00:00
|
|
|
values []uint64
|
2015-09-25 21:27:43 +00:00
|
|
|
bytes []byte
|
|
|
|
i int
|
|
|
|
n int
|
|
|
|
|
|
|
|
encoding byte
|
2015-10-02 16:46:58 +00:00
|
|
|
err error
|
2015-09-24 20:29:51 +00:00
|
|
|
}
|
|
|
|
|
2015-09-25 19:30:24 +00:00
|
|
|
func NewInt64Decoder(b []byte) Int64Decoder {
|
|
|
|
d := &int64Decoder{
|
2015-10-05 18:52:19 +00:00
|
|
|
// 240 is the maximum number of values that can be encoded into a single uint64 using simple8b
|
2015-09-25 21:27:43 +00:00
|
|
|
values: make([]uint64, 240),
|
2015-09-25 19:30:24 +00:00
|
|
|
}
|
2015-09-25 21:27:43 +00:00
|
|
|
|
|
|
|
d.SetBytes(b)
|
2015-09-24 20:29:51 +00:00
|
|
|
return d
|
|
|
|
}
|
|
|
|
|
2015-09-25 19:30:24 +00:00
|
|
|
func (d *int64Decoder) SetBytes(b []byte) {
|
2015-09-25 21:27:43 +00:00
|
|
|
if len(b) > 0 {
|
|
|
|
d.encoding = b[0] >> 4
|
|
|
|
d.bytes = b[1:]
|
|
|
|
}
|
|
|
|
d.i = 0
|
|
|
|
d.n = 0
|
2015-09-25 19:30:24 +00:00
|
|
|
}
|
|
|
|
|
2015-09-24 20:29:51 +00:00
|
|
|
func (d *int64Decoder) Next() bool {
|
2015-09-25 21:27:43 +00:00
|
|
|
if d.i >= d.n && len(d.bytes) == 0 {
|
2015-09-24 20:29:51 +00:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2015-09-25 21:27:43 +00:00
|
|
|
d.i += 1
|
2015-09-24 20:29:51 +00:00
|
|
|
|
2015-09-25 21:27:43 +00:00
|
|
|
if d.i >= d.n {
|
|
|
|
switch d.encoding {
|
2015-10-02 16:03:20 +00:00
|
|
|
case intUncompressed:
|
2015-09-25 21:27:43 +00:00
|
|
|
d.decodeUncompressed()
|
2015-10-02 16:03:20 +00:00
|
|
|
case intCompressedSimple:
|
2015-09-25 21:27:43 +00:00
|
|
|
d.decodePacked()
|
|
|
|
default:
|
2015-10-02 16:46:58 +00:00
|
|
|
d.err = fmt.Errorf("unknown encoding %v", d.encoding)
|
2015-09-25 21:27:43 +00:00
|
|
|
}
|
2015-09-24 20:29:51 +00:00
|
|
|
}
|
2015-09-25 21:27:43 +00:00
|
|
|
return d.i < d.n
|
|
|
|
}
|
2015-09-24 20:29:51 +00:00
|
|
|
|
2015-10-02 16:46:58 +00:00
|
|
|
func (d *int64Decoder) Error() error {
|
|
|
|
return d.err
|
|
|
|
}
|
|
|
|
|
2015-09-25 21:27:43 +00:00
|
|
|
func (d *int64Decoder) Read() int64 {
|
|
|
|
return ZigZagDecode(d.values[d.i])
|
2015-09-24 20:29:51 +00:00
|
|
|
}
|
|
|
|
|
2015-09-25 21:27:43 +00:00
|
|
|
func (d *int64Decoder) decodePacked() {
|
|
|
|
if len(d.bytes) == 0 {
|
2015-09-25 19:30:24 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2015-09-25 21:27:43 +00:00
|
|
|
v := binary.BigEndian.Uint64(d.bytes[0:8])
|
2015-10-02 16:46:58 +00:00
|
|
|
n, err := simple8b.Decode(d.values, v)
|
|
|
|
if err != nil {
|
|
|
|
// Should never happen, only error that could be returned is if the the value to be decoded was not
|
|
|
|
// actually encoded by simple8b encoder.
|
|
|
|
d.err = fmt.Errorf("failed to decode value %v: %v", v, err)
|
|
|
|
}
|
2015-09-25 19:30:24 +00:00
|
|
|
|
2015-09-25 21:27:43 +00:00
|
|
|
d.n = n
|
|
|
|
d.i = 0
|
|
|
|
d.bytes = d.bytes[8:]
|
2015-09-24 20:29:51 +00:00
|
|
|
}
|
|
|
|
|
2015-09-25 21:27:43 +00:00
|
|
|
func (d *int64Decoder) decodeUncompressed() {
|
|
|
|
d.values[0] = binary.BigEndian.Uint64(d.bytes[0:8])
|
|
|
|
d.i = 0
|
|
|
|
d.n = 1
|
|
|
|
d.bytes = d.bytes[8:]
|
2015-09-24 20:29:51 +00:00
|
|
|
}
|