281 lines
5.8 KiB
Go
281 lines
5.8 KiB
Go
package tsm1
|
|
|
|
/*
|
|
This code is originally from: https://github.com/dgryski/go-tsz and has been modified to remove
|
|
the timestamp compression functionality.
|
|
|
|
It implements the float compression as presented in: http://www.vldb.org/pvldb/vol8/p1816-teller.pdf.
|
|
This implementation uses a sentinel value of NaN which means that float64 NaN cannot be stored using
|
|
this version.
|
|
*/
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"math"
|
|
"math/bits"
|
|
|
|
"github.com/dgryski/go-bitstream"
|
|
)
|
|
|
|
// Note: an uncompressed format is not yet implemented.
|
|
// floatCompressedGorilla is a compressed format using the gorilla paper encoding
|
|
const floatCompressedGorilla = 1
|
|
|
|
// uvnan is the constant returned from math.NaN().
|
|
const uvnan = 0x7FF8000000000001
|
|
|
|
// FloatEncoder encodes multiple float64s into a byte slice.
|
|
type FloatEncoder struct {
|
|
val float64
|
|
err error
|
|
|
|
leading uint64
|
|
trailing uint64
|
|
|
|
buf bytes.Buffer
|
|
bw *bitstream.BitWriter
|
|
|
|
first bool
|
|
finished bool
|
|
}
|
|
|
|
// NewFloatEncoder returns a new FloatEncoder.
|
|
func NewFloatEncoder() *FloatEncoder {
|
|
s := FloatEncoder{
|
|
first: true,
|
|
leading: ^uint64(0),
|
|
}
|
|
|
|
s.bw = bitstream.NewWriter(&s.buf)
|
|
s.buf.WriteByte(floatCompressedGorilla << 4)
|
|
|
|
return &s
|
|
}
|
|
|
|
// Reset sets the encoder back to its initial state.
|
|
func (s *FloatEncoder) Reset() {
|
|
s.val = 0
|
|
s.err = nil
|
|
s.leading = ^uint64(0)
|
|
s.trailing = 0
|
|
s.buf.Reset()
|
|
s.buf.WriteByte(floatCompressedGorilla << 4)
|
|
|
|
s.bw.Resume(0x0, 8)
|
|
|
|
s.finished = false
|
|
s.first = true
|
|
}
|
|
|
|
// Bytes returns a copy of the underlying byte buffer used in the encoder.
|
|
func (s *FloatEncoder) Bytes() ([]byte, error) {
|
|
return s.buf.Bytes(), s.err
|
|
}
|
|
|
|
// Flush indicates there are no more values to encode.
|
|
func (s *FloatEncoder) Flush() {
|
|
if !s.finished {
|
|
// write an end-of-stream record
|
|
s.finished = true
|
|
s.Write(math.NaN())
|
|
s.bw.Flush(bitstream.Zero)
|
|
}
|
|
}
|
|
|
|
// Write encodes v to the underlying buffer.
|
|
func (s *FloatEncoder) Write(v float64) {
|
|
// Only allow NaN as a sentinel value
|
|
if math.IsNaN(v) && !s.finished {
|
|
s.err = fmt.Errorf("unsupported value: NaN")
|
|
return
|
|
}
|
|
if s.first {
|
|
// first point
|
|
s.val = v
|
|
s.first = false
|
|
s.bw.WriteBits(math.Float64bits(v), 64)
|
|
return
|
|
}
|
|
|
|
vDelta := math.Float64bits(v) ^ math.Float64bits(s.val)
|
|
|
|
if vDelta == 0 {
|
|
s.bw.WriteBit(bitstream.Zero)
|
|
} else {
|
|
s.bw.WriteBit(bitstream.One)
|
|
|
|
leading := uint64(bits.LeadingZeros64(vDelta))
|
|
trailing := uint64(bits.TrailingZeros64(vDelta))
|
|
|
|
// Clamp number of leading zeros to avoid overflow when encoding
|
|
leading &= 0x1F
|
|
if leading >= 32 {
|
|
leading = 31
|
|
}
|
|
|
|
// TODO(dgryski): check if it's 'cheaper' to reset the leading/trailing bits instead
|
|
if s.leading != ^uint64(0) && leading >= s.leading && trailing >= s.trailing {
|
|
s.bw.WriteBit(bitstream.Zero)
|
|
s.bw.WriteBits(vDelta>>s.trailing, 64-int(s.leading)-int(s.trailing))
|
|
} else {
|
|
s.leading, s.trailing = leading, trailing
|
|
|
|
s.bw.WriteBit(bitstream.One)
|
|
s.bw.WriteBits(leading, 5)
|
|
|
|
// Note that if leading == trailing == 0, then sigbits == 64. But that
|
|
// value doesn't actually fit into the 6 bits we have.
|
|
// Luckily, we never need to encode 0 significant bits, since that would
|
|
// put us in the other case (vdelta == 0). So instead we write out a 0 and
|
|
// adjust it back to 64 on unpacking.
|
|
sigbits := 64 - leading - trailing
|
|
s.bw.WriteBits(sigbits, 6)
|
|
s.bw.WriteBits(vDelta>>trailing, int(sigbits))
|
|
}
|
|
}
|
|
|
|
s.val = v
|
|
}
|
|
|
|
// FloatDecoder decodes a byte slice into multiple float64 values.
|
|
type FloatDecoder struct {
|
|
val uint64
|
|
|
|
leading uint64
|
|
trailing uint64
|
|
|
|
br BitReader
|
|
b []byte
|
|
|
|
first bool
|
|
finished bool
|
|
|
|
err error
|
|
}
|
|
|
|
// SetBytes initializes the decoder with b. Must call before calling Next().
|
|
func (it *FloatDecoder) SetBytes(b []byte) error {
|
|
var v uint64
|
|
if len(b) == 0 {
|
|
v = uvnan
|
|
} else {
|
|
// first byte is the compression type.
|
|
// we currently just have gorilla compression.
|
|
it.br.Reset(b[1:])
|
|
|
|
var err error
|
|
v, err = it.br.ReadBits(64)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
// Reset all fields.
|
|
it.val = v
|
|
it.leading = 0
|
|
it.trailing = 0
|
|
it.b = b
|
|
it.first = true
|
|
it.finished = false
|
|
it.err = nil
|
|
|
|
return nil
|
|
}
|
|
|
|
// Next returns true if there are remaining values to read.
|
|
func (it *FloatDecoder) Next() bool {
|
|
if it.err != nil || it.finished {
|
|
return false
|
|
}
|
|
|
|
if it.first {
|
|
it.first = false
|
|
|
|
// mark as finished if there were no values.
|
|
if it.val == uvnan { // IsNaN
|
|
it.finished = true
|
|
return false
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
// read compressed value
|
|
var bit bool
|
|
if it.br.CanReadBitFast() {
|
|
bit = it.br.ReadBitFast()
|
|
} else if v, err := it.br.ReadBit(); err != nil {
|
|
it.err = err
|
|
return false
|
|
} else {
|
|
bit = v
|
|
}
|
|
|
|
if !bit {
|
|
// it.val = it.val
|
|
} else {
|
|
var bit bool
|
|
if it.br.CanReadBitFast() {
|
|
bit = it.br.ReadBitFast()
|
|
} else if v, err := it.br.ReadBit(); err != nil {
|
|
it.err = err
|
|
return false
|
|
} else {
|
|
bit = v
|
|
}
|
|
|
|
if !bit {
|
|
// reuse leading/trailing zero bits
|
|
// it.leading, it.trailing = it.leading, it.trailing
|
|
} else {
|
|
bits, err := it.br.ReadBits(5)
|
|
if err != nil {
|
|
it.err = err
|
|
return false
|
|
}
|
|
it.leading = bits
|
|
|
|
bits, err = it.br.ReadBits(6)
|
|
if err != nil {
|
|
it.err = err
|
|
return false
|
|
}
|
|
mbits := bits
|
|
// 0 significant bits here means we overflowed and we actually need 64; see comment in encoder
|
|
if mbits == 0 {
|
|
mbits = 64
|
|
}
|
|
it.trailing = 64 - it.leading - mbits
|
|
}
|
|
|
|
mbits := uint(64 - it.leading - it.trailing)
|
|
bits, err := it.br.ReadBits(mbits)
|
|
if err != nil {
|
|
it.err = err
|
|
return false
|
|
}
|
|
|
|
vbits := it.val
|
|
vbits ^= (bits << it.trailing)
|
|
|
|
if vbits == uvnan { // IsNaN
|
|
it.finished = true
|
|
return false
|
|
}
|
|
it.val = vbits
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
// Values returns the current float64 value.
|
|
func (it *FloatDecoder) Values() float64 {
|
|
return math.Float64frombits(it.val)
|
|
}
|
|
|
|
// Error returns the current decoding error.
|
|
func (it *FloatDecoder) Error() error {
|
|
return it.err
|
|
}
|