515 lines
14 KiB
Go
515 lines
14 KiB
Go
package tsm1
|
|
|
|
import (
|
|
"encoding/binary"
|
|
"fmt"
|
|
"io"
|
|
"math"
|
|
"math/bits"
|
|
"unsafe"
|
|
)
|
|
|
|
// FloatArrayEncodeAll encodes src into b, returning b and any error encountered.
|
|
// The returned slice may be of a different length and capactity to b.
|
|
//
|
|
// Currently only the float compression scheme used in Facebook's Gorilla is
|
|
// supported, so this method implements a batch oriented version of that.
|
|
func FloatArrayEncodeAll(src []float64, b []byte) ([]byte, error) {
|
|
if cap(b) < 9 {
|
|
b = make([]byte, 0, 9) // Enough room for the header and one value.
|
|
}
|
|
|
|
b = b[:1]
|
|
b[0] = floatCompressedGorilla << 4
|
|
|
|
var first float64
|
|
var finished bool
|
|
if len(src) > 0 && math.IsNaN(src[0]) {
|
|
return nil, fmt.Errorf("unsupported value: NaN")
|
|
} else if len(src) == 0 {
|
|
first = math.NaN() // Write sentinal value to terminate batch.
|
|
finished = true
|
|
} else {
|
|
first = src[0]
|
|
src = src[1:]
|
|
}
|
|
|
|
b = b[:9]
|
|
n := uint64(8 + 64) // Number of bits written.
|
|
prev := math.Float64bits(first)
|
|
|
|
// Write first value.
|
|
binary.BigEndian.PutUint64(b[1:], prev)
|
|
|
|
prevLeading, prevTrailing := ^uint64(0), uint64(0)
|
|
var leading, trailing uint64
|
|
var mask uint64
|
|
var sum float64
|
|
|
|
// Encode remaining values.
|
|
for i := 0; !finished; i++ {
|
|
var x float64
|
|
if i < len(src) {
|
|
x = src[i]
|
|
sum += x
|
|
} else {
|
|
// Encode sentinal value to terminate batch
|
|
x = math.NaN()
|
|
finished = true
|
|
}
|
|
|
|
{
|
|
cur := math.Float64bits(x)
|
|
vDelta := cur ^ prev
|
|
if vDelta == 0 {
|
|
n++ // Write a zero bit. Nothing else to do.
|
|
prev = cur
|
|
continue
|
|
}
|
|
|
|
// First the current bit of the current byte is set to indicate we're
|
|
// writing a delta value to the stream.
|
|
for n>>3 >= uint64(len(b)) { // Keep growing b until we can fit all bits in.
|
|
b = append(b, byte(0))
|
|
}
|
|
|
|
// n&7 - current bit in current byte.
|
|
// n>>3 - the current byte.
|
|
b[n>>3] |= 128 >> (n & 7) // Sets the current bit of the current byte.
|
|
n++
|
|
|
|
// Write the delta to b.
|
|
|
|
// Determine the leading and trailing zeros.
|
|
leading = uint64(bits.LeadingZeros64(vDelta))
|
|
trailing = uint64(bits.TrailingZeros64(vDelta))
|
|
|
|
// Clamp number of leading zeros to avoid overflow when encoding
|
|
leading &= 0x1F
|
|
if leading >= 32 {
|
|
leading = 31
|
|
}
|
|
|
|
// At least 2 further bits will be required.
|
|
if (n+2)>>3 >= uint64(len(b)) {
|
|
b = append(b, byte(0))
|
|
}
|
|
|
|
if prevLeading != ^uint64(0) && leading >= prevLeading && trailing >= prevTrailing {
|
|
n++ // Write a zero bit.
|
|
|
|
// Write the l least significant bits of vDelta to b, most significant
|
|
// bit first.
|
|
l := uint64(64 - prevLeading - prevTrailing)
|
|
for (n+l)>>3 >= uint64(len(b)) { // Keep growing b until we can fit all bits in.
|
|
b = append(b, byte(0))
|
|
}
|
|
|
|
// Full value to write.
|
|
v := (vDelta >> prevTrailing) << (64 - l) // l least signifciant bits of v.
|
|
|
|
var m = n & 7 // Current bit in current byte.
|
|
var written uint64
|
|
if m > 0 { // In this case the current byte is not full.
|
|
written = 8 - m
|
|
if l < written {
|
|
written = l
|
|
}
|
|
mask = v >> 56 // Move 8 MSB to 8 LSB
|
|
b[n>>3] |= byte(mask >> m)
|
|
n += written
|
|
|
|
if l-written == 0 {
|
|
prev = cur
|
|
continue
|
|
}
|
|
}
|
|
|
|
vv := v << written // Move written bits out of the way.
|
|
|
|
// TODO(edd): Optimise this. It's unlikely we actually have 8 bytes to write.
|
|
if (n>>3)+8 >= uint64(len(b)) {
|
|
b = append(b, 0, 0, 0, 0, 0, 0, 0, 0)
|
|
}
|
|
binary.BigEndian.PutUint64(b[n>>3:], vv)
|
|
n += (l - written)
|
|
} else {
|
|
prevLeading, prevTrailing = leading, trailing
|
|
|
|
// Set a single bit to indicate a value will follow.
|
|
b[n>>3] |= 128 >> (n & 7) // Set current bit on current byte
|
|
n++
|
|
|
|
// Write 5 bits of leading.
|
|
if (n+5)>>3 >= uint64(len(b)) {
|
|
b = append(b, byte(0))
|
|
}
|
|
|
|
// Enough room to write the 5 bits in the current byte?
|
|
var m = n & 7
|
|
l := uint64(5)
|
|
v := leading << 59 // 5 LSB of leading.
|
|
mask = v >> 56 // Move 5 MSB to 8 LSB
|
|
|
|
if m <= 3 { // 5 bits fit into current byte.
|
|
b[n>>3] |= byte(mask >> m)
|
|
n += l
|
|
} else { // In this case there are fewer than 5 bits available in current byte.
|
|
// First step is to fill current byte
|
|
written := 8 - m
|
|
b[n>>3] |= byte(mask >> m) // Some of mask will get lost.
|
|
n += written
|
|
|
|
// Second step is to write the lost part of mask into the next byte.
|
|
mask = v << written // Move written bits in previous byte out of way.
|
|
mask >>= 56
|
|
|
|
m = n & 7 // Recompute current bit.
|
|
b[n>>3] |= byte(mask >> m)
|
|
n += (l - written)
|
|
}
|
|
|
|
// Note that if leading == trailing == 0, then sigbits == 64. But that
|
|
// value doesn't actually fit into the 6 bits we have.
|
|
// Luckily, we never need to encode 0 significant bits, since that would
|
|
// put us in the other case (vdelta == 0). So instead we write out a 0 and
|
|
// adjust it back to 64 on unpacking.
|
|
sigbits := 64 - leading - trailing
|
|
|
|
if (n+6)>>3 >= uint64(len(b)) {
|
|
b = append(b, byte(0))
|
|
}
|
|
|
|
m = n & 7
|
|
l = uint64(6)
|
|
v = sigbits << 58 // Move 6 LSB of sigbits to MSB
|
|
mask = v >> 56 // Move 6 MSB to 8 LSB
|
|
if m <= 2 {
|
|
// The 6 bits fit into the current byte.
|
|
b[n>>3] |= byte(mask >> m)
|
|
n += l
|
|
} else { // In this case there are fewer than 6 bits available in current byte.
|
|
// First step is to fill the current byte.
|
|
written := 8 - m
|
|
b[n>>3] |= byte(mask >> m) // Write to the current bit.
|
|
n += written
|
|
|
|
// Second step is to write the lost part of mask into the next byte.
|
|
// Write l remaining bits into current byte.
|
|
mask = v << written // Remove bits written in previous byte out of way.
|
|
mask >>= 56
|
|
|
|
m = n & 7 // Recompute current bit.
|
|
b[n>>3] |= byte(mask >> m)
|
|
n += l - written
|
|
}
|
|
|
|
// Write final value.
|
|
m = n & 7
|
|
l = sigbits
|
|
v = (vDelta >> trailing) << (64 - l) // Move l LSB into MSB
|
|
for (n+l)>>3 >= uint64(len(b)) { // Keep growing b until we can fit all bits in.
|
|
b = append(b, byte(0))
|
|
}
|
|
|
|
var written uint64
|
|
if m > 0 { // In this case the current byte is not full.
|
|
written = 8 - m
|
|
if l < written {
|
|
written = l
|
|
}
|
|
mask = v >> 56 // Move 8 MSB to 8 LSB
|
|
b[n>>3] |= byte(mask >> m)
|
|
n += written
|
|
|
|
if l-written == 0 {
|
|
prev = cur
|
|
continue
|
|
}
|
|
}
|
|
|
|
// Shift remaining bits and write out in one go.
|
|
vv := v << written // Remove bits written in previous byte.
|
|
// TODO(edd): Optimise this.
|
|
if (n>>3)+8 >= uint64(len(b)) {
|
|
b = append(b, 0, 0, 0, 0, 0, 0, 0, 0)
|
|
}
|
|
|
|
binary.BigEndian.PutUint64(b[n>>3:], vv)
|
|
n += (l - written)
|
|
}
|
|
prev = cur
|
|
}
|
|
}
|
|
|
|
if math.IsNaN(sum) {
|
|
return nil, fmt.Errorf("unsupported value: NaN")
|
|
}
|
|
|
|
length := n >> 3
|
|
if n&7 > 0 {
|
|
length++ // Add an extra byte to capture overflowing bits.
|
|
}
|
|
return b[:length], nil
|
|
}
|
|
|
|
// bitMask contains a lookup table where the index is the number of bits
|
|
// and the value is a mask. The table is always read by ANDing the index
|
|
// with 0x3f, such that if the index is 64, position 0 will be read, which
|
|
// is a 0xffffffffffffffff, thus returning all bits.
|
|
//
|
|
// 00 = 0xffffffffffffffff
|
|
// 01 = 0x0000000000000001
|
|
// 02 = 0x0000000000000003
|
|
// 03 = 0x0000000000000007
|
|
// ...
|
|
// 62 = 0x3fffffffffffffff
|
|
// 63 = 0x7fffffffffffffff
|
|
var bitMask [64]uint64
|
|
|
|
func init() {
|
|
v := uint64(1)
|
|
for i := 1; i <= 64; i++ {
|
|
bitMask[i&0x3f] = v
|
|
v = v<<1 | 1
|
|
}
|
|
}
|
|
|
|
func FloatArrayDecodeAll(b []byte, buf []float64) ([]float64, error) {
|
|
if len(b) < 9 {
|
|
return []float64{}, nil
|
|
}
|
|
|
|
var (
|
|
val uint64 // current value
|
|
trailingN uint8 // trailing zero count
|
|
meaningfulN uint8 = 64 // meaningful bit count
|
|
)
|
|
|
|
// first byte is the compression type; always Gorilla
|
|
b = b[1:]
|
|
|
|
val = binary.BigEndian.Uint64(b)
|
|
if val == uvnan {
|
|
if buf == nil {
|
|
var tmp [1]float64
|
|
buf = tmp[:0]
|
|
}
|
|
// special case: there were no values to decode
|
|
return buf[:0], nil
|
|
}
|
|
|
|
buf = buf[:0]
|
|
// convert the []float64 to []uint64 to avoid calling math.Float64Frombits,
|
|
// which results in unnecessary moves between Xn registers before moving
|
|
// the value into the float64 slice. This change increased performance from
|
|
// 320 MB/s to 340 MB/s on an Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz
|
|
dst := *(*[]uint64)(unsafe.Pointer(&buf))
|
|
dst = append(dst, val)
|
|
|
|
b = b[8:]
|
|
|
|
// The bit reader code uses brCachedVal to store up to the next 8 bytes
|
|
// of MSB data read from b. brValidBits stores the number of remaining unread
|
|
// bits starting from the MSB. Before N bits are read from brCachedVal,
|
|
// they are left-rotated N bits, such that they end up in the left-most position.
|
|
// Using bits.RotateLeft64 results in a single instruction on many CPU architectures.
|
|
// This approach permits simple tests, such as for the two control bits:
|
|
//
|
|
// brCachedVal&1 > 0
|
|
//
|
|
// The alternative was to leave brCachedValue alone and perform shifts and
|
|
// masks to read specific bits. The original approach looked like the
|
|
// following:
|
|
//
|
|
// brCachedVal&(1<<(brValidBits&0x3f)) > 0
|
|
//
|
|
var (
|
|
brCachedVal = uint64(0) // a buffer of up to the next 8 bytes read from b in MSB order
|
|
brValidBits = uint8(0) // the number of unread bits remaining in brCachedVal
|
|
)
|
|
|
|
// Refill brCachedVal, reading up to 8 bytes from b
|
|
if len(b) >= 8 {
|
|
// fast path reads 8 bytes directly
|
|
brCachedVal = binary.BigEndian.Uint64(b)
|
|
brValidBits = 64
|
|
b = b[8:]
|
|
} else if len(b) > 0 {
|
|
brCachedVal = 0
|
|
brValidBits = uint8(len(b) * 8)
|
|
for i := range b {
|
|
brCachedVal = (brCachedVal << 8) | uint64(b[i])
|
|
}
|
|
brCachedVal = bits.RotateLeft64(brCachedVal, -int(brValidBits))
|
|
b = b[:0]
|
|
} else {
|
|
goto ERROR
|
|
}
|
|
|
|
// The expected exit condition is for a uvnan to be decoded.
|
|
// Any other error (EOF) indicates a truncated stream.
|
|
for {
|
|
if brValidBits > 0 {
|
|
// brValidBits > 0 is impossible to predict, so we place the
|
|
// most likely case inside the if and immediately jump, keeping
|
|
// the instruction pipeline consistently full.
|
|
// This is a similar approach to using the GCC __builtin_expect
|
|
// intrinsic, which modifies the order of branches such that the
|
|
// likely case follows the conditional jump.
|
|
//
|
|
// Written as if brValidBits == 0 and placing the Refill brCachedVal
|
|
// code inside reduces benchmarks from 318 MB/s to 260 MB/s on an
|
|
// Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz
|
|
goto READ0
|
|
}
|
|
|
|
// Refill brCachedVal, reading up to 8 bytes from b
|
|
if len(b) >= 8 {
|
|
brCachedVal = binary.BigEndian.Uint64(b)
|
|
brValidBits = 64
|
|
b = b[8:]
|
|
} else if len(b) > 0 {
|
|
brCachedVal = 0
|
|
brValidBits = uint8(len(b) * 8)
|
|
for i := range b {
|
|
brCachedVal = (brCachedVal << 8) | uint64(b[i])
|
|
}
|
|
brCachedVal = bits.RotateLeft64(brCachedVal, -int(brValidBits))
|
|
b = b[:0]
|
|
} else {
|
|
goto ERROR
|
|
}
|
|
|
|
READ0:
|
|
// read control bit 0
|
|
brValidBits -= 1
|
|
brCachedVal = bits.RotateLeft64(brCachedVal, 1)
|
|
if brCachedVal&1 > 0 {
|
|
if brValidBits > 0 {
|
|
goto READ1
|
|
}
|
|
|
|
// Refill brCachedVal, reading up to 8 bytes from b
|
|
if len(b) >= 8 {
|
|
brCachedVal = binary.BigEndian.Uint64(b)
|
|
brValidBits = 64
|
|
b = b[8:]
|
|
} else if len(b) > 0 {
|
|
brCachedVal = 0
|
|
brValidBits = uint8(len(b) * 8)
|
|
for i := range b {
|
|
brCachedVal = (brCachedVal << 8) | uint64(b[i])
|
|
}
|
|
brCachedVal = bits.RotateLeft64(brCachedVal, -int(brValidBits))
|
|
b = b[:0]
|
|
} else {
|
|
goto ERROR
|
|
}
|
|
|
|
READ1:
|
|
// read control bit 1
|
|
brValidBits -= 1
|
|
brCachedVal = bits.RotateLeft64(brCachedVal, 1)
|
|
if brCachedVal&1 > 0 {
|
|
// read 5 bits for leading zero count and 6 bits for the meaningful data count
|
|
const leadingTrailingBitCount = 11
|
|
var lmBits uint64 // leading + meaningful data counts
|
|
if brValidBits >= leadingTrailingBitCount {
|
|
// decode 5 bits leading + 6 bits meaningful for a total of 11 bits
|
|
brValidBits -= leadingTrailingBitCount
|
|
brCachedVal = bits.RotateLeft64(brCachedVal, leadingTrailingBitCount)
|
|
lmBits = brCachedVal
|
|
} else {
|
|
bits01 := uint8(11)
|
|
if brValidBits > 0 {
|
|
bits01 -= brValidBits
|
|
lmBits = bits.RotateLeft64(brCachedVal, 11)
|
|
}
|
|
|
|
// Refill brCachedVal, reading up to 8 bytes from b
|
|
if len(b) >= 8 {
|
|
brCachedVal = binary.BigEndian.Uint64(b)
|
|
brValidBits = 64
|
|
b = b[8:]
|
|
} else if len(b) > 0 {
|
|
brCachedVal = 0
|
|
brValidBits = uint8(len(b) * 8)
|
|
for i := range b {
|
|
brCachedVal = (brCachedVal << 8) | uint64(b[i])
|
|
}
|
|
brCachedVal = bits.RotateLeft64(brCachedVal, -int(brValidBits))
|
|
b = b[:0]
|
|
} else {
|
|
goto ERROR
|
|
}
|
|
brCachedVal = bits.RotateLeft64(brCachedVal, int(bits01))
|
|
brValidBits -= bits01
|
|
lmBits &^= bitMask[bits01&0x3f]
|
|
lmBits |= brCachedVal & bitMask[bits01&0x3f]
|
|
}
|
|
|
|
lmBits &= 0x7ff
|
|
leadingN := uint8((lmBits >> 6) & 0x1f) // 5 bits leading
|
|
meaningfulN = uint8(lmBits & 0x3f) // 6 bits meaningful
|
|
if meaningfulN > 0 {
|
|
trailingN = 64 - leadingN - meaningfulN
|
|
} else {
|
|
// meaningfulN == 0 is a special case, such that all bits
|
|
// are meaningful
|
|
trailingN = 0
|
|
meaningfulN = 64
|
|
}
|
|
}
|
|
|
|
var sBits uint64 // significant bits
|
|
if brValidBits >= meaningfulN {
|
|
brValidBits -= meaningfulN
|
|
brCachedVal = bits.RotateLeft64(brCachedVal, int(meaningfulN))
|
|
sBits = brCachedVal
|
|
} else {
|
|
mBits := meaningfulN
|
|
if brValidBits > 0 {
|
|
mBits -= brValidBits
|
|
sBits = bits.RotateLeft64(brCachedVal, int(meaningfulN))
|
|
}
|
|
|
|
// Refill brCachedVal, reading up to 8 bytes from b
|
|
if len(b) >= 8 {
|
|
brCachedVal = binary.BigEndian.Uint64(b)
|
|
brValidBits = 64
|
|
b = b[8:]
|
|
} else if len(b) > 0 {
|
|
brCachedVal = 0
|
|
brValidBits = uint8(len(b) * 8)
|
|
for i := range b {
|
|
brCachedVal = (brCachedVal << 8) | uint64(b[i])
|
|
}
|
|
brCachedVal = bits.RotateLeft64(brCachedVal, -int(brValidBits))
|
|
b = b[:0]
|
|
} else {
|
|
goto ERROR
|
|
}
|
|
brCachedVal = bits.RotateLeft64(brCachedVal, int(mBits))
|
|
brValidBits -= mBits
|
|
sBits &^= bitMask[mBits&0x3f]
|
|
sBits |= brCachedVal & bitMask[mBits&0x3f]
|
|
}
|
|
sBits &= bitMask[meaningfulN&0x3f]
|
|
|
|
val ^= sBits << (trailingN & 0x3f)
|
|
if val == uvnan {
|
|
// IsNaN, eof
|
|
break
|
|
}
|
|
}
|
|
|
|
dst = append(dst, val)
|
|
}
|
|
|
|
return *(*[]float64)(unsafe.Pointer(&dst)), nil
|
|
|
|
ERROR:
|
|
return (*(*[]float64)(unsafe.Pointer(&dst)))[:0], io.EOF
|
|
}
|