influxdb/tsdb/engine/tsm1/bool.go

170 lines
3.7 KiB
Go

package tsm1
// boolean encoding uses 1 bit per value. Each compressed byte slice contains a 1 byte header
// indicating the compression type, followed by a variable byte encoded length indicating
// how many booleans are packed in the slice. The remaining bytes contains 1 byte for every
// 8 boolean values encoded.
import (
"encoding/binary"
"fmt"
)
// Note: an uncompressed boolean format is not yet implemented.
// booleanCompressedBitPacked is a bit packed format using 1 bit per boolean
const booleanCompressedBitPacked = 1
// BooleanEncoder encodes a series of booleans to an in-memory buffer.
type BooleanEncoder struct {
// The encoded bytes
bytes []byte
// The current byte being encoded
b byte
// The number of bools packed into b
i int
// The total number of bools written
n int
}
// NewBooleanEncoder returns a new instance of BooleanEncoder.
func NewBooleanEncoder(sz int) BooleanEncoder {
return BooleanEncoder{
bytes: make([]byte, 0, (sz+7)/8),
}
}
// Reset sets the encoder to its initial state.
func (e *BooleanEncoder) Reset() {
e.bytes = e.bytes[:0]
e.b = 0
e.i = 0
e.n = 0
}
// Write encodes b to the underlying buffer.
func (e *BooleanEncoder) Write(b bool) {
// If we have filled the current byte, flush it
if e.i >= 8 {
e.flush()
}
// Use 1 bit for each boolean value, shift the current byte
// by 1 and set the least significant bit accordingly
e.b = e.b << 1
if b {
e.b |= 1
}
// Increment the current boolean count
e.i++
// Increment the total boolean count
e.n++
}
func (e *BooleanEncoder) flush() {
// Pad remaining byte w/ 0s
for e.i < 8 {
e.b = e.b << 1
e.i++
}
// If we have bits set, append them to the byte slice
if e.i > 0 {
e.bytes = append(e.bytes, e.b)
e.b = 0
e.i = 0
}
}
// Flush is no-op
func (e *BooleanEncoder) Flush() {}
// Bytes returns a new byte slice containing the encoded booleans from previous calls to Write.
func (e *BooleanEncoder) Bytes() ([]byte, error) {
// Ensure the current byte is flushed
e.flush()
b := make([]byte, 10+1)
// Store the encoding type in the 4 high bits of the first byte
b[0] = byte(booleanCompressedBitPacked) << 4
i := 1
// Encode the number of booleans written
i += binary.PutUvarint(b[i:], uint64(e.n))
// Append the packed booleans
return append(b[:i], e.bytes...), nil
}
// BooleanDecoder decodes a series of booleans from an in-memory buffer.
type BooleanDecoder struct {
b []byte
i int
n int
err error
}
// SetBytes initializes the decoder with a new set of bytes to read from.
// This must be called before calling any other methods.
func (e *BooleanDecoder) SetBytes(b []byte) {
if len(b) == 0 {
return
}
// First byte stores the encoding type, only have 1 bit-packet format
// currently ignore for now.
b = b[1:]
count, n := binary.Uvarint(b)
if n <= 0 {
e.err = fmt.Errorf("BooleanDecoder: invalid count")
return
}
e.b = b[n:]
e.i = -1
e.n = int(count)
if min := len(e.b) * 8; min < e.n {
// Shouldn't happen - TSM file was truncated/corrupted
e.n = min
}
}
// Next returns whether there are any bits remaining in the decoder.
// It returns false if there was an error decoding.
// The error is available on the Error method.
func (e *BooleanDecoder) Next() bool {
if e.err != nil {
return false
}
e.i++
return e.i < e.n
}
// Read returns the next bit from the decoder.
func (e *BooleanDecoder) Read() bool {
// Index into the byte slice
idx := e.i >> 3 // integer division by 8
// Bit position
pos := 7 - (e.i & 0x7)
// The mask to select the bit
mask := byte(1 << uint(pos))
// The packed byte
v := e.b[idx]
// Returns true if the bit is set
return v&mask == mask
}
// Error returns the error encountered during decoding, if one occurred.
func (e *BooleanDecoder) Error() error {
return e.err
}