package tsm1 // boolean encoding uses 1 bit per value. Each compressed byte slice contains a 1 byte header // indicating the compression type, followed by a variable byte encoded length indicating // how many booleans are packed in the slice. The remaining bytes contains 1 byte for every // 8 boolean values encoded. import ( "encoding/binary" "fmt" ) // Note: an uncompressed boolean format is not yet implemented. // booleanCompressedBitPacked is a bit packed format using 1 bit per boolean const booleanCompressedBitPacked = 1 // BooleanEncoder encodes a series of booleans to an in-memory buffer. type BooleanEncoder struct { // The encoded bytes bytes []byte // The current byte being encoded b byte // The number of bools packed into b i int // The total number of bools written n int } // NewBooleanEncoder returns a new instance of BooleanEncoder. func NewBooleanEncoder(sz int) BooleanEncoder { return BooleanEncoder{ bytes: make([]byte, 0, (sz+7)/8), } } // Reset sets the encoder to its initial state. func (e *BooleanEncoder) Reset() { e.bytes = e.bytes[:0] e.b = 0 e.i = 0 e.n = 0 } // Write encodes b to the underlying buffer. func (e *BooleanEncoder) Write(b bool) { // If we have filled the current byte, flush it if e.i >= 8 { e.flush() } // Use 1 bit for each boolean value, shift the current byte // by 1 and set the least signficant bit acordingly e.b = e.b << 1 if b { e.b |= 1 } // Increment the current boolean count e.i++ // Increment the total boolean count e.n++ } func (e *BooleanEncoder) flush() { // Pad remaining byte w/ 0s for e.i < 8 { e.b = e.b << 1 e.i++ } // If we have bits set, append them to the byte slice if e.i > 0 { e.bytes = append(e.bytes, e.b) e.b = 0 e.i = 0 } } // Flush is no-op func (e *BooleanEncoder) Flush() {} // Bytes returns a new byte slice containing the encoded booleans from previous calls to Write. func (e *BooleanEncoder) Bytes() ([]byte, error) { // Ensure the current byte is flushed e.flush() b := make([]byte, 10+1) // Store the encoding type in the 4 high bits of the first byte b[0] = byte(booleanCompressedBitPacked) << 4 i := 1 // Encode the number of booleans written i += binary.PutUvarint(b[i:], uint64(e.n)) // Append the packed booleans return append(b[:i], e.bytes...), nil } // BooleanDecoder decodes a series of booleans from an in-memory buffer. type BooleanDecoder struct { b []byte i int n int err error } // SetBytes initializes the decoder with a new set of bytes to read from. // This must be called before calling any other methods. func (e *BooleanDecoder) SetBytes(b []byte) { if len(b) == 0 { return } // First byte stores the encoding type, only have 1 bit-packet format // currently ignore for now. b = b[1:] count, n := binary.Uvarint(b) if n <= 0 { e.err = fmt.Errorf("booleanDecoder: invalid count") return } e.b = b[n:] e.i = -1 e.n = int(count) if min := len(e.b) * 8; min < e.n { // Shouldn't happen - TSM file was truncated/corrupted e.n = min } } // Next returns whether there are any bits remaining in the decoder. // It returns false if there was an error decoding. // The error is available on the Error method. func (e *BooleanDecoder) Next() bool { if e.err != nil { return false } e.i++ return e.i < e.n } // Read returns the next bit from the decoder. func (e *BooleanDecoder) Read() bool { // Index into the byte slice idx := e.i >> 3 // integer division by 8 // Bit position pos := 7 - (e.i & 0x7) // The mask to select the bit mask := byte(1 << uint(pos)) // The packed byte v := e.b[idx] // Returns true if the bit is set return v&mask == mask } // Error returns the error encountered during decoding, if one occurred. func (e *BooleanDecoder) Error() error { return e.err }