influxdb/pkg/estimator/hll/compressed.go

174 lines
3.5 KiB
Go

package hll
import "encoding/binary"
// Original author of this file is github.com/clarkduvall/hyperloglog
type iterable interface {
decode(i int, last uint32) (uint32, int)
Len() int
Iter() *iterator
}
type iterator struct {
i int
last uint32
v iterable
}
func (iter *iterator) Next() uint32 {
n, i := iter.v.decode(iter.i, iter.last)
iter.last = n
iter.i = i
return n
}
func (iter *iterator) Peek() uint32 {
n, _ := iter.v.decode(iter.i, iter.last)
return n
}
func (iter iterator) HasNext() bool {
return iter.i < iter.v.Len()
}
type compressedList struct {
count uint32
last uint32
b variableLengthList
}
func (v *compressedList) Clone() *compressedList {
if v == nil {
return nil
}
newV := &compressedList{
count: v.count,
last: v.last,
}
newV.b = make(variableLengthList, len(v.b))
copy(newV.b, v.b)
return newV
}
func (v *compressedList) MarshalBinary() (data []byte, err error) {
// Marshal the variableLengthList
bdata, err := v.b.MarshalBinary()
if err != nil {
return nil, err
}
// At least 4 bytes for the two fixed sized values plus the size of bdata.
data = make([]byte, 0, 4+4+len(bdata))
// Marshal the count and last values.
data = append(data, []byte{
// Number of items in the list.
byte(v.count >> 24),
byte(v.count >> 16),
byte(v.count >> 8),
byte(v.count),
// The last item in the list.
byte(v.last >> 24),
byte(v.last >> 16),
byte(v.last >> 8),
byte(v.last),
}...)
// Append the list
return append(data, bdata...), nil
}
func (v *compressedList) UnmarshalBinary(data []byte) error {
// Set the count.
v.count, data = binary.BigEndian.Uint32(data[:4]), data[4:]
// Set the last value.
v.last, data = binary.BigEndian.Uint32(data[:4]), data[4:]
// Set the list.
sz, data := binary.BigEndian.Uint32(data[:4]), data[4:]
v.b = make([]uint8, sz)
for i := uint32(0); i < sz; i++ {
v.b[i] = uint8(data[i])
}
return nil
}
func newCompressedList(size int) *compressedList {
v := &compressedList{}
v.b = make(variableLengthList, 0, size)
return v
}
func (v *compressedList) Len() int {
return len(v.b)
}
func (v *compressedList) decode(i int, last uint32) (uint32, int) {
n, i := v.b.decode(i, last)
return n + last, i
}
func (v *compressedList) Append(x uint32) {
v.count++
v.b = v.b.Append(x - v.last)
v.last = x
}
func (v *compressedList) Iter() *iterator {
return &iterator{0, 0, v}
}
type variableLengthList []uint8
func (v variableLengthList) MarshalBinary() (data []byte, err error) {
// 4 bytes for the size of the list, and a byte for each element in the
// list.
data = make([]byte, 0, 4+v.Len())
// Length of the list. We only need 32 bits because the size of the set
// couldn't exceed that on 32 bit architectures.
sz := v.Len()
data = append(data, []byte{
byte(sz >> 24),
byte(sz >> 16),
byte(sz >> 8),
byte(sz),
}...)
// Marshal each element in the list.
for i := 0; i < sz; i++ {
data = append(data, byte(v[i]))
}
return data, nil
}
func (v variableLengthList) Len() int {
return len(v)
}
func (v *variableLengthList) Iter() *iterator {
return &iterator{0, 0, v}
}
func (v variableLengthList) decode(i int, last uint32) (uint32, int) {
var x uint32
j := i
for ; v[j]&0x80 != 0; j++ {
x |= uint32(v[j]&0x7f) << (uint(j-i) * 7)
}
x |= uint32(v[j]) << (uint(j-i) * 7)
return x, j + 1
}
func (v variableLengthList) Append(x uint32) variableLengthList {
for x&0xffffff80 != 0 {
v = append(v, uint8((x&0x7f)|0x80))
x >>= 7
}
return append(v, uint8(x&0x7f))
}