174 lines
3.5 KiB
Go
174 lines
3.5 KiB
Go
package hll
|
|
|
|
import "encoding/binary"
|
|
|
|
// Original author of this file is github.com/clarkduvall/hyperloglog
|
|
type iterable interface {
|
|
decode(i int, last uint32) (uint32, int)
|
|
Len() int
|
|
Iter() *iterator
|
|
}
|
|
|
|
type iterator struct {
|
|
i int
|
|
last uint32
|
|
v iterable
|
|
}
|
|
|
|
func (iter *iterator) Next() uint32 {
|
|
n, i := iter.v.decode(iter.i, iter.last)
|
|
iter.last = n
|
|
iter.i = i
|
|
return n
|
|
}
|
|
|
|
func (iter *iterator) Peek() uint32 {
|
|
n, _ := iter.v.decode(iter.i, iter.last)
|
|
return n
|
|
}
|
|
|
|
func (iter iterator) HasNext() bool {
|
|
return iter.i < iter.v.Len()
|
|
}
|
|
|
|
type compressedList struct {
|
|
count uint32
|
|
last uint32
|
|
b variableLengthList
|
|
}
|
|
|
|
func (v *compressedList) Clone() *compressedList {
|
|
if v == nil {
|
|
return nil
|
|
}
|
|
|
|
newV := &compressedList{
|
|
count: v.count,
|
|
last: v.last,
|
|
}
|
|
|
|
newV.b = make(variableLengthList, len(v.b))
|
|
copy(newV.b, v.b)
|
|
return newV
|
|
}
|
|
|
|
func (v *compressedList) MarshalBinary() (data []byte, err error) {
|
|
// Marshal the variableLengthList
|
|
bdata, err := v.b.MarshalBinary()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// At least 4 bytes for the two fixed sized values plus the size of bdata.
|
|
data = make([]byte, 0, 4+4+len(bdata))
|
|
|
|
// Marshal the count and last values.
|
|
data = append(data, []byte{
|
|
// Number of items in the list.
|
|
byte(v.count >> 24),
|
|
byte(v.count >> 16),
|
|
byte(v.count >> 8),
|
|
byte(v.count),
|
|
// The last item in the list.
|
|
byte(v.last >> 24),
|
|
byte(v.last >> 16),
|
|
byte(v.last >> 8),
|
|
byte(v.last),
|
|
}...)
|
|
|
|
// Append the list
|
|
return append(data, bdata...), nil
|
|
}
|
|
|
|
func (v *compressedList) UnmarshalBinary(data []byte) error {
|
|
// Set the count.
|
|
v.count, data = binary.BigEndian.Uint32(data[:4]), data[4:]
|
|
|
|
// Set the last value.
|
|
v.last, data = binary.BigEndian.Uint32(data[:4]), data[4:]
|
|
|
|
// Set the list.
|
|
sz, data := binary.BigEndian.Uint32(data[:4]), data[4:]
|
|
v.b = make([]uint8, sz)
|
|
for i := uint32(0); i < sz; i++ {
|
|
v.b[i] = uint8(data[i])
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func newCompressedList(size int) *compressedList {
|
|
v := &compressedList{}
|
|
v.b = make(variableLengthList, 0, size)
|
|
return v
|
|
}
|
|
|
|
func (v *compressedList) Len() int {
|
|
return len(v.b)
|
|
}
|
|
|
|
func (v *compressedList) decode(i int, last uint32) (uint32, int) {
|
|
n, i := v.b.decode(i, last)
|
|
return n + last, i
|
|
}
|
|
|
|
func (v *compressedList) Append(x uint32) {
|
|
v.count++
|
|
v.b = v.b.Append(x - v.last)
|
|
v.last = x
|
|
}
|
|
|
|
func (v *compressedList) Iter() *iterator {
|
|
return &iterator{0, 0, v}
|
|
}
|
|
|
|
type variableLengthList []uint8
|
|
|
|
func (v variableLengthList) MarshalBinary() (data []byte, err error) {
|
|
// 4 bytes for the size of the list, and a byte for each element in the
|
|
// list.
|
|
data = make([]byte, 0, 4+v.Len())
|
|
|
|
// Length of the list. We only need 32 bits because the size of the set
|
|
// couldn't exceed that on 32 bit architectures.
|
|
sz := v.Len()
|
|
data = append(data, []byte{
|
|
byte(sz >> 24),
|
|
byte(sz >> 16),
|
|
byte(sz >> 8),
|
|
byte(sz),
|
|
}...)
|
|
|
|
// Marshal each element in the list.
|
|
for i := 0; i < sz; i++ {
|
|
data = append(data, byte(v[i]))
|
|
}
|
|
|
|
return data, nil
|
|
}
|
|
|
|
func (v variableLengthList) Len() int {
|
|
return len(v)
|
|
}
|
|
|
|
func (v *variableLengthList) Iter() *iterator {
|
|
return &iterator{0, 0, v}
|
|
}
|
|
|
|
func (v variableLengthList) decode(i int, last uint32) (uint32, int) {
|
|
var x uint32
|
|
j := i
|
|
for ; v[j]&0x80 != 0; j++ {
|
|
x |= uint32(v[j]&0x7f) << (uint(j-i) * 7)
|
|
}
|
|
x |= uint32(v[j]) << (uint(j-i) * 7)
|
|
return x, j + 1
|
|
}
|
|
|
|
func (v variableLengthList) Append(x uint32) variableLengthList {
|
|
for x&0xffffff80 != 0 {
|
|
v = append(v, uint8((x&0x7f)|0x80))
|
|
x >>= 7
|
|
}
|
|
return append(v, uint8(x&0x7f))
|
|
}
|