Fix 32-bit rhh implementation.

pull/8173/head
Ben Johnson 2017-03-21 11:44:13 -06:00
parent 58c8736ebc
commit 1e9fa7bc2c
No known key found for this signature in database
GPG Key ID: 81741CD251883081
4 changed files with 70 additions and 68 deletions

View File

@ -10,13 +10,13 @@ import (
// HashMap represents a hash map that implements Robin Hood Hashing. // HashMap represents a hash map that implements Robin Hood Hashing.
// https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf // https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf
type HashMap struct { type HashMap struct {
hashes []uint64 hashes []int64
elems []hashElem elems []hashElem
n int n int64
capacity int capacity int64
threshold int threshold int64
mask uint64 mask int64
loadFactor int loadFactor int
} }
@ -51,9 +51,9 @@ func (m *HashMap) Put(key []byte, val interface{}) {
} }
} }
func (m *HashMap) insert(hash uint64, key []byte, val interface{}) (overwritten bool) { func (m *HashMap) insert(hash int64, key []byte, val interface{}) (overwritten bool) {
pos := int(hash & m.mask) pos := hash & m.mask
dist := 0 var dist int64
// Continue searching until we find an empty slot or lower probe distance. // Continue searching until we find an empty slot or lower probe distance.
for { for {
@ -83,7 +83,7 @@ func (m *HashMap) insert(hash uint64, key []byte, val interface{}) (overwritten
} }
// Increment position, wrap around on overflow. // Increment position, wrap around on overflow.
pos = int((uint64(pos) + 1) & m.mask) pos = (pos + 1) & m.mask
dist++ dist++
} }
} }
@ -91,9 +91,9 @@ func (m *HashMap) insert(hash uint64, key []byte, val interface{}) (overwritten
// alloc elems according to currently set capacity. // alloc elems according to currently set capacity.
func (m *HashMap) alloc() { func (m *HashMap) alloc() {
m.elems = make([]hashElem, m.capacity) m.elems = make([]hashElem, m.capacity)
m.hashes = make([]uint64, m.capacity) m.hashes = make([]int64, m.capacity)
m.threshold = (m.capacity * m.loadFactor) / 100 m.threshold = (m.capacity * int64(m.loadFactor)) / 100
m.mask = uint64(m.capacity - 1) m.mask = int64(m.capacity - 1)
} }
// grow doubles the capacity and reinserts all existing hashes & elements. // grow doubles the capacity and reinserts all existing hashes & elements.
@ -107,7 +107,7 @@ func (m *HashMap) grow() {
m.alloc() m.alloc()
// Copy old elements to new hash/elem list. // Copy old elements to new hash/elem list.
for i := 0; i < capacity; i++ { for i := int64(0); i < capacity; i++ {
elem, hash := &elems[i], hashes[i] elem, hash := &elems[i], hashes[i]
if hash == 0 { if hash == 0 {
continue continue
@ -117,11 +117,11 @@ func (m *HashMap) grow() {
} }
// index returns the position of key in the hash map. // index returns the position of key in the hash map.
func (m *HashMap) index(key []byte) int { func (m *HashMap) index(key []byte) int64 {
hash := HashKey(key) hash := HashKey(key)
pos := int(hash & m.mask) pos := hash & m.mask
dist := 0 var dist int64
for { for {
if m.hashes[pos] == 0 { if m.hashes[pos] == 0 {
return -1 return -1
@ -131,14 +131,14 @@ func (m *HashMap) index(key []byte) int {
return pos return pos
} }
pos = int(uint64(pos+1) & m.mask) pos = (pos + 1) & m.mask
dist++ dist++
} }
} }
// Elem returns the i-th key/value pair of the hash map. // Elem returns the i-th key/value pair of the hash map.
func (m *HashMap) Elem(i int) (key []byte, value interface{}) { func (m *HashMap) Elem(i int64) (key []byte, value interface{}) {
if i >= len(m.elems) { if i >= int64(len(m.elems)) {
return nil, nil return nil, nil
} }
@ -147,15 +147,15 @@ func (m *HashMap) Elem(i int) (key []byte, value interface{}) {
} }
// Len returns the number of key/values set in map. // Len returns the number of key/values set in map.
func (m *HashMap) Len() int { return m.n } func (m *HashMap) Len() int64 { return m.n }
// Cap returns the number of key/values set in map. // Cap returns the number of key/values set in map.
func (m *HashMap) Cap() int { return m.capacity } func (m *HashMap) Cap() int64 { return m.capacity }
// AverageProbeCount returns the average number of probes for each element. // AverageProbeCount returns the average number of probes for each element.
func (m *HashMap) AverageProbeCount() float64 { func (m *HashMap) AverageProbeCount() float64 {
var sum float64 var sum float64
for i := 0; i < m.capacity; i++ { for i := int64(0); i < m.capacity; i++ {
hash := m.hashes[i] hash := m.hashes[i]
if hash == 0 { if hash == 0 {
continue continue
@ -168,7 +168,7 @@ func (m *HashMap) AverageProbeCount() float64 {
// Keys returns a list of sorted keys. // Keys returns a list of sorted keys.
func (m *HashMap) Keys() [][]byte { func (m *HashMap) Keys() [][]byte {
a := make([][]byte, 0, m.Len()) a := make([][]byte, 0, m.Len())
for i := 0; i < m.Cap(); i++ { for i := int64(0); i < m.Cap(); i++ {
k, v := m.Elem(i) k, v := m.Elem(i)
if v == nil { if v == nil {
continue continue
@ -182,12 +182,12 @@ func (m *HashMap) Keys() [][]byte {
type hashElem struct { type hashElem struct {
key []byte key []byte
value interface{} value interface{}
hash uint64 hash int64
} }
// Options represents initialization options that are passed to NewHashMap(). // Options represents initialization options that are passed to NewHashMap().
type Options struct { type Options struct {
Capacity int Capacity int64
LoadFactor int LoadFactor int
} }
@ -198,26 +198,28 @@ var DefaultOptions = Options{
} }
// HashKey computes a hash of key. Hash is always non-zero. // HashKey computes a hash of key. Hash is always non-zero.
func HashKey(key []byte) uint64 { func HashKey(key []byte) int64 {
h := xxhash.Sum64(key) h := int64(xxhash.Sum64(key))
if h == 0 { if h == 0 {
h = 1 h = 1
} else if h < 0 {
h = 0 - h
} }
return h return h
} }
// Dist returns the probe distance for a hash in a slot index. // Dist returns the probe distance for a hash in a slot index.
// NOTE: Capacity must be a power of 2. // NOTE: Capacity must be a power of 2.
func Dist(hash uint64, i, capacity int) int { func Dist(hash, i, capacity int64) int64 {
mask := uint64(capacity - 1) mask := capacity - 1
dist := int(uint64(i+capacity-int(hash&mask)) & mask) dist := (i + capacity - (hash & mask)) & mask
return dist return dist
} }
// pow2 returns the number that is the next highest power of 2. // pow2 returns the number that is the next highest power of 2.
// Returns v if it is a power of 2. // Returns v if it is a power of 2.
func pow2(v int) int { func pow2(v int64) int64 {
for i := 2; i < 1<<62; i *= 2 { for i := int64(2); i < 1<<62; i *= 2 {
if i >= v { if i >= v {
return i return i
} }

View File

@ -64,12 +64,12 @@ func (blk *MeasurementBlock) Version() int { return blk.version }
// Elem returns an element for a measurement. // Elem returns an element for a measurement.
func (blk *MeasurementBlock) Elem(name []byte) (e MeasurementBlockElem, ok bool) { func (blk *MeasurementBlock) Elem(name []byte) (e MeasurementBlockElem, ok bool) {
n := binary.BigEndian.Uint64(blk.hashData[:MeasurementNSize]) n := int64(binary.BigEndian.Uint64(blk.hashData[:MeasurementNSize]))
hash := rhh.HashKey(name) hash := rhh.HashKey(name)
pos := int(hash % n) pos := hash % n
// Track current distance // Track current distance
var d int var d int64
for { for {
// Find offset of measurement. // Find offset of measurement.
offset := binary.BigEndian.Uint64(blk.hashData[MeasurementNSize+(pos*MeasurementOffsetSize):]) offset := binary.BigEndian.Uint64(blk.hashData[MeasurementNSize+(pos*MeasurementOffsetSize):])
@ -89,16 +89,16 @@ func (blk *MeasurementBlock) Elem(name []byte) (e MeasurementBlockElem, ok bool)
} }
// Check if we've exceeded the probe distance. // Check if we've exceeded the probe distance.
if d > rhh.Dist(rhh.HashKey(e.name), pos, int(n)) { if d > rhh.Dist(rhh.HashKey(e.name), pos, n) {
return MeasurementBlockElem{}, false return MeasurementBlockElem{}, false
} }
} }
// Move position forward. // Move position forward.
pos = (pos + 1) % int(n) pos = (pos + 1) % n
d++ d++
if uint64(d) > n { if d > n {
return MeasurementBlockElem{}, false return MeasurementBlockElem{}, false
} }
} }
@ -436,7 +436,7 @@ func (mw *MeasurementBlockWriter) WriteTo(w io.Writer) (n int64, err error) {
// Build key hash map // Build key hash map
m := rhh.NewHashMap(rhh.Options{ m := rhh.NewHashMap(rhh.Options{
Capacity: len(names), Capacity: int64(len(names)),
LoadFactor: LoadFactor, LoadFactor: LoadFactor,
}) })
for name := range mw.mms { for name := range mw.mms {
@ -452,7 +452,7 @@ func (mw *MeasurementBlockWriter) WriteTo(w io.Writer) (n int64, err error) {
} }
// Encode hash map offset entries. // Encode hash map offset entries.
for i := 0; i < m.Cap(); i++ { for i := int64(0); i < m.Cap(); i++ {
_, v := m.Elem(i) _, v := m.Elem(i)
var offset int64 var offset int64

View File

@ -45,7 +45,7 @@ type SeriesBlock struct {
// Series data & index/capacity. // Series data & index/capacity.
seriesData []byte seriesData []byte
seriesIndex []byte seriesIndex []byte
seriesIndexN uint64 seriesIndexN int64
// Exact series counts for this block. // Exact series counts for this block.
seriesN int64 seriesN int64
@ -65,10 +65,10 @@ func (blk *SeriesBlock) HasSeries(name []byte, tags models.Tags, buf []byte) (ex
n := blk.seriesIndexN n := blk.seriesIndexN
hash := rhh.HashKey(buf) hash := rhh.HashKey(buf)
pos := int(hash % n) pos := hash % n
// Track current distance // Track current distance
var d int var d int64
for { for {
// Find offset of series. // Find offset of series.
offset := binary.BigEndian.Uint64(blk.seriesIndex[pos*SeriesIDSize:]) offset := binary.BigEndian.Uint64(blk.seriesIndex[pos*SeriesIDSize:])
@ -83,16 +83,16 @@ func (blk *SeriesBlock) HasSeries(name []byte, tags models.Tags, buf []byte) (ex
} }
// Check if we've exceeded the probe distance. // Check if we've exceeded the probe distance.
max := rhh.Dist(rhh.HashKey(key), pos, int(n)) max := rhh.Dist(rhh.HashKey(key), pos, n)
if d > max { if d > max {
return false, false return false, false
} }
// Move position forward. // Move position forward.
pos = (pos + 1) % int(n) pos = (pos + 1) % n
d++ d++
if uint64(d) > n { if d > n {
return false, false return false, false
} }
} }
@ -105,10 +105,10 @@ func (blk *SeriesBlock) Series(name []byte, tags models.Tags) SeriesElem {
n := blk.seriesIndexN n := blk.seriesIndexN
hash := rhh.HashKey(buf) hash := rhh.HashKey(buf)
pos := int(hash % n) pos := hash % n
// Track current distance // Track current distance
var d int var d int64
for { for {
// Find offset of series. // Find offset of series.
offset := binary.BigEndian.Uint64(blk.seriesIndex[pos*SeriesIDSize:]) offset := binary.BigEndian.Uint64(blk.seriesIndex[pos*SeriesIDSize:])
@ -125,15 +125,15 @@ func (blk *SeriesBlock) Series(name []byte, tags models.Tags) SeriesElem {
} }
// Check if we've exceeded the probe distance. // Check if we've exceeded the probe distance.
if d > rhh.Dist(rhh.HashKey(key), pos, int(n)) { if d > rhh.Dist(rhh.HashKey(key), pos, n) {
return nil return nil
} }
// Move position forward. // Move position forward.
pos = (pos + 1) % int(n) pos = (pos + 1) % n
d++ d++
if uint64(d) > n { if d > n {
return nil return nil
} }
} }
@ -170,7 +170,7 @@ func (blk *SeriesBlock) UnmarshalBinary(data []byte) error {
// Slice series hash index. // Slice series hash index.
blk.seriesIndex = data[t.Series.Index.Offset:] blk.seriesIndex = data[t.Series.Index.Offset:]
blk.seriesIndex = blk.seriesIndex[:t.Series.Index.Size] blk.seriesIndex = blk.seriesIndex[:t.Series.Index.Size]
blk.seriesIndexN = binary.BigEndian.Uint64(blk.seriesIndex[:8]) blk.seriesIndexN = int64(binary.BigEndian.Uint64(blk.seriesIndex[:8]))
blk.seriesIndex = blk.seriesIndex[8:] blk.seriesIndex = blk.seriesIndex[8:]
// Initialise sketches. We're currently using HLL+. // Initialise sketches. We're currently using HLL+.
@ -559,7 +559,7 @@ func (enc *SeriesBlockEncoder) writeSeriesIndex() error {
} }
// Encode hash map offset entries. // Encode hash map offset entries.
for i := 0; i < enc.offsets.Cap(); i++ { for i := int64(0); i < enc.offsets.Cap(); i++ {
_, v := enc.offsets.Elem(i) _, v := enc.offsets.Elem(i)
offset, _ := v.(uint64) offset, _ := v.(uint64)

View File

@ -90,12 +90,12 @@ func (blk *TagBlock) UnmarshalBinary(data []byte) error {
// TagKeyElem returns an element for a tag key. // TagKeyElem returns an element for a tag key.
// Returns an element with a nil key if not found. // Returns an element with a nil key if not found.
func (blk *TagBlock) TagKeyElem(key []byte) TagKeyElem { func (blk *TagBlock) TagKeyElem(key []byte) TagKeyElem {
keyN := binary.BigEndian.Uint64(blk.hashData[:TagKeyNSize]) keyN := int64(binary.BigEndian.Uint64(blk.hashData[:TagKeyNSize]))
hash := rhh.HashKey(key) hash := rhh.HashKey(key)
pos := int(hash % keyN) pos := hash % keyN
// Track current distance // Track current distance
var d int var d int64
for { for {
// Find offset of tag key. // Find offset of tag key.
offset := binary.BigEndian.Uint64(blk.hashData[TagKeyNSize+(pos*TagKeyOffsetSize):]) offset := binary.BigEndian.Uint64(blk.hashData[TagKeyNSize+(pos*TagKeyOffsetSize):])
@ -113,15 +113,15 @@ func (blk *TagBlock) TagKeyElem(key []byte) TagKeyElem {
} }
// Check if we've exceeded the probe distance. // Check if we've exceeded the probe distance.
if d > rhh.Dist(rhh.HashKey(e.key), pos, int(keyN)) { if d > rhh.Dist(rhh.HashKey(e.key), pos, keyN) {
return nil return nil
} }
// Move position forward. // Move position forward.
pos = (pos + 1) % int(keyN) pos = (pos + 1) % keyN
d++ d++
if uint64(d) > keyN { if d > keyN {
return nil return nil
} }
} }
@ -138,12 +138,12 @@ func (blk *TagBlock) TagValueElem(key, value []byte) TagValueElem {
// Slice hash index data. // Slice hash index data.
hashData := kelem.hashIndex.buf hashData := kelem.hashIndex.buf
valueN := binary.BigEndian.Uint64(hashData[:TagValueNSize]) valueN := int64(binary.BigEndian.Uint64(hashData[:TagValueNSize]))
hash := rhh.HashKey(value) hash := rhh.HashKey(value)
pos := int(hash % valueN) pos := hash % valueN
// Track current distance // Track current distance
var d int var d int64
for { for {
// Find offset of tag value. // Find offset of tag value.
offset := binary.BigEndian.Uint64(hashData[TagValueNSize+(pos*TagValueOffsetSize):]) offset := binary.BigEndian.Uint64(hashData[TagValueNSize+(pos*TagValueOffsetSize):])
@ -161,16 +161,16 @@ func (blk *TagBlock) TagValueElem(key, value []byte) TagValueElem {
} }
// Check if we've exceeded the probe distance. // Check if we've exceeded the probe distance.
max := rhh.Dist(rhh.HashKey(e.value), pos, int(valueN)) max := rhh.Dist(rhh.HashKey(e.value), pos, valueN)
if d > max { if d > max {
return nil return nil
} }
// Move position forward. // Move position forward.
pos = (pos + 1) % int(valueN) pos = (pos + 1) % valueN
d++ d++
if uint64(d) > valueN { if d > valueN {
return nil return nil
} }
} }
@ -613,7 +613,7 @@ func (enc *TagBlockEncoder) flushValueHashIndex() error {
} }
// Encode hash map offset entries. // Encode hash map offset entries.
for i := 0; i < enc.offsets.Cap(); i++ { for i := int64(0); i < enc.offsets.Cap(); i++ {
_, v := enc.offsets.Elem(i) _, v := enc.offsets.Elem(i)
offset, _ := v.(int64) offset, _ := v.(int64)
if err := writeUint64To(enc.w, uint64(offset), &enc.n); err != nil { if err := writeUint64To(enc.w, uint64(offset), &enc.n); err != nil {
@ -630,7 +630,7 @@ func (enc *TagBlockEncoder) flushValueHashIndex() error {
// encodeTagKeyBlock encodes the keys section to the writer. // encodeTagKeyBlock encodes the keys section to the writer.
func (enc *TagBlockEncoder) encodeTagKeyBlock() error { func (enc *TagBlockEncoder) encodeTagKeyBlock() error {
offsets := rhh.NewHashMap(rhh.Options{Capacity: len(enc.keys), LoadFactor: LoadFactor}) offsets := rhh.NewHashMap(rhh.Options{Capacity: int64(len(enc.keys)), LoadFactor: LoadFactor})
// Encode key list in sorted order. // Encode key list in sorted order.
enc.trailer.KeyData.Offset = enc.n enc.trailer.KeyData.Offset = enc.n
@ -674,7 +674,7 @@ func (enc *TagBlockEncoder) encodeTagKeyBlock() error {
} }
// Encode hash map offset entries. // Encode hash map offset entries.
for i := 0; i < offsets.Cap(); i++ { for i := int64(0); i < offsets.Cap(); i++ {
_, v := offsets.Elem(i) _, v := offsets.Elem(i)
offset, _ := v.(int64) offset, _ := v.(int64)
if err := writeUint64To(enc.w, uint64(offset), &enc.n); err != nil { if err := writeUint64To(enc.w, uint64(offset), &enc.n); err != nil {