Fix 32-bit rhh implementation.

pull/8173/head
Ben Johnson 2017-03-21 11:44:13 -06:00
parent 58c8736ebc
commit 1e9fa7bc2c
No known key found for this signature in database
GPG Key ID: 81741CD251883081
4 changed files with 70 additions and 68 deletions

View File

@ -10,13 +10,13 @@ import (
// HashMap represents a hash map that implements Robin Hood Hashing.
// https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf
type HashMap struct {
hashes []uint64
hashes []int64
elems []hashElem
n int
capacity int
threshold int
mask uint64
n int64
capacity int64
threshold int64
mask int64
loadFactor int
}
@ -51,9 +51,9 @@ func (m *HashMap) Put(key []byte, val interface{}) {
}
}
func (m *HashMap) insert(hash uint64, key []byte, val interface{}) (overwritten bool) {
pos := int(hash & m.mask)
dist := 0
func (m *HashMap) insert(hash int64, key []byte, val interface{}) (overwritten bool) {
pos := hash & m.mask
var dist int64
// Continue searching until we find an empty slot or lower probe distance.
for {
@ -83,7 +83,7 @@ func (m *HashMap) insert(hash uint64, key []byte, val interface{}) (overwritten
}
// Increment position, wrap around on overflow.
pos = int((uint64(pos) + 1) & m.mask)
pos = (pos + 1) & m.mask
dist++
}
}
@ -91,9 +91,9 @@ func (m *HashMap) insert(hash uint64, key []byte, val interface{}) (overwritten
// alloc elems according to currently set capacity.
func (m *HashMap) alloc() {
m.elems = make([]hashElem, m.capacity)
m.hashes = make([]uint64, m.capacity)
m.threshold = (m.capacity * m.loadFactor) / 100
m.mask = uint64(m.capacity - 1)
m.hashes = make([]int64, m.capacity)
m.threshold = (m.capacity * int64(m.loadFactor)) / 100
m.mask = int64(m.capacity - 1)
}
// grow doubles the capacity and reinserts all existing hashes & elements.
@ -107,7 +107,7 @@ func (m *HashMap) grow() {
m.alloc()
// Copy old elements to new hash/elem list.
for i := 0; i < capacity; i++ {
for i := int64(0); i < capacity; i++ {
elem, hash := &elems[i], hashes[i]
if hash == 0 {
continue
@ -117,11 +117,11 @@ func (m *HashMap) grow() {
}
// index returns the position of key in the hash map.
func (m *HashMap) index(key []byte) int {
func (m *HashMap) index(key []byte) int64 {
hash := HashKey(key)
pos := int(hash & m.mask)
pos := hash & m.mask
dist := 0
var dist int64
for {
if m.hashes[pos] == 0 {
return -1
@ -131,14 +131,14 @@ func (m *HashMap) index(key []byte) int {
return pos
}
pos = int(uint64(pos+1) & m.mask)
pos = (pos + 1) & m.mask
dist++
}
}
// Elem returns the i-th key/value pair of the hash map.
func (m *HashMap) Elem(i int) (key []byte, value interface{}) {
if i >= len(m.elems) {
func (m *HashMap) Elem(i int64) (key []byte, value interface{}) {
if i >= int64(len(m.elems)) {
return nil, nil
}
@ -147,15 +147,15 @@ func (m *HashMap) Elem(i int) (key []byte, value interface{}) {
}
// Len returns the number of key/values set in map.
func (m *HashMap) Len() int { return m.n }
func (m *HashMap) Len() int64 { return m.n }
// Cap returns the number of key/values set in map.
func (m *HashMap) Cap() int { return m.capacity }
func (m *HashMap) Cap() int64 { return m.capacity }
// AverageProbeCount returns the average number of probes for each element.
func (m *HashMap) AverageProbeCount() float64 {
var sum float64
for i := 0; i < m.capacity; i++ {
for i := int64(0); i < m.capacity; i++ {
hash := m.hashes[i]
if hash == 0 {
continue
@ -168,7 +168,7 @@ func (m *HashMap) AverageProbeCount() float64 {
// Keys returns a list of sorted keys.
func (m *HashMap) Keys() [][]byte {
a := make([][]byte, 0, m.Len())
for i := 0; i < m.Cap(); i++ {
for i := int64(0); i < m.Cap(); i++ {
k, v := m.Elem(i)
if v == nil {
continue
@ -182,12 +182,12 @@ func (m *HashMap) Keys() [][]byte {
type hashElem struct {
key []byte
value interface{}
hash uint64
hash int64
}
// Options represents initialization options that are passed to NewHashMap().
type Options struct {
Capacity int
Capacity int64
LoadFactor int
}
@ -198,26 +198,28 @@ var DefaultOptions = Options{
}
// HashKey computes a hash of key. Hash is always non-zero.
func HashKey(key []byte) uint64 {
h := xxhash.Sum64(key)
func HashKey(key []byte) int64 {
h := int64(xxhash.Sum64(key))
if h == 0 {
h = 1
} else if h < 0 {
h = 0 - h
}
return h
}
// Dist returns the probe distance for a hash in a slot index.
// NOTE: Capacity must be a power of 2.
func Dist(hash uint64, i, capacity int) int {
mask := uint64(capacity - 1)
dist := int(uint64(i+capacity-int(hash&mask)) & mask)
func Dist(hash, i, capacity int64) int64 {
mask := capacity - 1
dist := (i + capacity - (hash & mask)) & mask
return dist
}
// pow2 returns the number that is the next highest power of 2.
// Returns v if it is a power of 2.
func pow2(v int) int {
for i := 2; i < 1<<62; i *= 2 {
func pow2(v int64) int64 {
for i := int64(2); i < 1<<62; i *= 2 {
if i >= v {
return i
}

View File

@ -64,12 +64,12 @@ func (blk *MeasurementBlock) Version() int { return blk.version }
// Elem returns an element for a measurement.
func (blk *MeasurementBlock) Elem(name []byte) (e MeasurementBlockElem, ok bool) {
n := binary.BigEndian.Uint64(blk.hashData[:MeasurementNSize])
n := int64(binary.BigEndian.Uint64(blk.hashData[:MeasurementNSize]))
hash := rhh.HashKey(name)
pos := int(hash % n)
pos := hash % n
// Track current distance
var d int
var d int64
for {
// Find offset of measurement.
offset := binary.BigEndian.Uint64(blk.hashData[MeasurementNSize+(pos*MeasurementOffsetSize):])
@ -89,16 +89,16 @@ func (blk *MeasurementBlock) Elem(name []byte) (e MeasurementBlockElem, ok bool)
}
// Check if we've exceeded the probe distance.
if d > rhh.Dist(rhh.HashKey(e.name), pos, int(n)) {
if d > rhh.Dist(rhh.HashKey(e.name), pos, n) {
return MeasurementBlockElem{}, false
}
}
// Move position forward.
pos = (pos + 1) % int(n)
pos = (pos + 1) % n
d++
if uint64(d) > n {
if d > n {
return MeasurementBlockElem{}, false
}
}
@ -436,7 +436,7 @@ func (mw *MeasurementBlockWriter) WriteTo(w io.Writer) (n int64, err error) {
// Build key hash map
m := rhh.NewHashMap(rhh.Options{
Capacity: len(names),
Capacity: int64(len(names)),
LoadFactor: LoadFactor,
})
for name := range mw.mms {
@ -452,7 +452,7 @@ func (mw *MeasurementBlockWriter) WriteTo(w io.Writer) (n int64, err error) {
}
// Encode hash map offset entries.
for i := 0; i < m.Cap(); i++ {
for i := int64(0); i < m.Cap(); i++ {
_, v := m.Elem(i)
var offset int64

View File

@ -45,7 +45,7 @@ type SeriesBlock struct {
// Series data & index/capacity.
seriesData []byte
seriesIndex []byte
seriesIndexN uint64
seriesIndexN int64
// Exact series counts for this block.
seriesN int64
@ -65,10 +65,10 @@ func (blk *SeriesBlock) HasSeries(name []byte, tags models.Tags, buf []byte) (ex
n := blk.seriesIndexN
hash := rhh.HashKey(buf)
pos := int(hash % n)
pos := hash % n
// Track current distance
var d int
var d int64
for {
// Find offset of series.
offset := binary.BigEndian.Uint64(blk.seriesIndex[pos*SeriesIDSize:])
@ -83,16 +83,16 @@ func (blk *SeriesBlock) HasSeries(name []byte, tags models.Tags, buf []byte) (ex
}
// Check if we've exceeded the probe distance.
max := rhh.Dist(rhh.HashKey(key), pos, int(n))
max := rhh.Dist(rhh.HashKey(key), pos, n)
if d > max {
return false, false
}
// Move position forward.
pos = (pos + 1) % int(n)
pos = (pos + 1) % n
d++
if uint64(d) > n {
if d > n {
return false, false
}
}
@ -105,10 +105,10 @@ func (blk *SeriesBlock) Series(name []byte, tags models.Tags) SeriesElem {
n := blk.seriesIndexN
hash := rhh.HashKey(buf)
pos := int(hash % n)
pos := hash % n
// Track current distance
var d int
var d int64
for {
// Find offset of series.
offset := binary.BigEndian.Uint64(blk.seriesIndex[pos*SeriesIDSize:])
@ -125,15 +125,15 @@ func (blk *SeriesBlock) Series(name []byte, tags models.Tags) SeriesElem {
}
// Check if we've exceeded the probe distance.
if d > rhh.Dist(rhh.HashKey(key), pos, int(n)) {
if d > rhh.Dist(rhh.HashKey(key), pos, n) {
return nil
}
// Move position forward.
pos = (pos + 1) % int(n)
pos = (pos + 1) % n
d++
if uint64(d) > n {
if d > n {
return nil
}
}
@ -170,7 +170,7 @@ func (blk *SeriesBlock) UnmarshalBinary(data []byte) error {
// Slice series hash index.
blk.seriesIndex = data[t.Series.Index.Offset:]
blk.seriesIndex = blk.seriesIndex[:t.Series.Index.Size]
blk.seriesIndexN = binary.BigEndian.Uint64(blk.seriesIndex[:8])
blk.seriesIndexN = int64(binary.BigEndian.Uint64(blk.seriesIndex[:8]))
blk.seriesIndex = blk.seriesIndex[8:]
// Initialise sketches. We're currently using HLL+.
@ -559,7 +559,7 @@ func (enc *SeriesBlockEncoder) writeSeriesIndex() error {
}
// Encode hash map offset entries.
for i := 0; i < enc.offsets.Cap(); i++ {
for i := int64(0); i < enc.offsets.Cap(); i++ {
_, v := enc.offsets.Elem(i)
offset, _ := v.(uint64)

View File

@ -90,12 +90,12 @@ func (blk *TagBlock) UnmarshalBinary(data []byte) error {
// TagKeyElem returns an element for a tag key.
// Returns an element with a nil key if not found.
func (blk *TagBlock) TagKeyElem(key []byte) TagKeyElem {
keyN := binary.BigEndian.Uint64(blk.hashData[:TagKeyNSize])
keyN := int64(binary.BigEndian.Uint64(blk.hashData[:TagKeyNSize]))
hash := rhh.HashKey(key)
pos := int(hash % keyN)
pos := hash % keyN
// Track current distance
var d int
var d int64
for {
// Find offset of tag key.
offset := binary.BigEndian.Uint64(blk.hashData[TagKeyNSize+(pos*TagKeyOffsetSize):])
@ -113,15 +113,15 @@ func (blk *TagBlock) TagKeyElem(key []byte) TagKeyElem {
}
// Check if we've exceeded the probe distance.
if d > rhh.Dist(rhh.HashKey(e.key), pos, int(keyN)) {
if d > rhh.Dist(rhh.HashKey(e.key), pos, keyN) {
return nil
}
// Move position forward.
pos = (pos + 1) % int(keyN)
pos = (pos + 1) % keyN
d++
if uint64(d) > keyN {
if d > keyN {
return nil
}
}
@ -138,12 +138,12 @@ func (blk *TagBlock) TagValueElem(key, value []byte) TagValueElem {
// Slice hash index data.
hashData := kelem.hashIndex.buf
valueN := binary.BigEndian.Uint64(hashData[:TagValueNSize])
valueN := int64(binary.BigEndian.Uint64(hashData[:TagValueNSize]))
hash := rhh.HashKey(value)
pos := int(hash % valueN)
pos := hash % valueN
// Track current distance
var d int
var d int64
for {
// Find offset of tag value.
offset := binary.BigEndian.Uint64(hashData[TagValueNSize+(pos*TagValueOffsetSize):])
@ -161,16 +161,16 @@ func (blk *TagBlock) TagValueElem(key, value []byte) TagValueElem {
}
// Check if we've exceeded the probe distance.
max := rhh.Dist(rhh.HashKey(e.value), pos, int(valueN))
max := rhh.Dist(rhh.HashKey(e.value), pos, valueN)
if d > max {
return nil
}
// Move position forward.
pos = (pos + 1) % int(valueN)
pos = (pos + 1) % valueN
d++
if uint64(d) > valueN {
if d > valueN {
return nil
}
}
@ -613,7 +613,7 @@ func (enc *TagBlockEncoder) flushValueHashIndex() error {
}
// Encode hash map offset entries.
for i := 0; i < enc.offsets.Cap(); i++ {
for i := int64(0); i < enc.offsets.Cap(); i++ {
_, v := enc.offsets.Elem(i)
offset, _ := v.(int64)
if err := writeUint64To(enc.w, uint64(offset), &enc.n); err != nil {
@ -630,7 +630,7 @@ func (enc *TagBlockEncoder) flushValueHashIndex() error {
// encodeTagKeyBlock encodes the keys section to the writer.
func (enc *TagBlockEncoder) encodeTagKeyBlock() error {
offsets := rhh.NewHashMap(rhh.Options{Capacity: len(enc.keys), LoadFactor: LoadFactor})
offsets := rhh.NewHashMap(rhh.Options{Capacity: int64(len(enc.keys)), LoadFactor: LoadFactor})
// Encode key list in sorted order.
enc.trailer.KeyData.Offset = enc.n
@ -674,7 +674,7 @@ func (enc *TagBlockEncoder) encodeTagKeyBlock() error {
}
// Encode hash map offset entries.
for i := 0; i < offsets.Cap(); i++ {
for i := int64(0); i < offsets.Cap(); i++ {
_, v := offsets.Elem(i)
offset, _ := v.(int64)
if err := writeUint64To(enc.w, uint64(offset), &enc.n); err != nil {