Fix RHH packing.

pull/7913/head
Ben Johnson 2016-09-22 08:46:12 -06:00
parent e25d61e4bd
commit 3240af07e0
No known key found for this signature in database
GPG Key ID: 81741CD251883081
4 changed files with 78 additions and 55 deletions

View File

@ -15,12 +15,13 @@ type HashMap struct {
n int
capacity int
threshold int
mask uint32
loadFactor int
}
func NewHashMap(opt Options) *HashMap {
m := &HashMap{
capacity: opt.Capacity,
capacity: pow2(opt.Capacity),
loadFactor: opt.LoadFactor,
}
m.alloc()
@ -50,7 +51,7 @@ func (m *HashMap) Put(key []byte, val interface{}) {
}
func (m *HashMap) insert(hash uint32, key []byte, val interface{}) (overwritten bool) {
pos := int(hash) % m.capacity
pos := int(hash & m.mask)
dist := 0
// Continue searching until we find an empty slot or lower probe distance.
@ -81,7 +82,7 @@ func (m *HashMap) insert(hash uint32, key []byte, val interface{}) (overwritten
}
// Increment position, wrap around on overflow.
pos = (pos + 1) % m.capacity
pos = int(uint32(pos+1) & m.mask)
dist++
}
}
@ -91,6 +92,7 @@ func (m *HashMap) alloc() {
m.elems = make([]hashElem, m.capacity)
m.hashes = make([]uint32, m.capacity)
m.threshold = (m.capacity * m.loadFactor) / 100
m.mask = uint32(m.capacity - 1)
}
// grow doubles the capacity and reinserts all existing hashes & elements.
@ -116,7 +118,7 @@ func (m *HashMap) grow() {
// index returns the position of key in the hash map.
func (m *HashMap) index(key []byte) int {
hash := m.hashKey(key)
pos := int(hash) % m.capacity
pos := int(hash & m.mask)
dist := 0
for {
@ -128,7 +130,7 @@ func (m *HashMap) index(key []byte) int {
return pos
}
pos = (pos + 1) % m.capacity
pos = int(uint32(pos+1) & m.mask)
dist++
}
}
@ -173,7 +175,7 @@ func (m *HashMap) AverageProbeCount() float64 {
// dist returns the probe distance for a hash in a slot index.
func (m *HashMap) dist(hash uint32, i int) int {
return (i + m.capacity - (int(hash) % m.capacity)) % m.capacity
return int(uint32(i+m.capacity-int(hash&m.mask)) & m.mask)
}
type hashElem struct {
@ -193,3 +195,14 @@ var DefaultOptions = Options{
Capacity: 256,
LoadFactor: 90,
}
// pow2 returns the number that is the next highest power of 2.
// Returns v if it is a power of 2.
func pow2(v int) int {
for i := 2; i < 1<<32; i *= 2 {
if i >= v {
return i
}
}
panic("unreachable")
}

View File

@ -17,16 +17,16 @@ func TestHashMap(t *testing.T) {
m.Put([]byte("baz"), []byte("bat"))
// Verify values can be retrieved.
if v := m.Get([]byte("foo")); !bytes.Equal(v, []byte("bar")) {
if v := m.Get([]byte("foo")); !bytes.Equal(v.([]byte), []byte("bar")) {
t.Fatalf("unexpected value: %s", v)
}
if v := m.Get([]byte("baz")); !bytes.Equal(v, []byte("bat")) {
if v := m.Get([]byte("baz")); !bytes.Equal(v.([]byte), []byte("bat")) {
t.Fatalf("unexpected value: %s", v)
}
// Overwrite field & verify.
m.Put([]byte("foo"), []byte("XXX"))
if v := m.Get([]byte("foo")); !bytes.Equal(v, []byte("XXX")) {
if v := m.Get([]byte("foo")); !bytes.Equal(v.([]byte), []byte("XXX")) {
t.Fatalf("unexpected value: %s", v)
}
}
@ -38,7 +38,7 @@ func TestHashMap_Quick(t *testing.T) {
}
if err := quick.Check(func(keys, values [][]byte) bool {
m := rhh.NewHashMap(rhh.Options{Capacity: 1000, LoadFactor: 100})
m := rhh.NewHashMap(rhh.Options{Capacity: 1000, LoadFactor: 90})
h := make(map[string][]byte)
// Insert all key/values into both maps.
@ -50,7 +50,7 @@ func TestHashMap_Quick(t *testing.T) {
// Verify the maps are equal.
for k, v := range h {
if mv := m.Get([]byte(k)); !bytes.Equal(mv, v) {
if mv := m.Get([]byte(k)); !bytes.Equal(mv.([]byte), v) {
t.Fatalf("value mismatch:\nkey=%x\ngot=%x\nexp=%x\n\n", []byte(k), mv, v)
}
}

View File

@ -62,7 +62,7 @@ func (ts *TagSet) Version() int { return ts.version }
// TagValueSeriesN returns the number of series ids associated with a tag value.
func (ts *TagSet) TagValueSeriesN(key, value []byte) int {
velem := ts.tagValueElem(key, value)
if velem.value == nil {
if len(velem.value) == 0 {
return 0
}
return int(velem.seriesN)
@ -72,7 +72,7 @@ func (ts *TagSet) TagValueSeriesN(key, value []byte) int {
func (ts *TagSet) TagValueSeriesIDs(key, value []byte) []uint32 {
// Find value element.
velem := ts.tagValueElem(key, value)
if velem.value == nil {
if len(velem.value) == 0 {
return nil
}
@ -98,18 +98,21 @@ func (ts *TagSet) tagKeyElem(key []byte) tagKeyElem {
// Find offset of tag key.
offset := binary.BigEndian.Uint64(ts.hashData[TagKeyNSize+(pos*TagKeyOffsetSize):])
// Parse into element.
var e tagKeyElem
e.UnmarshalBinary(ts.data[offset:])
// Evaluate key if offset is not empty.
if offset > 0 {
// Parse into element.
var e tagKeyElem
e.UnmarshalBinary(ts.data[offset:])
// Return if keys match.
if bytes.Equal(e.key, key) {
return e
}
// Return if keys match.
if bytes.Equal(e.key, key) {
return e
}
// Check if we've exceeded the probe distance.
if d > dist(hashKey(e.key), pos, int(keyN)) {
return tagKeyElem{}
// Check if we've exceeded the probe distance.
if d > dist(hashKey(e.key), pos, int(keyN)) {
return tagKeyElem{}
}
}
// Move position forward.
@ -123,7 +126,7 @@ func (ts *TagSet) tagKeyElem(key []byte) tagKeyElem {
func (ts *TagSet) tagValueElem(key, value []byte) tagValueElem {
// Find key element, exit if not found.
kelem := ts.tagKeyElem(key)
if kelem.key == nil {
if len(kelem.key) == 0 {
return tagValueElem{}
}
@ -139,18 +142,21 @@ func (ts *TagSet) tagValueElem(key, value []byte) tagValueElem {
// Find offset of tag value.
offset := binary.BigEndian.Uint64(hashData[TagValueNSize+(pos*TagValueOffsetSize):])
// Parse into element.
var e tagValueElem
e.UnmarshalBinary(ts.data[offset:])
// Evaluate value if offset is not empty.
if offset > 0 {
// Parse into element.
var e tagValueElem
e.UnmarshalBinary(ts.data[offset:])
// Return if values match.
if bytes.Equal(e.value, value) {
return e
}
// Return if values match.
if bytes.Equal(e.value, value) {
return e
}
// Check if we've exceeded the probe distance.
if d > dist(hashKey(e.value), pos, int(valueN)) {
return tagValueElem{}
// Check if we've exceeded the probe distance.
if d > dist(hashKey(e.value), pos, int(valueN)) {
return tagValueElem{}
}
}
// Move position forward.
@ -304,10 +310,15 @@ func (tsw *TagSetWriter) createTagSetIfNotExists(key []byte) tagSet {
// WriteTo encodes the tag values & tag key blocks.
func (tsw *TagSetWriter) WriteTo(w io.Writer) (n int64, err error) {
// Write padding byte so no offsets are zero.
if err := writeUint8To(w, 0, &n); err != nil {
return n, err
}
// Build key hash map with an exact capacity.
m := rhh.NewHashMap(rhh.Options{
Capacity: len(tsw.sets),
LoadFactor: 100,
LoadFactor: 90,
})
for key := range tsw.sets {
ts := tsw.sets[key]
@ -315,10 +326,10 @@ func (tsw *TagSetWriter) WriteTo(w io.Writer) (n int64, err error) {
}
// Write value blocks in key map order.
for i := 0; i < m.Len(); i++ {
k, v := m.Elem(i)
if k == nil {
panic("rhh nil key")
for i := 0; i < m.Cap(); i++ {
_, v := m.Elem(i)
if v == nil {
continue
}
ts := v.(*tagSet)
@ -353,20 +364,17 @@ func (tsw *TagSetWriter) writeTagValueBlockTo(w io.Writer, values map[string]tag
// Build RHH map from tag values.
m := rhh.NewHashMap(rhh.Options{
Capacity: len(values),
LoadFactor: 100,
LoadFactor: 90,
})
for value, tv := range values {
m.Put([]byte(value), tv)
}
// Encode value list.
offsets := make([]int64, m.Len())
for i := 0; i < m.Len(); i++ {
offsets := make([]int64, m.Cap())
for i := 0; i < m.Cap(); i++ {
k, v := m.Elem(i)
if k == nil {
panic("rhh nil key")
}
tv := v.(tagValue)
tv, _ := v.(tagValue)
// Save current offset so we can use it in the hash index.
offsets[i] = *n
@ -381,7 +389,7 @@ func (tsw *TagSetWriter) writeTagValueBlockTo(w io.Writer, values map[string]tag
hoff = *n
// Encode hash map length.
if err := writeUint32To(w, uint32(m.Len()), n); err != nil {
if err := writeUint32To(w, uint32(m.Cap()), n); err != nil {
return hoff, err
}
@ -427,11 +435,11 @@ func (tsw *TagSetWriter) writeTagValueTo(w io.Writer, v []byte, tv tagValue, n *
// writeTagKeyBlockTo encodes keys from a tag set into w.
func (tsw *TagSetWriter) writeTagKeyBlockTo(w io.Writer, m *rhh.HashMap, n *int64) (hoff int64, err error) {
// Encode key list.
offsets := make([]int64, m.Len())
for i := 0; i < m.Len(); i++ {
offsets := make([]int64, m.Cap())
for i := 0; i < m.Cap(); i++ {
k, v := m.Elem(i)
if k == nil {
panic("rhh nil key")
if v == nil {
continue
}
ts := v.(*tagSet)
@ -448,7 +456,7 @@ func (tsw *TagSetWriter) writeTagKeyBlockTo(w io.Writer, m *rhh.HashMap, n *int6
hoff = *n
// Encode hash map length.
if err := writeUint32To(w, uint32(m.Len()), n); err != nil {
if err := writeUint32To(w, uint32(m.Cap()), n); err != nil {
return hoff, err
}

View File

@ -49,6 +49,7 @@ func TestTagSetWriter(t *testing.T) {
var benchmarkTagSet10x1000 *tsi1.TagSet
var benchmarkTagSet100x1000 *tsi1.TagSet
var benchmarkTagSet1000x1000 *tsi1.TagSet
var benchmarkTagSet1x1000000 *tsi1.TagSet
func BenchmarkTagSet_SeriesN_10_1000(b *testing.B) {
benchmarkTagSet_SeriesN(b, 10, 1000, &benchmarkTagSet10x1000)
@ -59,20 +60,21 @@ func BenchmarkTagSet_SeriesN_100_1000(b *testing.B) {
func BenchmarkTagSet_SeriesN_1000_1000(b *testing.B) {
benchmarkTagSet_SeriesN(b, 1000, 1000, &benchmarkTagSet1000x1000)
}
func BenchmarkTagSet_SeriesN_1_1000000(b *testing.B) {
benchmarkTagSet_SeriesN(b, 1, 1000000, &benchmarkTagSet1x1000000)
}
func benchmarkTagSet_SeriesN(b *testing.B, tagN, valueN int, ts **tsi1.TagSet) {
if (*ts) == nil {
tsw := tsi1.NewTagSetWriter()
// Write tagset block.
var seriesID uint32
var kbuf, vbuf [20]byte
for i := 0; i < tagN; i++ {
for j := 0; j < valueN; j++ {
k := strconv.AppendInt(kbuf[:0], int64(i), 10)
v := strconv.AppendInt(vbuf[:0], int64(j), 10)
tsw.AddTagValueSeries(k, v, seriesID)
seriesID++
tsw.AddTagValueSeries(k, v, 1)
}
}
tsw.AddSeries(map[string]string{"region": "us-east", "host": "server0"}, 1)