Fix RHH packing.
parent
e25d61e4bd
commit
3240af07e0
|
@ -15,12 +15,13 @@ type HashMap struct {
|
|||
n int
|
||||
capacity int
|
||||
threshold int
|
||||
mask uint32
|
||||
loadFactor int
|
||||
}
|
||||
|
||||
func NewHashMap(opt Options) *HashMap {
|
||||
m := &HashMap{
|
||||
capacity: opt.Capacity,
|
||||
capacity: pow2(opt.Capacity),
|
||||
loadFactor: opt.LoadFactor,
|
||||
}
|
||||
m.alloc()
|
||||
|
@ -50,7 +51,7 @@ func (m *HashMap) Put(key []byte, val interface{}) {
|
|||
}
|
||||
|
||||
func (m *HashMap) insert(hash uint32, key []byte, val interface{}) (overwritten bool) {
|
||||
pos := int(hash) % m.capacity
|
||||
pos := int(hash & m.mask)
|
||||
dist := 0
|
||||
|
||||
// Continue searching until we find an empty slot or lower probe distance.
|
||||
|
@ -81,7 +82,7 @@ func (m *HashMap) insert(hash uint32, key []byte, val interface{}) (overwritten
|
|||
}
|
||||
|
||||
// Increment position, wrap around on overflow.
|
||||
pos = (pos + 1) % m.capacity
|
||||
pos = int(uint32(pos+1) & m.mask)
|
||||
dist++
|
||||
}
|
||||
}
|
||||
|
@ -91,6 +92,7 @@ func (m *HashMap) alloc() {
|
|||
m.elems = make([]hashElem, m.capacity)
|
||||
m.hashes = make([]uint32, m.capacity)
|
||||
m.threshold = (m.capacity * m.loadFactor) / 100
|
||||
m.mask = uint32(m.capacity - 1)
|
||||
}
|
||||
|
||||
// grow doubles the capacity and reinserts all existing hashes & elements.
|
||||
|
@ -116,7 +118,7 @@ func (m *HashMap) grow() {
|
|||
// index returns the position of key in the hash map.
|
||||
func (m *HashMap) index(key []byte) int {
|
||||
hash := m.hashKey(key)
|
||||
pos := int(hash) % m.capacity
|
||||
pos := int(hash & m.mask)
|
||||
|
||||
dist := 0
|
||||
for {
|
||||
|
@ -128,7 +130,7 @@ func (m *HashMap) index(key []byte) int {
|
|||
return pos
|
||||
}
|
||||
|
||||
pos = (pos + 1) % m.capacity
|
||||
pos = int(uint32(pos+1) & m.mask)
|
||||
dist++
|
||||
}
|
||||
}
|
||||
|
@ -173,7 +175,7 @@ func (m *HashMap) AverageProbeCount() float64 {
|
|||
|
||||
// dist returns the probe distance for a hash in a slot index.
|
||||
func (m *HashMap) dist(hash uint32, i int) int {
|
||||
return (i + m.capacity - (int(hash) % m.capacity)) % m.capacity
|
||||
return int(uint32(i+m.capacity-int(hash&m.mask)) & m.mask)
|
||||
}
|
||||
|
||||
type hashElem struct {
|
||||
|
@ -193,3 +195,14 @@ var DefaultOptions = Options{
|
|||
Capacity: 256,
|
||||
LoadFactor: 90,
|
||||
}
|
||||
|
||||
// pow2 returns the number that is the next highest power of 2.
|
||||
// Returns v if it is a power of 2.
|
||||
func pow2(v int) int {
|
||||
for i := 2; i < 1<<32; i *= 2 {
|
||||
if i >= v {
|
||||
return i
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
|
|
|
@ -17,16 +17,16 @@ func TestHashMap(t *testing.T) {
|
|||
m.Put([]byte("baz"), []byte("bat"))
|
||||
|
||||
// Verify values can be retrieved.
|
||||
if v := m.Get([]byte("foo")); !bytes.Equal(v, []byte("bar")) {
|
||||
if v := m.Get([]byte("foo")); !bytes.Equal(v.([]byte), []byte("bar")) {
|
||||
t.Fatalf("unexpected value: %s", v)
|
||||
}
|
||||
if v := m.Get([]byte("baz")); !bytes.Equal(v, []byte("bat")) {
|
||||
if v := m.Get([]byte("baz")); !bytes.Equal(v.([]byte), []byte("bat")) {
|
||||
t.Fatalf("unexpected value: %s", v)
|
||||
}
|
||||
|
||||
// Overwrite field & verify.
|
||||
m.Put([]byte("foo"), []byte("XXX"))
|
||||
if v := m.Get([]byte("foo")); !bytes.Equal(v, []byte("XXX")) {
|
||||
if v := m.Get([]byte("foo")); !bytes.Equal(v.([]byte), []byte("XXX")) {
|
||||
t.Fatalf("unexpected value: %s", v)
|
||||
}
|
||||
}
|
||||
|
@ -38,7 +38,7 @@ func TestHashMap_Quick(t *testing.T) {
|
|||
}
|
||||
|
||||
if err := quick.Check(func(keys, values [][]byte) bool {
|
||||
m := rhh.NewHashMap(rhh.Options{Capacity: 1000, LoadFactor: 100})
|
||||
m := rhh.NewHashMap(rhh.Options{Capacity: 1000, LoadFactor: 90})
|
||||
h := make(map[string][]byte)
|
||||
|
||||
// Insert all key/values into both maps.
|
||||
|
@ -50,7 +50,7 @@ func TestHashMap_Quick(t *testing.T) {
|
|||
|
||||
// Verify the maps are equal.
|
||||
for k, v := range h {
|
||||
if mv := m.Get([]byte(k)); !bytes.Equal(mv, v) {
|
||||
if mv := m.Get([]byte(k)); !bytes.Equal(mv.([]byte), v) {
|
||||
t.Fatalf("value mismatch:\nkey=%x\ngot=%x\nexp=%x\n\n", []byte(k), mv, v)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -62,7 +62,7 @@ func (ts *TagSet) Version() int { return ts.version }
|
|||
// TagValueSeriesN returns the number of series ids associated with a tag value.
|
||||
func (ts *TagSet) TagValueSeriesN(key, value []byte) int {
|
||||
velem := ts.tagValueElem(key, value)
|
||||
if velem.value == nil {
|
||||
if len(velem.value) == 0 {
|
||||
return 0
|
||||
}
|
||||
return int(velem.seriesN)
|
||||
|
@ -72,7 +72,7 @@ func (ts *TagSet) TagValueSeriesN(key, value []byte) int {
|
|||
func (ts *TagSet) TagValueSeriesIDs(key, value []byte) []uint32 {
|
||||
// Find value element.
|
||||
velem := ts.tagValueElem(key, value)
|
||||
if velem.value == nil {
|
||||
if len(velem.value) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -98,18 +98,21 @@ func (ts *TagSet) tagKeyElem(key []byte) tagKeyElem {
|
|||
// Find offset of tag key.
|
||||
offset := binary.BigEndian.Uint64(ts.hashData[TagKeyNSize+(pos*TagKeyOffsetSize):])
|
||||
|
||||
// Parse into element.
|
||||
var e tagKeyElem
|
||||
e.UnmarshalBinary(ts.data[offset:])
|
||||
// Evaluate key if offset is not empty.
|
||||
if offset > 0 {
|
||||
// Parse into element.
|
||||
var e tagKeyElem
|
||||
e.UnmarshalBinary(ts.data[offset:])
|
||||
|
||||
// Return if keys match.
|
||||
if bytes.Equal(e.key, key) {
|
||||
return e
|
||||
}
|
||||
// Return if keys match.
|
||||
if bytes.Equal(e.key, key) {
|
||||
return e
|
||||
}
|
||||
|
||||
// Check if we've exceeded the probe distance.
|
||||
if d > dist(hashKey(e.key), pos, int(keyN)) {
|
||||
return tagKeyElem{}
|
||||
// Check if we've exceeded the probe distance.
|
||||
if d > dist(hashKey(e.key), pos, int(keyN)) {
|
||||
return tagKeyElem{}
|
||||
}
|
||||
}
|
||||
|
||||
// Move position forward.
|
||||
|
@ -123,7 +126,7 @@ func (ts *TagSet) tagKeyElem(key []byte) tagKeyElem {
|
|||
func (ts *TagSet) tagValueElem(key, value []byte) tagValueElem {
|
||||
// Find key element, exit if not found.
|
||||
kelem := ts.tagKeyElem(key)
|
||||
if kelem.key == nil {
|
||||
if len(kelem.key) == 0 {
|
||||
return tagValueElem{}
|
||||
}
|
||||
|
||||
|
@ -139,18 +142,21 @@ func (ts *TagSet) tagValueElem(key, value []byte) tagValueElem {
|
|||
// Find offset of tag value.
|
||||
offset := binary.BigEndian.Uint64(hashData[TagValueNSize+(pos*TagValueOffsetSize):])
|
||||
|
||||
// Parse into element.
|
||||
var e tagValueElem
|
||||
e.UnmarshalBinary(ts.data[offset:])
|
||||
// Evaluate value if offset is not empty.
|
||||
if offset > 0 {
|
||||
// Parse into element.
|
||||
var e tagValueElem
|
||||
e.UnmarshalBinary(ts.data[offset:])
|
||||
|
||||
// Return if values match.
|
||||
if bytes.Equal(e.value, value) {
|
||||
return e
|
||||
}
|
||||
// Return if values match.
|
||||
if bytes.Equal(e.value, value) {
|
||||
return e
|
||||
}
|
||||
|
||||
// Check if we've exceeded the probe distance.
|
||||
if d > dist(hashKey(e.value), pos, int(valueN)) {
|
||||
return tagValueElem{}
|
||||
// Check if we've exceeded the probe distance.
|
||||
if d > dist(hashKey(e.value), pos, int(valueN)) {
|
||||
return tagValueElem{}
|
||||
}
|
||||
}
|
||||
|
||||
// Move position forward.
|
||||
|
@ -304,10 +310,15 @@ func (tsw *TagSetWriter) createTagSetIfNotExists(key []byte) tagSet {
|
|||
|
||||
// WriteTo encodes the tag values & tag key blocks.
|
||||
func (tsw *TagSetWriter) WriteTo(w io.Writer) (n int64, err error) {
|
||||
// Write padding byte so no offsets are zero.
|
||||
if err := writeUint8To(w, 0, &n); err != nil {
|
||||
return n, err
|
||||
}
|
||||
|
||||
// Build key hash map with an exact capacity.
|
||||
m := rhh.NewHashMap(rhh.Options{
|
||||
Capacity: len(tsw.sets),
|
||||
LoadFactor: 100,
|
||||
LoadFactor: 90,
|
||||
})
|
||||
for key := range tsw.sets {
|
||||
ts := tsw.sets[key]
|
||||
|
@ -315,10 +326,10 @@ func (tsw *TagSetWriter) WriteTo(w io.Writer) (n int64, err error) {
|
|||
}
|
||||
|
||||
// Write value blocks in key map order.
|
||||
for i := 0; i < m.Len(); i++ {
|
||||
k, v := m.Elem(i)
|
||||
if k == nil {
|
||||
panic("rhh nil key")
|
||||
for i := 0; i < m.Cap(); i++ {
|
||||
_, v := m.Elem(i)
|
||||
if v == nil {
|
||||
continue
|
||||
}
|
||||
ts := v.(*tagSet)
|
||||
|
||||
|
@ -353,20 +364,17 @@ func (tsw *TagSetWriter) writeTagValueBlockTo(w io.Writer, values map[string]tag
|
|||
// Build RHH map from tag values.
|
||||
m := rhh.NewHashMap(rhh.Options{
|
||||
Capacity: len(values),
|
||||
LoadFactor: 100,
|
||||
LoadFactor: 90,
|
||||
})
|
||||
for value, tv := range values {
|
||||
m.Put([]byte(value), tv)
|
||||
}
|
||||
|
||||
// Encode value list.
|
||||
offsets := make([]int64, m.Len())
|
||||
for i := 0; i < m.Len(); i++ {
|
||||
offsets := make([]int64, m.Cap())
|
||||
for i := 0; i < m.Cap(); i++ {
|
||||
k, v := m.Elem(i)
|
||||
if k == nil {
|
||||
panic("rhh nil key")
|
||||
}
|
||||
tv := v.(tagValue)
|
||||
tv, _ := v.(tagValue)
|
||||
|
||||
// Save current offset so we can use it in the hash index.
|
||||
offsets[i] = *n
|
||||
|
@ -381,7 +389,7 @@ func (tsw *TagSetWriter) writeTagValueBlockTo(w io.Writer, values map[string]tag
|
|||
hoff = *n
|
||||
|
||||
// Encode hash map length.
|
||||
if err := writeUint32To(w, uint32(m.Len()), n); err != nil {
|
||||
if err := writeUint32To(w, uint32(m.Cap()), n); err != nil {
|
||||
return hoff, err
|
||||
}
|
||||
|
||||
|
@ -427,11 +435,11 @@ func (tsw *TagSetWriter) writeTagValueTo(w io.Writer, v []byte, tv tagValue, n *
|
|||
// writeTagKeyBlockTo encodes keys from a tag set into w.
|
||||
func (tsw *TagSetWriter) writeTagKeyBlockTo(w io.Writer, m *rhh.HashMap, n *int64) (hoff int64, err error) {
|
||||
// Encode key list.
|
||||
offsets := make([]int64, m.Len())
|
||||
for i := 0; i < m.Len(); i++ {
|
||||
offsets := make([]int64, m.Cap())
|
||||
for i := 0; i < m.Cap(); i++ {
|
||||
k, v := m.Elem(i)
|
||||
if k == nil {
|
||||
panic("rhh nil key")
|
||||
if v == nil {
|
||||
continue
|
||||
}
|
||||
ts := v.(*tagSet)
|
||||
|
||||
|
@ -448,7 +456,7 @@ func (tsw *TagSetWriter) writeTagKeyBlockTo(w io.Writer, m *rhh.HashMap, n *int6
|
|||
hoff = *n
|
||||
|
||||
// Encode hash map length.
|
||||
if err := writeUint32To(w, uint32(m.Len()), n); err != nil {
|
||||
if err := writeUint32To(w, uint32(m.Cap()), n); err != nil {
|
||||
return hoff, err
|
||||
}
|
||||
|
||||
|
|
|
@ -49,6 +49,7 @@ func TestTagSetWriter(t *testing.T) {
|
|||
var benchmarkTagSet10x1000 *tsi1.TagSet
|
||||
var benchmarkTagSet100x1000 *tsi1.TagSet
|
||||
var benchmarkTagSet1000x1000 *tsi1.TagSet
|
||||
var benchmarkTagSet1x1000000 *tsi1.TagSet
|
||||
|
||||
func BenchmarkTagSet_SeriesN_10_1000(b *testing.B) {
|
||||
benchmarkTagSet_SeriesN(b, 10, 1000, &benchmarkTagSet10x1000)
|
||||
|
@ -59,20 +60,21 @@ func BenchmarkTagSet_SeriesN_100_1000(b *testing.B) {
|
|||
func BenchmarkTagSet_SeriesN_1000_1000(b *testing.B) {
|
||||
benchmarkTagSet_SeriesN(b, 1000, 1000, &benchmarkTagSet1000x1000)
|
||||
}
|
||||
func BenchmarkTagSet_SeriesN_1_1000000(b *testing.B) {
|
||||
benchmarkTagSet_SeriesN(b, 1, 1000000, &benchmarkTagSet1x1000000)
|
||||
}
|
||||
|
||||
func benchmarkTagSet_SeriesN(b *testing.B, tagN, valueN int, ts **tsi1.TagSet) {
|
||||
if (*ts) == nil {
|
||||
tsw := tsi1.NewTagSetWriter()
|
||||
|
||||
// Write tagset block.
|
||||
var seriesID uint32
|
||||
var kbuf, vbuf [20]byte
|
||||
for i := 0; i < tagN; i++ {
|
||||
for j := 0; j < valueN; j++ {
|
||||
k := strconv.AppendInt(kbuf[:0], int64(i), 10)
|
||||
v := strconv.AppendInt(vbuf[:0], int64(j), 10)
|
||||
tsw.AddTagValueSeries(k, v, seriesID)
|
||||
seriesID++
|
||||
tsw.AddTagValueSeries(k, v, 1)
|
||||
}
|
||||
}
|
||||
tsw.AddSeries(map[string]string{"region": "us-east", "host": "server0"}, 1)
|
||||
|
|
Loading…
Reference in New Issue