feat(encoding): Improve timestamp encoding

Timestamp improvements prior to any improvements to simple8b

```
name                               old time/op    new time/op    delta
name                               old time/op    new time/op    delta
EncodeTimestamps/1000_seq/batch-8    2.64µs ± 1%    1.36µs ± 1%  -48.25%  (p=0.008 n=5+5)
EncodeTimestamps/1000_ran/batch-8    64.0µs ± 1%    32.2µs ± 1%  -49.64%  (p=0.008 n=5+5)
EncodeTimestamps/1000_dup/batch-8    9.32µs ± 0%    1.30µs ± 1%  -86.06%  (p=0.008 n=5+5)
```
pull/10616/head
Stuart Carnie 2018-09-28 12:39:52 -07:00 committed by Edd Robinson
parent a339f8f620
commit 9fa01f7115
2 changed files with 58 additions and 38 deletions

View File

@ -27,55 +27,66 @@ func TimeArrayEncodeAll(src []int64, b []byte) ([]byte, error) {
return nil, nil // Nothing to do
}
var rle = true
var max, div = uint64(0), uint64(1e12)
// To prevent an allocation of the entire block we're encoding reuse the
// src slice to store the encoded deltas.
deltas := reintepretInt64ToUint64Slice(src)
for i := len(deltas) - 1; i > 0; i-- {
deltas[i] = deltas[i] - deltas[i-1]
v := deltas[i]
if v > max {
max = v
if len(deltas) > 1 {
for i := len(deltas) - 1; i > 0; i-- {
deltas[i] = deltas[i] - deltas[i-1]
if deltas[i] > max {
max = deltas[i]
}
}
// If our value is divisible by 10, break. Otherwise, try the next smallest divisor.
for div > 1 && v%div != 0 {
div /= 10
var rle = true
for i := 2; i < len(deltas); i++ {
if deltas[1] != deltas[i] {
rle = false
break
}
}
// Skip the first value || see if prev = curr. The deltas can be RLE if the are all equal.
rle = i == len(deltas)-1 || rle && (deltas[i+1] == deltas[i])
}
// Deltas are the same - encode with RLE
if rle {
// Large varints can take up to 10 bytes. We're storing 3 + 1
// type byte.
if len(b) < 31 && cap(b) >= 31 {
b = b[:31]
} else if len(b) < 31 {
b = append(b, make([]byte, 31-len(b))...)
}
// Deltas are the same - encode with RLE
if rle && len(deltas) > 1 {
// Large varints can take up to 10 bytes. We're storing 3 + 1
// type byte.
if len(b) < 31 && cap(b) >= 31 {
b = b[:31]
} else if len(b) < 31 {
b = append(b, make([]byte, 31-len(b))...)
// 4 high bits used for the encoding type
b[0] = byte(timeCompressedRLE) << 4
i := 1
// The first value
binary.BigEndian.PutUint64(b[i:], deltas[0])
i += 8
// The first delta, checking the divisor
// given all deltas are the same, we can do a single check for the divisor
v := deltas[1]
for div > 1 && v%div != 0 {
div /= 10
}
if div > 1 {
// 4 low bits are the log10 divisor
b[0] |= byte(math.Log10(float64(div)))
i += binary.PutUvarint(b[i:], deltas[1]/div)
} else {
i += binary.PutUvarint(b[i:], deltas[1])
}
// The number of times the delta is repeated
i += binary.PutUvarint(b[i:], uint64(len(deltas)))
return b[:i], nil
}
// 4 high bits used for the encoding type
b[0] = byte(timeCompressedRLE) << 4
// 4 low bits are the log10 divisor
b[0] |= byte(math.Log10(float64(div)))
i := 1
// The first value
binary.BigEndian.PutUint64(b[i:], deltas[0])
i += 8
// The first delta
i += binary.PutUvarint(b[i:], deltas[1]/div)
// The number of times the delta is repeated
i += binary.PutUvarint(b[i:], uint64(len(deltas)))
return b[:i], nil
}
// We can't compress this time-range, the deltas exceed 1 << 60
@ -96,6 +107,15 @@ func TimeArrayEncodeAll(src []int64, b []byte) ([]byte, error) {
return b[:sz], nil
}
// find divisor only if we're compressing with simple8b
for i := 1; i < len(deltas) && div > 1; i++ {
// If our value is divisible by 10, break. Otherwise, try the next smallest divisor.
v := deltas[i]
for div > 1 && v%div != 0 {
div /= 10
}
}
// Only apply the divisor if it's greater than 1 since division is expensive.
if div > 1 {
for i := 1; i < len(deltas); i++ {

View File

@ -106,7 +106,7 @@ func testTimeArrayEncodeAll_Compare(t *testing.T, input []int64, encoding byte)
}
if got := result; !reflect.DeepEqual(got, exp) {
t.Fatalf("got result %v, expected %v", got, exp)
t.Fatalf("-got/+exp\n%s", cmp.Diff(got, exp))
}
// Check that the encoders are byte for byte the same...