package tsm1 import ( "encoding/binary" "fmt" "io" "math" "math/bits" "unsafe" ) // FloatArrayEncodeAll encodes src into b, returning b and any error encountered. // The returned slice may be of a different length and capactity to b. // // Currently only the float compression scheme used in Facebook's Gorilla is // supported, so this method implements a batch oriented version of that. func FloatArrayEncodeAll(src []float64, b []byte) ([]byte, error) { if cap(b) < 9 { b = make([]byte, 0, 9) // Enough room for the header and one value. } b = b[:1] b[0] = floatCompressedGorilla << 4 var first float64 var finished bool if len(src) > 0 && math.IsNaN(src[0]) { return nil, fmt.Errorf("unsupported value: NaN") } else if len(src) == 0 { first = math.NaN() // Write sentinal value to terminate batch. finished = true } else { first = src[0] src = src[1:] } b = b[:9] n := uint64(8 + 64) // Number of bits written. prev := math.Float64bits(first) // Write first value. binary.BigEndian.PutUint64(b[1:], prev) prevLeading, prevTrailing := ^uint64(0), uint64(0) var leading, trailing uint64 var mask uint64 var sum float64 // Encode remaining values. for i := 0; !finished; i++ { var x float64 if i < len(src) { x = src[i] sum += x } else { // Encode sentinal value to terminate batch x = math.NaN() finished = true } { cur := math.Float64bits(x) vDelta := cur ^ prev if vDelta == 0 { n++ // Write a zero bit. Nothing else to do. prev = cur continue } // First the current bit of the current byte is set to indicate we're // writing a delta value to the stream. for n>>3 >= uint64(len(b)) { // Keep growing b until we can fit all bits in. b = append(b, byte(0)) } // n&7 - current bit in current byte. // n>>3 - the current byte. b[n>>3] |= 128 >> (n & 7) // Sets the current bit of the current byte. n++ // Write the delta to b. // Determine the leading and trailing zeros. leading = uint64(bits.LeadingZeros64(vDelta)) trailing = uint64(bits.TrailingZeros64(vDelta)) // Clamp number of leading zeros to avoid overflow when encoding leading &= 0x1F if leading >= 32 { leading = 31 } // At least 2 further bits will be required. if (n+2)>>3 >= uint64(len(b)) { b = append(b, byte(0)) } if prevLeading != ^uint64(0) && leading >= prevLeading && trailing >= prevTrailing { n++ // Write a zero bit. // Write the l least significant bits of vDelta to b, most significant // bit first. l := uint64(64 - prevLeading - prevTrailing) for (n+l)>>3 >= uint64(len(b)) { // Keep growing b until we can fit all bits in. b = append(b, byte(0)) } // Full value to write. v := (vDelta >> prevTrailing) << (64 - l) // l least signifciant bits of v. var m = n & 7 // Current bit in current byte. var written uint64 if m > 0 { // In this case the current byte is not full. written = 8 - m if l < written { written = l } mask = v >> 56 // Move 8 MSB to 8 LSB b[n>>3] |= byte(mask >> m) n += written if l-written == 0 { prev = cur continue } } vv := v << written // Move written bits out of the way. // TODO(edd): Optimise this. It's unlikely we actually have 8 bytes to write. if (n>>3)+8 >= uint64(len(b)) { b = append(b, 0, 0, 0, 0, 0, 0, 0, 0) } binary.BigEndian.PutUint64(b[n>>3:], vv) n += (l - written) } else { prevLeading, prevTrailing = leading, trailing // Set a single bit to indicate a value will follow. b[n>>3] |= 128 >> (n & 7) // Set current bit on current byte n++ // Write 5 bits of leading. if (n+5)>>3 >= uint64(len(b)) { b = append(b, byte(0)) } // Enough room to write the 5 bits in the current byte? var m = n & 7 l := uint64(5) v := leading << 59 // 5 LSB of leading. mask = v >> 56 // Move 5 MSB to 8 LSB if m <= 3 { // 5 bits fit into current byte. b[n>>3] |= byte(mask >> m) n += l } else { // In this case there are fewer than 5 bits available in current byte. // First step is to fill current byte written := 8 - m b[n>>3] |= byte(mask >> m) // Some of mask will get lost. n += written // Second step is to write the lost part of mask into the next byte. mask = v << written // Move written bits in previous byte out of way. mask >>= 56 m = n & 7 // Recompute current bit. b[n>>3] |= byte(mask >> m) n += (l - written) } // Note that if leading == trailing == 0, then sigbits == 64. But that // value doesn't actually fit into the 6 bits we have. // Luckily, we never need to encode 0 significant bits, since that would // put us in the other case (vdelta == 0). So instead we write out a 0 and // adjust it back to 64 on unpacking. sigbits := 64 - leading - trailing if (n+6)>>3 >= uint64(len(b)) { b = append(b, byte(0)) } m = n & 7 l = uint64(6) v = sigbits << 58 // Move 6 LSB of sigbits to MSB mask = v >> 56 // Move 6 MSB to 8 LSB if m <= 2 { // The 6 bits fit into the current byte. b[n>>3] |= byte(mask >> m) n += l } else { // In this case there are fewer than 6 bits available in current byte. // First step is to fill the current byte. written := 8 - m b[n>>3] |= byte(mask >> m) // Write to the current bit. n += written // Second step is to write the lost part of mask into the next byte. // Write l remaining bits into current byte. mask = v << written // Remove bits written in previous byte out of way. mask >>= 56 m = n & 7 // Recompute current bit. b[n>>3] |= byte(mask >> m) n += l - written } // Write final value. m = n & 7 l = sigbits v = (vDelta >> trailing) << (64 - l) // Move l LSB into MSB for (n+l)>>3 >= uint64(len(b)) { // Keep growing b until we can fit all bits in. b = append(b, byte(0)) } var written uint64 if m > 0 { // In this case the current byte is not full. written = 8 - m if l < written { written = l } mask = v >> 56 // Move 8 MSB to 8 LSB b[n>>3] |= byte(mask >> m) n += written if l-written == 0 { prev = cur continue } } // Shift remaining bits and write out in one go. vv := v << written // Remove bits written in previous byte. // TODO(edd): Optimise this. if (n>>3)+8 >= uint64(len(b)) { b = append(b, 0, 0, 0, 0, 0, 0, 0, 0) } binary.BigEndian.PutUint64(b[n>>3:], vv) n += (l - written) } prev = cur } } if math.IsNaN(sum) { return nil, fmt.Errorf("unsupported value: NaN") } length := n >> 3 if n&7 > 0 { length++ // Add an extra byte to capture overflowing bits. } return b[:length], nil } // bitMask contains a lookup table where the index is the number of bits // and the value is a mask. The table is always read by ANDing the index // with 0x3f, such that if the index is 64, position 0 will be read, which // is a 0xffffffffffffffff, thus returning all bits. // // 00 = 0xffffffffffffffff // 01 = 0x0000000000000001 // 02 = 0x0000000000000003 // 03 = 0x0000000000000007 // ... // 62 = 0x3fffffffffffffff // 63 = 0x7fffffffffffffff var bitMask [64]uint64 func init() { v := uint64(1) for i := 1; i <= 64; i++ { bitMask[i&0x3f] = v v = v<<1 | 1 } } func FloatArrayDecodeAll(b []byte, buf []float64) ([]float64, error) { if len(b) < 9 { return []float64{}, nil } var ( val uint64 // current value trailingN uint8 // trailing zero count meaningfulN uint8 = 64 // meaningful bit count ) // first byte is the compression type; always Gorilla b = b[1:] val = binary.BigEndian.Uint64(b) if val == uvnan { if buf == nil { var tmp [1]float64 buf = tmp[:0] } // special case: there were no values to decode return buf[:0], nil } buf = buf[:0] // convert the []float64 to []uint64 to avoid calling math.Float64Frombits, // which results in unnecessary moves between Xn registers before moving // the value into the float64 slice. This change increased performance from // 320 MB/s to 340 MB/s on an Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz dst := *(*[]uint64)(unsafe.Pointer(&buf)) dst = append(dst, val) b = b[8:] // The bit reader code uses brCachedVal to store up to the next 8 bytes // of MSB data read from b. brValidBits stores the number of remaining unread // bits starting from the MSB. Before N bits are read from brCachedVal, // they are left-rotated N bits, such that they end up in the left-most position. // Using bits.RotateLeft64 results in a single instruction on many CPU architectures. // This approach permits simple tests, such as for the two control bits: // // brCachedVal&1 > 0 // // The alternative was to leave brCachedValue alone and perform shifts and // masks to read specific bits. The original approach looked like the // following: // // brCachedVal&(1<<(brValidBits&0x3f)) > 0 // var ( brCachedVal = uint64(0) // a buffer of up to the next 8 bytes read from b in MSB order brValidBits = uint8(0) // the number of unread bits remaining in brCachedVal ) // Refill brCachedVal, reading up to 8 bytes from b if len(b) >= 8 { // fast path reads 8 bytes directly brCachedVal = binary.BigEndian.Uint64(b) brValidBits = 64 b = b[8:] } else if len(b) > 0 { brCachedVal = 0 brValidBits = uint8(len(b) * 8) for i := range b { brCachedVal = (brCachedVal << 8) | uint64(b[i]) } brCachedVal = bits.RotateLeft64(brCachedVal, -int(brValidBits)) b = b[:0] } else { goto ERROR } // The expected exit condition is for a uvnan to be decoded. // Any other error (EOF) indicates a truncated stream. for { if brValidBits > 0 { // brValidBits > 0 is impossible to predict, so we place the // most likely case inside the if and immediately jump, keeping // the instruction pipeline consistently full. // This is a similar approach to using the GCC __builtin_expect // intrinsic, which modifies the order of branches such that the // likely case follows the conditional jump. // // Written as if brValidBits == 0 and placing the Refill brCachedVal // code inside reduces benchmarks from 318 MB/s to 260 MB/s on an // Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz goto READ0 } // Refill brCachedVal, reading up to 8 bytes from b if len(b) >= 8 { brCachedVal = binary.BigEndian.Uint64(b) brValidBits = 64 b = b[8:] } else if len(b) > 0 { brCachedVal = 0 brValidBits = uint8(len(b) * 8) for i := range b { brCachedVal = (brCachedVal << 8) | uint64(b[i]) } brCachedVal = bits.RotateLeft64(brCachedVal, -int(brValidBits)) b = b[:0] } else { goto ERROR } READ0: // read control bit 0 brValidBits -= 1 brCachedVal = bits.RotateLeft64(brCachedVal, 1) if brCachedVal&1 > 0 { if brValidBits > 0 { goto READ1 } // Refill brCachedVal, reading up to 8 bytes from b if len(b) >= 8 { brCachedVal = binary.BigEndian.Uint64(b) brValidBits = 64 b = b[8:] } else if len(b) > 0 { brCachedVal = 0 brValidBits = uint8(len(b) * 8) for i := range b { brCachedVal = (brCachedVal << 8) | uint64(b[i]) } brCachedVal = bits.RotateLeft64(brCachedVal, -int(brValidBits)) b = b[:0] } else { goto ERROR } READ1: // read control bit 1 brValidBits -= 1 brCachedVal = bits.RotateLeft64(brCachedVal, 1) if brCachedVal&1 > 0 { // read 5 bits for leading zero count and 6 bits for the meaningful data count const leadingTrailingBitCount = 11 var lmBits uint64 // leading + meaningful data counts if brValidBits >= leadingTrailingBitCount { // decode 5 bits leading + 6 bits meaningful for a total of 11 bits brValidBits -= leadingTrailingBitCount brCachedVal = bits.RotateLeft64(brCachedVal, leadingTrailingBitCount) lmBits = brCachedVal } else { bits01 := uint8(11) if brValidBits > 0 { bits01 -= brValidBits lmBits = bits.RotateLeft64(brCachedVal, 11) } // Refill brCachedVal, reading up to 8 bytes from b if len(b) >= 8 { brCachedVal = binary.BigEndian.Uint64(b) brValidBits = 64 b = b[8:] } else if len(b) > 0 { brCachedVal = 0 brValidBits = uint8(len(b) * 8) for i := range b { brCachedVal = (brCachedVal << 8) | uint64(b[i]) } brCachedVal = bits.RotateLeft64(brCachedVal, -int(brValidBits)) b = b[:0] } else { goto ERROR } brCachedVal = bits.RotateLeft64(brCachedVal, int(bits01)) brValidBits -= bits01 lmBits &^= bitMask[bits01&0x3f] lmBits |= brCachedVal & bitMask[bits01&0x3f] } lmBits &= 0x7ff leadingN := uint8((lmBits >> 6) & 0x1f) // 5 bits leading meaningfulN = uint8(lmBits & 0x3f) // 6 bits meaningful if meaningfulN > 0 { trailingN = 64 - leadingN - meaningfulN } else { // meaningfulN == 0 is a special case, such that all bits // are meaningful trailingN = 0 meaningfulN = 64 } } var sBits uint64 // significant bits if brValidBits >= meaningfulN { brValidBits -= meaningfulN brCachedVal = bits.RotateLeft64(brCachedVal, int(meaningfulN)) sBits = brCachedVal } else { mBits := meaningfulN if brValidBits > 0 { mBits -= brValidBits sBits = bits.RotateLeft64(brCachedVal, int(meaningfulN)) } // Refill brCachedVal, reading up to 8 bytes from b if len(b) >= 8 { brCachedVal = binary.BigEndian.Uint64(b) brValidBits = 64 b = b[8:] } else if len(b) > 0 { brCachedVal = 0 brValidBits = uint8(len(b) * 8) for i := range b { brCachedVal = (brCachedVal << 8) | uint64(b[i]) } brCachedVal = bits.RotateLeft64(brCachedVal, -int(brValidBits)) b = b[:0] } else { goto ERROR } brCachedVal = bits.RotateLeft64(brCachedVal, int(mBits)) brValidBits -= mBits sBits &^= bitMask[mBits&0x3f] sBits |= brCachedVal & bitMask[mBits&0x3f] } sBits &= bitMask[meaningfulN&0x3f] val ^= sBits << (trailingN & 0x3f) if val == uvnan { // IsNaN, eof break } } dst = append(dst, val) } return *(*[]float64)(unsafe.Pointer(&dst)), nil ERROR: return (*(*[]float64)(unsafe.Pointer(&dst)))[:0], io.EOF }