influxdb/tsdb/tsm1/batch_string.go

137 lines
3.5 KiB
Go

package tsm1
import (
"encoding/binary"
"errors"
"fmt"
"unsafe"
"github.com/golang/snappy"
)
var (
errStringBatchDecodeInvalidStringLength = fmt.Errorf("stringArrayDecodeAll: invalid encoded string length")
errStringBatchDecodeLengthOverflow = fmt.Errorf("stringArrayDecodeAll: length overflow")
errStringBatchDecodeShortBuffer = fmt.Errorf("stringArrayDecodeAll: short buffer")
// ErrStringArrayEncodeTooLarge reports that the encoded length of a slice of strings is too large.
ErrStringArrayEncodeTooLarge = errors.New("StringArrayEncodeAll: source length too large")
)
// StringArrayEncodeAll encodes src into b, returning b and any error encountered.
// The returned slice may be of a different length and capactity to b.
//
// Currently only the string compression scheme used snappy.
func StringArrayEncodeAll(src []string, b []byte) ([]byte, error) {
srcSz := 2 + len(src)*binary.MaxVarintLen32 // strings should't be longer than 64kb
for i := range src {
srcSz += len(src[i])
}
// determine the maximum possible length needed for the buffer, which
// includes the compressed size
var compressedSz = 0
if len(src) > 0 {
mle := snappy.MaxEncodedLen(srcSz)
if mle == -1 {
return b[:0], ErrStringArrayEncodeTooLarge
}
compressedSz = mle + 1 /* header */
}
totSz := srcSz + compressedSz
if cap(b) < totSz {
b = make([]byte, totSz)
} else {
b = b[:totSz]
}
// Shortcut to snappy encoding nothing.
if len(src) == 0 {
b[0] = stringCompressedSnappy << 4
return b[:2], nil
}
// write the data to be compressed *after* the space needed for snappy
// compression. The compressed data is at the start of the allocated buffer,
// ensuring the entire capacity is returned and available for subsequent use.
dta := b[compressedSz:]
n := 0
for i := range src {
n += binary.PutUvarint(dta[n:], uint64(len(src[i])))
n += copy(dta[n:], src[i])
}
dta = dta[:n]
dst := b[:compressedSz]
dst[0] = stringCompressedSnappy << 4
res := snappy.Encode(dst[1:], dta)
return dst[:len(res)+1], nil
}
func StringArrayDecodeAll(b []byte, dst []string) ([]string, error) {
// First byte stores the encoding type, only have snappy format
// currently so ignore for now.
if len(b) > 0 {
var err error
// it is important that to note that `snappy.Decode` always returns
// a newly allocated slice as the final strings reference this slice
// directly.
b, err = snappy.Decode(nil, b[1:])
if err != nil {
return []string{}, fmt.Errorf("failed to decode string block: %v", err.Error())
}
} else {
return []string{}, nil
}
var (
i, l int
)
sz := cap(dst)
if sz == 0 {
sz = 64
dst = make([]string, sz)
} else {
dst = dst[:sz]
}
j := 0
for i < len(b) {
length, n := binary.Uvarint(b[i:])
if n <= 0 {
return []string{}, errStringBatchDecodeInvalidStringLength
}
// The length of this string plus the length of the variable byte encoded length
l = int(length) + n
lower := i + n
upper := lower + int(length)
if upper < lower {
return []string{}, errStringBatchDecodeLengthOverflow
}
if upper > len(b) {
return []string{}, errStringBatchDecodeShortBuffer
}
// NOTE: this optimization is critical for performance and to reduce
// allocations. This is just as "safe" as string.Builder, which
// returns a string mapped to the original byte slice
s := b[lower:upper]
val := *(*string)(unsafe.Pointer(&s))
if j < len(dst) {
dst[j] = val
} else {
dst = append(dst, val) // force a resize
dst = dst[:cap(dst)]
}
i += l
j++
}
return dst[:j], nil
}