Merge pull request #6995 from influxdata/mr-fuzz-crash-repros

Check slice bounds in tsm1 to avoid panics
pull/7014/merge
Jason Wilder 2016-07-20 16:38:30 -06:00 committed by GitHub
commit 6b71474434
13 changed files with 400 additions and 28 deletions

View File

@ -5,7 +5,10 @@ package tsm1
// how many booleans are packed in the slice. The remaining bytes contains 1 byte for every
// 8 boolean values encoded.
import "encoding/binary"
import (
"encoding/binary"
"fmt"
)
const (
// booleanUncompressed is an uncompressed boolean format.
@ -97,27 +100,44 @@ type BooleanDecoder struct {
// SetBytes initializes the decoder with a new set of bytes to read from.
// This must be called before calling any other methods.
func (e *BooleanDecoder) SetBytes(b []byte) {
if len(b) == 0 {
return
}
// First byte stores the encoding type, only have 1 bit-packet format
// currently ignore for now.
b = b[1:]
count, n := binary.Uvarint(b)
if n <= 0 {
e.err = fmt.Errorf("BooleanDecoder: invalid count")
return
}
e.b = b[n:]
e.i = -1
e.n = int(count)
if min := len(e.b) * 8; min < e.n {
// Shouldn't happen - TSM file was truncated/corrupted
e.n = min
}
}
func (e *BooleanDecoder) Next() bool {
if e.err != nil {
return false
}
e.i++
return e.i < e.n
}
func (e *BooleanDecoder) Read() bool {
// Index into the byte slice
idx := e.i / 8
idx := e.i >> 3 // integer division by 8
// Bit position
pos := (8 - e.i%8) - 1
pos := 7 - (e.i & 0x7)
// The mask to select the bit
mask := byte(1 << uint(pos))

View File

@ -113,3 +113,49 @@ func Test_BooleanEncoder_Quick(t *testing.T) {
t.Fatal(err)
}
}
func Test_BooleanDecoder_Corrupt(t *testing.T) {
cases := []string{
"", // Empty
"\x10\x90", // Packed: invalid count
"\x10\x7f", // Packed: count greater than remaining bits, multiple bytes expected
"\x10\x01", // Packed: count greater than remaining bits, one byte expected
}
for _, c := range cases {
var dec tsm1.BooleanDecoder
dec.SetBytes([]byte(c))
if dec.Next() {
t.Fatalf("exp next == false, got true for case %q", c)
}
}
}
func BenchmarkBooleanDecoder_2048(b *testing.B) { benchmarkBooleanDecoder(b, 2048) }
func benchmarkBooleanDecoder(b *testing.B, size int) {
e := tsm1.NewBooleanEncoder()
for i := 0; i < size; i++ {
e.Write(i&1 == 1)
}
bytes, err := e.Bytes()
if err != nil {
b.Fatalf("unexpected error: %v", err)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
var d tsm1.BooleanDecoder
d.SetBytes(bytes)
var n int
for d.Next() {
_ = d.Read()
n++
}
if n != size {
b.Fatalf("expected to read %d booleans, but read %d", size, n)
}
}
}

View File

@ -122,7 +122,10 @@ func BlockCount(block []byte) int {
panic(fmt.Sprintf("count of short block: got %v, exp %v", len(block), encodedBlockHeaderSize))
}
// first byte is the block type
tb, _ := unpackBlock(block[1:])
tb, _, err := unpackBlock(block[1:])
if err != nil {
panic(fmt.Sprintf("BlockCount: error unpacking block: %s", err.Error()))
}
return CountTimestamps(tb)
}
@ -255,7 +258,10 @@ func DecodeFloatBlock(block []byte, tdec *TimeDecoder, vdec *FloatDecoder, a *[]
}
block = block[1:]
tb, vb := unpackBlock(block)
tb, vb, err := unpackBlock(block)
if err != nil {
return nil, err
}
// Setup our timestamp and value decoders
tdec.Init(tb)
@ -356,7 +362,10 @@ func DecodeBooleanBlock(block []byte, tdec *TimeDecoder, vdec *BooleanDecoder, a
}
block = block[1:]
tb, vb := unpackBlock(block)
tb, vb, err := unpackBlock(block)
if err != nil {
return nil, err
}
// Setup our timestamp and value decoders
tdec.Init(tb)
@ -443,7 +452,10 @@ func DecodeIntegerBlock(block []byte, tdec *TimeDecoder, vdec *IntegerDecoder, a
block = block[1:]
// The first 8 bytes is the minimum timestamp of the block
tb, vb := unpackBlock(block)
tb, vb, err := unpackBlock(block)
if err != nil {
return nil, err
}
// Setup our timestamp and value decoders
tdec.Init(tb)
@ -530,7 +542,10 @@ func DecodeStringBlock(block []byte, tdec *TimeDecoder, vdec *StringDecoder, a *
block = block[1:]
// The first 8 bytes is the minimum timestamp of the block
tb, vb := unpackBlock(block)
tb, vb, err := unpackBlock(block)
if err != nil {
return nil, err
}
// Setup our timestamp and value decoders
tdec.Init(tb)
@ -583,15 +598,24 @@ func packBlock(ts []byte, values []byte) []byte {
return append(block, values...)
}
func unpackBlock(buf []byte) (ts, values []byte) {
func unpackBlock(buf []byte) (ts, values []byte, err error) {
// Unpack the timestamp block length
tsLen, i := binary.Uvarint(buf)
if i <= 0 {
err = fmt.Errorf("unpackBlock: unable to read timestamp block length")
return
}
// Unpack the timestamp bytes
ts = buf[int(i) : int(i)+int(tsLen)]
tsIdx := int(i) + int(tsLen)
if tsIdx > len(buf) {
err = fmt.Errorf("unpackBlock: not enough data for timestamp")
return
}
ts = buf[int(i):tsIdx]
// Unpack the value bytes
values = buf[int(i)+int(tsLen):]
values = buf[tsIdx:]
return
}

View File

@ -142,13 +142,19 @@ type FloatDecoder struct {
// SetBytes initializes the decoder with b. Must call before calling Next().
func (it *FloatDecoder) SetBytes(b []byte) error {
// first byte is the compression type.
// we currently just have gorilla compression.
it.br.Reset(b[1:])
var v uint64
if len(b) == 0 {
v = uvnan
} else {
// first byte is the compression type.
// we currently just have gorilla compression.
it.br.Reset(b[1:])
v, err := it.br.ReadBits(64)
if err != nil {
return err
var err error
v, err = it.br.ReadBits(64)
if err != nil {
return err
}
}
// Reset all fields.

View File

@ -239,6 +239,17 @@ func Test_FloatEncoder_Quick(t *testing.T) {
}, nil)
}
func TestFloatDecoder_Empty(t *testing.T) {
var dec tsm1.FloatDecoder
if err := dec.SetBytes([]byte{}); err != nil {
t.Fatalf("unexpected error: %v", err)
}
if dec.Next() {
t.Fatalf("exp next == false, got true")
}
}
func BenchmarkFloatEncoder(b *testing.B) {
for i := 0; i < b.N; i++ {
s := tsm1.NewFloatEncoder()

View File

@ -193,7 +193,7 @@ func (d *IntegerDecoder) Next() bool {
d.err = fmt.Errorf("unknown encoding %v", d.encoding)
}
}
return d.i < d.n
return d.err == nil && d.i < d.n
}
func (d *IntegerDecoder) Error() error {
@ -219,6 +219,11 @@ func (d *IntegerDecoder) decodeRLE() {
return
}
if len(d.bytes) < 8 {
d.err = fmt.Errorf("IntegerDecoder: not enough data to decode RLE starting value")
return
}
var i, n int
// Next 8 bytes is the starting value
@ -227,11 +232,18 @@ func (d *IntegerDecoder) decodeRLE() {
// Next 1-10 bytes is the delta value
value, n := binary.Uvarint(d.bytes[i:])
if n <= 0 {
d.err = fmt.Errorf("IntegerDecoder: invalid RLE delta value")
return
}
i += n
// Last 1-10 bytes is how many times the value repeats
count, n := binary.Uvarint(d.bytes[i:])
if n <= 0 {
d.err = fmt.Errorf("IntegerDecoder: invalid RLE repeat value")
return
}
// Store the first value and delta value so we do not need to allocate
// a large values slice. We can compute the value at position d.i on
@ -250,6 +262,11 @@ func (d *IntegerDecoder) decodePacked() {
return
}
if len(d.bytes) < 8 {
d.err = fmt.Errorf("IntegerDecoder: not enough data to decode packed value")
return
}
v := binary.BigEndian.Uint64(d.bytes[0:8])
// The first value is always unencoded
if d.first {
@ -275,6 +292,11 @@ func (d *IntegerDecoder) decodeUncompressed() {
return
}
if len(d.bytes) < 8 {
d.err = fmt.Errorf("IntegerDecoder: not enough data to decode uncompressed value")
return
}
d.values[0] = binary.BigEndian.Uint64(d.bytes[0:8])
d.i = 0
d.n = 1

View File

@ -464,6 +464,25 @@ func Test_IntegerEncoder_Quick(t *testing.T) {
}, nil)
}
func Test_IntegerDecoder_Corrupt(t *testing.T) {
cases := []string{
"", // Empty
"\x00abc", // Uncompressed: less than 8 bytes
"\x10abc", // Packed: less than 8 bytes
"\x20abc", // RLE: less than 8 bytes
"\x2012345678\x90", // RLE: valid starting value but invalid delta value
"\x2012345678\x01\x90", // RLE: valid starting, valid delta value, invalid repeat value
}
for _, c := range cases {
var dec IntegerDecoder
dec.SetBytes([]byte(c))
if dec.Next() {
t.Fatalf("exp next == false, got true")
}
}
}
func BenchmarkIntegerEncoderRLE(b *testing.B) {
enc := NewIntegerEncoder()
x := make([]int64, 1024)

View File

@ -596,10 +596,15 @@ func (d *indirectIndex) Key(idx int) (string, []IndexEntry) {
if idx < 0 || idx >= len(d.offsets) {
return "", nil
}
n, key, _ := readKey(d.b[d.offsets[idx]:])
n, key, err := readKey(d.b[d.offsets[idx]:])
if err != nil {
return "", nil
}
var entries indexEntries
readEntries(d.b[int(d.offsets[idx])+n:], &entries)
if _, err := readEntries(d.b[int(d.offsets[idx])+n:], &entries); err != nil {
return "", nil
}
return string(key), entries.entries
}
@ -773,6 +778,9 @@ func (d *indirectIndex) UnmarshalBinary(b []byte) error {
// Keep a reference to the actual index bytes
d.b = b
if len(b) == 0 {
return nil
}
//var minKey, maxKey []byte
var minTime, maxTime int64 = math.MaxInt64, 0
@ -783,18 +791,28 @@ func (d *indirectIndex) UnmarshalBinary(b []byte) error {
// basically skips across the slice keeping track of the counter when we are at a key
// field.
var i int32
for i < int32(len(b)) {
iMax := int32(len(b))
for i < iMax {
d.offsets = append(d.offsets, i)
// Skip to the start of the values
// key length value (2) + type (1) + length of key
if i+2 >= iMax {
return fmt.Errorf("indirectIndex: not enough data for key length value")
}
i += 3 + int32(binary.BigEndian.Uint16(b[i:i+2]))
// count of index entries
if i+indexCountSize >= iMax {
return fmt.Errorf("indirectIndex: not enough data for index entries count")
}
count := int32(binary.BigEndian.Uint16(b[i : i+indexCountSize]))
i += indexCountSize
// Find the min time for the block
if i+8 >= iMax {
return fmt.Errorf("indirectIndex: not enough data for min time")
}
minT := int64(binary.BigEndian.Uint64(b[i : i+8]))
if minT < minTime {
minTime = minT
@ -803,6 +821,9 @@ func (d *indirectIndex) UnmarshalBinary(b []byte) error {
i += (count - 1) * indexEntrySize
// Find the max time for the block
if i+16 >= iMax {
return fmt.Errorf("indirectIndex: not enough data for max time")
}
maxT := int64(binary.BigEndian.Uint64(b[i+8 : i+16]))
if maxT > maxTime {
maxTime = maxT
@ -871,9 +892,15 @@ func (m *mmapAccessor) init() (*indirectIndex, error) {
if err != nil {
return nil, err
}
if len(m.b) < 8 {
return nil, fmt.Errorf("mmapAccessor: byte slice too small for indirectIndex")
}
indexOfsPos := len(m.b) - 8
indexStart := binary.BigEndian.Uint64(m.b[indexOfsPos : indexOfsPos+8])
if indexStart >= uint64(indexOfsPos) {
return nil, fmt.Errorf("mmapAccessor: invalid indexStart")
}
m.index = NewIndirectIndex()
if err := m.index.UnmarshalBinary(m.b[indexStart:indexOfsPos]); err != nil {
@ -1106,6 +1133,10 @@ func readKey(b []byte) (n int, key []byte, err error) {
}
func readEntries(b []byte, entries *indexEntries) (n int, err error) {
if len(b) < 1+indexCountSize {
return 0, fmt.Errorf("readEntries: data too short for headers")
}
// 1 byte block type
entries.Type = b[n]
n++
@ -1117,7 +1148,12 @@ func readEntries(b []byte, entries *indexEntries) (n int, err error) {
entries.entries = make([]IndexEntry, count)
for i := 0; i < count; i++ {
var ie IndexEntry
if err := ie.UnmarshalBinary(b[i*indexEntrySize+indexCountSize+indexTypeSize : i*indexEntrySize+indexCountSize+indexEntrySize+indexTypeSize]); err != nil {
start := i*indexEntrySize + indexCountSize + indexTypeSize
end := start + indexEntrySize
if end > len(b) {
return 0, fmt.Errorf("readEntries: data too short for indexEntry %d", i)
}
if err := ie.UnmarshalBinary(b[start:end]); err != nil {
return 0, fmt.Errorf("readEntries: unmarshal error: %v", err)
}
entries.entries[i] = ie

View File

@ -2,8 +2,10 @@ package tsm1_test
import (
"fmt"
"io/ioutil"
"math"
"os"
"path/filepath"
"testing"
"github.com/influxdata/influxdb/tsdb/engine/tsm1"
@ -1171,6 +1173,75 @@ func TestTSMReader_File_ReadAll(t *testing.T) {
}
}
func TestTSMReader_FuzzCrashes(t *testing.T) {
cases := []string{
"",
"\x16\xd1\x16\xd1\x01\x10\x14X\xfb\x03\xac~\x80\xf0\x00\x00\x00I^K" +
"_\xf0\x00\x00\x00D424259389w\xf0\x00\x00\x00" +
"o\x93\bO\x10?\xf0\x00\x00\x00\x00\b\x00\xc2_\xff\xd8\x0fX^" +
"/\xbf\xe8\x00\x00\x00\x00\x00\x01\x00\bctr#!~#n\x00" +
"\x00\x01\x14X\xfb\xb0\x03\xac~\x80\x14X\xfb\xb1\x00\xd4ܥ\x00\x00" +
"\x00\x00\x00\x00\x00\x05\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00E",
"\x16\xd1\x16\xd1\x01\x80'Z\\\x00\v)\x00\x00\x00\x00;\x9a\xca\x00" +
"\x01\x05\x10?\xf0\x00\x00\x00\x00\x00\x00\xc2_\xff\xd6\x1d\xd4&\xed\v" +
"\xc5\xf7\xfb\xc0\x00\x00\x00\x00\x00 \x00\x06a#!~#v\x00\x00" +
"\x01\x00\x00\x00\x00;\x9a\xca\x00\x00\x00\x00\x01*\x05\xf2\x00\x00\x00\x00" +
"\x00\x00\x00\x00\x00\x00\x00\x00\x002",
"\x16\xd1\x16\xd1\x01\x80\xf0\x00\x00\x00I^K_\xf0\x00\x00\x00D7" +
"\nw\xf0\x00\x00\x00o\x93\bO\x10?\xf0\x00\x00\x00\x00\x00\x00\xc2" +
"_\xff\x14X\xfb\xb0\x03\xac~\x80\x14X\xfb\xb1\x00\xd4ܥ\x00\x00" +
"\x00\x00\x00\x00\x00\x05\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00E",
"\x16\xd1\x16\xd1\x01000000000000000" +
"00000000000000000000" +
"0000000000\x00\x000\x00\x0100000" +
"000\x00\x00\x00\x00\x00\x00\x002",
"\x16\xd1\x16\xd1\x01",
"\x16\xd1\x16\xd1\x01\x00\x00o\x93\bO\x10?\xf0\x00\x00\x00\x00X^" +
"/\xbf\xe8\x00\x00\x00\x00\x00\x01\x00\bctr#!~#n\x00" +
"\x00\x01\x14X\xfb\xb0\x03\xac~\x80\x14X\xfb\xb1\x00\xd4ܥ\x00\x00" +
"\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00E",
}
for _, c := range cases {
func() {
dir := MustTempDir()
defer os.RemoveAll(dir)
filename := filepath.Join(dir, "x.tsm")
if err := ioutil.WriteFile(filename, []byte(c), 0600); err != nil {
t.Fatalf("exp no error, got %s", err)
}
defer os.RemoveAll(dir)
f, err := os.Open(filename)
if err != nil {
t.Fatalf("exp no error, got %s", err)
}
defer f.Close()
r, err := tsm1.NewTSMReader(f)
if err != nil {
return
}
defer r.Close()
iter := r.BlockIterator()
for iter.Next() {
key, _, _, _, _, err := iter.Read()
if err != nil {
return
}
_, _ = r.Type(key)
if _, err = r.ReadAll(key); err != nil {
return
}
}
}()
}
}
func TestTSMReader_File_Read(t *testing.T) {
dir := MustTempDir()
defer os.RemoveAll(dir)

View File

@ -59,9 +59,13 @@ type StringDecoder struct {
func (e *StringDecoder) SetBytes(b []byte) error {
// First byte stores the encoding type, only have snappy format
// currently so ignore for now.
data, err := snappy.Decode(nil, b[1:])
if err != nil {
return fmt.Errorf("failed to decode string block: %v", err.Error())
var data []byte
if len(b) > 0 {
var err error
data, err = snappy.Decode(nil, b[1:])
if err != nil {
return fmt.Errorf("failed to decode string block: %v", err.Error())
}
}
e.b = data
@ -73,6 +77,10 @@ func (e *StringDecoder) SetBytes(b []byte) error {
}
func (e *StringDecoder) Next() bool {
if e.err != nil {
return false
}
e.i += e.l
return e.i < len(e.b)
}
@ -80,11 +88,26 @@ func (e *StringDecoder) Next() bool {
func (e *StringDecoder) Read() string {
// Read the length of the string
length, n := binary.Uvarint(e.b[e.i:])
if n <= 0 {
e.err = fmt.Errorf("StringDecoder: invalid encoded string length")
return ""
}
// The length of this string plus the length of the variable byte encoded length
e.l = int(length) + n
return string(e.b[e.i+n : e.i+n+int(length)])
lower := e.i + n
upper := lower + int(length)
if upper < lower {
e.err = fmt.Errorf("StringDecoder: length overflow")
return ""
}
if upper > len(e.b) {
e.err = fmt.Errorf("StringDecoder: not enough data to represent encoded string")
return ""
}
return string(e.b[lower:upper])
}
func (e *StringDecoder) Error() error {

View File

@ -125,3 +125,57 @@ func Test_StringEncoder_Quick(t *testing.T) {
return true
}, nil)
}
func Test_StringDecoder_Empty(t *testing.T) {
var dec StringDecoder
if err := dec.SetBytes([]byte{}); err != nil {
t.Fatal(err)
}
if dec.Next() {
t.Fatalf("exp Next() == false, got true")
}
}
func Test_StringDecoder_CorruptInitial(t *testing.T) {
cases := []string{
"\x10\x03\b\x03Hi", // Higher length than actual data
"\x10\x1dp\x9c\x90\x90\x90\x90\x90\x90\x90\x90\x90length overflow----",
}
for _, c := range cases {
var dec StringDecoder
if err := dec.SetBytes([]byte(c)); err != nil {
t.Fatal(err)
}
if !dec.Next() {
t.Fatalf("exp Next() to return true, got false")
}
_ = dec.Read()
if dec.Error() == nil {
t.Fatalf("exp an err, got nil: %q", c)
}
}
}
func Test_StringDecoder_CorruptReadAll(t *testing.T) {
cases := []string{
"0t\x00\x01\x000\x00\x01\x000\x00\x01\x000\x00\x01\x000\x00\x01" +
"\x000\x00\x01\x000\x00\x01\x000\x00\x00\x00\xff:\x01\x00\x01\x00\x01" +
"\x00\x01\x00\x01\x00\x01\x00\x010\x010\x000\x010\x010\x010\x01" +
"0\x010\x010\x010\x010\x010\x010\x010\x010\x010\x010", // Upper slice bounds overflows negative
}
for _, c := range cases {
var dec StringDecoder
if err := dec.SetBytes([]byte(c)); err != nil {
t.Fatal(err)
}
for dec.Next() {
_ = dec.Read()
}
}
}

View File

@ -209,6 +209,10 @@ func (d *TimeDecoder) Init(b []byte) {
}
func (d *TimeDecoder) Next() bool {
if d.err != nil {
return false
}
if d.encoding == timeCompressedRLE {
if d.i >= d.n {
return false
@ -252,6 +256,10 @@ func (d *TimeDecoder) decode(b []byte) {
}
func (d *TimeDecoder) decodePacked(b []byte) {
if len(b) < 9 {
d.err = fmt.Errorf("TimeDecoder: not enough data to decode packed timestamps")
return
}
div := uint64(math.Pow10(int(b[0] & 0xF)))
first := uint64(binary.BigEndian.Uint64(b[1:9]))
@ -275,6 +283,11 @@ func (d *TimeDecoder) decodePacked(b []byte) {
}
func (d *TimeDecoder) decodeRLE(b []byte) {
if len(b) < 9 {
d.err = fmt.Errorf("TimeDecoder: not enough data for initial RLE timestamp")
return
}
var i, n int
// Lower 4 bits hold the 10 based exponent so we can scale the values back up
@ -287,13 +300,21 @@ func (d *TimeDecoder) decodeRLE(b []byte) {
// Next 1-10 bytes is our (scaled down by factor of 10) run length values
value, n := binary.Uvarint(b[i:])
if n <= 0 {
d.err = fmt.Errorf("TimeDecoder: invalid run length in decodeRLE")
return
}
// Scale the value back up
value *= uint64(mod)
i += n
// Last 1-10 bytes is how many times the value repeats
count, _ := binary.Uvarint(b[i:])
count, n := binary.Uvarint(b[i:])
if n <= 0 {
d.err = fmt.Errorf("TimeDecoder: invalid repeat value in decodeRLE")
return
}
d.v = int64(first - value)
d.rleDelta = int64(value)

View File

@ -525,6 +525,25 @@ func TestTimeEncoder_Count_Simple8(t *testing.T) {
}
}
func TestTimeDecoder_Corrupt(t *testing.T) {
cases := []string{
"", // Empty
"\x10\x14", // Packed: not enough data
"\x20\x00", // RLE: not enough data for starting timestamp
"\x2012345678\x90", // RLE: initial timestamp but invalid uvarint encoding
"\x2012345678\x7f", // RLE: timestamp, RLE but invalid repeat
"\x00123", // Raw: data length not multiple of 8
}
for _, c := range cases {
var dec TimeDecoder
dec.Init([]byte(c))
if dec.Next() {
t.Fatalf("exp next == false, got true")
}
}
}
func BenchmarkTimeEncoder(b *testing.B) {
enc := NewTimeEncoder()
x := make([]int64, 1024)