Prevent out of range ints and floats from being parsed sucessfully

Field values that were out of range for the type would panic the database
when being inserted because the parser would allow them as valid points.
This change prevents those invalid values from being parsed and instead
returns an error.

An alternative fix considered was to handle the error and clamp the value
to the min/max value for the type.  This would treat numeric range errors
slightly differently than other type erros which might lead to confusion.

The simplest fix with the current parser would be to just convert each field
to the type at parse time.  Unfortunately, this adds extra memory allocations
and lowers throughput significantly.  Since out of range values are less common
than in-range values, some heuristics are used to determine when the more
expensive type parsing and range checking is performed.  Essentially, we only
do the slow path when we cannot determine that the value is in an acceptable
type range.

Fixes #3127
pull/3140/head
Jason Wilder 2015-06-25 13:20:20 -06:00
parent 4d2915b49e
commit 17432598b1
3 changed files with 142 additions and 10 deletions

View File

@ -33,6 +33,8 @@
- [#2102](https://github.com/influxdb/influxdb/issues/2102): Re-work Graphite input and metric processing - [#2102](https://github.com/influxdb/influxdb/issues/2102): Re-work Graphite input and metric processing
- [#2996](https://github.com/influxdb/influxdb/issues/2996): Graphite Input Parsing - [#2996](https://github.com/influxdb/influxdb/issues/2996): Graphite Input Parsing
- [#3136](https://github.com/influxdb/influxdb/pull/3136): Fix various issues with init.d script. Thanks @ miguelcnf. - [#3136](https://github.com/influxdb/influxdb/pull/3136): Fix various issues with init.d script. Thanks @ miguelcnf.
- [2996](https://github.com/influxdb/influxdb/issues/2996): Graphite Input Parsing
- [3127](https://github.com/influxdb/influxdb/issues/3127): Trying to insert a number larger than the largest signed 64-bit number kills influxd
## v0.9.0 [2015-06-11] ## v0.9.0 [2015-06-11]

View File

@ -56,17 +56,35 @@ type point struct {
data []byte data []byte
} }
// Compile the regex that detects unquoted double quote sequences const (
var quoteReplacer = regexp.MustCompile(`([^\\])"`) // the number of characters for the largest possible int64 (9223372036854775807)
maxInt64Digits = 19
var escapeCodes = map[byte][]byte{ // the number of characters for the smallest possible int64 (-9223372036854775808)
minInt64Digits = 20
// the number of characters required for the largest float64 before a range check
// would occur during parsing
maxFloat64Digits = 25
// the number of characters required for smallest float64 before a range check occur
// would occur during parsing
minFloat64Digits = 27
)
var (
// Compile the regex that detects unquoted double quote sequences
quoteReplacer = regexp.MustCompile(`([^\\])"`)
escapeCodes = map[byte][]byte{
',': []byte(`\,`), ',': []byte(`\,`),
'"': []byte(`\"`), '"': []byte(`\"`),
' ': []byte(`\ `), ' ': []byte(`\ `),
'=': []byte(`\=`), '=': []byte(`\=`),
} }
var escapeCodesStr = map[string]string{} escapeCodesStr = map[string]string{}
)
func init() { func init() {
for k, v := range escapeCodes { for k, v := range escapeCodes {
@ -426,8 +444,12 @@ func scanNumber(buf []byte, i int) (int, []byte, error) {
i += 1 i += 1
} }
// how many decimal points we've see
decimals := 0 decimals := 0
// indicates the number is float in scientific notation
scientific := false
for { for {
if i >= len(buf) { if i >= len(buf) {
break break
@ -448,6 +470,7 @@ func scanNumber(buf []byte, i int) (int, []byte, error) {
// `e` is valid for floats but not as the first char // `e` is valid for floats but not as the first char
if i > start && (buf[i] == 'e') { if i > start && (buf[i] == 'e') {
scientific = true
i += 1 i += 1
continue continue
} }
@ -464,6 +487,26 @@ func scanNumber(buf []byte, i int) (int, []byte, error) {
i += 1 i += 1
} }
// It's more common that numbers will be within min/max range for their type but we need to prevent
// out or range numbers from being parsed successfully. This uses some simple heuristics to decide
// if we should parse the number to the actual type. It does not do it all the time because it incurs
// extra allocations and we end up converting the type again when writing points to disk.
if decimals == 0 {
// Parse the int to check bounds the number of digits could be larger than the max range
if len(buf[start:i]) >= maxInt64Digits || len(buf[start:i]) >= minInt64Digits {
if _, err := strconv.ParseInt(string(buf[start:i]), 10, 64); err != nil {
return i, buf[start:i], fmt.Errorf("invalid integer")
}
}
} else {
// Parse the float to check bounds if it's scientific or the number of digits could be larger than the max range
if scientific || len(buf[start:i]) >= maxFloat64Digits || len(buf[start:i]) >= minFloat64Digits {
if _, err := strconv.ParseFloat(string(buf[start:i]), 10); err != nil {
return i, buf[start:i], fmt.Errorf("invalid float")
}
}
}
return i, buf[start:i], nil return i, buf[start:i], nil
} }

View File

@ -2,13 +2,20 @@ package tsdb
import ( import (
"bytes" "bytes"
"fmt"
"math"
"reflect" "reflect"
"strconv"
"strings" "strings"
"testing" "testing"
"time" "time"
) )
var tags = Tags{"foo": "bar", "apple": "orange", "host": "serverA", "region": "uswest"} var (
tags = Tags{"foo": "bar", "apple": "orange", "host": "serverA", "region": "uswest"}
maxFloat64 = strconv.FormatFloat(math.MaxFloat64, 'f', 1, 64)
minFloat64 = strconv.FormatFloat(-math.MaxFloat64, 'f', 1, 64)
)
func TestMarshal(t *testing.T) { func TestMarshal(t *testing.T) {
got := tags.hashKey() got := tags.hashKey()
@ -189,6 +196,86 @@ func TestParsePointBadNumber(t *testing.T) {
} }
} }
func TestParsePointMaxInt64(t *testing.T) {
// out of range
_, err := ParsePointsString(`cpu,host=serverA,region=us-west value=9223372036854775808`)
if err == nil {
t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west value=9223372036854775808`)
}
// max int
_, err = ParsePointsString(`cpu,host=serverA,region=us-west value=9223372036854775807`)
if err != nil {
t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=9223372036854775807`, err)
}
// leading zeros
_, err = ParsePointsString(`cpu,host=serverA,region=us-west value=0009223372036854775807`)
if err != nil {
t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=0009223372036854775807`, err)
}
}
func TestParsePointMinInt64(t *testing.T) {
// out of range
_, err := ParsePointsString(`cpu,host=serverA,region=us-west value=-9223372036854775809`)
if err == nil {
t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west value=-9223372036854775809`)
}
// min int
_, err = ParsePointsString(`cpu,host=serverA,region=us-west value=-9223372036854775808`)
if err != nil {
t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=-9223372036854775808`, err)
}
// leading zeros
_, err = ParsePointsString(`cpu,host=serverA,region=us-west value=-0009223372036854775808`)
if err != nil {
t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=-0009223372036854775808`, err)
}
}
func TestParsePointMaxFloat64(t *testing.T) {
// out of range
_, err := ParsePointsString(fmt.Sprintf(`cpu,host=serverA,region=us-west value=%s`, "1"+string(maxFloat64)))
if err == nil {
t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west value=...`)
}
// max float
_, err = ParsePointsString(fmt.Sprintf(`cpu,host=serverA,region=us-west value=%s`, string(maxFloat64)))
if err != nil {
t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=9223372036854775807`, err)
}
// leading zeros
_, err = ParsePointsString(fmt.Sprintf(`cpu,host=serverA,region=us-west value=%s`, "0000"+string(maxFloat64)))
if err != nil {
t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=0009223372036854775807`, err)
}
}
func TestParsePointMinFloat64(t *testing.T) {
// out of range
_, err := ParsePointsString(fmt.Sprintf(`cpu,host=serverA,region=us-west value=%s`, "-1"+string(minFloat64)[1:]))
if err == nil {
t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west value=...`)
}
// min float
_, err = ParsePointsString(fmt.Sprintf(`cpu,host=serverA,region=us-west value=%s`, string(minFloat64)))
if err != nil {
t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=...`, err)
}
// leading zeros
_, err = ParsePointsString(fmt.Sprintf(`cpu,host=serverA,region=us-west value=%s`, "-0000000"+string(minFloat64)[1:]))
if err != nil {
t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=...`, err)
}
}
func TestParsePointNumberNonNumeric(t *testing.T) { func TestParsePointNumberNonNumeric(t *testing.T) {
_, err := ParsePointsString(`cpu,host=serverA,region=us-west value=.1a`) _, err := ParsePointsString(`cpu,host=serverA,region=us-west value=.1a`)
if err == nil { if err == nil {