Prevent out of range ints and floats from being parsed sucessfully
Field values that were out of range for the type would panic the database when being inserted because the parser would allow them as valid points. This change prevents those invalid values from being parsed and instead returns an error. An alternative fix considered was to handle the error and clamp the value to the min/max value for the type. This would treat numeric range errors slightly differently than other type erros which might lead to confusion. The simplest fix with the current parser would be to just convert each field to the type at parse time. Unfortunately, this adds extra memory allocations and lowers throughput significantly. Since out of range values are less common than in-range values, some heuristics are used to determine when the more expensive type parsing and range checking is performed. Essentially, we only do the slow path when we cannot determine that the value is in an acceptable type range. Fixes #3127pull/3140/head
parent
4d2915b49e
commit
17432598b1
|
@ -33,6 +33,8 @@
|
|||
- [#2102](https://github.com/influxdb/influxdb/issues/2102): Re-work Graphite input and metric processing
|
||||
- [#2996](https://github.com/influxdb/influxdb/issues/2996): Graphite Input Parsing
|
||||
- [#3136](https://github.com/influxdb/influxdb/pull/3136): Fix various issues with init.d script. Thanks @ miguelcnf.
|
||||
- [2996](https://github.com/influxdb/influxdb/issues/2996): Graphite Input Parsing
|
||||
- [3127](https://github.com/influxdb/influxdb/issues/3127): Trying to insert a number larger than the largest signed 64-bit number kills influxd
|
||||
|
||||
## v0.9.0 [2015-06-11]
|
||||
|
||||
|
|
|
@ -56,17 +56,35 @@ type point struct {
|
|||
data []byte
|
||||
}
|
||||
|
||||
// Compile the regex that detects unquoted double quote sequences
|
||||
var quoteReplacer = regexp.MustCompile(`([^\\])"`)
|
||||
const (
|
||||
// the number of characters for the largest possible int64 (9223372036854775807)
|
||||
maxInt64Digits = 19
|
||||
|
||||
var escapeCodes = map[byte][]byte{
|
||||
',': []byte(`\,`),
|
||||
'"': []byte(`\"`),
|
||||
' ': []byte(`\ `),
|
||||
'=': []byte(`\=`),
|
||||
}
|
||||
// the number of characters for the smallest possible int64 (-9223372036854775808)
|
||||
minInt64Digits = 20
|
||||
|
||||
var escapeCodesStr = map[string]string{}
|
||||
// the number of characters required for the largest float64 before a range check
|
||||
// would occur during parsing
|
||||
maxFloat64Digits = 25
|
||||
|
||||
// the number of characters required for smallest float64 before a range check occur
|
||||
// would occur during parsing
|
||||
minFloat64Digits = 27
|
||||
)
|
||||
|
||||
var (
|
||||
// Compile the regex that detects unquoted double quote sequences
|
||||
quoteReplacer = regexp.MustCompile(`([^\\])"`)
|
||||
|
||||
escapeCodes = map[byte][]byte{
|
||||
',': []byte(`\,`),
|
||||
'"': []byte(`\"`),
|
||||
' ': []byte(`\ `),
|
||||
'=': []byte(`\=`),
|
||||
}
|
||||
|
||||
escapeCodesStr = map[string]string{}
|
||||
)
|
||||
|
||||
func init() {
|
||||
for k, v := range escapeCodes {
|
||||
|
@ -426,8 +444,12 @@ func scanNumber(buf []byte, i int) (int, []byte, error) {
|
|||
i += 1
|
||||
}
|
||||
|
||||
// how many decimal points we've see
|
||||
decimals := 0
|
||||
|
||||
// indicates the number is float in scientific notation
|
||||
scientific := false
|
||||
|
||||
for {
|
||||
if i >= len(buf) {
|
||||
break
|
||||
|
@ -448,6 +470,7 @@ func scanNumber(buf []byte, i int) (int, []byte, error) {
|
|||
|
||||
// `e` is valid for floats but not as the first char
|
||||
if i > start && (buf[i] == 'e') {
|
||||
scientific = true
|
||||
i += 1
|
||||
continue
|
||||
}
|
||||
|
@ -464,6 +487,26 @@ func scanNumber(buf []byte, i int) (int, []byte, error) {
|
|||
i += 1
|
||||
}
|
||||
|
||||
// It's more common that numbers will be within min/max range for their type but we need to prevent
|
||||
// out or range numbers from being parsed successfully. This uses some simple heuristics to decide
|
||||
// if we should parse the number to the actual type. It does not do it all the time because it incurs
|
||||
// extra allocations and we end up converting the type again when writing points to disk.
|
||||
if decimals == 0 {
|
||||
// Parse the int to check bounds the number of digits could be larger than the max range
|
||||
if len(buf[start:i]) >= maxInt64Digits || len(buf[start:i]) >= minInt64Digits {
|
||||
if _, err := strconv.ParseInt(string(buf[start:i]), 10, 64); err != nil {
|
||||
return i, buf[start:i], fmt.Errorf("invalid integer")
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Parse the float to check bounds if it's scientific or the number of digits could be larger than the max range
|
||||
if scientific || len(buf[start:i]) >= maxFloat64Digits || len(buf[start:i]) >= minFloat64Digits {
|
||||
if _, err := strconv.ParseFloat(string(buf[start:i]), 10); err != nil {
|
||||
return i, buf[start:i], fmt.Errorf("invalid float")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return i, buf[start:i], nil
|
||||
}
|
||||
|
||||
|
|
|
@ -2,13 +2,20 @@ package tsdb
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
var tags = Tags{"foo": "bar", "apple": "orange", "host": "serverA", "region": "uswest"}
|
||||
var (
|
||||
tags = Tags{"foo": "bar", "apple": "orange", "host": "serverA", "region": "uswest"}
|
||||
maxFloat64 = strconv.FormatFloat(math.MaxFloat64, 'f', 1, 64)
|
||||
minFloat64 = strconv.FormatFloat(-math.MaxFloat64, 'f', 1, 64)
|
||||
)
|
||||
|
||||
func TestMarshal(t *testing.T) {
|
||||
got := tags.hashKey()
|
||||
|
@ -189,6 +196,86 @@ func TestParsePointBadNumber(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestParsePointMaxInt64(t *testing.T) {
|
||||
// out of range
|
||||
_, err := ParsePointsString(`cpu,host=serverA,region=us-west value=9223372036854775808`)
|
||||
if err == nil {
|
||||
t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west value=9223372036854775808`)
|
||||
}
|
||||
|
||||
// max int
|
||||
_, err = ParsePointsString(`cpu,host=serverA,region=us-west value=9223372036854775807`)
|
||||
if err != nil {
|
||||
t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=9223372036854775807`, err)
|
||||
}
|
||||
|
||||
// leading zeros
|
||||
_, err = ParsePointsString(`cpu,host=serverA,region=us-west value=0009223372036854775807`)
|
||||
if err != nil {
|
||||
t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=0009223372036854775807`, err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParsePointMinInt64(t *testing.T) {
|
||||
// out of range
|
||||
_, err := ParsePointsString(`cpu,host=serverA,region=us-west value=-9223372036854775809`)
|
||||
if err == nil {
|
||||
t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west value=-9223372036854775809`)
|
||||
}
|
||||
|
||||
// min int
|
||||
_, err = ParsePointsString(`cpu,host=serverA,region=us-west value=-9223372036854775808`)
|
||||
if err != nil {
|
||||
t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=-9223372036854775808`, err)
|
||||
}
|
||||
|
||||
// leading zeros
|
||||
_, err = ParsePointsString(`cpu,host=serverA,region=us-west value=-0009223372036854775808`)
|
||||
if err != nil {
|
||||
t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=-0009223372036854775808`, err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParsePointMaxFloat64(t *testing.T) {
|
||||
// out of range
|
||||
_, err := ParsePointsString(fmt.Sprintf(`cpu,host=serverA,region=us-west value=%s`, "1"+string(maxFloat64)))
|
||||
if err == nil {
|
||||
t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west value=...`)
|
||||
}
|
||||
|
||||
// max float
|
||||
_, err = ParsePointsString(fmt.Sprintf(`cpu,host=serverA,region=us-west value=%s`, string(maxFloat64)))
|
||||
if err != nil {
|
||||
t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=9223372036854775807`, err)
|
||||
}
|
||||
|
||||
// leading zeros
|
||||
_, err = ParsePointsString(fmt.Sprintf(`cpu,host=serverA,region=us-west value=%s`, "0000"+string(maxFloat64)))
|
||||
if err != nil {
|
||||
t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=0009223372036854775807`, err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParsePointMinFloat64(t *testing.T) {
|
||||
// out of range
|
||||
_, err := ParsePointsString(fmt.Sprintf(`cpu,host=serverA,region=us-west value=%s`, "-1"+string(minFloat64)[1:]))
|
||||
if err == nil {
|
||||
t.Errorf(`ParsePoints("%s") mismatch. got nil, exp error`, `cpu,host=serverA,region=us-west value=...`)
|
||||
}
|
||||
|
||||
// min float
|
||||
_, err = ParsePointsString(fmt.Sprintf(`cpu,host=serverA,region=us-west value=%s`, string(minFloat64)))
|
||||
if err != nil {
|
||||
t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=...`, err)
|
||||
}
|
||||
|
||||
// leading zeros
|
||||
_, err = ParsePointsString(fmt.Sprintf(`cpu,host=serverA,region=us-west value=%s`, "-0000000"+string(minFloat64)[1:]))
|
||||
if err != nil {
|
||||
t.Errorf(`ParsePoints("%s") mismatch. got %v, exp nil`, `cpu,host=serverA,region=us-west value=...`, err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParsePointNumberNonNumeric(t *testing.T) {
|
||||
_, err := ParsePointsString(`cpu,host=serverA,region=us-west value=.1a`)
|
||||
if err == nil {
|
||||
|
|
Loading…
Reference in New Issue