Merge pull request #3716 from influxdb/jw-line

Fix escaping issues when parsing and serializing points
pull/3706/merge
Jason Wilder 2015-08-19 12:22:10 -06:00
commit b9270cb834
4 changed files with 235 additions and 39 deletions

View File

@ -67,6 +67,10 @@ Please see the *Features* section below for full details.
- [#3686](https://github.com/influxdb/influxdb/pull/3686): Ensure 'p' parameter is not logged, even on OPTIONS requests. - [#3686](https://github.com/influxdb/influxdb/pull/3686): Ensure 'p' parameter is not logged, even on OPTIONS requests.
- [#3687](https://github.com/influxdb/influxdb/issues/3687): Fix panic: runtime error: makeslice: len out of range in hinted handoff - [#3687](https://github.com/influxdb/influxdb/issues/3687): Fix panic: runtime error: makeslice: len out of range in hinted handoff
- [#3697](https://github.com/influxdb/influxdb/issues/3697): Correctly merge non-chunked results for same series. Fix issue #3242. - [#3697](https://github.com/influxdb/influxdb/issues/3697): Correctly merge non-chunked results for same series. Fix issue #3242.
- [#3708](https://github.com/influxdb/influxdb/issues/3708): Fix double escaping measurement name during cluster replication
- [#3704](https://github.com/influxdb/influxdb/issues/3704): cluster replication issue for measurement name containing backslash
- [#3681](https://github.com/influxdb/influxdb/issues/3681): Quoted measurement names fail
- [#3681](https://github.com/influxdb/influxdb/issues/3682): Fix inserting string value with backslashes
## v0.9.2 [2015-07-24] ## v0.9.2 [2015-07-24]

View File

@ -41,7 +41,7 @@ Field keys are always strings and follow the same syntactical rules as described
* _float_ - Numeric values tha are not followed by a trailing i. (e.g. 1, 1.0, -3.14, 6.0+e5, 10). * _float_ - Numeric values tha are not followed by a trailing i. (e.g. 1, 1.0, -3.14, 6.0+e5, 10).
* _boolean_ - A value indicating true or false. Valid boolean strings are (t, T, true, TRUE, f, F, false, and FALSE). * _boolean_ - A value indicating true or false. Valid boolean strings are (t, T, true, TRUE, f, F, false, and FALSE).
* _string_ - A text value. All string values _must_ be surrounded in double-quotes `"`. If the string contains * _string_ - A text value. All string values _must_ be surrounded in double-quotes `"`. If the string contains
a double-quote, it must be escaped with a backslash, e.g. `\"`. a double-quote or backslashes, it must be escaped with a backslash, e.g. `\"`, `\\`.
``` ```

View File

@ -91,6 +91,17 @@ var (
} }
escapeCodesStr = map[string]string{} escapeCodesStr = map[string]string{}
measurementEscapeCodes = map[byte][]byte{
',': []byte(`\,`),
' ': []byte(`\ `),
}
tagEscapeCodes = map[byte][]byte{
',': []byte(`\,`),
' ': []byte(`\ `),
'=': []byte(`\=`),
}
) )
func init() { func init() {
@ -135,9 +146,14 @@ func ParsePointsWithPrecision(buf []byte, defaultTime time.Time, precision strin
continue continue
} }
pt, err := parsePoint(block, defaultTime, precision) // strip the newline if one is present
if block[len(block)-1] == '\n' {
block = block[:len(block)-1]
}
pt, err := parsePoint(block[start:len(block)], defaultTime, precision)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to parse '%s': %v", string(block), err) return nil, fmt.Errorf("unable to parse '%s': %v", string(block[start:len(block)]), err)
} }
points = append(points, pt) points = append(points, pt)
@ -389,6 +405,15 @@ func less(buf []byte, indices []int, i, j int) bool {
return bytes.Compare(a, b) < 0 return bytes.Compare(a, b) < 0
} }
func isFieldEscapeChar(b byte) bool {
for c := range escapeCodes {
if c == b {
return true
}
}
return false
}
// scanFields scans buf, starting at i for the fields section of a point. It returns // scanFields scans buf, starting at i for the fields section of a point. It returns
// the ending position and the byte slice of the fields within buf // the ending position and the byte slice of the fields within buf
func scanFields(buf []byte, i int) (int, []byte, error) { func scanFields(buf []byte, i int) (int, []byte, error) {
@ -408,10 +433,18 @@ func scanFields(buf []byte, i int) (int, []byte, error) {
break break
} }
// escaped character // escaped characters?
if buf[i] == '\\' { if buf[i] == '\\' && i+1 < len(buf) {
// Is this an escape char within a string field? Only " and \ are allowed.
if quoted && (buf[i+1] == '"' || buf[i+1] == '\\') {
i += 2 i += 2
continue continue
// Non-string field escaped chars
} else if !quoted && isFieldEscapeChar(buf[i+1]) {
i += 2
continue
}
} }
// If the value is quoted, scan until we get to the end quote // If the value is quoted, scan until we get to the end quote
@ -707,17 +740,12 @@ func scanLine(buf []byte, i int) (int, []byte) {
} }
// If we see a double quote, makes sure it is not escaped // If we see a double quote, makes sure it is not escaped
if buf[i] == '"' && buf[i-1] != '\\' { if buf[i] == '"' && (i-1 > 0 && buf[i-1] != '\\') {
i += 1 i += 1
quoted = !quoted quoted = !quoted
continue continue
} }
if buf[i] == '\\' {
i += 2
continue
}
if buf[i] == '\n' && !quoted { if buf[i] == '\n' && !quoted {
break break
} }
@ -807,15 +835,16 @@ func scanFieldValue(buf []byte, i int) (int, []byte) {
break break
} }
// If we see a double quote, makes sure it is not escaped // Only escape char for a field value is a double-quote
if buf[i] == '"' && buf[i-1] != '\\' { if buf[i] == '\\' && i+1 < len(buf) && buf[i+1] == '"' {
i += 1 i += 2
quoted = !quoted
continue continue
} }
if buf[i] == '\\' { // Quoted value? (e.g. string)
i += 2 if buf[i] == '"' {
i += 1
quoted = !quoted
continue continue
} }
@ -827,6 +856,34 @@ func scanFieldValue(buf []byte, i int) (int, []byte) {
return i, buf[start:i] return i, buf[start:i]
} }
func escapeMeasurement(in []byte) []byte {
for b, esc := range measurementEscapeCodes {
in = bytes.Replace(in, []byte{b}, esc, -1)
}
return in
}
func unescapeMeasurement(in []byte) []byte {
for b, esc := range measurementEscapeCodes {
in = bytes.Replace(in, esc, []byte{b}, -1)
}
return in
}
func escapeTag(in []byte) []byte {
for b, esc := range tagEscapeCodes {
in = bytes.Replace(in, []byte{b}, esc, -1)
}
return in
}
func unescapeTag(in []byte) []byte {
for b, esc := range tagEscapeCodes {
in = bytes.Replace(in, esc, []byte{b}, -1)
}
return in
}
func escape(in []byte) []byte { func escape(in []byte) []byte {
for b, esc := range escapeCodes { for b, esc := range escapeCodes {
in = bytes.Replace(in, []byte{b}, esc, -1) in = bytes.Replace(in, []byte{b}, esc, -1)
@ -855,19 +912,62 @@ func unescapeString(in string) string {
return in return in
} }
// escapeQuoteString returns a copy of in with any double quotes that // escapeStringField returns a copy of in with any double quotes or
// have not been escaped with escaped quotes // backslashes with escaped values
func escapeQuoteString(in string) string { func escapeStringField(in string) string {
if strings.IndexAny(in, `"`) == -1 { var out []byte
return in i := 0
for {
if i >= len(in) {
break
} }
return quoteReplacer.ReplaceAllString(in, `$1\"`) // escape double-quotes
if in[i] == '\\' {
out = append(out, '\\')
out = append(out, '\\')
i += 1
continue
}
// escape double-quotes
if in[i] == '"' {
out = append(out, '\\')
out = append(out, '"')
i += 1
continue
}
out = append(out, in[i])
i += 1
}
return string(out)
} }
// unescapeQuoteString returns a copy of in with any escaped double-quotes // unescapeStringField returns a copy of in with any escaped double-quotes
// with unescaped double quotes // or backslashes unescaped
func unescapeQuoteString(in string) string { func unescapeStringField(in string) string {
return strings.Replace(in, `\"`, `"`, -1) var out []byte
i := 0
for {
if i >= len(in) {
break
}
// unescape backslashes
if in[i] == '\\' && i+1 < len(in) && in[i+1] == '\\' {
out = append(out, '\\')
i += 2
continue
}
// unescape double-quotes
if in[i] == '\\' && i+1 < len(in) && in[i+1] == '"' {
out = append(out, '"')
i += 2
continue
}
out = append(out, in[i])
i += 1
}
return string(out)
} }
// NewPoint returns a new point with the given measurement name, tags, fields and timestamp // NewPoint returns a new point with the given measurement name, tags, fields and timestamp
@ -937,7 +1037,7 @@ func (p *point) Tags() Tags {
i, key = scanTo(p.key, i, '=') i, key = scanTo(p.key, i, '=')
i, value = scanTagValue(p.key, i+1) i, value = scanTagValue(p.key, i+1)
tags[string(unescape(key))] = string(unescape(value)) tags[string(unescapeTag(key))] = string(unescapeTag(value))
i += 1 i += 1
} }
@ -946,7 +1046,9 @@ func (p *point) Tags() Tags {
} }
func MakeKey(name []byte, tags Tags) []byte { func MakeKey(name []byte, tags Tags) []byte {
return append(escape(name), tags.HashKey()...) // unescape the name and then re-escape it to avoid double escaping.
// The key should always be stored in escaped form.
return append(escapeMeasurement(unescapeMeasurement(name)), tags.HashKey()...)
} }
// SetTags replaces the tags for the point // SetTags replaces the tags for the point
@ -1040,9 +1142,9 @@ func (t Tags) HashKey() []byte {
escaped := Tags{} escaped := Tags{}
for k, v := range t { for k, v := range t {
ek := escapeString(k) ek := escapeTag([]byte(k))
ev := escapeString(v) ev := escapeTag([]byte(v))
escaped[ek] = ev escaped[string(ek)] = string(ev)
} }
// Extract keys and determine final size. // Extract keys and determine final size.
@ -1120,7 +1222,7 @@ func newFieldsFromBinary(buf []byte) Fields {
// If the first char is a double-quote, then unmarshal as string // If the first char is a double-quote, then unmarshal as string
if valueBuf[0] == '"' { if valueBuf[0] == '"' {
value = unescapeQuoteString(string(valueBuf[1 : len(valueBuf)-1])) value = unescapeStringField(string(valueBuf[1 : len(valueBuf)-1]))
// Check for numeric characters and special NaN or Inf // Check for numeric characters and special NaN or Inf
} else if (valueBuf[0] >= '0' && valueBuf[0] <= '9') || valueBuf[0] == '-' || valueBuf[0] == '+' || valueBuf[0] == '.' || } else if (valueBuf[0] >= '0' && valueBuf[0] <= '9') || valueBuf[0] == '-' || valueBuf[0] == '+' || valueBuf[0] == '.' ||
valueBuf[0] == 'N' || valueBuf[0] == 'n' || // NaN valueBuf[0] == 'N' || valueBuf[0] == 'n' || // NaN
@ -1187,14 +1289,14 @@ func (p Fields) MarshalBinary() []byte {
b = append(b, t...) b = append(b, t...)
case string: case string:
b = append(b, '"') b = append(b, '"')
b = append(b, []byte(escapeQuoteString(t))...) b = append(b, []byte(escapeStringField(t))...)
b = append(b, '"') b = append(b, '"')
case nil: case nil:
// skip // skip
default: default:
// Can't determine the type, so convert to string // Can't determine the type, so convert to string
b = append(b, '"') b = append(b, '"')
b = append(b, []byte(escapeQuoteString(fmt.Sprintf("%v", v)))...) b = append(b, []byte(escapeStringField(fmt.Sprintf("%v", v)))...)
b = append(b, '"') b = append(b, '"')
} }

View File

@ -740,7 +740,37 @@ func TestParsePointWithStringWithCommas(t *testing.T) {
}, },
time.Unix(1, 0)), time.Unix(1, 0)),
) )
}
func TestParsePointQuotedMeasurement(t *testing.T) {
// non-escaped comma
test(t, `"cpu",host=serverA,region=us-east value=1.0 1000000000`,
tsdb.NewPoint(
`"cpu"`,
tsdb.Tags{
"host": "serverA",
"region": "us-east",
},
tsdb.Fields{
"value": 1.0,
},
time.Unix(1, 0)),
)
}
func TestParsePointQuotedTags(t *testing.T) {
test(t, `cpu,"host"="serverA",region=us-east value=1.0 1000000000`,
tsdb.NewPoint(
"cpu",
tsdb.Tags{
`"host"`: `"serverA"`,
"region": "us-east",
},
tsdb.Fields{
"value": 1.0,
},
time.Unix(1, 0)),
)
} }
func TestParsePointEscapedStringsAndCommas(t *testing.T) { func TestParsePointEscapedStringsAndCommas(t *testing.T) {
@ -771,7 +801,6 @@ func TestParsePointEscapedStringsAndCommas(t *testing.T) {
}, },
time.Unix(1, 0)), time.Unix(1, 0)),
) )
} }
func TestParsePointWithStringWithEquals(t *testing.T) { func TestParsePointWithStringWithEquals(t *testing.T) {
@ -790,6 +819,48 @@ func TestParsePointWithStringWithEquals(t *testing.T) {
) )
} }
func TestParsePointWithStringWithBackslash(t *testing.T) {
test(t, `cpu value="test\\\"" 1000000000`,
tsdb.NewPoint(
"cpu",
tsdb.Tags{},
tsdb.Fields{
"value": `test\"`,
},
time.Unix(1, 0)),
)
test(t, `cpu value="test\\" 1000000000`,
tsdb.NewPoint(
"cpu",
tsdb.Tags{},
tsdb.Fields{
"value": `test\`,
},
time.Unix(1, 0)),
)
test(t, `cpu value="test\\\"" 1000000000`,
tsdb.NewPoint(
"cpu",
tsdb.Tags{},
tsdb.Fields{
"value": `test\"`,
},
time.Unix(1, 0)),
)
test(t, `cpu value="test\"" 1000000000`,
tsdb.NewPoint(
"cpu",
tsdb.Tags{},
tsdb.Fields{
"value": `test"`,
},
time.Unix(1, 0)),
)
}
func TestParsePointWithBoolField(t *testing.T) { func TestParsePointWithBoolField(t *testing.T) {
test(t, `cpu,host=serverA,region=us-east true=true,t=t,T=T,TRUE=TRUE,True=True,false=false,f=f,F=F,FALSE=FALSE,False=False 1000000000`, test(t, `cpu,host=serverA,region=us-east true=true,t=t,T=T,TRUE=TRUE,True=True,false=false,f=f,F=F,FALSE=FALSE,False=False 1000000000`,
tsdb.NewPoint( tsdb.NewPoint(
@ -1193,7 +1264,7 @@ func TestNewPointEscaped(t *testing.T) {
// equals // equals
pt = tsdb.NewPoint("cpu=main", tsdb.Tags{"tag=bar": "value=foo"}, tsdb.Fields{"name=bar": 1.0}, time.Unix(0, 0)) pt = tsdb.NewPoint("cpu=main", tsdb.Tags{"tag=bar": "value=foo"}, tsdb.Fields{"name=bar": 1.0}, time.Unix(0, 0))
if exp := `cpu\=main,tag\=bar=value\=foo name\=bar=1.0 0`; pt.String() != exp { if exp := `cpu=main,tag\=bar=value\=foo name\=bar=1.0 0`; pt.String() != exp {
t.Errorf("NewPoint().String() mismatch.\ngot %v\nexp %v", pt.String(), exp) t.Errorf("NewPoint().String() mismatch.\ngot %v\nexp %v", pt.String(), exp)
} }
} }
@ -1216,3 +1287,22 @@ func TestNewPointUnhandledType(t *testing.T) {
t.Errorf("NewPoint().String() mismatch.\ngot %v\nexp %v", pt.String(), exp) t.Errorf("NewPoint().String() mismatch.\ngot %v\nexp %v", pt.String(), exp)
} }
} }
func TestMakeKeyEscaped(t *testing.T) {
if exp, got := `cpu\ load`, tsdb.MakeKey([]byte(`cpu\ load`), tsdb.Tags{}); string(got) != exp {
t.Errorf("MakeKey() mismatch.\ngot %v\nexp %v", got, exp)
}
if exp, got := `cpu\ load`, tsdb.MakeKey([]byte(`cpu load`), tsdb.Tags{}); string(got) != exp {
t.Errorf("MakeKey() mismatch.\ngot %v\nexp %v", got, exp)
}
if exp, got := `cpu\,load`, tsdb.MakeKey([]byte(`cpu\,load`), tsdb.Tags{}); string(got) != exp {
t.Errorf("MakeKey() mismatch.\ngot %v\nexp %v", got, exp)
}
if exp, got := `cpu\,load`, tsdb.MakeKey([]byte(`cpu,load`), tsdb.Tags{}); string(got) != exp {
t.Errorf("MakeKey() mismatch.\ngot %v\nexp %v", got, exp)
}
}