Merge pull request #3716 from influxdb/jw-line

Fix escaping issues when parsing and serializing points
pull/3706/merge
Jason Wilder 2015-08-19 12:22:10 -06:00
commit b9270cb834
4 changed files with 235 additions and 39 deletions

View File

@ -67,6 +67,10 @@ Please see the *Features* section below for full details.
- [#3686](https://github.com/influxdb/influxdb/pull/3686): Ensure 'p' parameter is not logged, even on OPTIONS requests.
- [#3687](https://github.com/influxdb/influxdb/issues/3687): Fix panic: runtime error: makeslice: len out of range in hinted handoff
- [#3697](https://github.com/influxdb/influxdb/issues/3697): Correctly merge non-chunked results for same series. Fix issue #3242.
- [#3708](https://github.com/influxdb/influxdb/issues/3708): Fix double escaping measurement name during cluster replication
- [#3704](https://github.com/influxdb/influxdb/issues/3704): cluster replication issue for measurement name containing backslash
- [#3681](https://github.com/influxdb/influxdb/issues/3681): Quoted measurement names fail
- [#3681](https://github.com/influxdb/influxdb/issues/3682): Fix inserting string value with backslashes
## v0.9.2 [2015-07-24]

View File

@ -41,7 +41,7 @@ Field keys are always strings and follow the same syntactical rules as described
* _float_ - Numeric values tha are not followed by a trailing i. (e.g. 1, 1.0, -3.14, 6.0+e5, 10).
* _boolean_ - A value indicating true or false. Valid boolean strings are (t, T, true, TRUE, f, F, false, and FALSE).
* _string_ - A text value. All string values _must_ be surrounded in double-quotes `"`. If the string contains
a double-quote, it must be escaped with a backslash, e.g. `\"`.
a double-quote or backslashes, it must be escaped with a backslash, e.g. `\"`, `\\`.
```

View File

@ -91,6 +91,17 @@ var (
}
escapeCodesStr = map[string]string{}
measurementEscapeCodes = map[byte][]byte{
',': []byte(`\,`),
' ': []byte(`\ `),
}
tagEscapeCodes = map[byte][]byte{
',': []byte(`\,`),
' ': []byte(`\ `),
'=': []byte(`\=`),
}
)
func init() {
@ -135,9 +146,14 @@ func ParsePointsWithPrecision(buf []byte, defaultTime time.Time, precision strin
continue
}
pt, err := parsePoint(block, defaultTime, precision)
// strip the newline if one is present
if block[len(block)-1] == '\n' {
block = block[:len(block)-1]
}
pt, err := parsePoint(block[start:len(block)], defaultTime, precision)
if err != nil {
return nil, fmt.Errorf("unable to parse '%s': %v", string(block), err)
return nil, fmt.Errorf("unable to parse '%s': %v", string(block[start:len(block)]), err)
}
points = append(points, pt)
@ -389,6 +405,15 @@ func less(buf []byte, indices []int, i, j int) bool {
return bytes.Compare(a, b) < 0
}
func isFieldEscapeChar(b byte) bool {
for c := range escapeCodes {
if c == b {
return true
}
}
return false
}
// scanFields scans buf, starting at i for the fields section of a point. It returns
// the ending position and the byte slice of the fields within buf
func scanFields(buf []byte, i int) (int, []byte, error) {
@ -408,10 +433,18 @@ func scanFields(buf []byte, i int) (int, []byte, error) {
break
}
// escaped character
if buf[i] == '\\' {
i += 2
continue
// escaped characters?
if buf[i] == '\\' && i+1 < len(buf) {
// Is this an escape char within a string field? Only " and \ are allowed.
if quoted && (buf[i+1] == '"' || buf[i+1] == '\\') {
i += 2
continue
// Non-string field escaped chars
} else if !quoted && isFieldEscapeChar(buf[i+1]) {
i += 2
continue
}
}
// If the value is quoted, scan until we get to the end quote
@ -707,17 +740,12 @@ func scanLine(buf []byte, i int) (int, []byte) {
}
// If we see a double quote, makes sure it is not escaped
if buf[i] == '"' && buf[i-1] != '\\' {
if buf[i] == '"' && (i-1 > 0 && buf[i-1] != '\\') {
i += 1
quoted = !quoted
continue
}
if buf[i] == '\\' {
i += 2
continue
}
if buf[i] == '\n' && !quoted {
break
}
@ -807,15 +835,16 @@ func scanFieldValue(buf []byte, i int) (int, []byte) {
break
}
// If we see a double quote, makes sure it is not escaped
if buf[i] == '"' && buf[i-1] != '\\' {
i += 1
quoted = !quoted
// Only escape char for a field value is a double-quote
if buf[i] == '\\' && i+1 < len(buf) && buf[i+1] == '"' {
i += 2
continue
}
if buf[i] == '\\' {
i += 2
// Quoted value? (e.g. string)
if buf[i] == '"' {
i += 1
quoted = !quoted
continue
}
@ -827,6 +856,34 @@ func scanFieldValue(buf []byte, i int) (int, []byte) {
return i, buf[start:i]
}
func escapeMeasurement(in []byte) []byte {
for b, esc := range measurementEscapeCodes {
in = bytes.Replace(in, []byte{b}, esc, -1)
}
return in
}
func unescapeMeasurement(in []byte) []byte {
for b, esc := range measurementEscapeCodes {
in = bytes.Replace(in, esc, []byte{b}, -1)
}
return in
}
func escapeTag(in []byte) []byte {
for b, esc := range tagEscapeCodes {
in = bytes.Replace(in, []byte{b}, esc, -1)
}
return in
}
func unescapeTag(in []byte) []byte {
for b, esc := range tagEscapeCodes {
in = bytes.Replace(in, esc, []byte{b}, -1)
}
return in
}
func escape(in []byte) []byte {
for b, esc := range escapeCodes {
in = bytes.Replace(in, []byte{b}, esc, -1)
@ -855,19 +912,62 @@ func unescapeString(in string) string {
return in
}
// escapeQuoteString returns a copy of in with any double quotes that
// have not been escaped with escaped quotes
func escapeQuoteString(in string) string {
if strings.IndexAny(in, `"`) == -1 {
return in
// escapeStringField returns a copy of in with any double quotes or
// backslashes with escaped values
func escapeStringField(in string) string {
var out []byte
i := 0
for {
if i >= len(in) {
break
}
// escape double-quotes
if in[i] == '\\' {
out = append(out, '\\')
out = append(out, '\\')
i += 1
continue
}
// escape double-quotes
if in[i] == '"' {
out = append(out, '\\')
out = append(out, '"')
i += 1
continue
}
out = append(out, in[i])
i += 1
}
return quoteReplacer.ReplaceAllString(in, `$1\"`)
return string(out)
}
// unescapeQuoteString returns a copy of in with any escaped double-quotes
// with unescaped double quotes
func unescapeQuoteString(in string) string {
return strings.Replace(in, `\"`, `"`, -1)
// unescapeStringField returns a copy of in with any escaped double-quotes
// or backslashes unescaped
func unescapeStringField(in string) string {
var out []byte
i := 0
for {
if i >= len(in) {
break
}
// unescape backslashes
if in[i] == '\\' && i+1 < len(in) && in[i+1] == '\\' {
out = append(out, '\\')
i += 2
continue
}
// unescape double-quotes
if in[i] == '\\' && i+1 < len(in) && in[i+1] == '"' {
out = append(out, '"')
i += 2
continue
}
out = append(out, in[i])
i += 1
}
return string(out)
}
// NewPoint returns a new point with the given measurement name, tags, fields and timestamp
@ -937,7 +1037,7 @@ func (p *point) Tags() Tags {
i, key = scanTo(p.key, i, '=')
i, value = scanTagValue(p.key, i+1)
tags[string(unescape(key))] = string(unescape(value))
tags[string(unescapeTag(key))] = string(unescapeTag(value))
i += 1
}
@ -946,7 +1046,9 @@ func (p *point) Tags() Tags {
}
func MakeKey(name []byte, tags Tags) []byte {
return append(escape(name), tags.HashKey()...)
// unescape the name and then re-escape it to avoid double escaping.
// The key should always be stored in escaped form.
return append(escapeMeasurement(unescapeMeasurement(name)), tags.HashKey()...)
}
// SetTags replaces the tags for the point
@ -1040,9 +1142,9 @@ func (t Tags) HashKey() []byte {
escaped := Tags{}
for k, v := range t {
ek := escapeString(k)
ev := escapeString(v)
escaped[ek] = ev
ek := escapeTag([]byte(k))
ev := escapeTag([]byte(v))
escaped[string(ek)] = string(ev)
}
// Extract keys and determine final size.
@ -1120,7 +1222,7 @@ func newFieldsFromBinary(buf []byte) Fields {
// If the first char is a double-quote, then unmarshal as string
if valueBuf[0] == '"' {
value = unescapeQuoteString(string(valueBuf[1 : len(valueBuf)-1]))
value = unescapeStringField(string(valueBuf[1 : len(valueBuf)-1]))
// Check for numeric characters and special NaN or Inf
} else if (valueBuf[0] >= '0' && valueBuf[0] <= '9') || valueBuf[0] == '-' || valueBuf[0] == '+' || valueBuf[0] == '.' ||
valueBuf[0] == 'N' || valueBuf[0] == 'n' || // NaN
@ -1187,14 +1289,14 @@ func (p Fields) MarshalBinary() []byte {
b = append(b, t...)
case string:
b = append(b, '"')
b = append(b, []byte(escapeQuoteString(t))...)
b = append(b, []byte(escapeStringField(t))...)
b = append(b, '"')
case nil:
// skip
default:
// Can't determine the type, so convert to string
b = append(b, '"')
b = append(b, []byte(escapeQuoteString(fmt.Sprintf("%v", v)))...)
b = append(b, []byte(escapeStringField(fmt.Sprintf("%v", v)))...)
b = append(b, '"')
}

View File

@ -740,7 +740,37 @@ func TestParsePointWithStringWithCommas(t *testing.T) {
},
time.Unix(1, 0)),
)
}
func TestParsePointQuotedMeasurement(t *testing.T) {
// non-escaped comma
test(t, `"cpu",host=serverA,region=us-east value=1.0 1000000000`,
tsdb.NewPoint(
`"cpu"`,
tsdb.Tags{
"host": "serverA",
"region": "us-east",
},
tsdb.Fields{
"value": 1.0,
},
time.Unix(1, 0)),
)
}
func TestParsePointQuotedTags(t *testing.T) {
test(t, `cpu,"host"="serverA",region=us-east value=1.0 1000000000`,
tsdb.NewPoint(
"cpu",
tsdb.Tags{
`"host"`: `"serverA"`,
"region": "us-east",
},
tsdb.Fields{
"value": 1.0,
},
time.Unix(1, 0)),
)
}
func TestParsePointEscapedStringsAndCommas(t *testing.T) {
@ -771,7 +801,6 @@ func TestParsePointEscapedStringsAndCommas(t *testing.T) {
},
time.Unix(1, 0)),
)
}
func TestParsePointWithStringWithEquals(t *testing.T) {
@ -790,6 +819,48 @@ func TestParsePointWithStringWithEquals(t *testing.T) {
)
}
func TestParsePointWithStringWithBackslash(t *testing.T) {
test(t, `cpu value="test\\\"" 1000000000`,
tsdb.NewPoint(
"cpu",
tsdb.Tags{},
tsdb.Fields{
"value": `test\"`,
},
time.Unix(1, 0)),
)
test(t, `cpu value="test\\" 1000000000`,
tsdb.NewPoint(
"cpu",
tsdb.Tags{},
tsdb.Fields{
"value": `test\`,
},
time.Unix(1, 0)),
)
test(t, `cpu value="test\\\"" 1000000000`,
tsdb.NewPoint(
"cpu",
tsdb.Tags{},
tsdb.Fields{
"value": `test\"`,
},
time.Unix(1, 0)),
)
test(t, `cpu value="test\"" 1000000000`,
tsdb.NewPoint(
"cpu",
tsdb.Tags{},
tsdb.Fields{
"value": `test"`,
},
time.Unix(1, 0)),
)
}
func TestParsePointWithBoolField(t *testing.T) {
test(t, `cpu,host=serverA,region=us-east true=true,t=t,T=T,TRUE=TRUE,True=True,false=false,f=f,F=F,FALSE=FALSE,False=False 1000000000`,
tsdb.NewPoint(
@ -1193,7 +1264,7 @@ func TestNewPointEscaped(t *testing.T) {
// equals
pt = tsdb.NewPoint("cpu=main", tsdb.Tags{"tag=bar": "value=foo"}, tsdb.Fields{"name=bar": 1.0}, time.Unix(0, 0))
if exp := `cpu\=main,tag\=bar=value\=foo name\=bar=1.0 0`; pt.String() != exp {
if exp := `cpu=main,tag\=bar=value\=foo name\=bar=1.0 0`; pt.String() != exp {
t.Errorf("NewPoint().String() mismatch.\ngot %v\nexp %v", pt.String(), exp)
}
}
@ -1216,3 +1287,22 @@ func TestNewPointUnhandledType(t *testing.T) {
t.Errorf("NewPoint().String() mismatch.\ngot %v\nexp %v", pt.String(), exp)
}
}
func TestMakeKeyEscaped(t *testing.T) {
if exp, got := `cpu\ load`, tsdb.MakeKey([]byte(`cpu\ load`), tsdb.Tags{}); string(got) != exp {
t.Errorf("MakeKey() mismatch.\ngot %v\nexp %v", got, exp)
}
if exp, got := `cpu\ load`, tsdb.MakeKey([]byte(`cpu load`), tsdb.Tags{}); string(got) != exp {
t.Errorf("MakeKey() mismatch.\ngot %v\nexp %v", got, exp)
}
if exp, got := `cpu\,load`, tsdb.MakeKey([]byte(`cpu\,load`), tsdb.Tags{}); string(got) != exp {
t.Errorf("MakeKey() mismatch.\ngot %v\nexp %v", got, exp)
}
if exp, got := `cpu\,load`, tsdb.MakeKey([]byte(`cpu,load`), tsdb.Tags{}); string(got) != exp {
t.Errorf("MakeKey() mismatch.\ngot %v\nexp %v", got, exp)
}
}