diff --git a/tsdb/shard.go b/tsdb/shard.go index 5bb09773d7..8c38f5d8b3 100644 --- a/tsdb/shard.go +++ b/tsdb/shard.go @@ -15,6 +15,7 @@ import ( "sync" "sync/atomic" "time" + "unicode" "unsafe" "github.com/influxdata/influxdb/models" @@ -601,7 +602,7 @@ func (s *Shard) validateSeriesAndFields(points []models.Point, tracker StatsTrac if validateKeys && !models.ValidKeyTokens(string(p.Name()), tags) { dropped++ if reason == "" { - reason = fmt.Sprintf("key contains invalid unicode: \"%s\"", string(p.Key())) + reason = fmt.Sprintf("key contains invalid unicode: %q", makePrintable(string(p.Key()))) } continue } @@ -723,6 +724,28 @@ func (s *Shard) validateSeriesAndFields(points []models.Point, tracker StatsTrac return points[:j], fieldsToCreate, err } +const unPrintReplRune = '?' +const unPrintMaxReplRune = 3 + +// makePrintable - replace invalid and non-printable unicode characters with a few '?' runes +func makePrintable(s string) string { + b := strings.Builder{} + b.Grow(len(s)) + c := 0 + for _, r := range strings.ToValidUTF8(s, string(unicode.ReplacementChar)) { + if !unicode.IsPrint(r) || r == unicode.ReplacementChar { + if c < unPrintMaxReplRune { + b.WriteRune(unPrintReplRune) + } + c++ + } else { + b.WriteRune(r) + c = 0 + } + } + return b.String() +} + func (s *Shard) createFieldsAndMeasurements(fieldsToCreate []*FieldCreate) error { if len(fieldsToCreate) == 0 { return nil diff --git a/tsdb/shard_internal_test.go b/tsdb/shard_internal_test.go index 53c2197921..b2068b94ba 100644 --- a/tsdb/shard_internal_test.go +++ b/tsdb/shard_internal_test.go @@ -16,8 +16,40 @@ import ( "github.com/influxdata/influxdb/logger" "github.com/influxdata/influxdb/models" "github.com/influxdata/influxql" + "github.com/stretchr/testify/require" ) +func TestShard_ErrorPrinting(t *testing.T) { + + tests := []struct { + nSeq int + raw string + }{ + {1, string([]byte{'b', 'e', 'n', 't', 'e', 's', 't', '\t', '\n'})}, + {1, string([]byte{'b', 'e', 'n', 't', 'e', 's', 0, 0, 0xFE, 0, 0xFE, 't'})}, + {2, string([]byte{0, 0, 0, 0, 0xFE, '\t', '\n', '\t', 'b', 'e', 'n', 't', 'e', 's', 't', 0, 0, 0, 0, 0xFE, '\t', '\n', '\t', '\t', '\t'})}, + } + + for i, _ := range tests { + f := makePrintable(tests[i].raw) + require.True(t, models.ValidKeyToken(f)) + c := 0 + nSeq := 0 + for _, r := range f { + if r == unPrintReplRune { + c++ + if c == 1 { + nSeq++ + } + require.LessOrEqual(t, c, unPrintMaxReplRune, "too many repeated %c", unPrintReplRune) + } else { + c = 0 + } + } + require.Equalf(t, tests[i].nSeq, nSeq, "wrong number of elided sequences of replacement characters") + } +} + func TestShard_MapType(t *testing.T) { var sh *TempShard