fix: replace unprintable and invalid characters in errors (#23387)

Replace unprintable and invalid characters with '?'
in logged errors.  Truncate consecutive runs of them to
only 3 repeats of '?'

closes https://github.com/influxdata/influxdb/issues/23386
pull/23408/head
davidby-influx 2022-06-01 13:45:24 -07:00 committed by GitHub
parent df65c01ba2
commit 0ae0bd6e2e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 56 additions and 1 deletions

View File

@ -15,6 +15,7 @@ import (
"sync"
"sync/atomic"
"time"
"unicode"
"unsafe"
"github.com/influxdata/influxdb/models"
@ -601,7 +602,7 @@ func (s *Shard) validateSeriesAndFields(points []models.Point, tracker StatsTrac
if validateKeys && !models.ValidKeyTokens(string(p.Name()), tags) {
dropped++
if reason == "" {
reason = fmt.Sprintf("key contains invalid unicode: \"%s\"", string(p.Key()))
reason = fmt.Sprintf("key contains invalid unicode: %q", makePrintable(string(p.Key())))
}
continue
}
@ -723,6 +724,28 @@ func (s *Shard) validateSeriesAndFields(points []models.Point, tracker StatsTrac
return points[:j], fieldsToCreate, err
}
const unPrintReplRune = '?'
const unPrintMaxReplRune = 3
// makePrintable - replace invalid and non-printable unicode characters with a few '?' runes
func makePrintable(s string) string {
b := strings.Builder{}
b.Grow(len(s))
c := 0
for _, r := range strings.ToValidUTF8(s, string(unicode.ReplacementChar)) {
if !unicode.IsPrint(r) || r == unicode.ReplacementChar {
if c < unPrintMaxReplRune {
b.WriteRune(unPrintReplRune)
}
c++
} else {
b.WriteRune(r)
c = 0
}
}
return b.String()
}
func (s *Shard) createFieldsAndMeasurements(fieldsToCreate []*FieldCreate) error {
if len(fieldsToCreate) == 0 {
return nil

View File

@ -16,8 +16,40 @@ import (
"github.com/influxdata/influxdb/logger"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxql"
"github.com/stretchr/testify/require"
)
func TestShard_ErrorPrinting(t *testing.T) {
tests := []struct {
nSeq int
raw string
}{
{1, string([]byte{'b', 'e', 'n', 't', 'e', 's', 't', '\t', '\n'})},
{1, string([]byte{'b', 'e', 'n', 't', 'e', 's', 0, 0, 0xFE, 0, 0xFE, 't'})},
{2, string([]byte{0, 0, 0, 0, 0xFE, '\t', '\n', '\t', 'b', 'e', 'n', 't', 'e', 's', 't', 0, 0, 0, 0, 0xFE, '\t', '\n', '\t', '\t', '\t'})},
}
for i, _ := range tests {
f := makePrintable(tests[i].raw)
require.True(t, models.ValidKeyToken(f))
c := 0
nSeq := 0
for _, r := range f {
if r == unPrintReplRune {
c++
if c == 1 {
nSeq++
}
require.LessOrEqual(t, c, unPrintMaxReplRune, "too many repeated %c", unPrintReplRune)
} else {
c = 0
}
}
require.Equalf(t, tests[i].nSeq, nSeq, "wrong number of elided sequences of replacement characters")
}
}
func TestShard_MapType(t *testing.T) {
var sh *TempShard