Add option for unicode validation.

pull/9801/head
Ben Johnson 2018-05-02 11:16:55 -06:00
parent ef505542ad
commit 58aed93fe6
No known key found for this signature in database
GPG Key ID: 81741CD251883081
3 changed files with 19 additions and 0 deletions

View File

@ -67,6 +67,10 @@
# log any sensitive data contained within a query.
# query-log-enabled = true
# Validates incoming writes to ensure keys only have valid unicode characters.
# This setting will incur a small overhead because every key must be checked.
# validate-keys = false
# Settings for the TSM engine
# CacheMaxMemorySize is the maximum size a shard's cache can

View File

@ -71,6 +71,9 @@ type Config struct {
// disks or when WAL write contention is seen. A value of 0 fsyncs every write to the WAL.
WALFsyncDelay toml.Duration `toml:"wal-fsync-delay"`
// Enables unicode validation on series keys on write.
ValidateKeys bool `toml:"validate-keys"`
// Query logging
QueryLogEnabled bool `toml:"query-log-enabled"`

View File

@ -528,6 +528,9 @@ func (s *Shard) validateSeriesAndFields(points []models.Point) ([]models.Point,
names := make([][]byte, len(points))
tagsSlice := make([]models.Tags, len(points))
// Check if keys should be unicode validated.
validateKeys := s.options.Config.ValidateKeys
var j int
for i, p := range points {
tags := p.Tags()
@ -543,6 +546,15 @@ func (s *Shard) validateSeriesAndFields(points []models.Point) ([]models.Point,
continue
}
// Drop any series with invalid unicode characters in the key.
if validateKeys && !models.ValidKeyTokens(string(p.Name()), tags) {
dropped++
if reason == "" {
reason = fmt.Sprintf("key contains invalid unicode: \"%s\"", string(p.Key()))
}
continue
}
keys[j] = p.Key()
names[j] = p.Name()
tagsSlice[j] = tags