diff --git a/etc/config.sample.toml b/etc/config.sample.toml index dec500d9fa..4aa39acdcd 100644 --- a/etc/config.sample.toml +++ b/etc/config.sample.toml @@ -67,6 +67,10 @@ # log any sensitive data contained within a query. # query-log-enabled = true + # Validates incoming writes to ensure keys only have valid unicode characters. + # This setting will incur a small overhead because every key must be checked. + # validate-keys = false + # Settings for the TSM engine # CacheMaxMemorySize is the maximum size a shard's cache can diff --git a/tsdb/config.go b/tsdb/config.go index 3aaa0499d3..40da1e97ea 100644 --- a/tsdb/config.go +++ b/tsdb/config.go @@ -71,6 +71,9 @@ type Config struct { // disks or when WAL write contention is seen. A value of 0 fsyncs every write to the WAL. WALFsyncDelay toml.Duration `toml:"wal-fsync-delay"` + // Enables unicode validation on series keys on write. + ValidateKeys bool `toml:"validate-keys"` + // Query logging QueryLogEnabled bool `toml:"query-log-enabled"` diff --git a/tsdb/shard.go b/tsdb/shard.go index dd90ed0a03..b95b463d2f 100644 --- a/tsdb/shard.go +++ b/tsdb/shard.go @@ -528,6 +528,9 @@ func (s *Shard) validateSeriesAndFields(points []models.Point) ([]models.Point, names := make([][]byte, len(points)) tagsSlice := make([]models.Tags, len(points)) + // Check if keys should be unicode validated. + validateKeys := s.options.Config.ValidateKeys + var j int for i, p := range points { tags := p.Tags() @@ -543,6 +546,15 @@ func (s *Shard) validateSeriesAndFields(points []models.Point) ([]models.Point, continue } + // Drop any series with invalid unicode characters in the key. + if validateKeys && !models.ValidKeyTokens(string(p.Name()), tags) { + dropped++ + if reason == "" { + reason = fmt.Sprintf("key contains invalid unicode: \"%s\"", string(p.Key())) + } + continue + } + keys[j] = p.Key() names[j] = p.Name() tagsSlice[j] = tags