From 4cb36d53017ee59d5c797551c4e3cccfd8abb274 Mon Sep 17 00:00:00 2001 From: Ben Johnson Date: Mon, 30 Apr 2018 13:06:33 -0600 Subject: [PATCH] Add key sanitization to deletetsm This commit adds the `-sanitize` flag to `influx_inspect deletetsm` which will delete all keys that contain invalid, non-printable, or replacement character unicode. Usage: ```sh $ influx_inspect deletetsm -sanitize PATH ``` --- cmd/influx_inspect/deletetsm/deletetsm.go | 14 +++++++---- models/points.go | 29 +++++++++++++++++++++++ 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/cmd/influx_inspect/deletetsm/deletetsm.go b/cmd/influx_inspect/deletetsm/deletetsm.go index 33af542fa3..451cc03d7d 100644 --- a/cmd/influx_inspect/deletetsm/deletetsm.go +++ b/cmd/influx_inspect/deletetsm/deletetsm.go @@ -21,6 +21,7 @@ type Command struct { Stdout io.Writer measurement string // measurement to delete + sanitize bool // remove all keys with non-printable unicode verbose bool // verbose logging } @@ -36,6 +37,7 @@ func NewCommand() *Command { func (cmd *Command) Run(args ...string) (err error) { fs := flag.NewFlagSet("deletetsm", flag.ExitOnError) fs.StringVar(&cmd.measurement, "measurement", "", "") + fs.BoolVar(&cmd.sanitize, "sanitize", false, "") fs.BoolVar(&cmd.verbose, "v", false, "") fs.SetOutput(cmd.Stdout) fs.Usage = cmd.printUsage @@ -51,9 +53,9 @@ func (cmd *Command) Run(args ...string) (err error) { log.SetOutput(ioutil.Discard) } - // Validate measurement flag. - if cmd.measurement == "" { - return fmt.Errorf("measurement name required") + // Validate measurement or sanitize flag. + if cmd.measurement == "" && !cmd.sanitize { + return fmt.Errorf("-measurement or -sanitize flag required") } // Process each TSM file. @@ -113,8 +115,8 @@ func (cmd *Command) process(path string) error { // Skip block if this is the measurement and time range we are deleting. series, _ := tsm1.SeriesAndFieldFromCompositeKey(key) - measurement, _ := models.ParseKey(series) - if string(measurement) == cmd.measurement { + measurement, tags := models.ParseKey(series) + if string(measurement) == cmd.measurement || (cmd.sanitize && !models.ValidKeyTokens(measurement, tags)) { log.Printf("deleting block: %s (%s-%s) sz=%d", key, time.Unix(0, minTime).UTC().Format(time.RFC3339Nano), @@ -147,6 +149,8 @@ Usage: influx_inspect deletetsm [flags] path... -measurement NAME The name of the measurement to remove. + -sanitize + Remove all keys with non-printable unicode characters. -v Enable verbose logging. `) diff --git a/models/points.go b/models/points.go index da9e27674e..99c57570aa 100644 --- a/models/points.go +++ b/models/points.go @@ -12,6 +12,8 @@ import ( "strconv" "strings" "time" + "unicode" + "unicode/utf8" "github.com/influxdata/influxdb/pkg/escape" ) @@ -2399,3 +2401,30 @@ func appendField(b []byte, k string, v interface{}) []byte { return b } + +// ValidKeyToken returns true if the token used for measurement, tag key, or tag +// value is a valid unicode string and only contains printable, non-replacement characters. +func ValidKeyToken(s string) bool { + if !utf8.ValidString(s) { + return false + } + for _, r := range s { + if !unicode.IsPrint(r) || r == unicode.ReplacementChar { + return false + } + } + return true +} + +// ValidKeyTokens returns true if the measurement name and all tags are valid. +func ValidKeyTokens(name string, tags Tags) bool { + if !ValidKeyToken(name) { + return false + } + for _, tag := range tags { + if !ValidKeyToken(string(tag.Key)) || !ValidKeyToken(string(tag.Value)) { + return false + } + } + return true +}