Merge pull request #9793 from influxdata/bj-delete-nonprintable-unicode

Add key sanitization to deletetsm
pull/9801/head
Ben Johnson 2018-04-30 15:38:08 -06:00 committed by GitHub
commit 2e470974ec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 38 additions and 5 deletions

View File

@ -21,6 +21,7 @@ type Command struct {
Stdout io.Writer Stdout io.Writer
measurement string // measurement to delete measurement string // measurement to delete
sanitize bool // remove all keys with non-printable unicode
verbose bool // verbose logging verbose bool // verbose logging
} }
@ -36,6 +37,7 @@ func NewCommand() *Command {
func (cmd *Command) Run(args ...string) (err error) { func (cmd *Command) Run(args ...string) (err error) {
fs := flag.NewFlagSet("deletetsm", flag.ExitOnError) fs := flag.NewFlagSet("deletetsm", flag.ExitOnError)
fs.StringVar(&cmd.measurement, "measurement", "", "") fs.StringVar(&cmd.measurement, "measurement", "", "")
fs.BoolVar(&cmd.sanitize, "sanitize", false, "")
fs.BoolVar(&cmd.verbose, "v", false, "") fs.BoolVar(&cmd.verbose, "v", false, "")
fs.SetOutput(cmd.Stdout) fs.SetOutput(cmd.Stdout)
fs.Usage = cmd.printUsage fs.Usage = cmd.printUsage
@ -51,9 +53,9 @@ func (cmd *Command) Run(args ...string) (err error) {
log.SetOutput(ioutil.Discard) log.SetOutput(ioutil.Discard)
} }
// Validate measurement flag. // Validate measurement or sanitize flag.
if cmd.measurement == "" { if cmd.measurement == "" && !cmd.sanitize {
return fmt.Errorf("measurement name required") return fmt.Errorf("-measurement or -sanitize flag required")
} }
// Process each TSM file. // Process each TSM file.
@ -113,8 +115,8 @@ func (cmd *Command) process(path string) error {
// Skip block if this is the measurement and time range we are deleting. // Skip block if this is the measurement and time range we are deleting.
series, _ := tsm1.SeriesAndFieldFromCompositeKey(key) series, _ := tsm1.SeriesAndFieldFromCompositeKey(key)
measurement, _ := models.ParseKey(series) measurement, tags := models.ParseKey(series)
if string(measurement) == cmd.measurement { if string(measurement) == cmd.measurement || (cmd.sanitize && !models.ValidKeyTokens(measurement, tags)) {
log.Printf("deleting block: %s (%s-%s) sz=%d", log.Printf("deleting block: %s (%s-%s) sz=%d",
key, key,
time.Unix(0, minTime).UTC().Format(time.RFC3339Nano), time.Unix(0, minTime).UTC().Format(time.RFC3339Nano),
@ -147,6 +149,8 @@ Usage: influx_inspect deletetsm [flags] path...
-measurement NAME -measurement NAME
The name of the measurement to remove. The name of the measurement to remove.
-sanitize
Remove all keys with non-printable unicode characters.
-v -v
Enable verbose logging.`) Enable verbose logging.`)
} }

View File

@ -12,6 +12,8 @@ import (
"strconv" "strconv"
"strings" "strings"
"time" "time"
"unicode"
"unicode/utf8"
"github.com/influxdata/influxdb/pkg/escape" "github.com/influxdata/influxdb/pkg/escape"
) )
@ -2399,3 +2401,30 @@ func appendField(b []byte, k string, v interface{}) []byte {
return b return b
} }
// ValidKeyToken returns true if the token used for measurement, tag key, or tag
// value is a valid unicode string and only contains printable, non-replacement characters.
func ValidKeyToken(s string) bool {
if !utf8.ValidString(s) {
return false
}
for _, r := range s {
if !unicode.IsPrint(r) || r == unicode.ReplacementChar {
return false
}
}
return true
}
// ValidKeyTokens returns true if the measurement name and all tags are valid.
func ValidKeyTokens(name string, tags Tags) bool {
if !ValidKeyToken(name) {
return false
}
for _, tag := range tags {
if !ValidKeyToken(string(tag.Key)) || !ValidKeyToken(string(tag.Value)) {
return false
}
}
return true
}