influxdb/cmd/influx_inspect/report/report.go

327 lines
7.9 KiB
Go

// Package report reports statistics about TSM files.
package report
import (
"flag"
"fmt"
"io"
"math"
"os"
"path/filepath"
"sort"
"strconv"
"strings"
"text/tabwriter"
"time"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/tsdb/engine/tsm1"
"github.com/retailnext/hllpp"
)
// Command represents the program execution for "influxd report".
type Command struct {
Stderr io.Writer
Stdout io.Writer
dir string
pattern string
detailed, exact bool
}
// NewCommand returns a new instance of Command.
func NewCommand() *Command {
return &Command{
Stderr: os.Stderr,
Stdout: os.Stdout,
}
}
// Run executes the command.
func (cmd *Command) Run(args ...string) error {
fs := flag.NewFlagSet("report", flag.ExitOnError)
fs.StringVar(&cmd.pattern, "pattern", "", "Include only files matching a pattern")
fs.BoolVar(&cmd.detailed, "detailed", false, "Report detailed cardinality estimates")
fs.BoolVar(&cmd.exact, "exact", false, "Report exact counts")
fs.SetOutput(cmd.Stdout)
fs.Usage = cmd.printUsage
if err := fs.Parse(args); err != nil {
return err
}
newCounterFn := newHLLCounter
estTitle := " (est)"
if cmd.exact {
estTitle = ""
newCounterFn = newExactCounter
}
cmd.dir = fs.Arg(0)
err := cmd.isShardDir(cmd.dir)
if cmd.detailed && err != nil {
return fmt.Errorf("-detailed only supported for shard dirs.")
}
totalSeries := newCounterFn()
tagCardinalities := map[string]counter{}
measCardinalities := map[string]counter{}
fieldCardinalities := map[string]counter{}
dbCardinalities := map[string]counter{}
start := time.Now()
tw := tabwriter.NewWriter(cmd.Stdout, 8, 2, 1, ' ', 0)
fmt.Fprintln(tw, strings.Join([]string{"DB", "RP", "Shard", "File", "Series", "New" + estTitle, "Min Time", "Max Time", "Load Time"}, "\t"))
minTime, maxTime := int64(math.MaxInt64), int64(math.MinInt64)
var fileCount int
if err := cmd.WalkShardDirs(cmd.dir, func(db, rp, id, path string) error {
if cmd.pattern != "" && strings.Contains(path, cmd.pattern) {
return nil
}
file, err := os.OpenFile(path, os.O_RDONLY, 0600)
if err != nil {
fmt.Fprintf(cmd.Stderr, "error: %s: %v. Skipping.\n", path, err)
return nil
}
loadStart := time.Now()
reader, err := tsm1.NewTSMReader(file)
if err != nil {
fmt.Fprintf(cmd.Stderr, "error: %s: %v. Skipping.\n", file.Name(), err)
return nil
}
loadTime := time.Since(loadStart)
fileCount++
dbCount := dbCardinalities[db]
if dbCount == nil {
dbCount = newCounterFn()
dbCardinalities[db] = dbCount
}
oldCount := dbCount.Count()
seriesCount := reader.KeyCount()
for i := 0; i < seriesCount; i++ {
key, _ := reader.KeyAt(i)
totalSeries.Add([]byte(key))
dbCount.Add([]byte(key))
if cmd.detailed {
sep := strings.Index(string(key), "#!~#")
seriesKey, field := key[:sep], key[sep+4:]
measurement, tags := models.ParseKey(seriesKey)
measCount := measCardinalities[measurement]
if measCount == nil {
measCount = newCounterFn()
measCardinalities[measurement] = measCount
}
measCount.Add([]byte(key))
fieldCount := fieldCardinalities[measurement]
if fieldCount == nil {
fieldCount = newCounterFn()
fieldCardinalities[measurement] = fieldCount
}
fieldCount.Add([]byte(field))
for _, t := range tags {
tagCount := tagCardinalities[string(t.Key)]
if tagCount == nil {
tagCount = newCounterFn()
tagCardinalities[string(t.Key)] = tagCount
}
tagCount.Add(t.Value)
}
}
}
minT, maxT := reader.TimeRange()
if minT < minTime {
minTime = minT
}
if maxT > maxTime {
maxTime = maxT
}
reader.Close()
fmt.Fprintln(tw, strings.Join([]string{
db, rp, id,
filepath.Base(file.Name()),
strconv.FormatInt(int64(seriesCount), 10),
strconv.FormatInt(int64(dbCount.Count()-oldCount), 10),
time.Unix(0, minT).UTC().Format(time.RFC3339Nano),
time.Unix(0, maxT).UTC().Format(time.RFC3339Nano),
loadTime.String(),
}, "\t"))
if cmd.detailed {
tw.Flush()
}
return nil
}); err != nil {
return err
}
tw.Flush()
println()
println("Summary:")
fmt.Printf(" Files: %d\n", fileCount)
fmt.Printf(" Time Range: %s - %s\n",
time.Unix(0, minTime).UTC().Format(time.RFC3339Nano),
time.Unix(0, maxTime).UTC().Format(time.RFC3339Nano),
)
fmt.Printf(" Duration: %s \n", time.Unix(0, maxTime).Sub(time.Unix(0, minTime)))
println()
fmt.Printf("Statistics\n")
fmt.Printf(" Series:\n")
for db, counts := range dbCardinalities {
fmt.Printf(" - %s%s: %d (%d%%)\n", db, estTitle, counts.Count(), int(float64(counts.Count())/float64(totalSeries.Count())*100))
}
fmt.Printf(" Total%s: %d\n", estTitle, totalSeries.Count())
if cmd.detailed {
fmt.Printf("\n Measurements (est):\n")
for _, t := range sortKeys(measCardinalities) {
fmt.Printf(" - %v: %d (%d%%)\n", t, measCardinalities[t].Count(), int((float64(measCardinalities[t].Count())/float64(totalSeries.Count()))*100))
}
fmt.Printf("\n Fields (est):\n")
for _, t := range sortKeys(fieldCardinalities) {
fmt.Printf(" - %v: %d\n", t, fieldCardinalities[t].Count())
}
fmt.Printf("\n Tags (est):\n")
for _, t := range sortKeys(tagCardinalities) {
fmt.Printf(" - %v: %d\n", t, tagCardinalities[t].Count())
}
}
fmt.Printf("Completed in %s\n", time.Since(start))
return nil
}
// sortKeys is a quick helper to return the sorted set of a map's keys
func sortKeys(vals map[string]counter) (keys []string) {
for k := range vals {
keys = append(keys, k)
}
sort.Strings(keys)
return keys
}
func (cmd *Command) isShardDir(dir string) error {
name := filepath.Base(dir)
if id, err := strconv.Atoi(name); err != nil || id < 1 {
return fmt.Errorf("not a valid shard dir: %v", dir)
}
return nil
}
func (cmd *Command) WalkShardDirs(root string, fn func(db, rp, id, path string) error) error {
type location struct {
db, rp, id, path string
}
var dirs []location
if err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() {
return nil
}
if filepath.Ext(info.Name()) == "."+tsm1.TSMFileExtension {
shardDir := filepath.Dir(path)
if err := cmd.isShardDir(shardDir); err != nil {
return err
}
absPath, err := filepath.Abs(path)
if err != nil {
return err
}
parts := strings.Split(absPath, string(filepath.Separator))
db, rp, id := parts[len(parts)-4], parts[len(parts)-3], parts[len(parts)-2]
dirs = append(dirs, location{db: db, rp: rp, id: id, path: path})
return nil
}
return nil
}); err != nil {
return err
}
sort.Slice(dirs, func(i, j int) bool {
a, _ := strconv.Atoi(dirs[i].id)
b, _ := strconv.Atoi(dirs[j].id)
return a < b
})
for _, shard := range dirs {
if err := fn(shard.db, shard.rp, shard.id, shard.path); err != nil {
return err
}
}
return nil
}
// printUsage prints the usage message to STDERR.
func (cmd *Command) printUsage() {
usage := `Displays shard level report.
Usage: influx_inspect report [flags]
-pattern <pattern>
Include only files matching a pattern.
-exact
Report exact cardinality counts instead of estimates. Note: this can use a lot of memory.
Defaults to "false".
-detailed
Report detailed cardinality estimates.
Defaults to "false".
`
fmt.Fprintf(cmd.Stdout, usage)
}
// counter abstracts a a method of counting keys.
type counter interface {
Add(key []byte)
Count() uint64
}
// newHLLCounter returns an approximate counter using HyperLogLogs for cardinality estimation.
func newHLLCounter() counter {
return hllpp.New()
}
// exactCounter returns an exact count for keys using counting all distinct items in a set.
type exactCounter struct {
m map[string]struct{}
}
func (c *exactCounter) Add(key []byte) {
c.m[string(key)] = struct{}{}
}
func (c *exactCounter) Count() uint64 {
return uint64(len(c.m))
}
func newExactCounter() counter {
return &exactCounter{
m: make(map[string]struct{}),
}
}