From f383ec92254061d98d1ad0f0d36e69f00d8dd3c6 Mon Sep 17 00:00:00 2001 From: Edd Robinson Date: Mon, 18 Feb 2019 16:18:51 +0000 Subject: [PATCH] Add ability to use report-tsm programmatically --- cmd/influx/inspect.go | 2 +- go.sum | 2 ++ tsdb/tsm1/report.go | 75 +++++++++++++++++++++++++++++++++++-------- 3 files changed, 64 insertions(+), 15 deletions(-) diff --git a/cmd/influx/inspect.go b/cmd/influx/inspect.go index e188494338..fd2607a336 100644 --- a/cmd/influx/inspect.go +++ b/cmd/influx/inspect.go @@ -104,7 +104,7 @@ func inspectReportTSMF(cmd *cobra.Command, args []string) error { report.BucketID = bucketID } - err := report.Run() + _, err := report.Run(true) if err != nil { panic(err) } diff --git a/go.sum b/go.sum index ac7bd6c833..050a3ea928 100644 --- a/go.sum +++ b/go.sum @@ -234,6 +234,7 @@ github.com/influxdata/tdigest v0.0.0-20181121200506-bf2b5ad3c0a9 h1:MHTrDWmQpHq/ github.com/influxdata/tdigest v0.0.0-20181121200506-bf2b5ad3c0a9/go.mod h1:Js0mqiSBE6Ffsg94weZZ2c+v/ciT8QRHFOap7EKDrR0= github.com/influxdata/usage-client v0.0.0-20160829180054-6d3895376368 h1:+TUUmaFa4YD1Q+7bH9o5NCHQGPMqZCYJiNW6lIIS9z4= github.com/influxdata/usage-client v0.0.0-20160829180054-6d3895376368/go.mod h1:Wbbw6tYNvwa5dlB6304Sd+82Z3f7PmVZHVKU637d4po= +github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOlocH6Fxy8MmwDt+yVQYULKfN0RoTN8A= github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i6rXxKeerYnT8Nvf0QmHCRC1n8sfWVwXF2Frvo= github.com/jefferai/jsonx v0.0.0-20160721235117-9cc31c3135ee h1:AQ/QmCk6x8ECPpf2pkPtA4lyncEEBbs8VFnVXPYKhIs= github.com/jefferai/jsonx v0.0.0-20160721235117-9cc31c3135ee/go.mod h1:N0t2vlmpe8nyZB5ouIbJQPDSR+mH6oe7xHB9VZHSUzM= @@ -503,6 +504,7 @@ gopkg.in/robfig/cron.v2 v2.0.0-20150107220207-be2e0b0deed5 h1:E846t8CnR+lv5nE+Vu gopkg.in/robfig/cron.v2 v2.0.0-20150107220207-be2e0b0deed5/go.mod h1:hiOFpYm0ZJbusNj2ywpbrXowU3G8U6GIQzqn2mw1UIE= gopkg.in/src-d/go-billy.v4 v4.2.1/go.mod h1:tm33zBoOwxjYHZIE+OV8bxTWFMJLrconzFMd38aARFk= gopkg.in/src-d/go-git-fixtures.v3 v3.1.1/go.mod h1:dLBcvytrw/TYZsNTWCnkNF2DSIlzWYqTe3rJR56Ac7g= +gopkg.in/src-d/go-git.v4 v4.8.1 h1:aAyBmkdE1QUUEHcP4YFCGKmsMQRAuRmUcPEQR7lOAa0= gopkg.in/src-d/go-git.v4 v4.8.1/go.mod h1:Vtut8izDyrM8BUVQnzJ+YvmNcem2J89EmfZYCkLokZk= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= diff --git a/tsdb/tsm1/report.go b/tsdb/tsm1/report.go index 31adc7cb9f..88ff1bc7ae 100644 --- a/tsdb/tsm1/report.go +++ b/tsdb/tsm1/report.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "io" + "io/ioutil" "math" "os" "path/filepath" @@ -32,8 +33,34 @@ type Report struct { Exact bool // Exact determines if estimation or exact methods are used to determine cardinality. } +// ReportSummary provides a summary of the cardinalities in the processed fileset. +type ReportSummary struct { + Min, Max int64 + Total uint64 //The exact or estimated unique set of series keys across all files. + Organizations map[string]uint64 // The exact or estimated unique set of series keys segmented by org. + Buckets map[string]uint64 // The exact or estimated unique set of series keys segmented by bucket. + + // These are calculated when the detailed flag is in use. + Measurements map[string]uint64 // The exact or estimated unique set of series keys segmented by the measurement tag. + FieldKeys map[string]uint64 // The exact or estimated unique set of series keys segmented by the field tag. + TagKeys map[string]uint64 // The exact or estimated unique set of series keys segmented by tag keys. +} + +func newReportSummary() *ReportSummary { + return &ReportSummary{ + Organizations: map[string]uint64{}, + Buckets: map[string]uint64{}, + Measurements: map[string]uint64{}, + FieldKeys: map[string]uint64{}, + TagKeys: map[string]uint64{}, + } +} + // Run executes the Report. -func (r *Report) Run() error { +// +// Calling Run with print set to true emits data about each file to the report's +// Stdout fd as it is generated. +func (r *Report) Run(print bool) (*ReportSummary, error) { if r.Stderr == nil { r.Stderr = os.Stderr } @@ -41,6 +68,10 @@ func (r *Report) Run() error { r.Stdout = os.Stdout } + if !print { + r.Stderr, r.Stdout = ioutil.Discard, ioutil.Discard + } + newCounterFn := newHLLCounter estTitle := " (est)" if r.Exact { @@ -50,9 +81,9 @@ func (r *Report) Run() error { fi, err := os.Stat(r.Dir) if err != nil { - return err + return nil, err } else if !fi.IsDir() { - return errors.New("data directory not valid") + return nil, errors.New("data directory not valid") } totalSeries := newCounterFn() // The exact or estimated unique set of series keys across all files. @@ -86,7 +117,7 @@ func (r *Report) Run() error { file, err := os.OpenFile(path, os.O_RDONLY, 0600) if err != nil { fmt.Fprintf(r.Stderr, "error: %s: %v. Exiting.\n", path, err) - return err + return nil, err } loadStart := time.Now() @@ -104,7 +135,7 @@ func (r *Report) Run() error { seriesCount := reader.KeyCount() itr := reader.Iterator(nil) if itr == nil { - return errors.New("invalid TSM file, no index iterator") + return nil, errors.New("invalid TSM file, no index iterator") } for itr.Next() { @@ -186,7 +217,7 @@ func (r *Report) Run() error { } if err := reader.Close(); err != nil { - return fmt.Errorf("error: %s: %v. Exiting", path, err) + return nil, fmt.Errorf("error: %s: %v. Exiting", path, err) } fmt.Fprintln(tw, strings.Join([]string{ @@ -199,14 +230,20 @@ func (r *Report) Run() error { }, "\t")) if r.Detailed { if err := tw.Flush(); err != nil { - return err + return nil, err } } } if err := tw.Flush(); err != nil { - return err + return nil, err } + + summary := newReportSummary() + summary.Min = minTime + summary.Max = maxTime + summary.Total = totalSeries.Count() + println() println("Summary:") @@ -222,35 +259,45 @@ func (r *Report) Run() error { fmt.Printf("Statistics\n") fmt.Printf(" Organizations (%d):\n", len(orgCardinalities)) for _, org := range sortKeys(orgCardinalities) { - fmt.Printf(" - %s: %d%s (%d%%)\n", org, orgCardinalities[org].Count(), estTitle, int(float64(orgCardinalities[org].Count())/float64(totalSeries.Count())*100)) + cardinality := orgCardinalities[org].Count() + summary.Organizations[org] = cardinality + fmt.Printf(" - %s: %d%s (%d%%)\n", org, cardinality, estTitle, int(float64(cardinality)/float64(totalSeries.Count())*100)) } fmt.Printf(" Total%s: %d\n", estTitle, totalSeries.Count()) fmt.Printf(" \n Buckets (%d):\n", len(bucketCardinalities)) for _, bucket := range sortKeys(bucketCardinalities) { - fmt.Printf(" - %s: %d%s (%d%%)\n", bucket, bucketCardinalities[bucket].Count(), estTitle, int(float64(bucketCardinalities[bucket].Count())/float64(totalSeries.Count())*100)) + cardinality := bucketCardinalities[bucket].Count() + summary.Buckets[bucket] = cardinality + fmt.Printf(" - %s: %d%s (%d%%)\n", bucket, cardinality, estTitle, int(float64(cardinality)/float64(totalSeries.Count())*100)) } fmt.Printf(" Total%s: %d\n", estTitle, totalSeries.Count()) if r.Detailed { fmt.Printf("\n Series By Measurements (%d):\n", len(mCardinalities)) for _, mname := range sortKeys(mCardinalities) { - fmt.Printf(" - %v: %d%s (%d%%)\n", mname, mCardinalities[mname].Count(), estTitle, int((float64(mCardinalities[mname].Count())/float64(totalSeries.Count()))*100)) + cardinality := mCardinalities[mname].Count() + summary.Measurements[mname] = cardinality + fmt.Printf(" - %v: %d%s (%d%%)\n", mname, cardinality, estTitle, int((float64(cardinality)/float64(totalSeries.Count()))*100)) } fmt.Printf("\n Fields By Measurements (%d):\n", len(fCardinalities)) for _, mname := range sortKeys(fCardinalities) { - fmt.Printf(" - %v: %d%s\n", mname, fCardinalities[mname].Count(), estTitle) + cardinality := fCardinalities[mname].Count() + summary.FieldKeys[mname] = cardinality + fmt.Printf(" - %v: %d%s\n", mname, cardinality, estTitle) } fmt.Printf("\n Tag Values By Tag Keys (%d):\n", len(tCardinalities)) for _, tkey := range sortKeys(tCardinalities) { - fmt.Printf(" - %v: %d%s\n", tkey, tCardinalities[tkey].Count(), estTitle) + cardinality := tCardinalities[tkey].Count() + summary.TagKeys[tkey] = cardinality + fmt.Printf(" - %v: %d%s\n", tkey, cardinality, estTitle) } } fmt.Printf("\nCompleted in %s\n", time.Since(start)) - return nil + return summary, nil } // sortKeys is a quick helper to return the sorted set of a map's keys