Add ability to use report-tsm programmatically

pull/11964/head
Edd Robinson 2019-02-18 16:18:51 +00:00
parent 117a5a98e8
commit f383ec9225
3 changed files with 64 additions and 15 deletions

View File

@ -104,7 +104,7 @@ func inspectReportTSMF(cmd *cobra.Command, args []string) error {
report.BucketID = bucketID
}
err := report.Run()
_, err := report.Run(true)
if err != nil {
panic(err)
}

2
go.sum
View File

@ -234,6 +234,7 @@ github.com/influxdata/tdigest v0.0.0-20181121200506-bf2b5ad3c0a9 h1:MHTrDWmQpHq/
github.com/influxdata/tdigest v0.0.0-20181121200506-bf2b5ad3c0a9/go.mod h1:Js0mqiSBE6Ffsg94weZZ2c+v/ciT8QRHFOap7EKDrR0=
github.com/influxdata/usage-client v0.0.0-20160829180054-6d3895376368 h1:+TUUmaFa4YD1Q+7bH9o5NCHQGPMqZCYJiNW6lIIS9z4=
github.com/influxdata/usage-client v0.0.0-20160829180054-6d3895376368/go.mod h1:Wbbw6tYNvwa5dlB6304Sd+82Z3f7PmVZHVKU637d4po=
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOlocH6Fxy8MmwDt+yVQYULKfN0RoTN8A=
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i6rXxKeerYnT8Nvf0QmHCRC1n8sfWVwXF2Frvo=
github.com/jefferai/jsonx v0.0.0-20160721235117-9cc31c3135ee h1:AQ/QmCk6x8ECPpf2pkPtA4lyncEEBbs8VFnVXPYKhIs=
github.com/jefferai/jsonx v0.0.0-20160721235117-9cc31c3135ee/go.mod h1:N0t2vlmpe8nyZB5ouIbJQPDSR+mH6oe7xHB9VZHSUzM=
@ -503,6 +504,7 @@ gopkg.in/robfig/cron.v2 v2.0.0-20150107220207-be2e0b0deed5 h1:E846t8CnR+lv5nE+Vu
gopkg.in/robfig/cron.v2 v2.0.0-20150107220207-be2e0b0deed5/go.mod h1:hiOFpYm0ZJbusNj2ywpbrXowU3G8U6GIQzqn2mw1UIE=
gopkg.in/src-d/go-billy.v4 v4.2.1/go.mod h1:tm33zBoOwxjYHZIE+OV8bxTWFMJLrconzFMd38aARFk=
gopkg.in/src-d/go-git-fixtures.v3 v3.1.1/go.mod h1:dLBcvytrw/TYZsNTWCnkNF2DSIlzWYqTe3rJR56Ac7g=
gopkg.in/src-d/go-git.v4 v4.8.1 h1:aAyBmkdE1QUUEHcP4YFCGKmsMQRAuRmUcPEQR7lOAa0=
gopkg.in/src-d/go-git.v4 v4.8.1/go.mod h1:Vtut8izDyrM8BUVQnzJ+YvmNcem2J89EmfZYCkLokZk=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=

View File

@ -5,6 +5,7 @@ import (
"errors"
"fmt"
"io"
"io/ioutil"
"math"
"os"
"path/filepath"
@ -32,8 +33,34 @@ type Report struct {
Exact bool // Exact determines if estimation or exact methods are used to determine cardinality.
}
// ReportSummary provides a summary of the cardinalities in the processed fileset.
type ReportSummary struct {
Min, Max int64
Total uint64 //The exact or estimated unique set of series keys across all files.
Organizations map[string]uint64 // The exact or estimated unique set of series keys segmented by org.
Buckets map[string]uint64 // The exact or estimated unique set of series keys segmented by bucket.
// These are calculated when the detailed flag is in use.
Measurements map[string]uint64 // The exact or estimated unique set of series keys segmented by the measurement tag.
FieldKeys map[string]uint64 // The exact or estimated unique set of series keys segmented by the field tag.
TagKeys map[string]uint64 // The exact or estimated unique set of series keys segmented by tag keys.
}
func newReportSummary() *ReportSummary {
return &ReportSummary{
Organizations: map[string]uint64{},
Buckets: map[string]uint64{},
Measurements: map[string]uint64{},
FieldKeys: map[string]uint64{},
TagKeys: map[string]uint64{},
}
}
// Run executes the Report.
func (r *Report) Run() error {
//
// Calling Run with print set to true emits data about each file to the report's
// Stdout fd as it is generated.
func (r *Report) Run(print bool) (*ReportSummary, error) {
if r.Stderr == nil {
r.Stderr = os.Stderr
}
@ -41,6 +68,10 @@ func (r *Report) Run() error {
r.Stdout = os.Stdout
}
if !print {
r.Stderr, r.Stdout = ioutil.Discard, ioutil.Discard
}
newCounterFn := newHLLCounter
estTitle := " (est)"
if r.Exact {
@ -50,9 +81,9 @@ func (r *Report) Run() error {
fi, err := os.Stat(r.Dir)
if err != nil {
return err
return nil, err
} else if !fi.IsDir() {
return errors.New("data directory not valid")
return nil, errors.New("data directory not valid")
}
totalSeries := newCounterFn() // The exact or estimated unique set of series keys across all files.
@ -86,7 +117,7 @@ func (r *Report) Run() error {
file, err := os.OpenFile(path, os.O_RDONLY, 0600)
if err != nil {
fmt.Fprintf(r.Stderr, "error: %s: %v. Exiting.\n", path, err)
return err
return nil, err
}
loadStart := time.Now()
@ -104,7 +135,7 @@ func (r *Report) Run() error {
seriesCount := reader.KeyCount()
itr := reader.Iterator(nil)
if itr == nil {
return errors.New("invalid TSM file, no index iterator")
return nil, errors.New("invalid TSM file, no index iterator")
}
for itr.Next() {
@ -186,7 +217,7 @@ func (r *Report) Run() error {
}
if err := reader.Close(); err != nil {
return fmt.Errorf("error: %s: %v. Exiting", path, err)
return nil, fmt.Errorf("error: %s: %v. Exiting", path, err)
}
fmt.Fprintln(tw, strings.Join([]string{
@ -199,14 +230,20 @@ func (r *Report) Run() error {
}, "\t"))
if r.Detailed {
if err := tw.Flush(); err != nil {
return err
return nil, err
}
}
}
if err := tw.Flush(); err != nil {
return err
return nil, err
}
summary := newReportSummary()
summary.Min = minTime
summary.Max = maxTime
summary.Total = totalSeries.Count()
println()
println("Summary:")
@ -222,35 +259,45 @@ func (r *Report) Run() error {
fmt.Printf("Statistics\n")
fmt.Printf(" Organizations (%d):\n", len(orgCardinalities))
for _, org := range sortKeys(orgCardinalities) {
fmt.Printf(" - %s: %d%s (%d%%)\n", org, orgCardinalities[org].Count(), estTitle, int(float64(orgCardinalities[org].Count())/float64(totalSeries.Count())*100))
cardinality := orgCardinalities[org].Count()
summary.Organizations[org] = cardinality
fmt.Printf(" - %s: %d%s (%d%%)\n", org, cardinality, estTitle, int(float64(cardinality)/float64(totalSeries.Count())*100))
}
fmt.Printf(" Total%s: %d\n", estTitle, totalSeries.Count())
fmt.Printf(" \n Buckets (%d):\n", len(bucketCardinalities))
for _, bucket := range sortKeys(bucketCardinalities) {
fmt.Printf(" - %s: %d%s (%d%%)\n", bucket, bucketCardinalities[bucket].Count(), estTitle, int(float64(bucketCardinalities[bucket].Count())/float64(totalSeries.Count())*100))
cardinality := bucketCardinalities[bucket].Count()
summary.Buckets[bucket] = cardinality
fmt.Printf(" - %s: %d%s (%d%%)\n", bucket, cardinality, estTitle, int(float64(cardinality)/float64(totalSeries.Count())*100))
}
fmt.Printf(" Total%s: %d\n", estTitle, totalSeries.Count())
if r.Detailed {
fmt.Printf("\n Series By Measurements (%d):\n", len(mCardinalities))
for _, mname := range sortKeys(mCardinalities) {
fmt.Printf(" - %v: %d%s (%d%%)\n", mname, mCardinalities[mname].Count(), estTitle, int((float64(mCardinalities[mname].Count())/float64(totalSeries.Count()))*100))
cardinality := mCardinalities[mname].Count()
summary.Measurements[mname] = cardinality
fmt.Printf(" - %v: %d%s (%d%%)\n", mname, cardinality, estTitle, int((float64(cardinality)/float64(totalSeries.Count()))*100))
}
fmt.Printf("\n Fields By Measurements (%d):\n", len(fCardinalities))
for _, mname := range sortKeys(fCardinalities) {
fmt.Printf(" - %v: %d%s\n", mname, fCardinalities[mname].Count(), estTitle)
cardinality := fCardinalities[mname].Count()
summary.FieldKeys[mname] = cardinality
fmt.Printf(" - %v: %d%s\n", mname, cardinality, estTitle)
}
fmt.Printf("\n Tag Values By Tag Keys (%d):\n", len(tCardinalities))
for _, tkey := range sortKeys(tCardinalities) {
fmt.Printf(" - %v: %d%s\n", tkey, tCardinalities[tkey].Count(), estTitle)
cardinality := tCardinalities[tkey].Count()
summary.TagKeys[tkey] = cardinality
fmt.Printf(" - %v: %d%s\n", tkey, cardinality, estTitle)
}
}
fmt.Printf("\nCompleted in %s\n", time.Since(start))
return nil
return summary, nil
}
// sortKeys is a quick helper to return the sorted set of a map's keys