From a43852958ddca06f0146202a124ff07622926bbe Mon Sep 17 00:00:00 2001 From: Stuart Carnie Date: Wed, 20 Mar 2019 11:01:50 -0700 Subject: [PATCH] chore(gen): Back port improvements and changes from OSS 2.0 Specifically: * renamed files for consistency between versions * added `time-interval` schema option * updated schema example documentation Back port of improvements from #12710 --- cmd/influx_tools/generate/exec/command.go | 76 ++++++++++++++----- ...enerator.go => merged_series_generator.go} | 1 - ...est.go => merged_series_generator_test.go} | 0 pkg/data/gen/schema.go | 23 +++++- pkg/data/gen/sequence.go | 12 +++ pkg/data/gen/series.go | 8 +- ...seriesgenerator.go => series_generator.go} | 43 ++++++++++- pkg/data/gen/series_test.go | 6 +- pkg/data/gen/specs.go | 13 +--- pkg/data/gen/toml.go | 53 ++++++++++++- pkg/data/gen/toml_test.go | 13 +++- pkg/data/gen/util.go | 26 ------- 12 files changed, 204 insertions(+), 70 deletions(-) rename pkg/data/gen/{mergedseriesgenerator.go => merged_series_generator.go} (99%) rename pkg/data/gen/{mergedseriesgenerator_test.go => merged_series_generator_test.go} (100%) rename pkg/data/gen/{seriesgenerator.go => series_generator.go} (69%) diff --git a/cmd/influx_tools/generate/exec/command.go b/cmd/influx_tools/generate/exec/command.go index 9ffb9a305a..e0e8942fed 100644 --- a/cmd/influx_tools/generate/exec/command.go +++ b/cmd/influx_tools/generate/exec/command.go @@ -200,15 +200,19 @@ func (cmd *Command) exec(storagePlan *generate.StoragePlan, spec *gen.Spec) erro return g.Run(context.Background(), storagePlan.Database, storagePlan.ShardPath(), storagePlan.NodeShardGroups(), gens) } -const exampleSchema = `title = "CLI schema" +const exampleSchema = `title = "Documented schema" # limit the maximum number of series generated across all measurements # # series-limit: integer, optional (default: unlimited) -# multiple measurements are merged together -[[measurements]] -# name of measurement +[[measurements]] + +# name of measurement +# +# NOTE: +# Multiple definitions of the same measurement name are allowed and +# will be merged together. name = "cpu" # sample: float; where 0 < sample ≤ 1.0 (default: 0.5) @@ -216,7 +220,7 @@ name = "cpu" # # sample 25% of the tags # -# sample = 0.25 +sample = 0.25 # Keys for defining a tag # @@ -249,13 +253,16 @@ name = "cpu" # path: string # absolute path or relative path to current toml file tags = [ - # example sequence tag source. The range of values are automatically prefixed with 0s + # example sequence tag source. The range of values are automatically + # prefixed with 0s # to ensure correct sort behavior. - { name = "host", source = { type = "sequence", format = "host-%s", start = 0, count = 5 } }, + { name = "host", source = { type = "sequence", format = "host-%s", start = 0, count = 5 } }, - # tags can also be sourced from a file. The path is relative to the schema.toml. - # Each value must be on a new line. The file is also sorted, validated for UTF-8 and deduplicated. - # { name = "region", source = { type = "file", path = "files/regions.txt" } }, + # tags can also be sourced from a file. The path is relative to the + # schema.toml. + # Each value must be on a new line. The file is also sorted, deduplicated + # and UTF-8 validated. + { name = "rack", source = { type = "file", path = "files/racks.txt" } }, # Example string array source, which is also deduplicated and sorted { name = "region", source = ["us-west-01","us-west-02","us-east"] }, @@ -267,12 +274,47 @@ tags = [ # Name of field # # count: int, required -# Number of values to generate. When multiple fields have the same -# count, they will share timestamps. +# The maximum number of values to generate. When multiple fields +# have the same count and time-spec, they will share timestamps. # -# time-precision: string (default: ms) -# The precision for generated timestamps. -# One of ns, us, ms, s, m, h +# A time-spec can be either time-precision or time-interval, which +# determines how timestamps are generated and may also influence +# the time range and number of values generated. +# +# time-precision: string [ns, us, ms, s, m, h] (default: ms) +# Specifies the precision (rounding) for generated timestamps. +# +# If the precision results in fewer than "count" intervals for the +# given time range the number of values will be reduced. +# +# Example: +# count = 1000, start = 0s, end = 100s, time-precison = s +# 100 values will be generated at [0s, 1s, 2s, ..., 99s] +# +# If the precision results in greater than "count" intervals for the +# given time range, the interval will be rounded to the nearest multiple of +# time-precision. +# +# Example: +# count = 10, start = 0s, end = 100s, time-precison = s +# 100 values will be generated at [0s, 10s, 20s, ..., 90s] +# +# time-interval: Go duration string (eg 90s, 1h30m) +# Specifies the delta between generated timestamps. +# +# If the delta results in fewer than "count" intervals for the +# given time range the number of values will be reduced. +# +# Example: +# count = 100, start = 0s, end = 100s, time-interval = 10s +# 10 values will be generated at [0s, 10s, 20s, ..., 90s] +# +# If the delta results in greater than "count" intervals for the +# given time range, the start-time will be adjusted to ensure "count" values. +# +# Example: +# count = 20, start = 0s, end = 1000s, time-interval = 10s +# 20 values will be generated at [800s, 810s, ..., 900s, ..., 990s] # # source: int, float, boolean, string, array or object # @@ -321,8 +363,8 @@ tags = [ ] fields = [ # An example of a sequence of integer values - { name = "free", count = 17, source = [10,15,20,25,30,35,30], time-precision = "ms" }, - { name = "low_mem", count = 17, source = [false,true,true], time-precision = "ms" }, + { name = "free", count = 100, source = [10,15,20,25,30,35,30], time-precision = "ms" }, + { name = "low_mem", count = 100, source = [false,true,true], time-precision = "ms" }, ] ` diff --git a/pkg/data/gen/mergedseriesgenerator.go b/pkg/data/gen/merged_series_generator.go similarity index 99% rename from pkg/data/gen/mergedseriesgenerator.go rename to pkg/data/gen/merged_series_generator.go index e995e7798e..2ff4966ac0 100644 --- a/pkg/data/gen/mergedseriesgenerator.go +++ b/pkg/data/gen/merged_series_generator.go @@ -10,7 +10,6 @@ import ( type mergedSeriesGenerator struct { heap seriesGeneratorHeap last constSeries - err error n int64 first bool } diff --git a/pkg/data/gen/mergedseriesgenerator_test.go b/pkg/data/gen/merged_series_generator_test.go similarity index 100% rename from pkg/data/gen/mergedseriesgenerator_test.go rename to pkg/data/gen/merged_series_generator_test.go diff --git a/pkg/data/gen/schema.go b/pkg/data/gen/schema.go index 6334e2cb1b..6306876746 100644 --- a/pkg/data/gen/schema.go +++ b/pkg/data/gen/schema.go @@ -100,10 +100,29 @@ type FieldSource interface { type Field struct { Name string Count int64 - TimePrecision precision `toml:"time-precision"` // TimePrecision determines the precision for generated timestamp values + TimePrecision *precision `toml:"time-precision"` // TimePrecision determines the precision for generated timestamp values + TimeInterval *duration `toml:"time-interval"` // TimeInterval determines the duration between timestamp values Source FieldSource } +func (t *Field) TimeSequenceSpec() TimeSequenceSpec { + if t.TimeInterval != nil { + return TimeSequenceSpec{ + Count: int(t.Count), + Delta: t.TimeInterval.Duration, + } + } + + if t.TimePrecision != nil { + return TimeSequenceSpec{ + Count: int(t.Count), + Precision: t.TimePrecision.ToDuration(), + } + } + + panic("TimeInterval and TimePrecision are nil") +} + func (*Field) node() {} type FieldConstantValue struct { @@ -200,7 +219,7 @@ func walk(v Visitor, node SchemaNode, up bool) Visitor { case *Measurement: v := v v = walk(v, n.Tags, up) - v = walk(v, n.Fields, up) + walk(v, n.Fields, up) case Fields: v := v diff --git a/pkg/data/gen/sequence.go b/pkg/data/gen/sequence.go index 4fc469af85..e600471bd9 100644 --- a/pkg/data/gen/sequence.go +++ b/pkg/data/gen/sequence.go @@ -82,3 +82,15 @@ func (s *StringArraySequence) Value() string { func (s *StringArraySequence) Count() int { return len(s.vals) } + +type StringConstantSequence struct { + val string +} + +func NewStringConstantSequence(val string) *StringConstantSequence { + return &StringConstantSequence{val: val} +} + +func (s *StringConstantSequence) Next() bool { return true } +func (s *StringConstantSequence) Value() string { return s.val } +func (s *StringConstantSequence) Count() int { return 1 } diff --git a/pkg/data/gen/series.go b/pkg/data/gen/series.go index c62bbe6332..81d7703f3f 100644 --- a/pkg/data/gen/series.go +++ b/pkg/data/gen/series.go @@ -4,7 +4,7 @@ import ( "bytes" ) -type Series interface { +type seriesKeyField interface { // Key returns the series key. // The returned value may be cached. Key() []byte @@ -22,13 +22,13 @@ type constSeries struct { func (s *constSeries) Key() []byte { return s.key } func (s *constSeries) Field() []byte { return s.field } -var nilSeries Series = &constSeries{} +var nilSeries seriesKeyField = &constSeries{} // Compare returns an integer comparing two SeriesGenerator instances // lexicographically. // The result will be 0 if a==b, -1 if a < b, and +1 if a > b. // A nil argument is equivalent to an empty SeriesGenerator. -func CompareSeries(a, b Series) int { +func CompareSeries(a, b seriesKeyField) int { if a == nil { a = nilSeries } @@ -44,7 +44,7 @@ func CompareSeries(a, b Series) int { } } -func (s *constSeries) CopyFrom(a Series) { +func (s *constSeries) CopyFrom(a seriesKeyField) { key := a.Key() if cap(s.key) < len(key) { s.key = make([]byte, len(key)) diff --git a/pkg/data/gen/seriesgenerator.go b/pkg/data/gen/series_generator.go similarity index 69% rename from pkg/data/gen/seriesgenerator.go rename to pkg/data/gen/series_generator.go index 9ba3d0f4e1..619431f292 100644 --- a/pkg/data/gen/seriesgenerator.go +++ b/pkg/data/gen/series_generator.go @@ -32,7 +32,7 @@ type SeriesGenerator interface { } type TimeSequenceSpec struct { - // Count specifies the number of values to generate. + // Count specifies the maximum number of values to generate. Count int // Start specifies the starting time for the values. @@ -45,11 +45,52 @@ type TimeSequenceSpec struct { Precision time.Duration } +func (ts TimeSequenceSpec) ForTimeRange(tr TimeRange) TimeSequenceSpec { + // Truncate time range + if ts.Delta > 0 { + tr = tr.Truncate(ts.Delta) + } else { + tr = tr.Truncate(ts.Precision) + } + + ts.Start = tr.Start + + if ts.Delta > 0 { + intervals := int(tr.End.Sub(tr.Start) / ts.Delta) + if intervals > ts.Count { + // if the number of intervals in the specified time range exceeds + // the maximum count, move the start forward to limit the number of values + ts.Start = tr.End.Add(-time.Duration(ts.Count) * ts.Delta) + } else { + ts.Count = intervals + } + } else { + ts.Delta = tr.End.Sub(tr.Start) / time.Duration(ts.Count) + if ts.Delta < ts.Precision { + // count is too high for the range of time and precision + ts.Count = int(tr.End.Sub(tr.Start) / ts.Precision) + ts.Delta = ts.Precision + } else { + ts.Delta = ts.Delta.Round(ts.Precision) + } + ts.Precision = 0 + } + + return ts +} + type TimeRange struct { Start time.Time End time.Time } +func (t TimeRange) Truncate(d time.Duration) TimeRange { + return TimeRange{ + Start: t.Start.Truncate(d), + End: t.End.Truncate(d), + } +} + type TimeValuesSequence interface { Reset() Next() bool diff --git a/pkg/data/gen/series_test.go b/pkg/data/gen/series_test.go index 5f7c3d8bb3..0772cc75ca 100644 --- a/pkg/data/gen/series_test.go +++ b/pkg/data/gen/series_test.go @@ -7,14 +7,14 @@ import ( ) func TestCompareSeries(t *testing.T) { - mk := func(k, f string) Series { + mk := func(k, f string) seriesKeyField { return &constSeries{key: []byte(k), field: []byte(f)} } tests := []struct { name string - a Series - b Series + a seriesKeyField + b seriesKeyField exp int }{ { diff --git a/pkg/data/gen/specs.go b/pkg/data/gen/specs.go index c96aef50b4..7d8e6c9391 100644 --- a/pkg/data/gen/specs.go +++ b/pkg/data/gen/specs.go @@ -8,7 +8,6 @@ import ( "path" "path/filepath" "sort" - "time" "unicode/utf8" "github.com/BurntSushi/toml" @@ -97,12 +96,7 @@ type FieldValuesSpec struct { } func newTimeValuesSequenceFromFieldValuesSpec(fs *FieldValuesSpec, tr TimeRange) TimeValuesSequence { - ts := fs.TimeSequenceSpec - ts.Start = tr.Start - ts.Delta = tr.End.Sub(tr.Start) / time.Duration(ts.Count) - ts.Delta = ts.Delta.Round(ts.Precision) - - return fs.Values(ts) + return fs.Values(fs.TimeSequenceSpec.ForTimeRange(tr)) } func NewSpecFromToml(s string) (*Spec, error) { @@ -321,10 +315,7 @@ func (s *schemaToSpec) visit(node SchemaNode) bool { panic(fmt.Sprintf("unexpected type %T", fs)) } - fs.TimeSequenceSpec = TimeSequenceSpec{ - Count: int(n.Count), - Precision: n.TimePrecision.ToDuration(), - } + fs.TimeSequenceSpec = n.TimeSequenceSpec() fs.Name = n.Name case *FieldConstantValue: diff --git a/pkg/data/gen/toml.go b/pkg/data/gen/toml.go index fd227edd22..21137734d1 100644 --- a/pkg/data/gen/toml.go +++ b/pkg/data/gen/toml.go @@ -42,6 +42,41 @@ func (s *sample) UnmarshalTOML(data interface{}) error { return nil } +type duration struct { + time.Duration +} + +func (d *duration) UnmarshalTOML(data interface{}) error { + text, ok := data.(string) + if !ok { + return fmt.Errorf("invalid duration, expect a Go duration as a string: %T", data) + } + + return d.UnmarshalText([]byte(text)) +} + +func (d *duration) UnmarshalText(text []byte) error { + s := string(text) + + var err error + d.Duration, err = time.ParseDuration(s) + if err != nil { + return err + } + + if d.Duration == 0 { + d.Duration, err = time.ParseDuration("1" + s) + if err != nil { + return err + } + } + + if d.Duration <= 0 { + return fmt.Errorf("invalid duration, must be > 0: %s", d.Duration) + } + return nil +} + type precision byte const ( @@ -249,9 +284,25 @@ func (t *Field) UnmarshalTOML(data interface{}) error { } if n, ok := d["time-precision"]; ok { - if err := t.TimePrecision.UnmarshalTOML(n); err != nil { + var tp precision + if err := tp.UnmarshalTOML(n); err != nil { return err } + t.TimePrecision = &tp + } + + if n, ok := d["time-interval"]; ok { + var ti duration + if err := ti.UnmarshalTOML(n); err != nil { + return err + } + t.TimeInterval = &ti + t.TimePrecision = nil + } + + if t.TimePrecision == nil && t.TimeInterval == nil { + var tp precision + t.TimePrecision = &tp } // infer source diff --git a/pkg/data/gen/toml_test.go b/pkg/data/gen/toml_test.go index 63fe099635..7055ae7cd0 100644 --- a/pkg/data/gen/toml_test.go +++ b/pkg/data/gen/toml_test.go @@ -30,7 +30,11 @@ func visit(root *Schema) string { fmt.Fprintln(w, " Fields:") case *Field: - fmt.Fprintf(w, " %s: %s, count=%d, time-precision=%s\n", n.Name, n.Source, n.Count, n.TimePrecision) + if n.TimePrecision != nil { + fmt.Fprintf(w, " %s: %s, count=%d, time-precision=%s\n", n.Name, n.Source, n.Count, *n.TimePrecision) + } else { + fmt.Fprintf(w, " %s: %s, count=%d, time-interval=%s\n", n.Name, n.Source, n.Count, n.TimeInterval) + } case *Tag: fmt.Fprintf(w, " %s: %s\n", n.Name, n.Source) @@ -78,6 +82,7 @@ series-limit = 10 name = "stringC" count = 5000 source = "hello" + time-interval = "60s" [[measurements.fields]] name = "stringA" @@ -123,7 +128,7 @@ name = "array" name = "integerA" count = 1000 source = [5, 6, 7] - time-precision = "us" + time-interval = "90s" ` var out Schema _, err := toml.Decode(in, &out) @@ -140,7 +145,7 @@ name = "array" Fields: floatC: constant, source=0.5, count=5000, time-precision=Microsecond integerC: constant, source=3, count=5000, time-precision=Hour - stringC: constant, source="hello", count=5000, time-precision=Millisecond + stringC: constant, source="hello", count=5000, time-interval=1m0s stringA: array, source=[]string{"hello", "world"}, count=5000, time-precision=Millisecond boolf: constant, source=false, count=5000, time-precision=Millisecond @@ -156,7 +161,7 @@ name = "array" tagFile: file, path=foo.txt Fields: stringA: array, source=[]string{"this", "that"}, count=1000, time-precision=Microsecond - integerA: array, source=[]int64{5, 6, 7}, count=1000, time-precision=Microsecond + integerA: array, source=[]int64{5, 6, 7}, count=1000, time-interval=1m30s ` if got := visit(&out); !cmp.Equal(got, exp) { t.Errorf("unexpected value, -got/+exp\n%s", cmp.Diff(got, exp)) diff --git a/pkg/data/gen/util.go b/pkg/data/gen/util.go index 178d367c87..bb16508f5f 100644 --- a/pkg/data/gen/util.go +++ b/pkg/data/gen/util.go @@ -28,32 +28,6 @@ func sortDedupStrings(in []string) []string { return in[:j+1] } -func sortDedupInts(in []int) []int { - sort.Ints(in) - j := 0 - for i := 1; i < len(in); i++ { - if in[j] == in[i] { - continue - } - j++ - in[j] = in[i] - } - return in[:j+1] -} - -func sortDedupFloats(in []float64) []float64 { - sort.Float64s(in) - j := 0 - for i := 1; i < len(in); i++ { - if in[j] == in[i] { - continue - } - j++ - in[j] = in[i] - } - return in[:j+1] -} - // ToInt64SliceE casts an interface to a []int64 type. func toInt64SliceE(i interface{}) ([]int64, error) { if i == nil {