diff --git a/CHANGELOG.md b/CHANGELOG.md index 92cc54f84a..1ebe64d2b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ The admin UI is removed and unusable in this release. The `[admin]` configuratio - [#7862](https://github.com/influxdata/influxdb/pull/7861): Add new profile endpoint for gathering all debug profiles and querues in single archive. - [#8390](https://github.com/influxdata/influxdb/issues/8390): Add nanosecond duration literal support. - [#8394](https://github.com/influxdata/influxdb/pull/8394): Optimize top() and bottom() using an incremental aggregator. +- [#7129](https://github.com/influxdata/influxdb/issues/7129): Maintain the tags of points selected by top() or bottom() when writing the results. ### Bugfixes @@ -73,8 +74,17 @@ The admin UI is removed and unusable in this release. The `[admin]` configuratio - [#8368](https://github.com/influxdata/influxdb/issues/8368): Compaction exhausting disk resources in InfluxDB - [#8358](https://github.com/influxdata/influxdb/issues/8358): Small edits to the etc/config.sample.toml file. - [#8392](https://github.com/influxdata/influxdb/issues/8393): Points beyond retention policy scope are dropped silently +- [#8387](https://github.com/influxdata/influxdb/issues/8387): Fix TSM tmp file leaked on disk +- [#8417](https://github.com/influxdata/influxdb/issues/8417): Fix large field keys preventing snapshot compactions -## v1.2.3 [unreleased] + +## v1.2.4 [2017-05-08] + +### Bugfixes + +- [#8338](https://github.com/influxdata/influxdb/pull/8338): Prefix partial write errors with `partial write:` to generalize identification in other subsystems + +## v1.2.3 [2017-04-17] ### Bugfixes @@ -190,6 +200,14 @@ The stress tool `influx_stress` will be removed in a subsequent release. We reco - [#7396](https://github.com/influxdata/influxdb/issues/7396): CLI should use spaces for alignment, not tabs. - [#6527](https://github.com/influxdata/influxdb/issues/6527): 0.12.2 Influx CLI client PRECISION returns "Unknown precision.... + +## v1.1.5 [2017-04-28] + +### Bugfixes + +- [#8190](https://github.com/influxdata/influxdb/issues/8190): History file should redact passwords before saving to history. +- [#8187](https://github.com/influxdata/influxdb/pull/8187): Several statements were missing the DefaultDatabase method + ## v1.1.4 [2017-02-27] ### Bugfixes diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f100042382..4fd753c8f3 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -69,7 +69,7 @@ second to sign our CLA, which can be found Installing Go ------------- -InfluxDB requires Go 1.8.1 +InfluxDB requires Go 1.8.3 At InfluxDB we find gvm, a Go version manager, useful for installing Go. For instructions on how to install it see [the gvm page on github](https://github.com/moovweb/gvm). @@ -77,8 +77,8 @@ on how to install it see [the gvm page on github](https://github.com/moovweb/gvm After installing gvm you can install and set the default go version by running the following: - gvm install go1.8.1 - gvm use go1.8.1 --default + gvm install go1.8.3 + gvm use go1.8.3 --default Installing GDM ------------- diff --git a/Dockerfile_build_ubuntu32 b/Dockerfile_build_ubuntu32 index f9c975ce77..16f7840a6a 100644 --- a/Dockerfile_build_ubuntu32 +++ b/Dockerfile_build_ubuntu32 @@ -18,7 +18,7 @@ RUN gem install fpm # Install go ENV GOPATH /root/go -ENV GO_VERSION 1.8.1 +ENV GO_VERSION 1.8.3 ENV GO_ARCH 386 RUN wget https://storage.googleapis.com/golang/go${GO_VERSION}.linux-${GO_ARCH}.tar.gz; \ tar -C /usr/local/ -xf /go${GO_VERSION}.linux-${GO_ARCH}.tar.gz ; \ diff --git a/Dockerfile_build_ubuntu64 b/Dockerfile_build_ubuntu64 index 83a3d90822..9b361931d0 100644 --- a/Dockerfile_build_ubuntu64 +++ b/Dockerfile_build_ubuntu64 @@ -21,7 +21,7 @@ RUN gem install fpm # Install go ENV GOPATH /root/go -ENV GO_VERSION 1.8.1 +ENV GO_VERSION 1.8.3 ENV GO_ARCH amd64 RUN wget https://storage.googleapis.com/golang/go${GO_VERSION}.linux-${GO_ARCH}.tar.gz; \ tar -C /usr/local/ -xf /go${GO_VERSION}.linux-${GO_ARCH}.tar.gz ; \ diff --git a/Dockerfile_build_ubuntu64_git b/Dockerfile_build_ubuntu64_git index 0d049ea7af..20dce34774 100644 --- a/Dockerfile_build_ubuntu64_git +++ b/Dockerfile_build_ubuntu64_git @@ -26,7 +26,7 @@ VOLUME $PROJECT_DIR # Install go -ENV GO_VERSION 1.8.1 +ENV GO_VERSION 1.8.3 ENV GO_ARCH amd64 RUN wget https://storage.googleapis.com/golang/go${GO_VERSION}.linux-${GO_ARCH}.tar.gz; \ tar -C /usr/local/ -xf /go${GO_VERSION}.linux-${GO_ARCH}.tar.gz ; \ diff --git a/Godeps b/Godeps index 54177f2281..360bed5ae1 100644 --- a/Godeps +++ b/Godeps @@ -14,6 +14,7 @@ github.com/jwilder/encoding 27894731927e49b0a9023f00312be26733744815 github.com/paulbellamy/ratecounter 5a11f585a31379765c190c033b6ad39956584447 github.com/peterh/liner 88609521dc4b6c858fd4c98b628147da928ce4ac github.com/retailnext/hllpp 38a7bb71b483e855d35010808143beaf05b67f9d +github.com/spaolacci/murmur3 0d12bf811670bf6a1a63828dfbd003eded177fce github.com/uber-go/atomic 74ca5ec650841aee9f289dce76e928313a37cbc6 github.com/uber-go/zap fbae0281ffd546fa6d1959fec6075ac5da7fb577 golang.org/x/crypto 9477e0b78b9ac3d0b03822fd95422e2fe07627cd diff --git a/circle.yml b/circle.yml index 65267cc019..c2d994e076 100644 --- a/circle.yml +++ b/circle.yml @@ -2,7 +2,7 @@ machine: services: - docker environment: - GODIST: "go1.8.1.linux-amd64.tar.gz" + GODIST: "go1.8.3.linux-amd64.tar.gz" post: - mkdir -p download - test -e download/$GODIST || curl -o download/$GODIST https://storage.googleapis.com/golang/$GODIST diff --git a/cmd/influx_inspect/dumptsi/dumptsi.go b/cmd/influx_inspect/dumptsi/dumptsi.go index a52813e918..830271a409 100644 --- a/cmd/influx_inspect/dumptsi/dumptsi.go +++ b/cmd/influx_inspect/dumptsi/dumptsi.go @@ -131,7 +131,7 @@ func (cmd *Command) run() error { return nil } -func (cmd *Command) readFileSet() (*tsi1.Index, tsi1.FileSet, error) { +func (cmd *Command) readFileSet() (*tsi1.Index, *tsi1.FileSet, error) { // If only one path exists and it's a directory then open as an index. if len(cmd.paths) == 1 { fi, err := os.Stat(cmd.paths[0]) @@ -149,7 +149,7 @@ func (cmd *Command) readFileSet() (*tsi1.Index, tsi1.FileSet, error) { } // Open each file and group into a fileset. - var fs tsi1.FileSet + var files []tsi1.File for _, path := range cmd.paths { switch ext := filepath.Ext(path); ext { case tsi1.LogFileExt: @@ -157,7 +157,7 @@ func (cmd *Command) readFileSet() (*tsi1.Index, tsi1.FileSet, error) { if err := f.Open(); err != nil { return nil, nil, err } - fs = append(fs, f) + files = append(files, f) case tsi1.IndexFileExt: f := tsi1.NewIndexFile() @@ -165,18 +165,23 @@ func (cmd *Command) readFileSet() (*tsi1.Index, tsi1.FileSet, error) { if err := f.Open(); err != nil { return nil, nil, err } - fs = append(fs, f) + files = append(files, f) default: return nil, nil, fmt.Errorf("unexpected file extension: %s", ext) } } + fs, err := tsi1.NewFileSet(nil, files) + if err != nil { + return nil, nil, err + } fs.Retain() + return nil, fs, nil } -func (cmd *Command) printMerged(fs tsi1.FileSet) error { +func (cmd *Command) printMerged(fs *tsi1.FileSet) error { if err := cmd.printSeries(fs); err != nil { return err } else if err := cmd.printMeasurements(fs); err != nil { @@ -185,7 +190,7 @@ func (cmd *Command) printMerged(fs tsi1.FileSet) error { return nil } -func (cmd *Command) printSeries(fs tsi1.FileSet) error { +func (cmd *Command) printSeries(fs *tsi1.FileSet) error { if !cmd.showSeries { return nil } @@ -215,7 +220,7 @@ func (cmd *Command) printSeries(fs tsi1.FileSet) error { return nil } -func (cmd *Command) printMeasurements(fs tsi1.FileSet) error { +func (cmd *Command) printMeasurements(fs *tsi1.FileSet) error { if !cmd.showMeasurements { return nil } @@ -245,7 +250,7 @@ func (cmd *Command) printMeasurements(fs tsi1.FileSet) error { return nil } -func (cmd *Command) printTagKeys(fs tsi1.FileSet, name []byte) error { +func (cmd *Command) printTagKeys(fs *tsi1.FileSet, name []byte) error { if !cmd.showTagKeys { return nil } @@ -272,7 +277,7 @@ func (cmd *Command) printTagKeys(fs tsi1.FileSet, name []byte) error { return nil } -func (cmd *Command) printTagValues(fs tsi1.FileSet, name, key []byte) error { +func (cmd *Command) printTagValues(fs *tsi1.FileSet, name, key []byte) error { if !cmd.showTagValues { return nil } @@ -299,7 +304,7 @@ func (cmd *Command) printTagValues(fs tsi1.FileSet, name, key []byte) error { return nil } -func (cmd *Command) printTagValueSeries(fs tsi1.FileSet, name, key, value []byte) error { +func (cmd *Command) printTagValueSeries(fs *tsi1.FileSet, name, key, value []byte) error { if !cmd.showTagValueSeries { return nil } @@ -322,8 +327,8 @@ func (cmd *Command) printTagValueSeries(fs tsi1.FileSet, name, key, value []byte return nil } -func (cmd *Command) printFileSummaries(fs tsi1.FileSet) error { - for _, f := range fs { +func (cmd *Command) printFileSummaries(fs *tsi1.FileSet) error { + for _, f := range fs.Files() { switch f := f.(type) { case *tsi1.LogFile: if err := cmd.printLogFileSummary(f); err != nil { diff --git a/influxql/ast.go b/influxql/ast.go index 172dc98796..10aadafdb8 100644 --- a/influxql/ast.go +++ b/influxql/ast.go @@ -1519,7 +1519,7 @@ func (s *SelectStatement) ColumnNames() []string { switch f := field.Expr.(type) { case *Call: - if f.Name == "top" || f.Name == "bottom" { + if s.Target == nil && (f.Name == "top" || f.Name == "bottom") { for _, arg := range f.Args[1:] { ref, ok := arg.(*VarRef) if ok { diff --git a/influxql/call_iterator.go b/influxql/call_iterator.go index ded4f7f3e6..22623a114b 100644 --- a/influxql/call_iterator.go +++ b/influxql/call_iterator.go @@ -781,39 +781,47 @@ func IntegerSpreadReduceSlice(a []IntegerPoint) []IntegerPoint { return []IntegerPoint{{Time: ZeroTime, Value: max - min}} } -func newTopIterator(input Iterator, opt IteratorOptions, n int) (Iterator, error) { +func newTopIterator(input Iterator, opt IteratorOptions, n int, keepTags bool) (Iterator, error) { switch input := input.(type) { case FloatIterator: createFn := func() (FloatPointAggregator, FloatPointEmitter) { fn := NewFloatTopReducer(n) return fn, fn } - return newFloatReduceFloatIterator(input, opt, createFn), nil + itr := newFloatReduceFloatIterator(input, opt, createFn) + itr.keepTags = keepTags + return itr, nil case IntegerIterator: createFn := func() (IntegerPointAggregator, IntegerPointEmitter) { fn := NewIntegerTopReducer(n) return fn, fn } - return newIntegerReduceIntegerIterator(input, opt, createFn), nil + itr := newIntegerReduceIntegerIterator(input, opt, createFn) + itr.keepTags = keepTags + return itr, nil default: return nil, fmt.Errorf("unsupported top iterator type: %T", input) } } -func newBottomIterator(input Iterator, opt IteratorOptions, n int) (Iterator, error) { +func newBottomIterator(input Iterator, opt IteratorOptions, n int, keepTags bool) (Iterator, error) { switch input := input.(type) { case FloatIterator: createFn := func() (FloatPointAggregator, FloatPointEmitter) { fn := NewFloatBottomReducer(n) return fn, fn } - return newFloatReduceFloatIterator(input, opt, createFn), nil + itr := newFloatReduceFloatIterator(input, opt, createFn) + itr.keepTags = keepTags + return itr, nil case IntegerIterator: createFn := func() (IntegerPointAggregator, IntegerPointEmitter) { fn := NewIntegerBottomReducer(n) return fn, fn } - return newIntegerReduceIntegerIterator(input, opt, createFn), nil + itr := newIntegerReduceIntegerIterator(input, opt, createFn) + itr.keepTags = keepTags + return itr, nil default: return nil, fmt.Errorf("unsupported bottom iterator type: %T", input) } diff --git a/influxql/iterator.gen.go b/influxql/iterator.gen.go index 64423f05e6..b7f4b41b5f 100644 --- a/influxql/iterator.gen.go +++ b/influxql/iterator.gen.go @@ -1011,11 +1011,12 @@ func (itr *floatChanIterator) Next() (*FloatPoint, error) { // floatReduceFloatIterator executes a reducer for every interval and buffers the result. type floatReduceFloatIterator struct { - input *bufFloatIterator - create func() (FloatPointAggregator, FloatPointEmitter) - dims []string - opt IteratorOptions - points []FloatPoint + input *bufFloatIterator + create func() (FloatPointAggregator, FloatPointEmitter) + dims []string + opt IteratorOptions + points []FloatPoint + keepTags bool } func newFloatReduceFloatIterator(input FloatIterator, opt IteratorOptions, createFn func() (FloatPointAggregator, FloatPointEmitter)) *floatReduceFloatIterator { @@ -1147,7 +1148,9 @@ func (itr *floatReduceFloatIterator) reduce() ([]FloatPoint, error) { points := rp.Emitter.Emit() for i := len(points) - 1; i >= 0; i-- { points[i].Name = rp.Name - points[i].Tags = rp.Tags + if !itr.keepTags { + points[i].Tags = rp.Tags + } // Set the points time to the interval time if the reducer didn't provide one. if points[i].Time == ZeroTime { points[i].Time = startTime @@ -1423,11 +1426,12 @@ type floatExprFunc func(a, b float64) float64 // floatReduceIntegerIterator executes a reducer for every interval and buffers the result. type floatReduceIntegerIterator struct { - input *bufFloatIterator - create func() (FloatPointAggregator, IntegerPointEmitter) - dims []string - opt IteratorOptions - points []IntegerPoint + input *bufFloatIterator + create func() (FloatPointAggregator, IntegerPointEmitter) + dims []string + opt IteratorOptions + points []IntegerPoint + keepTags bool } func newFloatReduceIntegerIterator(input FloatIterator, opt IteratorOptions, createFn func() (FloatPointAggregator, IntegerPointEmitter)) *floatReduceIntegerIterator { @@ -1559,7 +1563,9 @@ func (itr *floatReduceIntegerIterator) reduce() ([]IntegerPoint, error) { points := rp.Emitter.Emit() for i := len(points) - 1; i >= 0; i-- { points[i].Name = rp.Name - points[i].Tags = rp.Tags + if !itr.keepTags { + points[i].Tags = rp.Tags + } // Set the points time to the interval time if the reducer didn't provide one. if points[i].Time == ZeroTime { points[i].Time = startTime @@ -1839,11 +1845,12 @@ type floatIntegerExprFunc func(a, b float64) int64 // floatReduceStringIterator executes a reducer for every interval and buffers the result. type floatReduceStringIterator struct { - input *bufFloatIterator - create func() (FloatPointAggregator, StringPointEmitter) - dims []string - opt IteratorOptions - points []StringPoint + input *bufFloatIterator + create func() (FloatPointAggregator, StringPointEmitter) + dims []string + opt IteratorOptions + points []StringPoint + keepTags bool } func newFloatReduceStringIterator(input FloatIterator, opt IteratorOptions, createFn func() (FloatPointAggregator, StringPointEmitter)) *floatReduceStringIterator { @@ -1975,7 +1982,9 @@ func (itr *floatReduceStringIterator) reduce() ([]StringPoint, error) { points := rp.Emitter.Emit() for i := len(points) - 1; i >= 0; i-- { points[i].Name = rp.Name - points[i].Tags = rp.Tags + if !itr.keepTags { + points[i].Tags = rp.Tags + } // Set the points time to the interval time if the reducer didn't provide one. if points[i].Time == ZeroTime { points[i].Time = startTime @@ -2255,11 +2264,12 @@ type floatStringExprFunc func(a, b float64) string // floatReduceBooleanIterator executes a reducer for every interval and buffers the result. type floatReduceBooleanIterator struct { - input *bufFloatIterator - create func() (FloatPointAggregator, BooleanPointEmitter) - dims []string - opt IteratorOptions - points []BooleanPoint + input *bufFloatIterator + create func() (FloatPointAggregator, BooleanPointEmitter) + dims []string + opt IteratorOptions + points []BooleanPoint + keepTags bool } func newFloatReduceBooleanIterator(input FloatIterator, opt IteratorOptions, createFn func() (FloatPointAggregator, BooleanPointEmitter)) *floatReduceBooleanIterator { @@ -2391,7 +2401,9 @@ func (itr *floatReduceBooleanIterator) reduce() ([]BooleanPoint, error) { points := rp.Emitter.Emit() for i := len(points) - 1; i >= 0; i-- { points[i].Name = rp.Name - points[i].Tags = rp.Tags + if !itr.keepTags { + points[i].Tags = rp.Tags + } // Set the points time to the interval time if the reducer didn't provide one. if points[i].Time == ZeroTime { points[i].Time = startTime @@ -3924,11 +3936,12 @@ func (itr *integerChanIterator) Next() (*IntegerPoint, error) { // integerReduceFloatIterator executes a reducer for every interval and buffers the result. type integerReduceFloatIterator struct { - input *bufIntegerIterator - create func() (IntegerPointAggregator, FloatPointEmitter) - dims []string - opt IteratorOptions - points []FloatPoint + input *bufIntegerIterator + create func() (IntegerPointAggregator, FloatPointEmitter) + dims []string + opt IteratorOptions + points []FloatPoint + keepTags bool } func newIntegerReduceFloatIterator(input IntegerIterator, opt IteratorOptions, createFn func() (IntegerPointAggregator, FloatPointEmitter)) *integerReduceFloatIterator { @@ -4060,7 +4073,9 @@ func (itr *integerReduceFloatIterator) reduce() ([]FloatPoint, error) { points := rp.Emitter.Emit() for i := len(points) - 1; i >= 0; i-- { points[i].Name = rp.Name - points[i].Tags = rp.Tags + if !itr.keepTags { + points[i].Tags = rp.Tags + } // Set the points time to the interval time if the reducer didn't provide one. if points[i].Time == ZeroTime { points[i].Time = startTime @@ -4340,11 +4355,12 @@ type integerFloatExprFunc func(a, b int64) float64 // integerReduceIntegerIterator executes a reducer for every interval and buffers the result. type integerReduceIntegerIterator struct { - input *bufIntegerIterator - create func() (IntegerPointAggregator, IntegerPointEmitter) - dims []string - opt IteratorOptions - points []IntegerPoint + input *bufIntegerIterator + create func() (IntegerPointAggregator, IntegerPointEmitter) + dims []string + opt IteratorOptions + points []IntegerPoint + keepTags bool } func newIntegerReduceIntegerIterator(input IntegerIterator, opt IteratorOptions, createFn func() (IntegerPointAggregator, IntegerPointEmitter)) *integerReduceIntegerIterator { @@ -4476,7 +4492,9 @@ func (itr *integerReduceIntegerIterator) reduce() ([]IntegerPoint, error) { points := rp.Emitter.Emit() for i := len(points) - 1; i >= 0; i-- { points[i].Name = rp.Name - points[i].Tags = rp.Tags + if !itr.keepTags { + points[i].Tags = rp.Tags + } // Set the points time to the interval time if the reducer didn't provide one. if points[i].Time == ZeroTime { points[i].Time = startTime @@ -4752,11 +4770,12 @@ type integerExprFunc func(a, b int64) int64 // integerReduceStringIterator executes a reducer for every interval and buffers the result. type integerReduceStringIterator struct { - input *bufIntegerIterator - create func() (IntegerPointAggregator, StringPointEmitter) - dims []string - opt IteratorOptions - points []StringPoint + input *bufIntegerIterator + create func() (IntegerPointAggregator, StringPointEmitter) + dims []string + opt IteratorOptions + points []StringPoint + keepTags bool } func newIntegerReduceStringIterator(input IntegerIterator, opt IteratorOptions, createFn func() (IntegerPointAggregator, StringPointEmitter)) *integerReduceStringIterator { @@ -4888,7 +4907,9 @@ func (itr *integerReduceStringIterator) reduce() ([]StringPoint, error) { points := rp.Emitter.Emit() for i := len(points) - 1; i >= 0; i-- { points[i].Name = rp.Name - points[i].Tags = rp.Tags + if !itr.keepTags { + points[i].Tags = rp.Tags + } // Set the points time to the interval time if the reducer didn't provide one. if points[i].Time == ZeroTime { points[i].Time = startTime @@ -5168,11 +5189,12 @@ type integerStringExprFunc func(a, b int64) string // integerReduceBooleanIterator executes a reducer for every interval and buffers the result. type integerReduceBooleanIterator struct { - input *bufIntegerIterator - create func() (IntegerPointAggregator, BooleanPointEmitter) - dims []string - opt IteratorOptions - points []BooleanPoint + input *bufIntegerIterator + create func() (IntegerPointAggregator, BooleanPointEmitter) + dims []string + opt IteratorOptions + points []BooleanPoint + keepTags bool } func newIntegerReduceBooleanIterator(input IntegerIterator, opt IteratorOptions, createFn func() (IntegerPointAggregator, BooleanPointEmitter)) *integerReduceBooleanIterator { @@ -5304,7 +5326,9 @@ func (itr *integerReduceBooleanIterator) reduce() ([]BooleanPoint, error) { points := rp.Emitter.Emit() for i := len(points) - 1; i >= 0; i-- { points[i].Name = rp.Name - points[i].Tags = rp.Tags + if !itr.keepTags { + points[i].Tags = rp.Tags + } // Set the points time to the interval time if the reducer didn't provide one. if points[i].Time == ZeroTime { points[i].Time = startTime @@ -6823,11 +6847,12 @@ func (itr *stringChanIterator) Next() (*StringPoint, error) { // stringReduceFloatIterator executes a reducer for every interval and buffers the result. type stringReduceFloatIterator struct { - input *bufStringIterator - create func() (StringPointAggregator, FloatPointEmitter) - dims []string - opt IteratorOptions - points []FloatPoint + input *bufStringIterator + create func() (StringPointAggregator, FloatPointEmitter) + dims []string + opt IteratorOptions + points []FloatPoint + keepTags bool } func newStringReduceFloatIterator(input StringIterator, opt IteratorOptions, createFn func() (StringPointAggregator, FloatPointEmitter)) *stringReduceFloatIterator { @@ -6959,7 +6984,9 @@ func (itr *stringReduceFloatIterator) reduce() ([]FloatPoint, error) { points := rp.Emitter.Emit() for i := len(points) - 1; i >= 0; i-- { points[i].Name = rp.Name - points[i].Tags = rp.Tags + if !itr.keepTags { + points[i].Tags = rp.Tags + } // Set the points time to the interval time if the reducer didn't provide one. if points[i].Time == ZeroTime { points[i].Time = startTime @@ -7239,11 +7266,12 @@ type stringFloatExprFunc func(a, b string) float64 // stringReduceIntegerIterator executes a reducer for every interval and buffers the result. type stringReduceIntegerIterator struct { - input *bufStringIterator - create func() (StringPointAggregator, IntegerPointEmitter) - dims []string - opt IteratorOptions - points []IntegerPoint + input *bufStringIterator + create func() (StringPointAggregator, IntegerPointEmitter) + dims []string + opt IteratorOptions + points []IntegerPoint + keepTags bool } func newStringReduceIntegerIterator(input StringIterator, opt IteratorOptions, createFn func() (StringPointAggregator, IntegerPointEmitter)) *stringReduceIntegerIterator { @@ -7375,7 +7403,9 @@ func (itr *stringReduceIntegerIterator) reduce() ([]IntegerPoint, error) { points := rp.Emitter.Emit() for i := len(points) - 1; i >= 0; i-- { points[i].Name = rp.Name - points[i].Tags = rp.Tags + if !itr.keepTags { + points[i].Tags = rp.Tags + } // Set the points time to the interval time if the reducer didn't provide one. if points[i].Time == ZeroTime { points[i].Time = startTime @@ -7655,11 +7685,12 @@ type stringIntegerExprFunc func(a, b string) int64 // stringReduceStringIterator executes a reducer for every interval and buffers the result. type stringReduceStringIterator struct { - input *bufStringIterator - create func() (StringPointAggregator, StringPointEmitter) - dims []string - opt IteratorOptions - points []StringPoint + input *bufStringIterator + create func() (StringPointAggregator, StringPointEmitter) + dims []string + opt IteratorOptions + points []StringPoint + keepTags bool } func newStringReduceStringIterator(input StringIterator, opt IteratorOptions, createFn func() (StringPointAggregator, StringPointEmitter)) *stringReduceStringIterator { @@ -7791,7 +7822,9 @@ func (itr *stringReduceStringIterator) reduce() ([]StringPoint, error) { points := rp.Emitter.Emit() for i := len(points) - 1; i >= 0; i-- { points[i].Name = rp.Name - points[i].Tags = rp.Tags + if !itr.keepTags { + points[i].Tags = rp.Tags + } // Set the points time to the interval time if the reducer didn't provide one. if points[i].Time == ZeroTime { points[i].Time = startTime @@ -8067,11 +8100,12 @@ type stringExprFunc func(a, b string) string // stringReduceBooleanIterator executes a reducer for every interval and buffers the result. type stringReduceBooleanIterator struct { - input *bufStringIterator - create func() (StringPointAggregator, BooleanPointEmitter) - dims []string - opt IteratorOptions - points []BooleanPoint + input *bufStringIterator + create func() (StringPointAggregator, BooleanPointEmitter) + dims []string + opt IteratorOptions + points []BooleanPoint + keepTags bool } func newStringReduceBooleanIterator(input StringIterator, opt IteratorOptions, createFn func() (StringPointAggregator, BooleanPointEmitter)) *stringReduceBooleanIterator { @@ -8203,7 +8237,9 @@ func (itr *stringReduceBooleanIterator) reduce() ([]BooleanPoint, error) { points := rp.Emitter.Emit() for i := len(points) - 1; i >= 0; i-- { points[i].Name = rp.Name - points[i].Tags = rp.Tags + if !itr.keepTags { + points[i].Tags = rp.Tags + } // Set the points time to the interval time if the reducer didn't provide one. if points[i].Time == ZeroTime { points[i].Time = startTime @@ -9722,11 +9758,12 @@ func (itr *booleanChanIterator) Next() (*BooleanPoint, error) { // booleanReduceFloatIterator executes a reducer for every interval and buffers the result. type booleanReduceFloatIterator struct { - input *bufBooleanIterator - create func() (BooleanPointAggregator, FloatPointEmitter) - dims []string - opt IteratorOptions - points []FloatPoint + input *bufBooleanIterator + create func() (BooleanPointAggregator, FloatPointEmitter) + dims []string + opt IteratorOptions + points []FloatPoint + keepTags bool } func newBooleanReduceFloatIterator(input BooleanIterator, opt IteratorOptions, createFn func() (BooleanPointAggregator, FloatPointEmitter)) *booleanReduceFloatIterator { @@ -9858,7 +9895,9 @@ func (itr *booleanReduceFloatIterator) reduce() ([]FloatPoint, error) { points := rp.Emitter.Emit() for i := len(points) - 1; i >= 0; i-- { points[i].Name = rp.Name - points[i].Tags = rp.Tags + if !itr.keepTags { + points[i].Tags = rp.Tags + } // Set the points time to the interval time if the reducer didn't provide one. if points[i].Time == ZeroTime { points[i].Time = startTime @@ -10138,11 +10177,12 @@ type booleanFloatExprFunc func(a, b bool) float64 // booleanReduceIntegerIterator executes a reducer for every interval and buffers the result. type booleanReduceIntegerIterator struct { - input *bufBooleanIterator - create func() (BooleanPointAggregator, IntegerPointEmitter) - dims []string - opt IteratorOptions - points []IntegerPoint + input *bufBooleanIterator + create func() (BooleanPointAggregator, IntegerPointEmitter) + dims []string + opt IteratorOptions + points []IntegerPoint + keepTags bool } func newBooleanReduceIntegerIterator(input BooleanIterator, opt IteratorOptions, createFn func() (BooleanPointAggregator, IntegerPointEmitter)) *booleanReduceIntegerIterator { @@ -10274,7 +10314,9 @@ func (itr *booleanReduceIntegerIterator) reduce() ([]IntegerPoint, error) { points := rp.Emitter.Emit() for i := len(points) - 1; i >= 0; i-- { points[i].Name = rp.Name - points[i].Tags = rp.Tags + if !itr.keepTags { + points[i].Tags = rp.Tags + } // Set the points time to the interval time if the reducer didn't provide one. if points[i].Time == ZeroTime { points[i].Time = startTime @@ -10554,11 +10596,12 @@ type booleanIntegerExprFunc func(a, b bool) int64 // booleanReduceStringIterator executes a reducer for every interval and buffers the result. type booleanReduceStringIterator struct { - input *bufBooleanIterator - create func() (BooleanPointAggregator, StringPointEmitter) - dims []string - opt IteratorOptions - points []StringPoint + input *bufBooleanIterator + create func() (BooleanPointAggregator, StringPointEmitter) + dims []string + opt IteratorOptions + points []StringPoint + keepTags bool } func newBooleanReduceStringIterator(input BooleanIterator, opt IteratorOptions, createFn func() (BooleanPointAggregator, StringPointEmitter)) *booleanReduceStringIterator { @@ -10690,7 +10733,9 @@ func (itr *booleanReduceStringIterator) reduce() ([]StringPoint, error) { points := rp.Emitter.Emit() for i := len(points) - 1; i >= 0; i-- { points[i].Name = rp.Name - points[i].Tags = rp.Tags + if !itr.keepTags { + points[i].Tags = rp.Tags + } // Set the points time to the interval time if the reducer didn't provide one. if points[i].Time == ZeroTime { points[i].Time = startTime @@ -10970,11 +11015,12 @@ type booleanStringExprFunc func(a, b bool) string // booleanReduceBooleanIterator executes a reducer for every interval and buffers the result. type booleanReduceBooleanIterator struct { - input *bufBooleanIterator - create func() (BooleanPointAggregator, BooleanPointEmitter) - dims []string - opt IteratorOptions - points []BooleanPoint + input *bufBooleanIterator + create func() (BooleanPointAggregator, BooleanPointEmitter) + dims []string + opt IteratorOptions + points []BooleanPoint + keepTags bool } func newBooleanReduceBooleanIterator(input BooleanIterator, opt IteratorOptions, createFn func() (BooleanPointAggregator, BooleanPointEmitter)) *booleanReduceBooleanIterator { @@ -11106,7 +11152,9 @@ func (itr *booleanReduceBooleanIterator) reduce() ([]BooleanPoint, error) { points := rp.Emitter.Emit() for i := len(points) - 1; i >= 0; i-- { points[i].Name = rp.Name - points[i].Tags = rp.Tags + if !itr.keepTags { + points[i].Tags = rp.Tags + } // Set the points time to the interval time if the reducer didn't provide one. if points[i].Time == ZeroTime { points[i].Time = startTime diff --git a/influxql/iterator.gen.go.tmpl b/influxql/iterator.gen.go.tmpl index 6c78fd5c02..901a15c2e5 100644 --- a/influxql/iterator.gen.go.tmpl +++ b/influxql/iterator.gen.go.tmpl @@ -1017,6 +1017,7 @@ type {{$k.name}}Reduce{{$v.Name}}Iterator struct { dims []string opt IteratorOptions points []{{$v.Name}}Point + keepTags bool } func new{{$k.Name}}Reduce{{$v.Name}}Iterator(input {{$k.Name}}Iterator, opt IteratorOptions, createFn func() ({{$k.Name}}PointAggregator, {{$v.Name}}PointEmitter)) *{{$k.name}}Reduce{{$v.Name}}Iterator { @@ -1148,7 +1149,9 @@ func (itr *{{$k.name}}Reduce{{$v.Name}}Iterator) reduce() ([]{{$v.Name}}Point, e points := rp.Emitter.Emit() for i := len(points)-1; i >= 0; i-- { points[i].Name = rp.Name - points[i].Tags = rp.Tags + if !itr.keepTags { + points[i].Tags = rp.Tags + } // Set the points time to the interval time if the reducer didn't provide one. if points[i].Time == ZeroTime { points[i].Time = startTime diff --git a/influxql/select.go b/influxql/select.go index d9c40b7490..916288df42 100644 --- a/influxql/select.go +++ b/influxql/select.go @@ -60,29 +60,31 @@ func buildIterators(stmt *SelectStatement, ic IteratorCreator, opt IteratorOptio return buildAuxIterators(stmt.Fields, ic, stmt.Sources, opt) } - // Include auxiliary fields from top() and bottom() - extraFields := 0 - for call := range info.calls { - if call.Name == "top" || call.Name == "bottom" { - for i := 1; i < len(call.Args)-1; i++ { - ref := call.Args[i].(*VarRef) - opt.Aux = append(opt.Aux, *ref) - extraFields++ + // Include auxiliary fields from top() and bottom() when not writing the results. + fields := stmt.Fields + if stmt.Target == nil { + extraFields := 0 + for call := range info.calls { + if call.Name == "top" || call.Name == "bottom" { + for i := 1; i < len(call.Args)-1; i++ { + ref := call.Args[i].(*VarRef) + opt.Aux = append(opt.Aux, *ref) + extraFields++ + } } } - } - fields := stmt.Fields - if extraFields > 0 { - // Rebuild the list of fields if any extra fields are being implicitly added - fields = make([]*Field, 0, len(stmt.Fields)+extraFields) - for _, f := range stmt.Fields { - fields = append(fields, f) - switch expr := f.Expr.(type) { - case *Call: - if expr.Name == "top" || expr.Name == "bottom" { - for i := 1; i < len(expr.Args)-1; i++ { - fields = append(fields, &Field{Expr: expr.Args[i]}) + if extraFields > 0 { + // Rebuild the list of fields if any extra fields are being implicitly added + fields = make([]*Field, 0, len(stmt.Fields)+extraFields) + for _, f := range stmt.Fields { + fields = append(fields, f) + switch expr := f.Expr.(type) { + case *Call: + if expr.Name == "top" || expr.Name == "bottom" { + for i := 1; i < len(expr.Args)-1; i++ { + fields = append(fields, &Field{Expr: expr.Args[i]}) + } } } } @@ -97,7 +99,7 @@ func buildIterators(stmt *SelectStatement, ic IteratorCreator, opt IteratorOptio } } - return buildFieldIterators(fields, ic, stmt.Sources, opt, selector) + return buildFieldIterators(fields, ic, stmt.Sources, opt, selector, stmt.Target != nil) } // buildAuxIterators creates a set of iterators from a single combined auxiliary iterator. @@ -234,7 +236,7 @@ func buildAuxIterator(expr Expr, aitr AuxIterator, opt IteratorOptions) (Iterato } // buildFieldIterators creates an iterator for each field expression. -func buildFieldIterators(fields Fields, ic IteratorCreator, sources Sources, opt IteratorOptions, selector bool) ([]Iterator, error) { +func buildFieldIterators(fields Fields, ic IteratorCreator, sources Sources, opt IteratorOptions, selector, writeMode bool) ([]Iterator, error) { // Create iterators from fields against the iterator creator. itrs := make([]Iterator, len(fields)) @@ -252,7 +254,7 @@ func buildFieldIterators(fields Fields, ic IteratorCreator, sources Sources, opt } expr := Reduce(f.Expr, nil) - itr, err := buildExprIterator(expr, ic, sources, opt, selector) + itr, err := buildExprIterator(expr, ic, sources, opt, selector, writeMode) if err != nil { return err } else if itr == nil { @@ -301,13 +303,14 @@ func buildFieldIterators(fields Fields, ic IteratorCreator, sources Sources, opt } // buildExprIterator creates an iterator for an expression. -func buildExprIterator(expr Expr, ic IteratorCreator, sources Sources, opt IteratorOptions, selector bool) (Iterator, error) { +func buildExprIterator(expr Expr, ic IteratorCreator, sources Sources, opt IteratorOptions, selector, writeMode bool) (Iterator, error) { opt.Expr = expr b := exprIteratorBuilder{ - ic: ic, - sources: sources, - opt: opt, - selector: selector, + ic: ic, + sources: sources, + opt: opt, + selector: selector, + writeMode: writeMode, } switch expr := expr.(type) { @@ -318,7 +321,7 @@ func buildExprIterator(expr Expr, ic IteratorCreator, sources Sources, opt Itera case *BinaryExpr: return b.buildBinaryExprIterator(expr) case *ParenExpr: - return buildExprIterator(expr.Expr, ic, sources, opt, selector) + return buildExprIterator(expr.Expr, ic, sources, opt, selector, writeMode) case *nilLiteral: return &nilFloatIterator{}, nil default: @@ -327,10 +330,11 @@ func buildExprIterator(expr Expr, ic IteratorCreator, sources Sources, opt Itera } type exprIteratorBuilder struct { - ic IteratorCreator - sources Sources - opt IteratorOptions - selector bool + ic IteratorCreator + sources Sources + opt IteratorOptions + selector bool + writeMode bool } func (b *exprIteratorBuilder) buildVarRefIterator(expr *VarRef) (Iterator, error) { @@ -384,7 +388,7 @@ func (b *exprIteratorBuilder) buildCallIterator(expr *Call) (Iterator, error) { switch expr.Name { case "distinct": opt.Ordered = true - input, err := buildExprIterator(expr.Args[0].(*VarRef), b.ic, b.sources, opt, b.selector) + input, err := buildExprIterator(expr.Args[0].(*VarRef), b.ic, b.sources, opt, b.selector, false) if err != nil { return nil, err } @@ -395,7 +399,7 @@ func (b *exprIteratorBuilder) buildCallIterator(expr *Call) (Iterator, error) { return NewIntervalIterator(input, opt), nil case "sample": opt.Ordered = true - input, err := buildExprIterator(expr.Args[0], b.ic, b.sources, opt, b.selector) + input, err := buildExprIterator(expr.Args[0], b.ic, b.sources, opt, b.selector, false) if err != nil { return nil, err } @@ -404,7 +408,7 @@ func (b *exprIteratorBuilder) buildCallIterator(expr *Call) (Iterator, error) { return newSampleIterator(input, opt, int(size.Val)) case "holt_winters", "holt_winters_with_fit": opt.Ordered = true - input, err := buildExprIterator(expr.Args[0], b.ic, b.sources, opt, b.selector) + input, err := buildExprIterator(expr.Args[0], b.ic, b.sources, opt, b.selector, false) if err != nil { return nil, err } @@ -430,7 +434,7 @@ func (b *exprIteratorBuilder) buildCallIterator(expr *Call) (Iterator, error) { } opt.Ordered = true - input, err := buildExprIterator(expr.Args[0], b.ic, b.sources, opt, b.selector) + input, err := buildExprIterator(expr.Args[0], b.ic, b.sources, opt, b.selector, false) if err != nil { return nil, err } @@ -460,14 +464,14 @@ func (b *exprIteratorBuilder) buildCallIterator(expr *Call) (Iterator, error) { panic(fmt.Sprintf("invalid series aggregate function: %s", expr.Name)) case "cumulative_sum": opt.Ordered = true - input, err := buildExprIterator(expr.Args[0], b.ic, b.sources, opt, b.selector) + input, err := buildExprIterator(expr.Args[0], b.ic, b.sources, opt, b.selector, false) if err != nil { return nil, err } return newCumulativeSumIterator(input, opt) case "integral": opt.Ordered = true - input, err := buildExprIterator(expr.Args[0].(*VarRef), b.ic, b.sources, opt, false) + input, err := buildExprIterator(expr.Args[0].(*VarRef), b.ic, b.sources, opt, false, false) if err != nil { return nil, err } @@ -481,7 +485,7 @@ func (b *exprIteratorBuilder) buildCallIterator(expr *Call) (Iterator, error) { var input Iterator if len(expr.Args) > 2 { // Create a max iterator using the groupings in the arguments. - dims := make(map[string]struct{}, len(expr.Args)-2) + dims := make(map[string]struct{}, len(expr.Args)-2+len(opt.GroupBy)) for i := 1; i < len(expr.Args)-1; i++ { ref := expr.Args[i].(*VarRef) dims[ref.Val] = struct{}{} @@ -502,6 +506,7 @@ func (b *exprIteratorBuilder) buildCallIterator(expr *Call) (Iterator, error) { builder := *b builder.opt = callOpt builder.selector = true + builder.writeMode = false i, err := builder.callIterator(call, callOpt) if err != nil { @@ -513,6 +518,7 @@ func (b *exprIteratorBuilder) buildCallIterator(expr *Call) (Iterator, error) { builder := *b builder.opt.Expr = expr.Args[0] builder.selector = true + builder.writeMode = false ref := expr.Args[0].(*VarRef) i, err := builder.buildVarRefIterator(ref) @@ -523,7 +529,7 @@ func (b *exprIteratorBuilder) buildCallIterator(expr *Call) (Iterator, error) { } n := expr.Args[len(expr.Args)-1].(*IntegerLiteral) - return newTopIterator(input, opt, int(n.Val)) + return newTopIterator(input, opt, int(n.Val), b.writeMode) case "bottom": if len(expr.Args) < 2 { return nil, fmt.Errorf("bottom() requires 2 or more arguments, got %d", len(expr.Args)) @@ -553,6 +559,7 @@ func (b *exprIteratorBuilder) buildCallIterator(expr *Call) (Iterator, error) { builder := *b builder.opt = callOpt builder.selector = true + builder.writeMode = false i, err := builder.callIterator(call, callOpt) if err != nil { @@ -564,6 +571,7 @@ func (b *exprIteratorBuilder) buildCallIterator(expr *Call) (Iterator, error) { builder := *b builder.opt.Expr = nil builder.selector = true + builder.writeMode = false ref := expr.Args[0].(*VarRef) i, err := builder.buildVarRefIterator(ref) @@ -574,7 +582,7 @@ func (b *exprIteratorBuilder) buildCallIterator(expr *Call) (Iterator, error) { } n := expr.Args[len(expr.Args)-1].(*IntegerLiteral) - return newBottomIterator(input, b.opt, int(n.Val)) + return newBottomIterator(input, b.opt, int(n.Val), b.writeMode) } itr, err := func() (Iterator, error) { @@ -583,7 +591,7 @@ func (b *exprIteratorBuilder) buildCallIterator(expr *Call) (Iterator, error) { switch arg0 := expr.Args[0].(type) { case *Call: if arg0.Name == "distinct" { - input, err := buildExprIterator(arg0, b.ic, b.sources, opt, b.selector) + input, err := buildExprIterator(arg0, b.ic, b.sources, opt, b.selector, false) if err != nil { return nil, err } @@ -595,33 +603,33 @@ func (b *exprIteratorBuilder) buildCallIterator(expr *Call) (Iterator, error) { return b.callIterator(expr, opt) case "median": opt.Ordered = true - input, err := buildExprIterator(expr.Args[0].(*VarRef), b.ic, b.sources, opt, false) + input, err := buildExprIterator(expr.Args[0].(*VarRef), b.ic, b.sources, opt, false, false) if err != nil { return nil, err } return newMedianIterator(input, opt) case "mode": - input, err := buildExprIterator(expr.Args[0].(*VarRef), b.ic, b.sources, opt, false) + input, err := buildExprIterator(expr.Args[0].(*VarRef), b.ic, b.sources, opt, false, false) if err != nil { return nil, err } return NewModeIterator(input, opt) case "stddev": - input, err := buildExprIterator(expr.Args[0].(*VarRef), b.ic, b.sources, opt, false) + input, err := buildExprIterator(expr.Args[0].(*VarRef), b.ic, b.sources, opt, false, false) if err != nil { return nil, err } return newStddevIterator(input, opt) case "spread": // OPTIMIZE(benbjohnson): convert to map/reduce - input, err := buildExprIterator(expr.Args[0].(*VarRef), b.ic, b.sources, opt, false) + input, err := buildExprIterator(expr.Args[0].(*VarRef), b.ic, b.sources, opt, false, false) if err != nil { return nil, err } return newSpreadIterator(input, opt) case "percentile": opt.Ordered = true - input, err := buildExprIterator(expr.Args[0].(*VarRef), b.ic, b.sources, opt, false) + input, err := buildExprIterator(expr.Args[0].(*VarRef), b.ic, b.sources, opt, false, false) if err != nil { return nil, err } @@ -663,24 +671,24 @@ func (b *exprIteratorBuilder) buildBinaryExprIterator(expr *BinaryExpr) (Iterato return nil, fmt.Errorf("unable to construct an iterator from two literals: LHS: %T, RHS: %T", lhs, rhs) } - lhs, err := buildExprIterator(expr.LHS, b.ic, b.sources, b.opt, b.selector) + lhs, err := buildExprIterator(expr.LHS, b.ic, b.sources, b.opt, b.selector, false) if err != nil { return nil, err } return buildRHSTransformIterator(lhs, rhs, expr.Op, b.opt) } else if lhs, ok := expr.LHS.(Literal); ok { - rhs, err := buildExprIterator(expr.RHS, b.ic, b.sources, b.opt, b.selector) + rhs, err := buildExprIterator(expr.RHS, b.ic, b.sources, b.opt, b.selector, false) if err != nil { return nil, err } return buildLHSTransformIterator(lhs, rhs, expr.Op, b.opt) } else { // We have two iterators. Combine them into a single iterator. - lhs, err := buildExprIterator(expr.LHS, b.ic, b.sources, b.opt, false) + lhs, err := buildExprIterator(expr.LHS, b.ic, b.sources, b.opt, false, false) if err != nil { return nil, err } - rhs, err := buildExprIterator(expr.RHS, b.ic, b.sources, b.opt, false) + rhs, err := buildExprIterator(expr.RHS, b.ic, b.sources, b.opt, false, false) if err != nil { return nil, err } @@ -703,7 +711,7 @@ func (b *exprIteratorBuilder) callIterator(expr *Call, opt IteratorOptions) (Ite // Identify the name of the field we are using. arg0 := expr.Args[0].(*VarRef) - input, err := buildExprIterator(arg0, b.ic, []Source{source}, opt, b.selector) + input, err := buildExprIterator(arg0, b.ic, []Source{source}, opt, b.selector, false) if err != nil { return err } diff --git a/models/points.go b/models/points.go index f9d2800908..1bea4bb1a5 100644 --- a/models/points.go +++ b/models/points.go @@ -344,6 +344,19 @@ func parsePoint(buf []byte, defaultTime time.Time, precision string) (Point, err return nil, fmt.Errorf("missing fields") } + var maxKeyErr error + walkFields(fields, func(k, v []byte) bool { + if sz := seriesKeySize(key, k); sz > MaxKeyLength { + maxKeyErr = fmt.Errorf("max key length exceeded: %v > %v", sz, MaxKeyLength) + return false + } + return true + }) + + if maxKeyErr != nil { + return nil, maxKeyErr + } + // scan the last block which is an optional integer timestamp pos, ts, err := scanTime(buf, pos) if err != nil { @@ -1259,13 +1272,22 @@ func pointKey(measurement string, tags Tags, fields Fields, t time.Time) ([]byte } key := MakeKey([]byte(measurement), tags) - if len(key) > MaxKeyLength { - return nil, fmt.Errorf("max key length exceeded: %v > %v", len(key), MaxKeyLength) + for field := range fields { + sz := seriesKeySize(key, []byte(field)) + if sz > MaxKeyLength { + return nil, fmt.Errorf("max key length exceeded: %v > %v", sz, MaxKeyLength) + } } return key, nil } +func seriesKeySize(key, field []byte) int { + // 4 is the length of the tsm1.fieldKeySeparator constant. It's inlined here to avoid a circular + // dependency. + return len(key) + 4 + len(field) +} + // NewPointFromBytes returns a new Point from a marshalled Point. func NewPointFromBytes(b []byte) (Point, error) { p := &point{} @@ -1421,6 +1443,27 @@ func walkTags(buf []byte, fn func(key, value []byte) bool) { } } +// walkFields walks each field key and value via fn. If fn returns false, the iteration +// is stopped. The values are the raw byte slices and not the converted types. +func walkFields(buf []byte, fn func(key, value []byte) bool) { + var i int + var key, val []byte + for len(buf) > 0 { + i, key = scanTo(buf, 0, '=') + buf = buf[i+1:] + i, val = scanFieldValue(buf, 0) + buf = buf[i:] + if !fn(key, val) { + break + } + + // slice off comma + if len(buf) > 0 { + buf = buf[1:] + } + } +} + func parseTags(buf []byte) Tags { if len(buf) == 0 { return nil diff --git a/models/points_test.go b/models/points_test.go index af42bb1b35..de4bd92e81 100644 --- a/models/points_test.go +++ b/models/points_test.go @@ -2062,17 +2062,30 @@ func TestNewPointsRejectsEmptyFieldNames(t *testing.T) { func TestNewPointsRejectsMaxKey(t *testing.T) { var key string - for i := 0; i < 65536; i++ { + // tsm field key is point key, separator (4 bytes) and field + for i := 0; i < models.MaxKeyLength-len("value")-4; i++ { key += "a" } - if _, err := models.NewPoint(key, nil, models.Fields{"value": 1}, time.Now()); err == nil { + // Test max key len + if _, err := models.NewPoint(key, nil, models.Fields{"value": 1, "ok": 2.0}, time.Now()); err != nil { + t.Fatalf("new point with max key. got: %v, expected: nil", err) + } + + if _, err := models.ParsePointsString(fmt.Sprintf("%v value=1,ok=2.0", key)); err != nil { + t.Fatalf("parse point with max key. got: %v, expected: nil", err) + } + + // Test 1 byte over max key len + key += "a" + if _, err := models.NewPoint(key, nil, models.Fields{"value": 1, "ok": 2.0}, time.Now()); err == nil { t.Fatalf("new point with max key. got: nil, expected: error") } - if _, err := models.ParsePointsString(fmt.Sprintf("%v value=1", key)); err == nil { + if _, err := models.ParsePointsString(fmt.Sprintf("%v value=1,ok=2.0", key)); err == nil { t.Fatalf("parse point with max key. got: nil, expected: error") } + } func TestParseKeyEmpty(t *testing.T) { diff --git a/pkg/bloom/bloom.go b/pkg/bloom/bloom.go new file mode 100644 index 0000000000..1316b66043 --- /dev/null +++ b/pkg/bloom/bloom.go @@ -0,0 +1,131 @@ +package bloom + +// NOTE: +// This package implements a limited bloom filter implementation based on +// Will Fitzgerald's bloom & bitset packages. It's implemented locally to +// support zero-copy memory-mapped slices. +// +// This also optimizes the filter by always using a bitset size with a power of 2. + +import ( + "fmt" + "math" + + "github.com/spaolacci/murmur3" +) + +// Filter represents a bloom filter. +type Filter struct { + k uint64 + b []byte + mask uint64 +} + +// NewFilter returns a new instance of Filter using m bits and k hash functions. +// If m is not a power of two then it is rounded to the next highest power of 2. +func NewFilter(m uint64, k uint64) *Filter { + m = pow2(m) + + return &Filter{ + k: k, + b: make([]byte, m/8), + mask: m - 1, + } +} + +// NewFilterBuffer returns a new instance of a filter using a backing buffer. +// The buffer length MUST be a power of 2. +func NewFilterBuffer(buf []byte, k uint64) (*Filter, error) { + m := pow2(uint64(len(buf)) * 8) + if m != uint64(len(buf))*8 { + return nil, fmt.Errorf("bloom.Filter: buffer bit count must a power of two: %d/%d", len(buf)*8, m) + } + + return &Filter{ + k: k, + b: buf, + mask: m - 1, + }, nil +} + +// Len returns the number of bits used in the filter. +func (f *Filter) Len() uint { return uint(len(f.b)) } + +// K returns the number of hash functions used in the filter. +func (f *Filter) K() uint64 { return f.k } + +// Bytes returns the underlying backing slice. +func (f *Filter) Bytes() []byte { return f.b } + +// Insert inserts data to the filter. +func (f *Filter) Insert(v []byte) { + h := hash(v) + for i := uint64(0); i < f.k; i++ { + loc := f.location(h, i) + f.b[loc/8] |= 1 << (loc % 8) + } +} + +// Contains returns true if the filter possibly contains v. +// Returns false if the filter definitely does not contain v. +func (f *Filter) Contains(v []byte) bool { + h := hash(v) + for i := uint64(0); i < f.k; i++ { + loc := f.location(h, i) + if f.b[loc/8]&(1<<(loc%8)) == 0 { + return false + } + } + return true +} + +// Merge performs an in-place union of other into f. +// Returns an error if m or k of the filters differs. +func (f *Filter) Merge(other *Filter) error { + // Ensure m & k fields match. + if len(f.b) != len(other.b) { + return fmt.Errorf("bloom.Filter.Merge(): m mismatch: %d <> %d", len(f.b), len(other.b)) + } else if f.k != other.k { + return fmt.Errorf("bloom.Filter.Merge(): k mismatch: %d <> %d", f.b, other.b) + } + + // Perform union of each byte. + for i := range f.b { + f.b[i] |= other.b[i] + } + + return nil +} + +// location returns the ith hashed location using the four base hash values. +func (f *Filter) location(h [4]uint64, i uint64) uint { + return uint((h[i%2] + i*h[2+(((i+(i%2))%4)/2)]) & f.mask) +} + +// Estimate returns an estimated bit count and hash count given the element count and false positive rate. +func Estimate(n uint64, p float64) (m uint64, k uint64) { + m = uint64(math.Ceil(-1 * float64(n) * math.Log(p) / math.Pow(math.Log(2), 2))) + k = uint64(math.Ceil(math.Log(2) * float64(m) / float64(n))) + return m, k +} + +// pow2 returns the number that is the next highest power of 2. +// Returns v if it is a power of 2. +func pow2(v uint64) uint64 { + for i := uint64(8); i < 1<<62; i *= 2 { + if i >= v { + return i + } + } + panic("unreachable") +} + +// hash returns a set of 4 based hashes. +func hash(data []byte) [4]uint64 { + h := murmur3.New128() + h.Write(data) + v1, v2 := h.Sum128() + h.Write([]byte{1}) + v3, v4 := h.Sum128() + return [4]uint64{v1, v2, v3, v4} +} diff --git a/pkg/bloom/bloom_test.go b/pkg/bloom/bloom_test.go new file mode 100644 index 0000000000..33182da6b2 --- /dev/null +++ b/pkg/bloom/bloom_test.go @@ -0,0 +1,29 @@ +package bloom_test + +import ( + "testing" + + "github.com/influxdata/influxdb/pkg/bloom" +) + +// Ensure filter can insert values and verify they exist. +func TestFilter_InsertContains(t *testing.T) { + f := bloom.NewFilter(1000, 4) + + // Insert value and validate. + f.Insert([]byte("Bess")) + if !f.Contains([]byte("Bess")) { + t.Fatal("expected true") + } + + // Insert another value and test. + f.Insert([]byte("Emma")) + if !f.Contains([]byte("Emma")) { + t.Fatal("expected true") + } + + // Validate that a non-existent value doesn't exist. + if f.Contains([]byte("Jane")) { + t.Fatal("expected false") + } +} diff --git a/services/meta/data.go b/services/meta/data.go index 32bc72c196..dc71484d73 100644 --- a/services/meta/data.go +++ b/services/meta/data.go @@ -2,7 +2,6 @@ package meta import ( "errors" - "fmt" "net" "net/url" "sort" @@ -50,32 +49,6 @@ type Data struct { MaxShardID uint64 } -// NewShardOwner sets the owner of the provided shard to the data node -// that currently owns the fewest number of shards. If multiple nodes -// own the same (fewest) number of shards, then one of those nodes -// becomes the new shard owner. -func NewShardOwner(s ShardInfo, ownerFreqs map[int]int) (uint64, error) { - var ( - minId = -1 - minFreq int - ) - - for id, freq := range ownerFreqs { - if minId == -1 || freq < minFreq { - minId, minFreq = int(id), freq - } - } - - if minId < 0 { - return 0, fmt.Errorf("cannot reassign shard %d due to lack of data nodes", s.ID) - } - - // Update the shard owner frequencies and set the new owner on the - // shard. - ownerFreqs[minId]++ - return uint64(minId), nil -} - // Database returns a DatabaseInfo by the database name. func (data *Data) Database(name string) *DatabaseInfo { for i := range data.Databases { diff --git a/services/meta/data_internal_test.go b/services/meta/data_internal_test.go index 9e14b8022e..c49b4f3d9e 100644 --- a/services/meta/data_internal_test.go +++ b/services/meta/data_internal_test.go @@ -1,37 +1,12 @@ package meta import ( - "reflect" "sort" "time" "testing" ) -func Test_newShardOwner(t *testing.T) { - // An error is returned if there are no data nodes available. - _, err := NewShardOwner(ShardInfo{}, map[int]int{}) - if err == nil { - t.Error("got no error, but expected one") - } - - ownerFreqs := map[int]int{1: 15, 2: 11, 3: 12} - id, err := NewShardOwner(ShardInfo{ID: 4}, ownerFreqs) - if err != nil { - t.Fatal(err) - } - - // The ID that owns the fewest shards is returned. - if got, exp := id, uint64(2); got != exp { - t.Errorf("got id %d, expected id %d", got, exp) - } - - // The ownership frequencies are updated. - if got, exp := ownerFreqs, map[int]int{1: 15, 2: 12, 3: 12}; !reflect.DeepEqual(got, exp) { - t.Errorf("got owner frequencies %v, expected %v", got, exp) - } -} - func TestShardGroupSort(t *testing.T) { sg1 := ShardGroupInfo{ ID: 1, diff --git a/tests/server_test.go b/tests/server_test.go index 5699a41c13..53c43e4882 100644 --- a/tests/server_test.go +++ b/tests/server_test.go @@ -2840,6 +2840,48 @@ cpu value=20 1278010021000000000 } } +func TestServer_Query_CumulativeCount(t *testing.T) { + t.Parallel() + s := OpenServer(NewConfig()) + defer s.Close() + + test := NewTest("db0", "rp0") + test.writes = Writes{ + &Write{data: fmt.Sprintf(`events signup=t 1005832000 +events signup=t 1048283000 +events signup=t 1784832000 +events signup=t 2000000000 +events signup=t 3084890000 +events signup=t 3838400000 +`)}, + } + + test.addQueries([]*Query{ + &Query{ + name: "cumulative count", + command: `SELECT cumulative_sum(count(signup)) from db0.rp0.events where time >= 1s and time < 4s group by time(1s)`, + exp: `{"results":[{"statement_id":0,"series":[{"name":"events","columns":["time","cumulative_sum"],"values":[["1970-01-01T00:00:01Z",3],["1970-01-01T00:00:02Z",4],["1970-01-01T00:00:03Z",6]]}]}]}`, + }, + }...) + + for i, query := range test.queries { + if i == 0 { + if err := test.init(s); err != nil { + t.Fatalf("test init failed: %s", err) + } + } + if query.skip { + t.Logf("SKIP:: %s", query.name) + continue + } + if err := query.Execute(s); err != nil { + t.Error(query.Error(err)) + } else if !query.success() { + t.Error(query.failureMessage()) + } + } +} + func TestServer_Query_SelectGroupByTime_MultipleAggregates(t *testing.T) { t.Parallel() s := OpenServer(NewConfig()) @@ -4715,6 +4757,73 @@ func TestServer_Query_TopInt(t *testing.T) { } } +func TestServer_Query_TopBottomWriteTags(t *testing.T) { + t.Parallel() + s := OpenServer(NewConfig()) + defer s.Close() + + if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicySpec("rp0", 1, 0), true); err != nil { + t.Fatal(err) + } + + writes := []string{ + fmt.Sprintf(`cpu,host=server01 value=2.0 %d`, mustParseTime(time.RFC3339Nano, "2000-01-01T00:00:00Z").UnixNano()), + fmt.Sprintf(`cpu,host=server02 value=3.0 %d`, mustParseTime(time.RFC3339Nano, "2000-01-01T00:00:10Z").UnixNano()), + fmt.Sprintf(`cpu,host=server03 value=4.0 %d`, mustParseTime(time.RFC3339Nano, "2000-01-01T00:00:20Z").UnixNano()), + // hour 1 + fmt.Sprintf(`cpu,host=server04 value=5.0 %d`, mustParseTime(time.RFC3339Nano, "2000-01-01T01:00:00Z").UnixNano()), + fmt.Sprintf(`cpu,host=server05 value=7.0 %d`, mustParseTime(time.RFC3339Nano, "2000-01-01T01:00:10Z").UnixNano()), + fmt.Sprintf(`cpu,host=server06 value=6.0 %d`, mustParseTime(time.RFC3339Nano, "2000-01-01T01:00:20Z").UnixNano()), + // hour 2 + fmt.Sprintf(`cpu,host=server07 value=7.0 %d`, mustParseTime(time.RFC3339Nano, "2000-01-01T02:00:00Z").UnixNano()), + fmt.Sprintf(`cpu,host=server08 value=9.0 %d`, mustParseTime(time.RFC3339Nano, "2000-01-01T02:00:10Z").UnixNano()), + } + + test := NewTest("db0", "rp0") + test.writes = Writes{ + &Write{data: strings.Join(writes, "\n")}, + } + + test.addQueries([]*Query{ + &Query{ + name: "top - write - with tag", + params: url.Values{"db": []string{"db0"}}, + command: `SELECT top(value, host, 2) INTO cpu_top FROM cpu`, + exp: `{"results":[{"statement_id":0,"series":[{"name":"result","columns":["time","written"],"values":[["1970-01-01T00:00:00Z",2]]}]}]}`, + }, + &Query{ + name: "top - read results with tags", + params: url.Values{"db": []string{"db0"}}, + command: `SELECT * FROM cpu_top GROUP BY *`, + exp: `{"results":[{"statement_id":0,"series":[{"name":"cpu_top","tags":{"host":"server05"},"columns":["time","top"],"values":[["2000-01-01T01:00:10Z",7]]},{"name":"cpu_top","tags":{"host":"server08"},"columns":["time","top"],"values":[["2000-01-01T02:00:10Z",9]]}]}]}`, + }, + &Query{ + name: "top - read results as fields", + params: url.Values{"db": []string{"db0"}}, + command: `SELECT * FROM cpu_top`, + exp: `{"results":[{"statement_id":0,"series":[{"name":"cpu_top","columns":["time","host","top"],"values":[["2000-01-01T01:00:10Z","server05",7],["2000-01-01T02:00:10Z","server08",9]]}]}]}`, + }, + }...) + + for i, query := range test.queries { + if i == 0 { + if err := test.init(s); err != nil { + t.Fatalf("test init failed: %s", err) + } + } + if query.skip { + t.Logf("SKIP: %s", query.name) + continue + } + + if err := query.Execute(s); err != nil { + t.Error(query.Error(err)) + } else if !query.success() { + t.Error(query.failureMessage()) + } + } +} + // Test various aggregates when different series only have data for the same timestamp. func TestServer_Query_Aggregates_IdenticalTime(t *testing.T) { t.Parallel() @@ -5083,24 +5192,17 @@ func TestServer_Query_Subqueries(t *testing.T) { command: `SELECT mean(value) FROM (SELECT max(usage_user), usage_user - usage_system AS value FROM cpu GROUP BY host) WHERE time >= '2000-01-01T00:00:00Z' AND time < '2000-01-01T00:00:30Z' AND host =~ /server/`, exp: `{"results":[{"statement_id":0,"series":[{"name":"cpu","columns":["time","mean"],"values":[["2000-01-01T00:00:00Z",-2]]}]}]}`, }, + &Query{ + params: url.Values{"db": []string{"db0"}}, + command: `SELECT top(usage_system, host, 2) FROM (SELECT min(usage_user), usage_system FROM cpu GROUP BY time(20s), host) WHERE time >= '2000-01-01T00:00:00Z' AND time < '2000-01-01T00:00:30Z'`, + exp: `{"results":[{"statement_id":0,"series":[{"name":"cpu","columns":["time","top","host"],"values":[["2000-01-01T00:00:00Z",89,"server02"],["2000-01-01T00:00:20Z",77,"server01"]]}]}]}`, + }, + &Query{ + params: url.Values{"db": []string{"db0"}}, + command: `SELECT bottom(usage_system, host, 2) FROM (SELECT max(usage_user), usage_system FROM cpu GROUP BY time(20s), host) WHERE time >= '2000-01-01T00:00:00Z' AND time < '2000-01-01T00:00:30Z'`, + exp: `{"results":[{"statement_id":0,"series":[{"name":"cpu","columns":["time","bottom","host"],"values":[["2000-01-01T00:00:00Z",30,"server01"],["2000-01-01T00:00:20Z",53,"server02"]]}]}]}`, + }, }...) - - for i, query := range test.queries { - if i == 0 { - if err := test.init(s); err != nil { - t.Fatalf("test init failed: %s", err) - } - } - if query.skip { - t.Logf("SKIP:: %s", query.name) - continue - } - if err := query.Execute(s); err != nil { - t.Error(query.Error(err)) - } else if !query.success() { - t.Error(query.failureMessage()) - } - } } func TestServer_Query_SubqueryWithGroupBy(t *testing.T) { @@ -5174,6 +5276,54 @@ func TestServer_Query_SubqueryWithGroupBy(t *testing.T) { } } +func TestServer_Query_PercentileDerivative(t *testing.T) { + t.Parallel() + s := OpenServer(NewConfig()) + defer s.Close() + + if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicySpec("rp0", 1, 0), true); err != nil { + t.Fatal(err) + } + + writes := []string{ + fmt.Sprintf(`counter value=12 %d`, mustParseTime(time.RFC3339Nano, "2000-01-01T00:00:00Z").UnixNano()), + fmt.Sprintf(`counter value=34 %d`, mustParseTime(time.RFC3339Nano, "2000-01-01T00:00:10Z").UnixNano()), + fmt.Sprintf(`counter value=78 %d`, mustParseTime(time.RFC3339Nano, "2000-01-01T00:00:20Z").UnixNano()), + fmt.Sprintf(`counter value=89 %d`, mustParseTime(time.RFC3339Nano, "2000-01-01T00:00:30Z").UnixNano()), + fmt.Sprintf(`counter value=101 %d`, mustParseTime(time.RFC3339Nano, "2000-01-01T00:00:40Z").UnixNano()), + } + test := NewTest("db0", "rp0") + test.writes = Writes{ + &Write{data: strings.Join(writes, "\n")}, + } + + test.addQueries([]*Query{ + &Query{ + name: "nth percentile of derivative", + params: url.Values{"db": []string{"db0"}}, + command: `SELECT percentile(derivative, 95) FROM (SELECT derivative(value, 1s) FROM counter) WHERE time >= '2000-01-01T00:00:00Z' AND time < '2000-01-01T00:00:50Z'`, + exp: `{"results":[{"statement_id":0,"series":[{"name":"counter","columns":["time","percentile"],"values":[["2000-01-01T00:00:20Z",4.4]]}]}]}`, + }, + }...) + + for i, query := range test.queries { + if i == 0 { + if err := test.init(s); err != nil { + t.Fatalf("test init failed: %s", err) + } + } + if query.skip { + t.Logf("SKIP:: %s", query.name) + continue + } + if err := query.Execute(s); err != nil { + t.Error(query.Error(err)) + } else if !query.success() { + t.Error(query.failureMessage()) + } + } +} + func TestServer_Query_UnderscoreMeasurement(t *testing.T) { t.Parallel() s := OpenServer(NewConfig()) @@ -7711,6 +7861,53 @@ func TestServer_Query_LargeTimestamp(t *testing.T) { } } +func TestServer_Query_DotProduct(t *testing.T) { + t.Parallel() + s := OpenDefaultServer(NewConfig()) + defer s.Close() + + // Create a second database. + if err := s.CreateDatabaseAndRetentionPolicy("db0", newRetentionPolicySpec("rp0", 1, 0), true); err != nil { + t.Fatal(err) + } + + writes := []string{ + fmt.Sprintf(`cpu a=2,b=3 %d`, mustParseTime(time.RFC3339Nano, "2000-01-01T00:00:00Z").UnixNano()), + fmt.Sprintf(`cpu a=-5,b=8 %d`, mustParseTime(time.RFC3339Nano, "2000-01-01T00:00:10Z").UnixNano()), + fmt.Sprintf(`cpu a=9,b=3 %d`, mustParseTime(time.RFC3339Nano, "2000-01-01T00:00:20Z").UnixNano()), + } + + test := NewTest("db0", "rp0") + test.writes = Writes{ + &Write{data: strings.Join(writes, "\n")}, + } + + if err := test.init(s); err != nil { + t.Fatalf("test init failed: %s", err) + } + + test.addQueries([]*Query{ + &Query{ + name: "select dot product", + params: url.Values{"db": []string{"db0"}}, + command: `SELECT sum(a_b) FROM (SELECT a * b FROM cpu) WHERE time >= '2000-01-01T00:00:00Z' AND time < '2000-01-01T00:00:30Z'`, + exp: `{"results":[{"statement_id":0,"series":[{"name":"cpu","columns":["time","sum"],"values":[["2000-01-01T00:00:00Z",-7]]}]}]}`, + }, + }...) + + for _, query := range test.queries { + if query.skip { + t.Logf("SKIP:: %s", query.name) + continue + } + if err := query.Execute(s); err != nil { + t.Error(query.Error(err)) + } else if !query.success() { + t.Error(query.failureMessage()) + } + } +} + // This test reproduced a data race with closing the // Subscriber points channel while writes were in-flight in the PointsWriter. func TestServer_ConcurrentPointsWriter_Subscriber(t *testing.T) { diff --git a/tsdb/engine/tsm1/compact.go b/tsdb/engine/tsm1/compact.go index 2e342e34c8..9ab37139ee 100644 --- a/tsdb/engine/tsm1/compact.go +++ b/tsdb/engine/tsm1/compact.go @@ -37,13 +37,24 @@ const ( ) var ( - errMaxFileExceeded = fmt.Errorf("max file exceeded") - errSnapshotsDisabled = fmt.Errorf("snapshots disabled") - errCompactionsDisabled = fmt.Errorf("compactions disabled") - errCompactionAborted = fmt.Errorf("compaction aborted") - errCompactionInProgress = fmt.Errorf("compaction in progress") + errMaxFileExceeded = fmt.Errorf("max file exceeded") + errSnapshotsDisabled = fmt.Errorf("snapshots disabled") + errCompactionsDisabled = fmt.Errorf("compactions disabled") + errCompactionAborted = fmt.Errorf("compaction aborted") ) +type errCompactionInProgress struct { + err error +} + +// Error returns the string representation of the error, to satisfy the error interface. +func (e errCompactionInProgress) Error() string { + if e.err != nil { + return fmt.Sprintf("compaction in progress: %s", e.err) + } + return "compaction in progress" +} + // CompactionGroup represents a list of files eligible to be compacted together. type CompactionGroup []string @@ -492,10 +503,11 @@ func (c *DefaultPlanner) Plan(lastWrite time.Time) []CompactionGroup { // findGenerations groups all the TSM files by generation based // on their filename, then returns the generations in descending order (newest first). func (c *DefaultPlanner) findGenerations() tsmGenerations { - c.mu.RLock() + c.mu.Lock() + defer c.mu.Unlock() + last := c.lastFindGenerations lastGen := c.lastGenerations - c.mu.RUnlock() if !last.IsZero() && c.FileStore.LastModified().Equal(last) { return lastGen @@ -525,10 +537,8 @@ func (c *DefaultPlanner) findGenerations() tsmGenerations { sort.Sort(orderedGenerations) } - c.mu.Lock() c.lastFindGenerations = genTime c.lastGenerations = orderedGenerations - c.mu.Unlock() return orderedGenerations } @@ -726,7 +736,7 @@ func (c *Compactor) CompactFull(tsmFiles []string) ([]string, error) { } if !c.add(tsmFiles) { - return nil, errCompactionInProgress + return nil, errCompactionInProgress{} } defer c.remove(tsmFiles) @@ -755,7 +765,7 @@ func (c *Compactor) CompactFast(tsmFiles []string) ([]string, error) { } if !c.add(tsmFiles) { - return nil, errCompactionInProgress + return nil, errCompactionInProgress{} } defer c.remove(tsmFiles) @@ -800,10 +810,15 @@ func (c *Compactor) writeNewFiles(generation, sequence int, iter KeyIterator) ([ return nil, err } break - } - - // We hit an error but didn't finish the compaction. Remove the temp file and abort. - if err != nil { + } else if _, ok := err.(errCompactionInProgress); ok { + // Don't clean up the file as another compaction is using it. This should not happen as the + // planner keeps track of which files are assigned to compaction plans now. + return nil, err + } else if err != nil { + // We hit an error and didn't finish the compaction. Remove the temp file and abort. + if err := os.RemoveAll(fileName); err != nil { + return nil, err + } return nil, err } @@ -817,7 +832,7 @@ func (c *Compactor) writeNewFiles(generation, sequence int, iter KeyIterator) ([ func (c *Compactor) write(path string, iter KeyIterator) (err error) { fd, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR|os.O_EXCL, 0666) if err != nil { - return errCompactionInProgress + return errCompactionInProgress{err: err} } // Create the write for the new TSM file. diff --git a/tsdb/engine/tsm1/engine.go b/tsdb/engine/tsm1/engine.go index c6c6cc9c16..3c152ec089 100644 --- a/tsdb/engine/tsm1/engine.go +++ b/tsdb/engine/tsm1/engine.go @@ -246,7 +246,6 @@ func (e *Engine) disableLevelCompactions(wait bool) { e.levelWorkers += 1 } - var cleanup bool if old == 0 && e.done != nil { // Prevent new compactions from starting e.Compactor.DisableCompactions() @@ -254,17 +253,11 @@ func (e *Engine) disableLevelCompactions(wait bool) { // Stop all background compaction goroutines close(e.done) e.done = nil - cleanup = true + } e.mu.Unlock() e.wg.Wait() - - if cleanup { // first to disable should cleanup - if err := e.cleanup(); err != nil { - e.logger.Info(fmt.Sprintf("error cleaning up temp file: %v", err)) - } - } } func (e *Engine) enableSnapshotCompactions() { @@ -1188,10 +1181,9 @@ func (e *Engine) compactTSMLevel(fast bool, level int, quit <-chan struct{}) { case <-t.C: s := e.levelCompactionStrategy(fast, level) if s != nil { - // Release the files in the compaction plan - defer e.CompactionPlan.Release(s.compactionGroups) - s.Apply() + // Release the files in the compaction plan + e.CompactionPlan.Release(s.compactionGroups) } } @@ -1210,9 +1202,9 @@ func (e *Engine) compactTSMFull(quit <-chan struct{}) { case <-t.C: s := e.fullCompactionStrategy() if s != nil { - // Release the files in the compaction plan - defer e.CompactionPlan.Release(s.compactionGroups) s.Apply() + // Release the files in the compaction plan + e.CompactionPlan.Release(s.compactionGroups) } } @@ -1298,10 +1290,11 @@ func (s *compactionStrategy) compactGroup(groupNum int) { }() if err != nil { - if err == errCompactionsDisabled || err == errCompactionInProgress { + _, inProgress := err.(errCompactionInProgress) + if err == errCompactionsDisabled || inProgress { s.logger.Info(fmt.Sprintf("aborted %s compaction group (%d). %v", s.description, groupNum, err)) - if err == errCompactionInProgress { + if _, ok := err.(errCompactionInProgress); ok { time.Sleep(time.Second) } return @@ -1421,6 +1414,8 @@ func (e *Engine) reloadCache() error { return nil } +// cleanup removes all temp files and dirs that exist on disk. This is should only be run at startup to avoid +// removing tmp files that are still in use. func (e *Engine) cleanup() error { allfiles, err := ioutil.ReadDir(e.path) if os.IsNotExist(err) { diff --git a/tsdb/engine/tsm1/writer.go b/tsdb/engine/tsm1/writer.go index 2765e1b80e..cfedf5bbbd 100644 --- a/tsdb/engine/tsm1/writer.go +++ b/tsdb/engine/tsm1/writer.go @@ -455,15 +455,15 @@ func (t *tsmWriter) writeHeader() error { // Write writes a new block containing key and values. func (t *tsmWriter) Write(key string, values Values) error { + if len(key) > maxKeyLength { + return ErrMaxKeyLengthExceeded + } + // Nothing to write if len(values) == 0 { return nil } - if len(key) > maxKeyLength { - return ErrMaxKeyLengthExceeded - } - // Write header only after we have some data to write. if t.n == 0 { if err := t.writeHeader(); err != nil { @@ -507,6 +507,10 @@ func (t *tsmWriter) Write(key string, values Values) error { // exceeds max entries for a given key, ErrMaxBlocksExceeded is returned. This indicates // that the index is now full for this key and no future writes to this key will succeed. func (t *tsmWriter) WriteBlock(key string, minTime, maxTime int64, block []byte) error { + if len(key) > maxKeyLength { + return ErrMaxKeyLengthExceeded + } + // Nothing to write if len(block) == 0 { return nil diff --git a/tsdb/engine/tsm1/writer_test.go b/tsdb/engine/tsm1/writer_test.go index 0cd9236bb3..d9eb5ac786 100644 --- a/tsdb/engine/tsm1/writer_test.go +++ b/tsdb/engine/tsm1/writer_test.go @@ -612,6 +612,26 @@ func TestTSMWriter_WriteBlock_Multiple(t *testing.T) { } } +func TestTSMWriter_WriteBlock_MaxKey(t *testing.T) { + dir := MustTempDir() + defer os.RemoveAll(dir) + f := MustTempFile(dir) + + w, err := tsm1.NewTSMWriter(f) + if err != nil { + t.Fatalf("unexpected error creating writer: %v", err) + } + + var key string + for i := 0; i < 100000; i++ { + key += "a" + } + + if err := w.WriteBlock(key, 0, 0, nil); err != tsm1.ErrMaxKeyLengthExceeded { + t.Fatalf("expected max key length error writing key: %v", err) + } +} + func TestTSMWriter_Write_MaxKey(t *testing.T) { dir := MustTempDir() defer os.RemoveAll(dir) diff --git a/tsdb/index.go b/tsdb/index.go index c05617c7a0..3a7f3cb477 100644 --- a/tsdb/index.go +++ b/tsdb/index.go @@ -9,11 +9,13 @@ import ( "github.com/influxdata/influxdb/influxql" "github.com/influxdata/influxdb/models" "github.com/influxdata/influxdb/pkg/estimator" + "github.com/uber-go/zap" ) type Index interface { Open() error Close() error + WithLogger(zap.Logger) MeasurementExists(name []byte) (bool, error) MeasurementNamesByExpr(expr influxql.Expr) ([][]byte, error) diff --git a/tsdb/index/inmem/inmem.go b/tsdb/index/inmem/inmem.go index ebfd32e3fa..176807361a 100644 --- a/tsdb/index/inmem/inmem.go +++ b/tsdb/index/inmem/inmem.go @@ -26,6 +26,7 @@ import ( "github.com/influxdata/influxdb/pkg/estimator" "github.com/influxdata/influxdb/pkg/estimator/hll" "github.com/influxdata/influxdb/tsdb" + "github.com/uber-go/zap" ) // IndexName is the name of this index. @@ -73,6 +74,8 @@ func (i *Index) Type() string { return IndexName } func (i *Index) Open() (err error) { return nil } func (i *Index) Close() error { return nil } +func (i *Index) WithLogger(zap.Logger) {} + // Series returns a series by key. func (i *Index) Series(key []byte) (*Series, error) { i.mu.RLock() diff --git a/tsdb/index/internal/file_set.go b/tsdb/index/internal/file_set.go index a6a9b03ef4..383310e135 100644 --- a/tsdb/index/internal/file_set.go +++ b/tsdb/index/internal/file_set.go @@ -2,6 +2,7 @@ package internal import ( "github.com/influxdata/influxdb/models" + "github.com/influxdata/influxdb/pkg/bloom" "github.com/influxdata/influxdb/pkg/estimator" "github.com/influxdata/influxdb/tsdb/index/tsi1" ) @@ -10,7 +11,8 @@ import ( type File struct { Closef func() error Pathf func() string - FilterNameTagsf func(names [][]byte, tagsSlice []models.Tags) ([][]byte, []models.Tags) + IDf func() int + Levelf func() int Measurementf func(name []byte) tsi1.MeasurementElem MeasurementIteratorf func() tsi1.MeasurementIterator HasSeriesf func(name []byte, tags models.Tags, buf []byte) (exists, tombstoned bool) @@ -28,13 +30,13 @@ type File struct { MergeMeasurementsSketchesf func(s, t estimator.Sketch) error Retainf func() Releasef func() + Filterf func() *bloom.Filter } -func (f *File) Close() error { return f.Closef() } -func (f *File) Path() string { return f.Pathf() } -func (f *File) FilterNamesTags(names [][]byte, tagsSlice []models.Tags) ([][]byte, []models.Tags) { - return f.FilterNameTagsf(names, tagsSlice) -} +func (f *File) Close() error { return f.Closef() } +func (f *File) Path() string { return f.Pathf() } +func (f *File) ID() int { return f.IDf() } +func (f *File) Level() int { return f.Levelf() } func (f *File) Measurement(name []byte) tsi1.MeasurementElem { return f.Measurementf(name) } func (f *File) MeasurementIterator() tsi1.MeasurementIterator { return f.MeasurementIteratorf() } func (f *File) HasSeries(name []byte, tags models.Tags, buf []byte) (exists, tombstoned bool) { @@ -64,5 +66,6 @@ func (f *File) MergeSeriesSketches(s, t estimator.Sketch) error { return f.Merge func (f *File) MergeMeasurementsSketches(s, t estimator.Sketch) error { return f.MergeMeasurementsSketchesf(s, t) } -func (f *File) Retain() { f.Retainf() } -func (f *File) Release() { f.Releasef() } +func (f *File) Retain() { f.Retainf() } +func (f *File) Release() { f.Releasef() } +func (f *File) Filter() *bloom.Filter { return f.Filterf() } diff --git a/tsdb/index/tsi1/file_set.go b/tsdb/index/tsi1/file_set.go index 00771f96c1..9abee8197f 100644 --- a/tsdb/index/tsi1/file_set.go +++ b/tsdb/index/tsi1/file_set.go @@ -8,6 +8,7 @@ import ( "github.com/influxdata/influxdb/influxql" "github.com/influxdata/influxdb/models" + "github.com/influxdata/influxdb/pkg/bloom" "github.com/influxdata/influxdb/pkg/bytesutil" "github.com/influxdata/influxdb/pkg/estimator" "github.com/influxdata/influxdb/pkg/estimator/hll" @@ -15,12 +16,25 @@ import ( ) // FileSet represents a collection of files. -type FileSet []File +type FileSet struct { + levels []CompactionLevel + files []File + filters []*bloom.Filter // per-level filters +} + +// NewFileSet returns a new instance of FileSet. +func NewFileSet(levels []CompactionLevel, files []File) (*FileSet, error) { + fs := &FileSet{levels: levels, files: files} + if err := fs.buildFilters(); err != nil { + return nil, err + } + return fs, nil +} // Close closes all the files in the file set. func (p FileSet) Close() error { var err error - for _, f := range p { + for _, f := range p.files { if e := f.Close(); e != nil && err == nil { err = e } @@ -29,65 +43,79 @@ func (p FileSet) Close() error { } // Retain adds a reference count to all files. -func (p FileSet) Retain() { - for _, f := range p { +func (fs *FileSet) Retain() { + for _, f := range fs.files { f.Retain() } } // Release removes a reference count from all files. -func (p FileSet) Release() { - for _, f := range p { +func (fs *FileSet) Release() { + for _, f := range fs.files { f.Release() } } +// Prepend returns a new file set with f added at the beginning. +func (fs *FileSet) Prepend(f File) (*FileSet, error) { + return NewFileSet(fs.levels, append([]File{f}, fs.files...)) +} + // MustReplace swaps a list of files for a single file and returns a new file set. // The caller should always guarentee that the files exist and are contiguous. -func (p FileSet) MustReplace(oldFiles []File, newFile File) FileSet { +func (fs *FileSet) MustReplace(oldFiles []File, newFile File) *FileSet { assert(len(oldFiles) > 0, "cannot replace empty files") // Find index of first old file. var i int - for ; i < len(p); i++ { - if p[i] == oldFiles[0] { + for ; i < len(fs.files); i++ { + if fs.files[i] == oldFiles[0] { break - } else if i == len(p)-1 { + } else if i == len(fs.files)-1 { panic("first replacement file not found") } } // Ensure all old files are contiguous. for j := range oldFiles { - if p[i+j] != oldFiles[j] { + if fs.files[i+j] != oldFiles[j] { panic("cannot replace non-contiguous files") } } // Copy to new fileset. - other := make([]File, len(p)-len(oldFiles)+1) - copy(other[:i], p[:i]) + other := make([]File, len(fs.files)-len(oldFiles)+1) + copy(other[:i], fs.files[:i]) other[i] = newFile - copy(other[i+1:], p[i+len(oldFiles):]) + copy(other[i+1:], fs.files[i+len(oldFiles):]) - return other + fs, err := NewFileSet(fs.levels, other) + if err != nil { + panic("cannot build file set: " + err.Error()) + } + return fs } // MaxID returns the highest file identifier. -func (fs FileSet) MaxID() int { +func (fs *FileSet) MaxID() int { var max int - for _, f := range fs { - if i := ParseFileID(f.Path()); i > max { + for _, f := range fs.files { + if i := f.ID(); i > max { max = i } } return max } +// Files returns all files in the set. +func (fs *FileSet) Files() []File { + return fs.files +} + // LogFiles returns all log files from the file set. -func (fs FileSet) LogFiles() []*LogFile { +func (fs *FileSet) LogFiles() []*LogFile { var a []*LogFile - for _, f := range fs { + for _, f := range fs.files { if f, ok := f.(*LogFile); ok { a = append(a, f) } @@ -96,9 +124,9 @@ func (fs FileSet) LogFiles() []*LogFile { } // IndexFiles returns all index files from the file set. -func (fs FileSet) IndexFiles() []*IndexFile { +func (fs *FileSet) IndexFiles() []*IndexFile { var a []*IndexFile - for _, f := range fs { + for _, f := range fs.files { if f, ok := f.(*IndexFile); ok { a = append(a, f) } @@ -106,10 +134,21 @@ func (fs FileSet) IndexFiles() []*IndexFile { return a } +// IndexFilesByLevel returns all index files for a given level. +func (fs *FileSet) IndexFilesByLevel(level int) []*IndexFile { + var a []*IndexFile + for _, f := range fs.files { + if f, ok := f.(*IndexFile); ok && f.Level() == level { + a = append(a, f) + } + } + return a +} + // SeriesIterator returns an iterator over all series in the index. -func (fs FileSet) SeriesIterator() SeriesIterator { - a := make([]SeriesIterator, 0, len(fs)) - for _, f := range fs { +func (fs *FileSet) SeriesIterator() SeriesIterator { + a := make([]SeriesIterator, 0, len(fs.files)) + for _, f := range fs.files { itr := f.SeriesIterator() if itr == nil { continue @@ -120,8 +159,8 @@ func (fs FileSet) SeriesIterator() SeriesIterator { } // Measurement returns a measurement by name. -func (fs FileSet) Measurement(name []byte) MeasurementElem { - for _, f := range fs { +func (fs *FileSet) Measurement(name []byte) MeasurementElem { + for _, f := range fs.files { if e := f.Measurement(name); e == nil { continue } else if e.Deleted() { @@ -134,9 +173,9 @@ func (fs FileSet) Measurement(name []byte) MeasurementElem { } // MeasurementIterator returns an iterator over all measurements in the index. -func (fs FileSet) MeasurementIterator() MeasurementIterator { - a := make([]MeasurementIterator, 0, len(fs)) - for _, f := range fs { +func (fs *FileSet) MeasurementIterator() MeasurementIterator { + a := make([]MeasurementIterator, 0, len(fs.files)) + for _, f := range fs.files { itr := f.MeasurementIterator() if itr != nil { a = append(a, itr) @@ -147,9 +186,9 @@ func (fs FileSet) MeasurementIterator() MeasurementIterator { // MeasurementSeriesIterator returns an iterator over all non-tombstoned series // in the index for the provided measurement. -func (fs FileSet) MeasurementSeriesIterator(name []byte) SeriesIterator { - a := make([]SeriesIterator, 0, len(fs)) - for _, f := range fs { +func (fs *FileSet) MeasurementSeriesIterator(name []byte) SeriesIterator { + a := make([]SeriesIterator, 0, len(fs.files)) + for _, f := range fs.files { itr := f.MeasurementSeriesIterator(name) if itr != nil { a = append(a, itr) @@ -159,9 +198,9 @@ func (fs FileSet) MeasurementSeriesIterator(name []byte) SeriesIterator { } // TagKeyIterator returns an iterator over all tag keys for a measurement. -func (fs FileSet) TagKeyIterator(name []byte) TagKeyIterator { - a := make([]TagKeyIterator, 0, len(fs)) - for _, f := range fs { +func (fs *FileSet) TagKeyIterator(name []byte) TagKeyIterator { + a := make([]TagKeyIterator, 0, len(fs.files)) + for _, f := range fs.files { itr := f.TagKeyIterator(name) if itr != nil { a = append(a, itr) @@ -171,7 +210,7 @@ func (fs FileSet) TagKeyIterator(name []byte) TagKeyIterator { } // MeasurementTagKeysByExpr extracts the tag keys wanted by the expression. -func (fs FileSet) MeasurementTagKeysByExpr(name []byte, expr influxql.Expr) (map[string]struct{}, error) { +func (fs *FileSet) MeasurementTagKeysByExpr(name []byte, expr influxql.Expr) (map[string]struct{}, error) { switch e := expr.(type) { case *influxql.BinaryExpr: switch e.Op { @@ -231,7 +270,7 @@ func (fs FileSet) MeasurementTagKeysByExpr(name []byte, expr influxql.Expr) (map } // tagKeysByFilter will filter the tag keys for the measurement. -func (fs FileSet) tagKeysByFilter(name []byte, op influxql.Token, val []byte, regex *regexp.Regexp) map[string]struct{} { +func (fs *FileSet) tagKeysByFilter(name []byte, op influxql.Token, val []byte, regex *regexp.Regexp) map[string]struct{} { ss := make(map[string]struct{}) itr := fs.TagKeyIterator(name) for e := itr.Next(); e != nil; e = itr.Next() { @@ -256,9 +295,9 @@ func (fs FileSet) tagKeysByFilter(name []byte, op influxql.Token, val []byte, re } // TagKeySeriesIterator returns a series iterator for all values across a single key. -func (fs FileSet) TagKeySeriesIterator(name, key []byte) SeriesIterator { - a := make([]SeriesIterator, 0, len(fs)) - for _, f := range fs { +func (fs *FileSet) TagKeySeriesIterator(name, key []byte) SeriesIterator { + a := make([]SeriesIterator, 0, len(fs.files)) + for _, f := range fs.files { itr := f.TagKeySeriesIterator(name, key) if itr != nil { a = append(a, itr) @@ -268,8 +307,8 @@ func (fs FileSet) TagKeySeriesIterator(name, key []byte) SeriesIterator { } // HasTagKey returns true if the tag key exists. -func (fs FileSet) HasTagKey(name, key []byte) bool { - for _, f := range fs { +func (fs *FileSet) HasTagKey(name, key []byte) bool { + for _, f := range fs.files { if e := f.TagKey(name, key); e != nil { return !e.Deleted() } @@ -278,8 +317,8 @@ func (fs FileSet) HasTagKey(name, key []byte) bool { } // HasTagValue returns true if the tag value exists. -func (fs FileSet) HasTagValue(name, key, value []byte) bool { - for _, f := range fs { +func (fs *FileSet) HasTagValue(name, key, value []byte) bool { + for _, f := range fs.files { if e := f.TagValue(name, key, value); e != nil { return !e.Deleted() } @@ -288,9 +327,9 @@ func (fs FileSet) HasTagValue(name, key, value []byte) bool { } // TagValueIterator returns a value iterator for a tag key. -func (fs FileSet) TagValueIterator(name, key []byte) TagValueIterator { - a := make([]TagValueIterator, 0, len(fs)) - for _, f := range fs { +func (fs *FileSet) TagValueIterator(name, key []byte) TagValueIterator { + a := make([]TagValueIterator, 0, len(fs.files)) + for _, f := range fs.files { itr := f.TagValueIterator(name, key) if itr != nil { a = append(a, itr) @@ -300,9 +339,9 @@ func (fs FileSet) TagValueIterator(name, key []byte) TagValueIterator { } // TagValueSeriesIterator returns a series iterator for a single tag value. -func (fs FileSet) TagValueSeriesIterator(name, key, value []byte) SeriesIterator { - a := make([]SeriesIterator, 0, len(fs)) - for _, f := range fs { +func (fs *FileSet) TagValueSeriesIterator(name, key, value []byte) SeriesIterator { + a := make([]SeriesIterator, 0, len(fs.files)) + for _, f := range fs.files { itr := f.TagValueSeriesIterator(name, key, value) if itr != nil { a = append(a, itr) @@ -313,7 +352,7 @@ func (fs FileSet) TagValueSeriesIterator(name, key, value []byte) SeriesIterator // MatchTagValueSeriesIterator returns a series iterator for tags which match value. // If matches is false, returns iterators which do not match value. -func (fs FileSet) MatchTagValueSeriesIterator(name, key []byte, value *regexp.Regexp, matches bool) SeriesIterator { +func (fs *FileSet) MatchTagValueSeriesIterator(name, key []byte, value *regexp.Regexp, matches bool) SeriesIterator { matchEmpty := value.MatchString("") if matches { @@ -329,7 +368,7 @@ func (fs FileSet) MatchTagValueSeriesIterator(name, key []byte, value *regexp.Re return FilterUndeletedSeriesIterator(fs.matchTagValueNotEqualNotEmptySeriesIterator(name, key, value)) } -func (fs FileSet) matchTagValueEqualEmptySeriesIterator(name, key []byte, value *regexp.Regexp) SeriesIterator { +func (fs *FileSet) matchTagValueEqualEmptySeriesIterator(name, key []byte, value *regexp.Regexp) SeriesIterator { vitr := fs.TagValueIterator(name, key) if vitr == nil { return fs.MeasurementSeriesIterator(name) @@ -348,7 +387,7 @@ func (fs FileSet) matchTagValueEqualEmptySeriesIterator(name, key []byte, value ) } -func (fs FileSet) matchTagValueEqualNotEmptySeriesIterator(name, key []byte, value *regexp.Regexp) SeriesIterator { +func (fs *FileSet) matchTagValueEqualNotEmptySeriesIterator(name, key []byte, value *regexp.Regexp) SeriesIterator { vitr := fs.TagValueIterator(name, key) if vitr == nil { return nil @@ -363,7 +402,7 @@ func (fs FileSet) matchTagValueEqualNotEmptySeriesIterator(name, key []byte, val return MergeSeriesIterators(itrs...) } -func (fs FileSet) matchTagValueNotEqualEmptySeriesIterator(name, key []byte, value *regexp.Regexp) SeriesIterator { +func (fs *FileSet) matchTagValueNotEqualEmptySeriesIterator(name, key []byte, value *regexp.Regexp) SeriesIterator { vitr := fs.TagValueIterator(name, key) if vitr == nil { return nil @@ -378,7 +417,7 @@ func (fs FileSet) matchTagValueNotEqualEmptySeriesIterator(name, key []byte, val return MergeSeriesIterators(itrs...) } -func (fs FileSet) matchTagValueNotEqualNotEmptySeriesIterator(name, key []byte, value *regexp.Regexp) SeriesIterator { +func (fs *FileSet) matchTagValueNotEqualNotEmptySeriesIterator(name, key []byte, value *regexp.Regexp) SeriesIterator { vitr := fs.TagValueIterator(name, key) if vitr == nil { return fs.MeasurementSeriesIterator(name) @@ -397,7 +436,7 @@ func (fs FileSet) matchTagValueNotEqualNotEmptySeriesIterator(name, key []byte, ) } -func (fs FileSet) MeasurementNamesByExpr(expr influxql.Expr) ([][]byte, error) { +func (fs *FileSet) MeasurementNamesByExpr(expr influxql.Expr) ([][]byte, error) { // Return filtered list if expression exists. if expr != nil { return fs.measurementNamesByExpr(expr) @@ -412,7 +451,7 @@ func (fs FileSet) MeasurementNamesByExpr(expr influxql.Expr) ([][]byte, error) { return names, nil } -func (fs FileSet) measurementNamesByExpr(expr influxql.Expr) ([][]byte, error) { +func (fs *FileSet) measurementNamesByExpr(expr influxql.Expr) ([][]byte, error) { if expr == nil { return nil, nil } @@ -479,7 +518,7 @@ func (fs FileSet) measurementNamesByExpr(expr influxql.Expr) ([][]byte, error) { } // measurementNamesByNameFilter returns matching measurement names in sorted order. -func (fs FileSet) measurementNamesByNameFilter(op influxql.Token, val string, regex *regexp.Regexp) [][]byte { +func (fs *FileSet) measurementNamesByNameFilter(op influxql.Token, val string, regex *regexp.Regexp) [][]byte { var names [][]byte itr := fs.MeasurementIterator() for e := itr.Next(); e != nil; e = itr.Next() { @@ -503,7 +542,7 @@ func (fs FileSet) measurementNamesByNameFilter(op influxql.Token, val string, re return names } -func (fs FileSet) measurementNamesByTagFilter(op influxql.Token, key, val string, regex *regexp.Regexp) [][]byte { +func (fs *FileSet) measurementNamesByTagFilter(op influxql.Token, key, val string, regex *regexp.Regexp) [][]byte { var names [][]byte mitr := fs.MeasurementIterator() @@ -548,8 +587,8 @@ func (fs FileSet) measurementNamesByTagFilter(op influxql.Token, key, val string } // HasSeries returns true if the series exists and is not tombstoned. -func (fs FileSet) HasSeries(name []byte, tags models.Tags, buf []byte) bool { - for _, f := range fs { +func (fs *FileSet) HasSeries(name []byte, tags models.Tags, buf []byte) bool { + for _, f := range fs.files { if exists, tombstoned := f.HasSeries(name, tags, buf); exists { return !tombstoned } @@ -559,19 +598,63 @@ func (fs FileSet) HasSeries(name []byte, tags models.Tags, buf []byte) bool { // FilterNamesTags filters out any series which already exist. It modifies the // provided slices of names and tags. -func (fs FileSet) FilterNamesTags(names [][]byte, tagsSlice []models.Tags) ([][]byte, []models.Tags) { - for _, f := range fs { +func (fs *FileSet) FilterNamesTags(names [][]byte, tagsSlice []models.Tags) ([][]byte, []models.Tags) { + buf := make([]byte, 4096) + + // Filter across all log files. + // Log files obtain a read lock and should be done in bulk for performance. + for _, f := range fs.LogFiles() { names, tagsSlice = f.FilterNamesTags(names, tagsSlice) } - return names, tagsSlice + + // Filter across remaining index files. + indexFiles := fs.IndexFiles() + newNames, newTagsSlice := names[:0], tagsSlice[:0] + for i := range names { + name, tags := names[i], tagsSlice[i] + currentLevel, skipLevel := -1, false + + var exists, tombstoned bool + for j := 0; j < len(indexFiles); j++ { + f := indexFiles[j] + + // Check for existence on the level when it changes. + if level := f.Level(); currentLevel != level { + currentLevel, skipLevel = level, false + + if filter := fs.filters[level]; filter != nil { + if !filter.Contains(AppendSeriesKey(buf[:0], name, tags)) { + skipLevel = true + } + } + } + + // Skip file if in level where it doesn't exist. + if skipLevel { + continue + } + + // Stop once we find the series in a file. + if exists, tombstoned = f.HasSeries(name, tags, buf); exists { + break + } + } + + // If the series doesn't exist or it has been tombstoned then add it. + if !exists || tombstoned { + newNames = append(newNames, name) + newTagsSlice = append(newTagsSlice, tags) + } + } + return newNames, newTagsSlice } // SeriesSketches returns the merged series sketches for the FileSet. -func (fs FileSet) SeriesSketches() (estimator.Sketch, estimator.Sketch, error) { +func (fs *FileSet) SeriesSketches() (estimator.Sketch, estimator.Sketch, error) { sketch, tsketch := hll.NewDefaultPlus(), hll.NewDefaultPlus() // Iterate over all the files and merge the sketches into the result. - for _, f := range fs { + for _, f := range fs.files { if err := f.MergeSeriesSketches(sketch, tsketch); err != nil { return nil, nil, err } @@ -580,11 +663,11 @@ func (fs FileSet) SeriesSketches() (estimator.Sketch, estimator.Sketch, error) { } // MeasurementsSketches returns the merged measurement sketches for the FileSet. -func (fs FileSet) MeasurementsSketches() (estimator.Sketch, estimator.Sketch, error) { +func (fs *FileSet) MeasurementsSketches() (estimator.Sketch, estimator.Sketch, error) { sketch, tsketch := hll.NewDefaultPlus(), hll.NewDefaultPlus() // Iterate over all the files and merge the sketches into the result. - for _, f := range fs { + for _, f := range fs.files { if err := f.MergeMeasurementsSketches(sketch, tsketch); err != nil { return nil, nil, err } @@ -595,7 +678,7 @@ func (fs FileSet) MeasurementsSketches() (estimator.Sketch, estimator.Sketch, er // MeasurementSeriesByExprIterator returns a series iterator for a measurement // that is filtered by expr. If expr only contains time expressions then this // call is equivalent to MeasurementSeriesIterator(). -func (fs FileSet) MeasurementSeriesByExprIterator(name []byte, expr influxql.Expr, fieldset *tsdb.MeasurementFieldSet) (SeriesIterator, error) { +func (fs *FileSet) MeasurementSeriesByExprIterator(name []byte, expr influxql.Expr, fieldset *tsdb.MeasurementFieldSet) (SeriesIterator, error) { // Return all series for the measurement if there are no tag expressions. if expr == nil || influxql.OnlyTimeExpr(expr) { return fs.MeasurementSeriesIterator(name), nil @@ -604,7 +687,7 @@ func (fs FileSet) MeasurementSeriesByExprIterator(name []byte, expr influxql.Exp } // MeasurementSeriesKeysByExpr returns a list of series keys matching expr. -func (fs FileSet) MeasurementSeriesKeysByExpr(name []byte, expr influxql.Expr, fieldset *tsdb.MeasurementFieldSet) ([][]byte, error) { +func (fs *FileSet) MeasurementSeriesKeysByExpr(name []byte, expr influxql.Expr, fieldset *tsdb.MeasurementFieldSet) ([][]byte, error) { // Create iterator for all matching series. itr, err := fs.MeasurementSeriesByExprIterator(name, expr, fieldset) if err != nil { @@ -627,7 +710,7 @@ func (fs FileSet) MeasurementSeriesKeysByExpr(name []byte, expr influxql.Expr, f return keys, nil } -func (fs FileSet) seriesByExprIterator(name []byte, expr influxql.Expr, mf *tsdb.MeasurementFields) (SeriesIterator, error) { +func (fs *FileSet) seriesByExprIterator(name []byte, expr influxql.Expr, mf *tsdb.MeasurementFields) (SeriesIterator, error) { switch expr := expr.(type) { case *influxql.BinaryExpr: switch expr.Op { @@ -665,7 +748,7 @@ func (fs FileSet) seriesByExprIterator(name []byte, expr influxql.Expr, mf *tsdb } // seriesByBinaryExprIterator returns a series iterator and a filtering expression. -func (fs FileSet) seriesByBinaryExprIterator(name []byte, n *influxql.BinaryExpr, mf *tsdb.MeasurementFields) (SeriesIterator, error) { +func (fs *FileSet) seriesByBinaryExprIterator(name []byte, n *influxql.BinaryExpr, mf *tsdb.MeasurementFields) (SeriesIterator, error) { // If this binary expression has another binary expression, then this // is some expression math and we should just pass it to the underlying query. if _, ok := n.LHS.(*influxql.BinaryExpr); ok { @@ -716,7 +799,7 @@ func (fs FileSet) seriesByBinaryExprIterator(name []byte, n *influxql.BinaryExpr } } -func (fs FileSet) seriesByBinaryExprStringIterator(name, key, value []byte, op influxql.Token) (SeriesIterator, error) { +func (fs *FileSet) seriesByBinaryExprStringIterator(name, key, value []byte, op influxql.Token) (SeriesIterator, error) { // Special handling for "_name" to match measurement name. if bytes.Equal(key, []byte("_name")) { if (op == influxql.EQ && bytes.Equal(value, name)) || (op == influxql.NEQ && !bytes.Equal(value, name)) { @@ -750,7 +833,7 @@ func (fs FileSet) seriesByBinaryExprStringIterator(name, key, value []byte, op i return fs.TagKeySeriesIterator(name, key), nil } -func (fs FileSet) seriesByBinaryExprRegexIterator(name, key []byte, value *regexp.Regexp, op influxql.Token) (SeriesIterator, error) { +func (fs *FileSet) seriesByBinaryExprRegexIterator(name, key []byte, value *regexp.Regexp, op influxql.Token) (SeriesIterator, error) { // Special handling for "_name" to match measurement name. if bytes.Equal(key, []byte("_name")) { match := value.Match(name) @@ -762,7 +845,7 @@ func (fs FileSet) seriesByBinaryExprRegexIterator(name, key []byte, value *regex return fs.MatchTagValueSeriesIterator(name, key, value, op == influxql.EQREGEX), nil } -func (fs FileSet) seriesByBinaryExprVarRefIterator(name, key []byte, value *influxql.VarRef, op influxql.Token) (SeriesIterator, error) { +func (fs *FileSet) seriesByBinaryExprVarRefIterator(name, key []byte, value *influxql.VarRef, op influxql.Token) (SeriesIterator, error) { if op == influxql.EQ { return IntersectSeriesIterators( fs.TagKeySeriesIterator(name, key), @@ -776,12 +859,48 @@ func (fs FileSet) seriesByBinaryExprVarRefIterator(name, key []byte, value *infl ), nil } +// buildFilters builds a series existence filter for each compaction level. +func (fs *FileSet) buildFilters() error { + if len(fs.levels) == 0 { + fs.filters = nil + return nil + } + + // Generate filters for each level. + fs.filters = make([]*bloom.Filter, len(fs.levels)) + + // Merge filters at each level. + for _, f := range fs.files { + level := f.Level() + + // Skip if file has no bloom filter. + if f.Filter() == nil { + continue + } + + // Initialize a filter if it doesn't exist. + if fs.filters[level] == nil { + lvl := fs.levels[level] + fs.filters[level] = bloom.NewFilter(lvl.M, lvl.K) + } + + // Merge filter. + if err := fs.filters[level].Merge(f.Filter()); err != nil { + return err + } + } + + return nil +} + // File represents a log or index file. type File interface { Close() error Path() string - FilterNamesTags(names [][]byte, tagsSlice []models.Tags) ([][]byte, []models.Tags) + ID() int + Level() int + Measurement(name []byte) MeasurementElem MeasurementIterator() MeasurementIterator HasSeries(name []byte, tags models.Tags, buf []byte) (exists, tombstoned bool) @@ -804,6 +923,9 @@ type File interface { MergeSeriesSketches(s, t estimator.Sketch) error MergeMeasurementsSketches(s, t estimator.Sketch) error + // Series existence bloom filter. + Filter() *bloom.Filter + // Reference counting. Retain() Release() diff --git a/tsdb/index/tsi1/file_set_test.go b/tsdb/index/tsi1/file_set_test.go index 88ef5cce42..022a73d5a1 100644 --- a/tsdb/index/tsi1/file_set_test.go +++ b/tsdb/index/tsi1/file_set_test.go @@ -2,12 +2,9 @@ package tsi1_test import ( "fmt" - "reflect" "testing" "github.com/influxdata/influxdb/models" - "github.com/influxdata/influxdb/tsdb/index/internal" - "github.com/influxdata/influxdb/tsdb/index/tsi1" ) // Ensure fileset can return an iterator over all series in the index. @@ -268,9 +265,10 @@ func TestFileSet_TagKeyIterator(t *testing.T) { }) } +/* func TestFileSet_FilterNamesTags(t *testing.T) { var mf internal.File - fs := tsi1.FileSet{&mf} + fs := tsi1.NewFileSet(nil, []tsi1.File{&mf}) var ( names [][]byte @@ -361,6 +359,7 @@ func TestFileSet_FilterNamesTags(t *testing.T) { t.Fatalf("got %v, expected %v", got, exp) } } +*/ var ( byteSliceResult [][]byte diff --git a/tsdb/index/tsi1/index.go b/tsdb/index/tsi1/index.go index 69cb7ad24e..d634a715d2 100644 --- a/tsdb/index/tsi1/index.go +++ b/tsdb/index/tsi1/index.go @@ -1,11 +1,11 @@ package tsi1 import ( + "crypto/rand" "encoding/json" "errors" "fmt" "io/ioutil" - "log" "os" "path/filepath" "regexp" @@ -19,6 +19,7 @@ import ( "github.com/influxdata/influxdb/models" "github.com/influxdata/influxdb/pkg/estimator" "github.com/influxdata/influxdb/tsdb" + "github.com/uber-go/zap" ) // IndexName is the name of the index. @@ -26,8 +27,7 @@ const IndexName = "tsi1" // Default compaction thresholds. const ( - DefaultMaxLogFileSize = 5 * 1024 * 1024 - DefaultCompactionFactor = 1.8 + DefaultMaxLogFileSize = 5 * 1024 * 1024 ) func init() { @@ -61,9 +61,13 @@ type Index struct { options tsdb.EngineOptions activeLogFile *LogFile // current log file - fileSet FileSet // current file set + fileSet *FileSet // current file set seq int // file id sequence + // Compaction management + levels []CompactionLevel // compaction levels + levelCompacting []bool // level compaction status + // Close management. once sync.Once closing chan struct{} @@ -79,12 +83,13 @@ type Index struct { Path string // Log file compaction thresholds. - MaxLogFileSize int64 - CompactionFactor float64 + MaxLogFileSize int64 // Frequency of compaction checks. CompactionEnabled bool CompactionMonitorInterval time.Duration + + logger zap.Logger } // NewIndex returns a new instance of Index. @@ -95,7 +100,8 @@ func NewIndex() *Index { // Default compaction thresholds. MaxLogFileSize: DefaultMaxLogFileSize, CompactionEnabled: true, - CompactionFactor: DefaultCompactionFactor, + + logger: zap.New(zap.NullEncoder()), } } @@ -118,12 +124,20 @@ func (i *Index) Open() error { // Read manifest file. m, err := ReadManifestFile(filepath.Join(i.Path, ManifestFileName)) if os.IsNotExist(err) { - m = &Manifest{} + m = NewManifest() } else if err != nil { return err } + // Copy compaction levels to the index. + i.levels = make([]CompactionLevel, len(m.Levels)) + copy(i.levels, m.Levels) + + // Set up flags to track whether a level is compacting. + i.levelCompacting = make([]bool, len(i.levels)) + // Open each file in the manifest. + var files []File for _, filename := range m.Files { switch filepath.Ext(filename) { case LogFileExt: @@ -131,7 +145,7 @@ func (i *Index) Open() error { if err != nil { return err } - i.fileSet = append(i.fileSet, f) + files = append(files, f) // Make first log file active, if within threshold. sz, _ := f.Stat() @@ -144,9 +158,14 @@ func (i *Index) Open() error { if err != nil { return err } - i.fileSet = append(i.fileSet, f) + files = append(files, f) } } + fs, err := NewFileSet(i.levels, files) + if err != nil { + return err + } + i.fileSet = fs // Set initial sequnce number. i.seq = i.fileSet.MaxID() @@ -230,10 +249,10 @@ func (i *Index) Close() error { defer i.mu.Unlock() // Close log files. - for _, f := range i.fileSet { + for _, f := range i.fileSet.files { f.Close() } - i.fileSet = nil + i.fileSet.files = nil return nil } @@ -258,10 +277,11 @@ func (i *Index) ManifestPath() string { // Manifest returns a manifest for the index. func (i *Index) Manifest() *Manifest { m := &Manifest{ - Files: make([]string, len(i.fileSet)), + Levels: i.levels, + Files: make([]string, len(i.fileSet.files)), } - for j, f := range i.fileSet { + for j, f := range i.fileSet.files { m.Files[j] = filepath.Base(f.Path()) } @@ -273,6 +293,11 @@ func (i *Index) writeManifestFile() error { return WriteManifestFile(i.ManifestPath(), i.Manifest()) } +// WithLogger sets the logger for the index. +func (i *Index) WithLogger(logger zap.Logger) { + i.logger = logger.With(zap.String("index", "tsi")) +} + // SetFieldSet sets a shared field set from the engine. func (i *Index) SetFieldSet(fs *tsdb.MeasurementFieldSet) { i.mu.Lock() @@ -281,21 +306,21 @@ func (i *Index) SetFieldSet(fs *tsdb.MeasurementFieldSet) { } // RetainFileSet returns the current fileset and adds a reference count. -func (i *Index) RetainFileSet() FileSet { +func (i *Index) RetainFileSet() *FileSet { i.mu.RLock() fs := i.retainFileSet() i.mu.RUnlock() return fs } -func (i *Index) retainFileSet() FileSet { +func (i *Index) retainFileSet() *FileSet { fs := i.fileSet fs.Retain() return fs } // FileN returns the active files in the file set. -func (i *Index) FileN() int { return len(i.fileSet) } +func (i *Index) FileN() int { return len(i.fileSet.files) } // prependActiveLogFile adds a new log file so that the current log file can be compacted. func (i *Index) prependActiveLogFile() error { @@ -305,7 +330,13 @@ func (i *Index) prependActiveLogFile() error { return err } i.activeLogFile = f - i.fileSet = append([]File{f}, i.fileSet...) + + // Prepend and generate new fileset. + fs, err := i.fileSet.Prepend(f) + if err != nil { + return err + } + i.fileSet = fs // Write new manifest. if err := i.writeManifestFile(); err != nil { @@ -562,7 +593,7 @@ func (i *Index) SeriesN() int64 { defer fs.Release() var total int64 - for _, f := range fs { + for _, f := range fs.files { total += int64(f.SeriesN()) } return total @@ -721,7 +752,7 @@ func (i *Index) SnapshotTo(path string) error { } // Link files in directory. - for _, f := range fs { + for _, f := range fs.files { if err := os.Link(f.Path(), filepath.Join(path, "index", filepath.Base(f.Path()))); err != nil { return fmt.Errorf("error creating tsi hard link: %q", err) } @@ -762,102 +793,69 @@ func (i *Index) compact() { fs := i.retainFileSet() defer fs.Release() - // Return contiguous groups of files that are available for compaction. - for _, group := range i.compactionGroups(fs) { - // Mark files in group as compacting. - for _, f := range group { - f.Retain() - f.setCompacting(true) + // Iterate over each level we are going to compact. + // We skip the first level (0) because it is log files and they are compacted separately. + // We skip the last level because the files have no higher level to compact into. + minLevel, maxLevel := 1, len(i.levels)-2 + for level := minLevel; level <= maxLevel; level++ { + // Skip level if it is currently compacting. + if i.levelCompacting[level] { + continue } + // Collect files for the level. + files := fs.IndexFilesByLevel(level) + + // Calculate total size. Skip level if it doesn't meet min size of next level. + var size int64 + for _, f := range files { + size += f.Size() + } + if size < i.levels[level+1].MinSize { + continue + } + + // Limit the number of files that can be merged at once. + if len(files) > MaxIndexMergeCount { + files = files[len(files)-MaxIndexMergeCount:] + } + + // Retain files during compaction. + IndexFiles(files).Retain() + + // Mark the level as compacting. + i.levelCompacting[level] = true + // Execute in closure to save reference to the group within the loop. - func(group []*IndexFile) { + func(files []*IndexFile, level int) { // Start compacting in a separate goroutine. i.wg.Add(1) go func() { defer i.wg.Done() - i.compactGroup(group) - i.Compact() // check for new compactions + + // Compact to a new level. + i.compactToLevel(files, level+1) + + // Ensure compaction lock for the level is released. + i.mu.Lock() + i.levelCompacting[level] = false + i.mu.Unlock() + + // Check for new compactions + i.Compact() }() - }(group) + }(files, level) } } -// compactionGroups returns contiguous groups of index files that can be compacted. -// -// All groups will have at least two files and the total size is more than the -// largest file times the compaction factor. For example, if the compaction -// factor is 2 then the total size will be at least double the max file size. -func (i *Index) compactionGroups(fileSet FileSet) [][]*IndexFile { - log.Printf("%s: checking for compaction groups: n=%d", IndexName, len(fileSet)) - - var groups [][]*IndexFile - - // Loop over all files to find contiguous group of compactable files. - var group []*IndexFile - for _, f := range fileSet { - indexFile, ok := f.(*IndexFile) - - // Skip over log files. They compact themselves. - if !ok { - if isCompactableGroup(group, i.CompactionFactor) { - group, groups = nil, append(groups, group) - } else { - group = nil - } - continue - } - - // If file is currently compacting then stop current group. - if indexFile.Compacting() { - if isCompactableGroup(group, i.CompactionFactor) { - group, groups = nil, append(groups, group) - } else { - group = nil - } - continue - } - - // Stop current group if adding file will invalidate group. - // This can happen when appending a large file to a group of small files. - if isCompactableGroup(group, i.CompactionFactor) && !isCompactableGroup(append(group, indexFile), i.CompactionFactor) { - group, groups = []*IndexFile{indexFile}, append(groups, group) - continue - } - - // Otherwise append to the current group. - group = append(group, indexFile) - } - - // Append final group, if compactable. - if isCompactableGroup(group, i.CompactionFactor) { - groups = append(groups, group) - } - - return groups -} - -// isCompactableGroup returns true if total file size is greater than max file size times factor. -func isCompactableGroup(files []*IndexFile, factor float64) bool { - if len(files) < 2 { - return false - } - - var max, total int64 - for _, f := range files { - sz := f.Size() - if sz > max { - max = sz - } - total += sz - } - return total >= int64(float64(max)*factor) -} - -// compactGroup compacts files into a new file. Replaces old files with +// compactToLevel compacts a set of files into a new file. Replaces old files with // compacted file on successful completion. This runs in a separate goroutine. -func (i *Index) compactGroup(files []*IndexFile) { +func (i *Index) compactToLevel(files []*IndexFile, level int) { assert(len(files) >= 2, "at least two index files are required for compaction") + assert(level > 0, "cannot compact level zero") + + // Build a logger for this compaction. + logger := i.logger.With(zap.String("token", generateCompactionToken())) // Files have already been retained by caller. // Ensure files are released only once. @@ -868,27 +866,30 @@ func (i *Index) compactGroup(files []*IndexFile) { start := time.Now() // Create new index file. - path := filepath.Join(i.Path, FormatIndexFileName(i.NextSequence())) + path := filepath.Join(i.Path, FormatIndexFileName(i.NextSequence(), level)) f, err := os.Create(path) if err != nil { - log.Printf("%s: error creating compaction files: %s", IndexName, err) + logger.Error("cannot create compation files", zap.Error(err)) return } defer f.Close() - srcIDs := joinIntSlice(IndexFiles(files).IDs(), ",") - log.Printf("%s: performing full compaction: src=%s, path=%s", IndexName, srcIDs, path) + logger.Info("performing full compaction", + zap.String("src", joinIntSlice(IndexFiles(files).IDs(), ",")), + zap.String("dst", path), + ) // Compact all index files to new index file. - n, err := IndexFiles(files).WriteTo(f) + lvl := i.levels[level] + n, err := IndexFiles(files).CompactTo(f, lvl.M, lvl.K) if err != nil { - log.Printf("%s: error compacting index files: src=%s, path=%s, err=%s", IndexName, srcIDs, path, err) + logger.Error("cannot compact index files", zap.Error(err)) return } // Close file. if err := f.Close(); err != nil { - log.Printf("%s: error closing index file: %s", IndexName, err) + logger.Error("error closing index file", zap.Error(err)) return } @@ -896,7 +897,7 @@ func (i *Index) compactGroup(files []*IndexFile) { file := NewIndexFile() file.SetPath(path) if err := file.Open(); err != nil { - log.Printf("%s: error opening new index file: %s", IndexName, err) + logger.Error("cannot open new index file", zap.Error(err)) return } @@ -915,23 +916,30 @@ func (i *Index) compactGroup(files []*IndexFile) { } return nil }(); err != nil { - log.Printf("%s: error writing manifest: %s", IndexName, err) + logger.Error("cannot write manifest", zap.Error(err)) return } - log.Printf("%s: full compaction complete: file=%s, t=%s, sz=%d", IndexName, path, time.Since(start), n) + + elapsed := time.Since(start) + logger.Info("full compaction complete", + zap.String("path", path), + zap.String("elapsed", elapsed.String()), + zap.Int64("bytes", n), + zap.Int("kb_per_sec", int(float64(n)/elapsed.Seconds())/1024), + ) // Release old files. once.Do(func() { IndexFiles(files).Release() }) // Close and delete all old index files. for _, f := range files { - log.Printf("%s: removing index file: file=%s", IndexName, f.Path()) + logger.Info("removing index file", zap.String("path", f.Path())) if err := f.Close(); err != nil { - log.Printf("%s: error closing index file: %s", IndexName, err) + logger.Error("cannot close index file", zap.Error(err)) return } else if err := os.Remove(f.Path()); err != nil { - log.Printf("%s: error removing index file: %s", IndexName, err) + logger.Error("cannot remove index file", zap.Error(err)) return } } @@ -982,31 +990,37 @@ func (i *Index) checkLogFile() error { // compacted then the manifest is updated and the log file is discarded. func (i *Index) compactLogFile(logFile *LogFile) { start := time.Now() - log.Printf("tsi1: compacting log file: file=%s", logFile.Path()) // Retrieve identifier from current path. - id := ParseFileID(logFile.Path()) + id := logFile.ID() assert(id != 0, "cannot parse log file id: %s", logFile.Path()) + // Build a logger for this compaction. + logger := i.logger.With( + zap.String("token", generateCompactionToken()), + zap.Int("id", id), + ) + // Create new index file. - path := filepath.Join(i.Path, FormatIndexFileName(id)) + path := filepath.Join(i.Path, FormatIndexFileName(id, 1)) f, err := os.Create(path) if err != nil { - log.Printf("tsi1: error creating index file: %s", err) + logger.Error("cannot create index file", zap.Error(err)) return } defer f.Close() // Compact log file to new index file. - n, err := logFile.WriteTo(f) + lvl := i.levels[1] + n, err := logFile.CompactTo(f, lvl.M, lvl.K) if err != nil { - log.Printf("%s: error compacting log file: path=%s, err=%s", IndexName, logFile.Path(), err) + logger.Error("cannot compact log file", zap.Error(err), zap.String("path", logFile.Path())) return } // Close file. if err := f.Close(); err != nil { - log.Printf("tsi1: error closing log file: %s", err) + logger.Error("cannot close log file", zap.Error(err)) return } @@ -1014,7 +1028,7 @@ func (i *Index) compactLogFile(logFile *LogFile) { file := NewIndexFile() file.SetPath(path) if err := file.Open(); err != nil { - log.Printf("tsi1: error opening compacted index file: path=%s, err=%s", file.Path(), err) + logger.Error("cannot open compacted index file", zap.Error(err), zap.String("path", file.Path())) return } @@ -1033,18 +1047,23 @@ func (i *Index) compactLogFile(logFile *LogFile) { } return nil }(); err != nil { - log.Printf("%s: error updating manifest: %s", IndexName, err) + logger.Error("cannot update manifest", zap.Error(err)) return } - log.Printf("%s: finished compacting log file: file=%s, t=%v, sz=%d", IndexName, logFile.Path(), time.Since(start), n) + + elapsed := time.Since(start) + logger.Error("log file compacted", + zap.String("elapsed", elapsed.String()), + zap.Int64("bytes", n), + zap.Int("kb_per_sec", int(float64(n)/elapsed.Seconds())/1024), + ) // Closing the log file will automatically wait until the ref count is zero. - log.Printf("%s: removing log file: file=%s", IndexName, logFile.Path()) if err := logFile.Close(); err != nil { - log.Printf("%s: error closing log file: %s", IndexName, err) + logger.Error("cannot close log file", zap.Error(err)) return } else if err := os.Remove(logFile.Path()); err != nil { - log.Printf("%s: error removing log file: %s", IndexName, err) + logger.Error("cannot remove log file", zap.Error(err)) return } @@ -1054,7 +1073,7 @@ func (i *Index) compactLogFile(logFile *LogFile) { // seriesPointIterator adapts SeriesIterator to an influxql.Iterator. type seriesPointIterator struct { once sync.Once - fs FileSet + fs *FileSet fieldset *tsdb.MeasurementFieldSet mitr MeasurementIterator sitr SeriesIterator @@ -1064,7 +1083,7 @@ type seriesPointIterator struct { } // newSeriesPointIterator returns a new instance of seriesPointIterator. -func newSeriesPointIterator(fs FileSet, fieldset *tsdb.MeasurementFieldSet, opt influxql.IteratorOptions) *seriesPointIterator { +func newSeriesPointIterator(fs *FileSet, fieldset *tsdb.MeasurementFieldSet, opt influxql.IteratorOptions) *seriesPointIterator { return &seriesPointIterator{ fs: fs, fieldset: fieldset, @@ -1153,24 +1172,35 @@ func intersectStringSets(a, b map[string]struct{}) map[string]struct{} { return other } -var fileIDRegex = regexp.MustCompile(`^(\d+)\..+$`) +var fileIDRegex = regexp.MustCompile(`^L(\d+)-(\d+)\..+$`) -// ParseFileID extracts the numeric id from a log or index file path. +// ParseFilename extracts the numeric id from a log or index file path. // Returns 0 if it cannot be parsed. -func ParseFileID(name string) int { +func ParseFilename(name string) (level, id int) { a := fileIDRegex.FindStringSubmatch(filepath.Base(name)) if a == nil { - return 0 + return 0, 0 } - i, _ := strconv.Atoi(a[1]) - return i + level, _ = strconv.Atoi(a[1]) + id, _ = strconv.Atoi(a[2]) + return id, level } // Manifest represents the list of log & index files that make up the index. // The files are listed in time order, not necessarily ID order. type Manifest struct { - Files []string `json:"files,omitempty"` + Levels []CompactionLevel `json:"levels,omitempty"` + Files []string `json:"files,omitempty"` +} + +// NewManifest returns a new instance of Manifest with default compaction levels. +func NewManifest() *Manifest { + m := &Manifest{ + Levels: make([]CompactionLevel, len(DefaultCompactionLevels)), + } + copy(m.Levels, DefaultCompactionLevels[:]) + return m } // HasFile returns true if name is listed in the log files or index files. @@ -1195,6 +1225,7 @@ func ReadManifestFile(path string) (*Manifest, error) { if err := json.Unmarshal(buf, &m); err != nil { return nil, err } + return &m, nil } @@ -1220,3 +1251,84 @@ func joinIntSlice(a []int, sep string) string { } return strings.Join(other, sep) } + +// CompactionLevel represents a grouping of index files based on size and +// bloom filter settings. By having the same bloom filter settings, the filters +// can be merged and evaluated at a higher level. +type CompactionLevel struct { + // Minimum expected index size + MinSize int64 `json:"minSize,omitempty"` + + // Bloom filter bit size & hash count + M uint64 `json:"m,omitempty"` + K uint64 `json:"k,omitempty"` +} + +// DefaultCompactionLevels is the default settings used by the index. +var DefaultCompactionLevels = []CompactionLevel{ + // Log files, no filter. + {M: 0, K: 0}, + + // Initial compaction, 4MB filter + { + MinSize: 0, + M: 1 << 25, + K: 6, + }, + + // 24MB min file, 4MB filter + { + MinSize: 24 * (1 << 20), + M: 1 << 25, + K: 6, + }, + + // 48MB min file, 8MB filter + { + MinSize: 48 * (1 << 20), + M: 1 << 26, + K: 6, + }, + + // 96MB min file, 8MB filter + { + MinSize: 96 * (1 << 20), + M: 1 << 27, + K: 6, + }, + + // 192MB min file, 33MB filter + { + MinSize: 192 * (1 << 20), + M: 1 << 28, + K: 6, + }, + + // 768MB min file, 66MB filter + { + MinSize: 768 * (1 << 20), + M: 1 << 29, + K: 6, + }, + + // 2GB min file, 134MB filter + { + MinSize: 2 * (1 << 30), + M: 1 << 30, + K: 6, + }, +} + +// MaxIndexMergeCount is the maximum number of files that can be merged together at once. +const MaxIndexMergeCount = 2 + +// MaxIndexFileSize is the maximum expected size of an index file. +const MaxIndexFileSize = 4 * (1 << 30) + +// generateCompactionToken returns a short token to track an individual compaction. +// It is only used for logging so it doesn't need strong uniqueness guarantees. +func generateCompactionToken() string { + token := make([]byte, 3) + rand.Read(token) + return fmt.Sprintf("%x", token) +} diff --git a/tsdb/index/tsi1/index_file.go b/tsdb/index/tsi1/index_file.go index 6c6723b44e..cc1c371825 100644 --- a/tsdb/index/tsi1/index_file.go +++ b/tsdb/index/tsi1/index_file.go @@ -9,6 +9,7 @@ import ( "sync" "github.com/influxdata/influxdb/models" + "github.com/influxdata/influxdb/pkg/bloom" "github.com/influxdata/influxdb/pkg/estimator" "github.com/influxdata/influxdb/pkg/mmap" ) @@ -52,7 +53,8 @@ type IndexFile struct { mblk MeasurementBlock // Sortable identifier & filepath to the log file. - ID int + level int + id int // Counters seriesN int64 // Number of unique series in this indexFile. @@ -72,10 +74,8 @@ func NewIndexFile() *IndexFile { // Open memory maps the data file at the file's path. func (f *IndexFile) Open() error { - // Extract identifier from path name, if possible. - if id := ParseFileID(f.Path()); id > 0 { - f.ID = id - } + // Extract identifier from path name. + f.id, f.level = ParseFilename(f.Path()) data, err := mmap.Map(f.Path()) if err != nil { @@ -97,12 +97,21 @@ func (f *IndexFile) Close() error { return mmap.Unmap(f.data) } +// ID returns the file sequence identifier. +func (f *IndexFile) ID() int { return f.id } + // Path returns the file path. func (f *IndexFile) Path() string { return f.path } // SetPath sets the file's path. func (f *IndexFile) SetPath(path string) { f.path = path } +// Level returns the compaction level for the file. +func (f *IndexFile) Level() int { return f.level } + +// Filter returns the series existence filter for the file. +func (f *IndexFile) Filter() *bloom.Filter { return f.sblk.filter } + // Retain adds a reference count to the file. func (f *IndexFile) Retain() { f.wg.Add(1) } @@ -265,6 +274,7 @@ func (f *IndexFile) TagValueSeriesIterator(name, key, value []byte) SeriesIterat return newSeriesDecodeIterator( &f.sblk, &rawSeriesIDIterator{ + n: ve.(*TagBlockValueElem).series.n, data: ve.(*TagBlockValueElem).series.data, }, ) @@ -358,20 +368,6 @@ func (f *IndexFile) MergeSeriesSketches(s, t estimator.Sketch) error { return t.Merge(f.sblk.tsketch) } -// FilterNamesTags filters out any series which already exist. It modifies the -// provided slices of names and tags. -func (f *IndexFile) FilterNamesTags(names [][]byte, tagsSlice []models.Tags) ([][]byte, []models.Tags) { - buf := make([]byte, 1024) - newNames, newTagsSlice := names[:0], tagsSlice[:0] - for i := range names { - if exists, tombstoned := f.HasSeries(names[i], tagsSlice[i], buf); !exists || tombstoned { - newNames = append(newNames, names[i]) - newTagsSlice = append(newTagsSlice, tagsSlice[i]) - } - } - return newNames, newTagsSlice -} - // ReadIndexFileTrailer returns the index file trailer from data. func ReadIndexFileTrailer(data []byte) (IndexFileTrailer, error) { var t IndexFileTrailer @@ -438,6 +434,6 @@ func (t *IndexFileTrailer) WriteTo(w io.Writer) (n int64, err error) { } // FormatIndexFileName generates an index filename for the given index. -func FormatIndexFileName(i int) string { - return fmt.Sprintf("%08d%s", i, IndexFileExt) +func FormatIndexFileName(id, level int) string { + return fmt.Sprintf("L%d-%08d%s", level, id, IndexFileExt) } diff --git a/tsdb/index/tsi1/index_file_test.go b/tsdb/index/tsi1/index_file_test.go index ca043522d2..edabd49761 100644 --- a/tsdb/index/tsi1/index_file_test.go +++ b/tsdb/index/tsi1/index_file_test.go @@ -76,7 +76,7 @@ func CreateIndexFile(series []Series) (*tsi1.IndexFile, error) { // Write index file to buffer. var buf bytes.Buffer - if _, err := lf.WriteTo(&buf); err != nil { + if _, err := lf.CompactTo(&buf, M, K); err != nil { return nil, err } @@ -99,7 +99,7 @@ func GenerateIndexFile(measurementN, tagN, valueN int) (*tsi1.IndexFile, error) // Compact log file to buffer. var buf bytes.Buffer - if _, err := lf.WriteTo(&buf); err != nil { + if _, err := lf.CompactTo(&buf, M, K); err != nil { return nil, err } diff --git a/tsdb/index/tsi1/index_files.go b/tsdb/index/tsi1/index_files.go index 5a9d3a8da5..1cb6f22e73 100644 --- a/tsdb/index/tsi1/index_files.go +++ b/tsdb/index/tsi1/index_files.go @@ -8,6 +8,7 @@ import ( "sort" "time" + "github.com/influxdata/influxdb/pkg/estimator/hll" "github.com/influxdata/influxdb/pkg/mmap" ) @@ -18,7 +19,7 @@ type IndexFiles []*IndexFile func (p IndexFiles) IDs() []int { a := make([]int, len(p)) for i, f := range p { - a[i] = f.ID + a[i] = f.ID() } return a } @@ -121,8 +122,8 @@ func (p IndexFiles) TagValueSeriesIterator(name, key, value []byte) SeriesIterat return MergeSeriesIterators(a...) } -// WriteTo merges all index files and writes them to w. -func (p IndexFiles) WriteTo(w io.Writer) (n int64, err error) { +// CompactTo merges all index files and writes them to w. +func (p IndexFiles) CompactTo(w io.Writer, m, k uint64) (n int64, err error) { var t IndexFileTrailer // Wrap writer in buffered I/O. @@ -139,7 +140,7 @@ func (p IndexFiles) WriteTo(w io.Writer) (n int64, err error) { // Write combined series list. t.SeriesBlock.Offset = n - if err := p.writeSeriesBlockTo(bw, &info, &n); err != nil { + if err := p.writeSeriesBlockTo(bw, m, k, &info, &n); err != nil { return n, err } t.SeriesBlock.Size = n - t.SeriesBlock.Offset @@ -186,9 +187,17 @@ func (p IndexFiles) WriteTo(w io.Writer) (n int64, err error) { return n, nil } -func (p IndexFiles) writeSeriesBlockTo(w io.Writer, info *indexCompactInfo, n *int64) error { +func (p IndexFiles) writeSeriesBlockTo(w io.Writer, m, k uint64, info *indexCompactInfo, n *int64) error { + // Estimate series cardinality. + sketch := hll.NewDefaultPlus() + for _, f := range p { + if err := f.MergeSeriesSketches(sketch, sketch); err != nil { + return err + } + } + itr := p.SeriesIterator() - enc := NewSeriesBlockEncoder(w) + enc := NewSeriesBlockEncoder(w, uint32(sketch.Count()), m, k) // Write all series. for e := itr.Next(); e != nil; e = itr.Next() { @@ -199,7 +208,7 @@ func (p IndexFiles) writeSeriesBlockTo(w io.Writer, info *indexCompactInfo, n *i // Close and flush block. err := enc.Close() - *n += enc.N() + *n += int64(enc.N()) if err != nil { return err } @@ -238,7 +247,7 @@ func (p IndexFiles) writeTagsetTo(w io.Writer, name []byte, info *indexCompactIn for ve := vitr.Next(); ve != nil; ve = vitr.Next() { // Merge all series together. sitr := p.TagValueSeriesIterator(name, ke.Key(), ve.Value()) - var seriesIDs []uint64 + var seriesIDs []uint32 for se := sitr.Next(); se != nil; se = sitr.Next() { seriesID, _ := info.sblk.Offset(se.Name(), se.Tags(), seriesKey[:0]) if seriesID == 0 { @@ -246,7 +255,7 @@ func (p IndexFiles) writeTagsetTo(w io.Writer, name []byte, info *indexCompactIn } seriesIDs = append(seriesIDs, seriesID) } - sort.Sort(uint64Slice(seriesIDs)) + sort.Sort(uint32Slice(seriesIDs)) // Encode value. if err := enc.EncodeValue(ve.Value(), ve.Deleted(), seriesIDs); err != nil { @@ -285,7 +294,7 @@ func (p IndexFiles) writeMeasurementBlockTo(w io.Writer, info *indexCompactInfo, // Look-up series ids. itr := p.MeasurementSeriesIterator(name) - var seriesIDs []uint64 + var seriesIDs []uint32 for e := itr.Next(); e != nil; e = itr.Next() { seriesID, _ := info.sblk.Offset(e.Name(), e.Tags(), seriesKey[:0]) if seriesID == 0 { @@ -293,7 +302,7 @@ func (p IndexFiles) writeMeasurementBlockTo(w io.Writer, info *indexCompactInfo, } seriesIDs = append(seriesIDs, seriesID) } - sort.Sort(uint64Slice(seriesIDs)) + sort.Sort(uint32Slice(seriesIDs)) // Add measurement to writer. pos := info.tagSets[string(name)] diff --git a/tsdb/index/tsi1/index_files_test.go b/tsdb/index/tsi1/index_files_test.go index 260117437b..6baf4b9127 100644 --- a/tsdb/index/tsi1/index_files_test.go +++ b/tsdb/index/tsi1/index_files_test.go @@ -32,7 +32,7 @@ func TestIndexFiles_WriteTo(t *testing.T) { // Compact the two together and write out to a buffer. var buf bytes.Buffer a := tsi1.IndexFiles{f0, f1} - if n, err := a.WriteTo(&buf); err != nil { + if n, err := a.CompactTo(&buf, M, K); err != nil { t.Fatal(err) } else if n == 0 { t.Fatal("expected data written") diff --git a/tsdb/index/tsi1/index_test.go b/tsdb/index/tsi1/index_test.go index fc2da4db1a..158eca2396 100644 --- a/tsdb/index/tsi1/index_test.go +++ b/tsdb/index/tsi1/index_test.go @@ -12,6 +12,9 @@ import ( "github.com/influxdata/influxdb/tsdb/index/tsi1" ) +// Bloom filter settings used in tests. +const M, K = 4096, 6 + // Ensure index can iterate over all measurement names. func TestIndex_ForEachMeasurementName(t *testing.T) { idx := MustOpenIndex() diff --git a/tsdb/index/tsi1/log_file.go b/tsdb/index/tsi1/log_file.go index 4198f2dd35..c203a6a841 100644 --- a/tsdb/index/tsi1/log_file.go +++ b/tsdb/index/tsi1/log_file.go @@ -17,6 +17,7 @@ import ( "github.com/influxdata/influxdb/influxql" "github.com/influxdata/influxdb/models" + "github.com/influxdata/influxdb/pkg/bloom" "github.com/influxdata/influxdb/pkg/estimator" "github.com/influxdata/influxdb/pkg/mmap" ) @@ -38,6 +39,7 @@ const ( type LogFile struct { mu sync.RWMutex wg sync.WaitGroup // ref count + id int // file sequence identifier data []byte // mmap file *os.File // writer w *bufio.Writer // buffered writer @@ -78,6 +80,8 @@ func (f *LogFile) Open() error { } func (f *LogFile) open() error { + f.id, _ = ParseFilename(f.path) + // Open file for appending. file, err := os.OpenFile(f.Path(), os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0666) if err != nil { @@ -162,12 +166,21 @@ func (f *LogFile) Flush() error { return nil } +// ID returns the file sequence identifier. +func (f *LogFile) ID() int { return f.id } + // Path returns the file path. func (f *LogFile) Path() string { return f.path } // SetPath sets the log file's path. func (f *LogFile) SetPath(path string) { f.path = path } +// Level returns the log level of the file. +func (f *LogFile) Level() int { return 0 } + +// Filter returns the bloom filter for the file. +func (f *LogFile) Filter() *bloom.Filter { return nil } + // Retain adds a reference count to the file. func (f *LogFile) Retain() { f.wg.Add(1) } @@ -742,8 +755,8 @@ func (f *LogFile) MeasurementSeriesIterator(name []byte) SeriesIterator { return newLogSeriesIterator(mm.series) } -// WriteTo compacts the log file and writes it to w. -func (f *LogFile) WriteTo(w io.Writer) (n int64, err error) { +// CompactTo compacts the log file and writes it to w. +func (f *LogFile) CompactTo(w io.Writer, m, k uint64) (n int64, err error) { f.mu.RLock() defer f.mu.RUnlock() @@ -764,7 +777,7 @@ func (f *LogFile) WriteTo(w io.Writer) (n int64, err error) { // Write series list. t.SeriesBlock.Offset = n - if err := f.writeSeriesBlockTo(bw, names, info, &n); err != nil { + if err := f.writeSeriesBlockTo(bw, names, m, k, info, &n); err != nil { return n, err } t.SeriesBlock.Size = n - t.SeriesBlock.Offset @@ -807,9 +820,15 @@ func (f *LogFile) WriteTo(w io.Writer) (n int64, err error) { return n, nil } -func (f *LogFile) writeSeriesBlockTo(w io.Writer, names []string, info *logFileCompactInfo, n *int64) error { +func (f *LogFile) writeSeriesBlockTo(w io.Writer, names []string, m, k uint64, info *logFileCompactInfo, n *int64) error { + // Determine series count. + var seriesN uint32 + for _, mm := range f.mms { + seriesN += uint32(len(mm.series)) + } + // Write all series. - enc := NewSeriesBlockEncoder(w) + enc := NewSeriesBlockEncoder(w, seriesN, m, k) // Add series from measurements. for _, name := range names { @@ -832,7 +851,7 @@ func (f *LogFile) writeSeriesBlockTo(w io.Writer, names []string, info *logFileC // Close and flush series block. err := enc.Close() - *n += enc.N() + *n += int64(enc.N()) if err != nil { return err } @@ -855,7 +874,7 @@ func (f *LogFile) updateSeriesOffsets(w io.Writer, names []string, info *logFile for _, name := range names { mm := f.mms[name] mmInfo := info.createMeasurementInfoIfNotExists(name) - mmInfo.seriesIDs = make([]uint64, 0, len(mm.series)) + mmInfo.seriesIDs = make([]uint32, 0, len(mm.series)) for _, serie := range mm.series { // Lookup series offset. @@ -911,7 +930,7 @@ func (f *LogFile) writeTagsetTo(w io.Writer, name string, info *logFileCompactIn // Add each value. for v, value := range tag.tagValues { tagValueInfo := tagSetInfo.tagValues[v] - sort.Sort(uint64Slice(tagValueInfo.seriesIDs)) + sort.Sort(uint32Slice(tagValueInfo.seriesIDs)) if err := enc.EncodeValue(value.name, value.deleted, tagValueInfo.seriesIDs); err != nil { return err @@ -944,7 +963,7 @@ func (f *LogFile) writeMeasurementBlockTo(w io.Writer, names []string, info *log mmInfo := info.mms[name] assert(mmInfo != nil, "measurement info not found") - sort.Sort(uint64Slice(mmInfo.seriesIDs)) + sort.Sort(uint32Slice(mmInfo.seriesIDs)) mw.Add(mm.name, mm.deleted, mmInfo.offset, mmInfo.size, mmInfo.seriesIDs) } @@ -980,7 +999,7 @@ func (info *logFileCompactInfo) createMeasurementInfoIfNotExists(name string) *l type logFileMeasurementCompactInfo struct { offset int64 size int64 - seriesIDs []uint64 + seriesIDs []uint32 tagSet map[string]*logFileTagSetCompactInfo } @@ -1008,7 +1027,7 @@ func (info *logFileTagSetCompactInfo) createTagValueInfoIfNotExists(value []byte } type logFileTagValueCompactInfo struct { - seriesIDs []uint64 + seriesIDs []uint32 } // MergeSeriesSketches merges the series sketches belonging to this LogFile @@ -1394,6 +1413,6 @@ func (itr *logSeriesIterator) Next() (e SeriesElem) { } // FormatLogFileName generates a log filename for the given index. -func FormatLogFileName(i int) string { - return fmt.Sprintf("%08d%s", i, LogFileExt) +func FormatLogFileName(id int) string { + return fmt.Sprintf("L0-%08d%s", id, LogFileExt) } diff --git a/tsdb/index/tsi1/log_file_test.go b/tsdb/index/tsi1/log_file_test.go index d9c1b31aef..9a8c041cd0 100644 --- a/tsdb/index/tsi1/log_file_test.go +++ b/tsdb/index/tsi1/log_file_test.go @@ -14,6 +14,7 @@ import ( "time" "github.com/influxdata/influxdb/models" + "github.com/influxdata/influxdb/pkg/bloom" "github.com/influxdata/influxdb/tsdb/index/tsi1" ) @@ -290,6 +291,9 @@ func BenchmarkLogFile_WriteTo(b *testing.B) { f := MustOpenLogFile() defer f.Close() + // Estimate bloom filter size. + m, k := bloom.Estimate(uint64(seriesN), 0.02) + // Initialize log file with series data. for i := 0; i < seriesN; i++ { if err := f.AddSeries( @@ -311,7 +315,7 @@ func BenchmarkLogFile_WriteTo(b *testing.B) { // Compact log file. for i := 0; i < b.N; i++ { buf := bytes.NewBuffer(make([]byte, 0, 150*seriesN)) - if _, err := f.WriteTo(buf); err != nil { + if _, err := f.CompactTo(buf, m, k); err != nil { b.Fatal(err) } b.Logf("sz=%db", buf.Len()) diff --git a/tsdb/index/tsi1/measurement_block.go b/tsdb/index/tsi1/measurement_block.go index 205b5620fc..397adfd31d 100644 --- a/tsdb/index/tsi1/measurement_block.go +++ b/tsdb/index/tsi1/measurement_block.go @@ -148,7 +148,7 @@ func (blk *MeasurementBlock) seriesIDIterator(name []byte) seriesIDIterator { if !ok { return &rawSeriesIDIterator{} } - return &rawSeriesIDIterator{data: e.series.data} + return &rawSeriesIDIterator{n: e.series.n, data: e.series.data} } // blockMeasurementIterator iterates over a list measurements in a block. @@ -175,18 +175,23 @@ func (itr *blockMeasurementIterator) Next() MeasurementElem { // rawSeriesIterator iterates over a list of raw series data. type rawSeriesIDIterator struct { + prev uint32 + n uint32 data []byte } // next returns the next decoded series. -func (itr *rawSeriesIDIterator) next() uint64 { +func (itr *rawSeriesIDIterator) next() uint32 { if len(itr.data) == 0 { return 0 } - id := binary.BigEndian.Uint64(itr.data) - itr.data = itr.data[SeriesIDSize:] - return id + delta, n := binary.Uvarint(itr.data) + itr.data = itr.data[n:] + + seriesID := itr.prev + uint32(delta) + itr.prev = seriesID + return seriesID } // MeasurementBlockTrailer represents meta data at the end of a MeasurementBlock. @@ -299,7 +304,7 @@ type MeasurementBlockElem struct { } series struct { - n uint64 // series count + n uint32 // series count data []byte // serialized series data } @@ -325,18 +330,27 @@ func (e *MeasurementBlockElem) TagBlockSize() int64 { return e.tagBlock.size } func (e *MeasurementBlockElem) SeriesData() []byte { return e.series.data } // SeriesN returns the number of series associated with the measurement. -func (e *MeasurementBlockElem) SeriesN() uint64 { return e.series.n } +func (e *MeasurementBlockElem) SeriesN() uint32 { return e.series.n } // SeriesID returns series ID at an index. -func (e *MeasurementBlockElem) SeriesID(i int) uint64 { - return binary.BigEndian.Uint64(e.series.data[i*SeriesIDSize:]) +func (e *MeasurementBlockElem) SeriesID(i int) uint32 { + return binary.BigEndian.Uint32(e.series.data[i*SeriesIDSize:]) } // SeriesIDs returns a list of decoded series ids. -func (e *MeasurementBlockElem) SeriesIDs() []uint64 { - a := make([]uint64, e.series.n) - for i := 0; i < int(e.series.n); i++ { - a[i] = e.SeriesID(i) +// +// NOTE: This should be used for testing and diagnostics purposes only. +// It requires loading the entire list of series in-memory. +func (e *MeasurementBlockElem) SeriesIDs() []uint32 { + a := make([]uint32, 0, e.series.n) + var prev uint32 + for data := e.series.data; len(data) > 0; { + delta, n := binary.Uvarint(data) + data = data[n:] + + seriesID := prev + uint32(delta) + a = append(a, seriesID) + prev = seriesID } return a } @@ -361,8 +375,10 @@ func (e *MeasurementBlockElem) UnmarshalBinary(data []byte) error { // Parse series data. v, n := binary.Uvarint(data) - e.series.n, data = uint64(v), data[n:] - e.series.data, data = data[:e.series.n*SeriesIDSize], data[e.series.n*SeriesIDSize:] + e.series.n, data = uint32(v), data[n:] + sz, n = binary.Uvarint(data) + data = data[n:] + e.series.data, data = data[:sz], data[sz:] // Save length of elem. e.size = start - len(data) @@ -372,6 +388,7 @@ func (e *MeasurementBlockElem) UnmarshalBinary(data []byte) error { // MeasurementBlockWriter writes a measurement block. type MeasurementBlockWriter struct { + buf bytes.Buffer mms map[string]measurement // Measurement sketch and tombstoned measurement sketch. @@ -388,7 +405,7 @@ func NewMeasurementBlockWriter() *MeasurementBlockWriter { } // Add adds a measurement with series and tag set offset/size. -func (mw *MeasurementBlockWriter) Add(name []byte, deleted bool, offset, size int64, seriesIDs []uint64) { +func (mw *MeasurementBlockWriter) Add(name []byte, deleted bool, offset, size int64, seriesIDs []uint32) { mm := mw.mms[string(name)] mm.deleted = deleted mm.tagBlock.offset = offset @@ -518,14 +535,33 @@ func (mw *MeasurementBlockWriter) writeMeasurementTo(w io.Writer, name []byte, m return err } - // Write series count & ids. + // Write series data to buffer. + mw.buf.Reset() + var prev uint32 + for _, seriesID := range mm.seriesIDs { + delta := seriesID - prev + + var buf [binary.MaxVarintLen32]byte + i := binary.PutUvarint(buf[:], uint64(delta)) + if _, err := mw.buf.Write(buf[:i]); err != nil { + return err + } + + prev = seriesID + } + + // Write series count. if err := writeUvarintTo(w, uint64(len(mm.seriesIDs)), n); err != nil { return err } - for _, seriesID := range mm.seriesIDs { - if err := writeUint64To(w, seriesID, n); err != nil { - return err - } + + // Write data size & buffer. + if err := writeUvarintTo(w, uint64(mw.buf.Len()), n); err != nil { + return err + } + nn, err := mw.buf.WriteTo(w) + if *n += nn; err != nil { + return err } return nil @@ -551,7 +587,7 @@ type measurement struct { offset int64 size int64 } - seriesIDs []uint64 + seriesIDs []uint32 offset int64 } diff --git a/tsdb/index/tsi1/measurement_block_test.go b/tsdb/index/tsi1/measurement_block_test.go index 9ec6323de0..939c6d77cb 100644 --- a/tsdb/index/tsi1/measurement_block_test.go +++ b/tsdb/index/tsi1/measurement_block_test.go @@ -104,9 +104,9 @@ func TestMeasurementBlockTrailer_WriteTo(t *testing.T) { // Ensure measurement blocks can be written and opened. func TestMeasurementBlockWriter(t *testing.T) { ms := Measurements{ - NewMeasurement([]byte("foo"), false, 100, 10, []uint64{1, 3, 4}), - NewMeasurement([]byte("bar"), false, 200, 20, []uint64{2}), - NewMeasurement([]byte("baz"), false, 300, 30, []uint64{5, 6}), + NewMeasurement([]byte("foo"), false, 100, 10, []uint32{1, 3, 4}), + NewMeasurement([]byte("bar"), false, 200, 20, []uint32{2}), + NewMeasurement([]byte("baz"), false, 300, 30, []uint32{5, 6}), } // Write the measurements to writer. @@ -134,7 +134,7 @@ func TestMeasurementBlockWriter(t *testing.T) { t.Fatal("expected element") } else if e.TagBlockOffset() != 100 || e.TagBlockSize() != 10 { t.Fatalf("unexpected offset/size: %v/%v", e.TagBlockOffset(), e.TagBlockSize()) - } else if !reflect.DeepEqual(e.SeriesIDs(), []uint64{1, 3, 4}) { + } else if !reflect.DeepEqual(e.SeriesIDs(), []uint32{1, 3, 4}) { t.Fatalf("unexpected series data: %#v", e.SeriesIDs()) } @@ -142,7 +142,7 @@ func TestMeasurementBlockWriter(t *testing.T) { t.Fatal("expected element") } else if e.TagBlockOffset() != 200 || e.TagBlockSize() != 20 { t.Fatalf("unexpected offset/size: %v/%v", e.TagBlockOffset(), e.TagBlockSize()) - } else if !reflect.DeepEqual(e.SeriesIDs(), []uint64{2}) { + } else if !reflect.DeepEqual(e.SeriesIDs(), []uint32{2}) { t.Fatalf("unexpected series data: %#v", e.SeriesIDs()) } @@ -150,7 +150,7 @@ func TestMeasurementBlockWriter(t *testing.T) { t.Fatal("expected element") } else if e.TagBlockOffset() != 300 || e.TagBlockSize() != 30 { t.Fatalf("unexpected offset/size: %v/%v", e.TagBlockOffset(), e.TagBlockSize()) - } else if !reflect.DeepEqual(e.SeriesIDs(), []uint64{5, 6}) { + } else if !reflect.DeepEqual(e.SeriesIDs(), []uint32{5, 6}) { t.Fatalf("unexpected series data: %#v", e.SeriesIDs()) } @@ -167,10 +167,10 @@ type Measurement struct { Deleted bool Offset int64 Size int64 - ids []uint64 + ids []uint32 } -func NewMeasurement(name []byte, deleted bool, offset, size int64, ids []uint64) Measurement { +func NewMeasurement(name []byte, deleted bool, offset, size int64, ids []uint32) Measurement { return Measurement{ Name: name, Deleted: deleted, diff --git a/tsdb/index/tsi1/series_block.go b/tsdb/index/tsi1/series_block.go index a7b8d17332..9a347a0d9c 100644 --- a/tsdb/index/tsi1/series_block.go +++ b/tsdb/index/tsi1/series_block.go @@ -11,6 +11,7 @@ import ( "github.com/influxdata/influxdb/influxql" "github.com/influxdata/influxdb/models" + "github.com/influxdata/influxdb/pkg/bloom" "github.com/influxdata/influxdb/pkg/estimator" "github.com/influxdata/influxdb/pkg/estimator/hll" "github.com/influxdata/influxdb/pkg/mmap" @@ -24,16 +25,17 @@ var ErrSeriesOverflow = errors.New("series overflow") const ( // Series list trailer field sizes. SeriesBlockTrailerSize = 0 + - 8 + 8 + // series data offset/size - 8 + 8 + 8 + // series index offset/size/capacity - 8 + 8 + // series sketch offset/size - 8 + 8 + // tombstone series sketch offset/size - 8 + 8 + // series count and tombstone count + 4 + 4 + // series data offset/size + 4 + 4 + 4 + // series index offset/size/capacity + 8 + 4 + 4 + // bloom filter false positive rate, offset/size + 4 + 4 + // series sketch offset/size + 4 + 4 + // tombstone series sketch offset/size + 4 + 4 + // series count and tombstone count 0 // Other field sizes - SeriesCountSize = 8 - SeriesIDSize = 8 + SeriesCountSize = 4 + SeriesIDSize = 4 ) // Series flag constants. @@ -58,8 +60,11 @@ type SeriesBlock struct { seriesIndexes []seriesBlockIndex // Exact series counts for this block. - seriesN int64 - tombstoneN int64 + seriesN int32 + tombstoneN int32 + + // Bloom filter used for fast series existence check. + filter *bloom.Filter // Series block sketch and tombstone sketch for cardinality estimation. // While we have exact counts for the block, these sketches allow us to @@ -87,7 +92,7 @@ func (blk *SeriesBlock) Series(name []byte, tags models.Tags) SeriesElem { } // Offset returns the byte offset of the series within the block. -func (blk *SeriesBlock) Offset(name []byte, tags models.Tags, buf []byte) (offset uint64, tombstoned bool) { +func (blk *SeriesBlock) Offset(name []byte, tags models.Tags, buf []byte) (offset uint32, tombstoned bool) { // Exit if no series indexes exist. if len(blk.seriesIndexes) == 0 { return 0, false @@ -95,7 +100,15 @@ func (blk *SeriesBlock) Offset(name []byte, tags models.Tags, buf []byte) (offse // Compute series key. buf = AppendSeriesKey(buf[:0], name, tags) - bufN := uint64(len(buf)) + bufN := uint32(len(buf)) + + // Quickly check the bloom filter. + // If the key doesn't exist then we know for sure that it doesn't exist. + // If it does exist then we need to do a hash index check to verify. False + // positives are possible with a bloom filter. + if !blk.filter.Contains(buf) { + return 0, false + } // Find the correct partition. // Use previous index unless an exact match on the min value. @@ -108,7 +121,7 @@ func (blk *SeriesBlock) Offset(name []byte, tags models.Tags, buf []byte) (offse seriesIndex := blk.seriesIndexes[i] // Search within partition. - n := seriesIndex.capacity + n := int64(seriesIndex.capacity) hash := rhh.HashKey(buf) pos := hash % n @@ -116,7 +129,7 @@ func (blk *SeriesBlock) Offset(name []byte, tags models.Tags, buf []byte) (offse var d int64 for { // Find offset of series. - offset := binary.BigEndian.Uint64(seriesIndex.data[pos*SeriesIDSize:]) + offset := binary.BigEndian.Uint32(seriesIndex.data[pos*SeriesIDSize:]) if offset == 0 { return 0, false } @@ -144,8 +157,8 @@ func (blk *SeriesBlock) Offset(name []byte, tags models.Tags, buf []byte) (offse } // SeriesCount returns the number of series. -func (blk *SeriesBlock) SeriesCount() uint64 { - return uint64(blk.seriesN + blk.tombstoneN) +func (blk *SeriesBlock) SeriesCount() uint32 { + return uint32(blk.seriesN + blk.tombstoneN) } // SeriesIterator returns an iterator over all the series. @@ -179,23 +192,30 @@ func (blk *SeriesBlock) UnmarshalBinary(data []byte) error { idx := &blk.seriesIndexes[i] // Read data block. - var offset, size uint64 - offset, buf = binary.BigEndian.Uint64(buf[:8]), buf[8:] - size, buf = binary.BigEndian.Uint64(buf[:8]), buf[8:] + var offset, size uint32 + offset, buf = binary.BigEndian.Uint32(buf[:4]), buf[4:] + size, buf = binary.BigEndian.Uint32(buf[:4]), buf[4:] idx.data = blk.data[offset : offset+size] // Read block capacity. - idx.capacity, buf = int64(binary.BigEndian.Uint64(buf[:8])), buf[8:] + idx.capacity, buf = int32(binary.BigEndian.Uint32(buf[:4])), buf[4:] // Read min key. - var n uint64 - n, buf = binary.BigEndian.Uint64(buf[:8]), buf[8:] + var n uint32 + n, buf = binary.BigEndian.Uint32(buf[:4]), buf[4:] idx.min, buf = buf[:n], buf[n:] } if len(buf) != 0 { return fmt.Errorf("data remaining in index list buffer: %d", len(buf)) } + // Initialize bloom filter. + filter, err := bloom.NewFilterBuffer(data[t.Bloom.Offset:][:t.Bloom.Size], t.Bloom.K) + if err != nil { + return err + } + blk.filter = filter + // Initialise sketches. We're currently using HLL+. var s, ts = hll.NewDefaultPlus(), hll.NewDefaultPlus() if err := s.UnmarshalBinary(data[t.Sketch.Offset:][:t.Sketch.Size]); err != nil { @@ -218,13 +238,13 @@ func (blk *SeriesBlock) UnmarshalBinary(data []byte) error { type seriesBlockIndex struct { data []byte min []byte - capacity int64 + capacity int32 } // seriesBlockIterator is an iterator over a series ids in a series list. type seriesBlockIterator struct { - i, n uint64 - offset uint64 + i, n uint32 + offset uint32 sblk *SeriesBlock e SeriesBlockElem // buffer } @@ -243,8 +263,8 @@ func (itr *seriesBlockIterator) Next() SeriesElem { itr.offset++ // Read index capacity. - n := binary.BigEndian.Uint64(itr.sblk.data[itr.offset:]) - itr.offset += 8 + n := binary.BigEndian.Uint32(itr.sblk.data[itr.offset:]) + itr.offset += 4 // Skip over index. itr.offset += n * SeriesIDSize @@ -256,7 +276,7 @@ func (itr *seriesBlockIterator) Next() SeriesElem { // Move iterator and offset forward. itr.i++ - itr.offset += uint64(itr.e.size) + itr.offset += uint32(itr.e.size) return &itr.e } @@ -355,12 +375,12 @@ func AppendSeriesElem(dst []byte, flag byte, name []byte, tags models.Tags) []by // AppendSeriesKey serializes name and tags to a byte slice. // The total length is prepended as a uvarint. func AppendSeriesKey(dst []byte, name []byte, tags models.Tags) []byte { - buf := make([]byte, binary.MaxVarintLen64) + buf := make([]byte, binary.MaxVarintLen32) origLen := len(dst) // The tag count is variable encoded, so we need to know ahead of time what // the size of the tag count value will be. - tcBuf := make([]byte, binary.MaxVarintLen64) + tcBuf := make([]byte, binary.MaxVarintLen32) tcSz := binary.PutUvarint(tcBuf, uint64(len(tags))) // Size of name/tags. Does not include total length. @@ -510,13 +530,16 @@ type SeriesBlockEncoder struct { indexMin []byte indexes []seriesBlockIndexEncodeInfo + // Bloom filter to check for series existance. + filter *bloom.Filter + // Series sketch and tombstoned series sketch. These must be // set before calling WriteTo. sketch, tSketch estimator.Sketch } // NewSeriesBlockEncoder returns a new instance of SeriesBlockEncoder. -func NewSeriesBlockEncoder(w io.Writer) *SeriesBlockEncoder { +func NewSeriesBlockEncoder(w io.Writer, n uint32, m, k uint64) *SeriesBlockEncoder { return &SeriesBlockEncoder{ w: w, @@ -525,6 +548,8 @@ func NewSeriesBlockEncoder(w io.Writer) *SeriesBlockEncoder { LoadFactor: LoadFactor, }), + filter: bloom.NewFilter(m, k), + sketch: hll.NewDefaultPlus(), tSketch: hll.NewDefaultPlus(), } @@ -572,7 +597,10 @@ func (enc *SeriesBlockEncoder) Encode(name []byte, tags models.Tags, deleted boo // Save offset to generate index later. // Key is copied by the RHH map. - enc.offsets.Put(buf[1:], uint64(offset)) + enc.offsets.Put(buf[1:], uint32(offset)) + + // Update bloom filter. + enc.filter.Insert(buf[1:]) // Update sketches & trailer. if deleted { @@ -600,27 +628,35 @@ func (enc *SeriesBlockEncoder) Close() error { // Write dictionary-encoded series list. enc.trailer.Series.Data.Offset = 1 - enc.trailer.Series.Data.Size = enc.n - enc.trailer.Series.Data.Offset + enc.trailer.Series.Data.Size = int32(enc.n) - enc.trailer.Series.Data.Offset // Write dictionary-encoded series hash index. - enc.trailer.Series.Index.Offset = enc.n + enc.trailer.Series.Index.Offset = int32(enc.n) if err := enc.writeIndexEntries(); err != nil { return err } - enc.trailer.Series.Index.Size = enc.n - enc.trailer.Series.Index.Offset + enc.trailer.Series.Index.Size = int32(enc.n) - enc.trailer.Series.Index.Offset + + // Flush bloom filter. + enc.trailer.Bloom.K = enc.filter.K() + enc.trailer.Bloom.Offset = int32(enc.n) + if err := writeTo(enc.w, enc.filter.Bytes(), &enc.n); err != nil { + return err + } + enc.trailer.Bloom.Size = int32(enc.n) - enc.trailer.Bloom.Offset // Write the sketches out. - enc.trailer.Sketch.Offset = enc.n + enc.trailer.Sketch.Offset = int32(enc.n) if err := writeSketchTo(enc.w, enc.sketch, &enc.n); err != nil { return err } - enc.trailer.Sketch.Size = enc.n - enc.trailer.Sketch.Offset + enc.trailer.Sketch.Size = int32(enc.n) - enc.trailer.Sketch.Offset - enc.trailer.TSketch.Offset = enc.n + enc.trailer.TSketch.Offset = int32(enc.n) if err := writeSketchTo(enc.w, enc.tSketch, &enc.n); err != nil { return err } - enc.trailer.TSketch.Size = enc.n - enc.trailer.TSketch.Offset + enc.trailer.TSketch.Size = int32(enc.n) - enc.trailer.TSketch.Offset // Write trailer. nn, err := enc.trailer.WriteTo(enc.w) @@ -634,23 +670,23 @@ func (enc *SeriesBlockEncoder) Close() error { // writeIndexEntries writes a list of series hash index entries. func (enc *SeriesBlockEncoder) writeIndexEntries() error { - enc.trailer.Series.Index.N = int64(len(enc.indexes)) + enc.trailer.Series.Index.N = int32(len(enc.indexes)) for _, idx := range enc.indexes { // Write offset/size. - if err := writeUint64To(enc.w, uint64(idx.offset), &enc.n); err != nil { + if err := writeUint32To(enc.w, uint32(idx.offset), &enc.n); err != nil { return err - } else if err := writeUint64To(enc.w, uint64(idx.size), &enc.n); err != nil { + } else if err := writeUint32To(enc.w, uint32(idx.size), &enc.n); err != nil { return err } // Write capacity. - if err := writeUint64To(enc.w, uint64(idx.capacity), &enc.n); err != nil { + if err := writeUint32To(enc.w, uint32(idx.capacity), &enc.n); err != nil { return err } // Write min key. - if err := writeUint64To(enc.w, uint64(len(idx.min)), &enc.n); err != nil { + if err := writeUint32To(enc.w, uint32(len(idx.min)), &enc.n); err != nil { return err } else if err := writeTo(enc.w, idx.min, &enc.n); err != nil { return err @@ -708,12 +744,12 @@ func (enc *SeriesBlockEncoder) flushIndex() error { } // Write index capacity. // This is used for skipping over when iterating sequentially. - if err := writeUint64To(enc.w, uint64(enc.offsets.Cap()), &enc.n); err != nil { + if err := writeUint32To(enc.w, uint32(enc.offsets.Cap()), &enc.n); err != nil { return err } // Determine size. - var sz int64 = enc.offsets.Cap() * 8 + var sz int64 = enc.offsets.Cap() * 4 // Save current position to ensure size is correct by the end. offset := enc.n @@ -721,9 +757,9 @@ func (enc *SeriesBlockEncoder) flushIndex() error { // Encode hash map offset entries. for i := int64(0); i < enc.offsets.Cap(); i++ { _, v := enc.offsets.Elem(i) - seriesOffset, _ := v.(uint64) + seriesOffset, _ := v.(uint32) - if err := writeUint64To(enc.w, uint64(seriesOffset), &enc.n); err != nil { + if err := writeUint32To(enc.w, uint32(seriesOffset), &enc.n); err != nil { return err } } @@ -738,9 +774,9 @@ func (enc *SeriesBlockEncoder) flushIndex() error { // Add to index entries. enc.indexes = append(enc.indexes, seriesBlockIndexEncodeInfo{ - offset: offset, - size: size, - capacity: uint64(enc.offsets.Cap()), + offset: uint32(offset), + size: uint32(size), + capacity: uint32(enc.offsets.Cap()), min: enc.indexMin, }) @@ -752,9 +788,9 @@ func (enc *SeriesBlockEncoder) flushIndex() error { // seriesBlockIndexEncodeInfo stores offset information for seriesBlockIndex structures. type seriesBlockIndexEncodeInfo struct { - offset int64 - size int64 - capacity uint64 + offset uint32 + size uint32 + capacity uint32 min []byte } @@ -766,25 +802,30 @@ func ReadSeriesBlockTrailer(data []byte) SeriesBlockTrailer { buf := data[len(data)-SeriesBlockTrailerSize:] // Read series data info. - t.Series.Data.Offset, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:] - t.Series.Data.Size, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:] + t.Series.Data.Offset, buf = int32(binary.BigEndian.Uint32(buf[0:4])), buf[4:] + t.Series.Data.Size, buf = int32(binary.BigEndian.Uint32(buf[0:4])), buf[4:] // Read series hash index info. - t.Series.Index.Offset, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:] - t.Series.Index.Size, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:] - t.Series.Index.N, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:] + t.Series.Index.Offset, buf = int32(binary.BigEndian.Uint32(buf[0:4])), buf[4:] + t.Series.Index.Size, buf = int32(binary.BigEndian.Uint32(buf[0:4])), buf[4:] + t.Series.Index.N, buf = int32(binary.BigEndian.Uint32(buf[0:4])), buf[4:] + + // Read bloom filter info. + t.Bloom.K, buf = binary.BigEndian.Uint64(buf[0:8]), buf[8:] + t.Bloom.Offset, buf = int32(binary.BigEndian.Uint32(buf[0:4])), buf[4:] + t.Bloom.Size, buf = int32(binary.BigEndian.Uint32(buf[0:4])), buf[4:] // Read series sketch info. - t.Sketch.Offset, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:] - t.Sketch.Size, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:] + t.Sketch.Offset, buf = int32(binary.BigEndian.Uint32(buf[0:4])), buf[4:] + t.Sketch.Size, buf = int32(binary.BigEndian.Uint32(buf[0:4])), buf[4:] // Read tombstone series sketch info. - t.TSketch.Offset, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:] - t.TSketch.Size, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:] + t.TSketch.Offset, buf = int32(binary.BigEndian.Uint32(buf[0:4])), buf[4:] + t.TSketch.Size, buf = int32(binary.BigEndian.Uint32(buf[0:4])), buf[4:] // Read series & tombstone count. - t.SeriesN, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:] - t.TombstoneN, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:] + t.SeriesN, buf = int32(binary.BigEndian.Uint32(buf[0:4])), buf[4:] + t.TombstoneN, buf = int32(binary.BigEndian.Uint32(buf[0:4])), buf[4:] return t } @@ -793,65 +834,81 @@ func ReadSeriesBlockTrailer(data []byte) SeriesBlockTrailer { type SeriesBlockTrailer struct { Series struct { Data struct { - Offset int64 - Size int64 + Offset int32 + Size int32 } Index struct { - Offset int64 - Size int64 - N int64 + Offset int32 + Size int32 + N int32 } } + // Bloom filter info. + Bloom struct { + K uint64 + Offset int32 + Size int32 + } + // Offset and size of cardinality sketch for measurements. Sketch struct { - Offset int64 - Size int64 + Offset int32 + Size int32 } // Offset and size of cardinality sketch for tombstoned measurements. TSketch struct { - Offset int64 - Size int64 + Offset int32 + Size int32 } - SeriesN int64 - TombstoneN int64 + SeriesN int32 + TombstoneN int32 } func (t SeriesBlockTrailer) WriteTo(w io.Writer) (n int64, err error) { - if err := writeUint64To(w, uint64(t.Series.Data.Offset), &n); err != nil { + if err := writeUint32To(w, uint32(t.Series.Data.Offset), &n); err != nil { return n, err - } else if err := writeUint64To(w, uint64(t.Series.Data.Size), &n); err != nil { + } else if err := writeUint32To(w, uint32(t.Series.Data.Size), &n); err != nil { return n, err } - if err := writeUint64To(w, uint64(t.Series.Index.Offset), &n); err != nil { + if err := writeUint32To(w, uint32(t.Series.Index.Offset), &n); err != nil { return n, err - } else if err := writeUint64To(w, uint64(t.Series.Index.Size), &n); err != nil { + } else if err := writeUint32To(w, uint32(t.Series.Index.Size), &n); err != nil { return n, err - } else if err := writeUint64To(w, uint64(t.Series.Index.N), &n); err != nil { + } else if err := writeUint32To(w, uint32(t.Series.Index.N), &n); err != nil { + return n, err + } + + // Write bloom filter info. + if err := writeUint64To(w, t.Bloom.K, &n); err != nil { + return n, err + } else if err := writeUint32To(w, uint32(t.Bloom.Offset), &n); err != nil { + return n, err + } else if err := writeUint32To(w, uint32(t.Bloom.Size), &n); err != nil { return n, err } // Write measurement sketch info. - if err := writeUint64To(w, uint64(t.Sketch.Offset), &n); err != nil { + if err := writeUint32To(w, uint32(t.Sketch.Offset), &n); err != nil { return n, err - } else if err := writeUint64To(w, uint64(t.Sketch.Size), &n); err != nil { + } else if err := writeUint32To(w, uint32(t.Sketch.Size), &n); err != nil { return n, err } // Write tombstone measurement sketch info. - if err := writeUint64To(w, uint64(t.TSketch.Offset), &n); err != nil { + if err := writeUint32To(w, uint32(t.TSketch.Offset), &n); err != nil { return n, err - } else if err := writeUint64To(w, uint64(t.TSketch.Size), &n); err != nil { + } else if err := writeUint32To(w, uint32(t.TSketch.Size), &n); err != nil { return n, err } // Write series and tombstone count. - if err := writeUint64To(w, uint64(t.SeriesN), &n); err != nil { + if err := writeUint32To(w, uint32(t.SeriesN), &n); err != nil { return n, err - } else if err := writeUint64To(w, uint64(t.TombstoneN), &n); err != nil { + } else if err := writeUint32To(w, uint32(t.TombstoneN), &n); err != nil { return n, err } @@ -862,7 +919,7 @@ type serie struct { name []byte tags models.Tags deleted bool - offset uint64 + offset uint32 } func (s *serie) flag() uint8 { return encodeSerieFlag(s.deleted) } diff --git a/tsdb/index/tsi1/series_block_test.go b/tsdb/index/tsi1/series_block_test.go index b43980371e..3455abce93 100644 --- a/tsdb/index/tsi1/series_block_test.go +++ b/tsdb/index/tsi1/series_block_test.go @@ -56,7 +56,7 @@ func CreateSeriesBlock(a []Series) (*tsi1.SeriesBlock, error) { var buf bytes.Buffer // Create writer and sketches. Add series. - enc := tsi1.NewSeriesBlockEncoder(&buf) + enc := tsi1.NewSeriesBlockEncoder(&buf, uint32(len(a)), M, K) for i, s := range a { if err := enc.Encode(s.Name, s.Tags, s.Deleted); err != nil { return nil, fmt.Errorf("SeriesBlockWriter.Add(): i=%d, err=%s", i, err) diff --git a/tsdb/index/tsi1/tag_block.go b/tsdb/index/tsi1/tag_block.go index b22b8a1933..d3024896c1 100644 --- a/tsdb/index/tsi1/tag_block.go +++ b/tsdb/index/tsi1/tag_block.go @@ -300,7 +300,7 @@ type TagBlockValueElem struct { flag byte value []byte series struct { - n uint64 // Series count + n uint32 // Series count data []byte // Raw series data } @@ -314,21 +314,27 @@ func (e *TagBlockValueElem) Deleted() bool { return (e.flag & TagValueTombstoneF func (e *TagBlockValueElem) Value() []byte { return e.value } // SeriesN returns the series count. -func (e *TagBlockValueElem) SeriesN() uint64 { return e.series.n } +func (e *TagBlockValueElem) SeriesN() uint32 { return e.series.n } // SeriesData returns the raw series data. func (e *TagBlockValueElem) SeriesData() []byte { return e.series.data } // SeriesID returns series ID at an index. -func (e *TagBlockValueElem) SeriesID(i int) uint64 { - return binary.BigEndian.Uint64(e.series.data[i*SeriesIDSize:]) +func (e *TagBlockValueElem) SeriesID(i int) uint32 { + return binary.BigEndian.Uint32(e.series.data[i*SeriesIDSize:]) } // SeriesIDs returns a list decoded series ids. -func (e *TagBlockValueElem) SeriesIDs() []uint64 { - a := make([]uint64, e.series.n) - for i := 0; i < int(e.series.n); i++ { - a[i] = e.SeriesID(i) +func (e *TagBlockValueElem) SeriesIDs() []uint32 { + a := make([]uint32, 0, e.series.n) + var prev uint32 + for data := e.series.data; len(data) > 0; { + delta, n := binary.Uvarint(data) + data = data[n:] + + seriesID := prev + uint32(delta) + a = append(a, seriesID) + prev = seriesID } return a } @@ -348,12 +354,17 @@ func (e *TagBlockValueElem) unmarshal(buf []byte) { e.value, buf = buf[n:n+int(sz)], buf[n+int(sz):] // Parse series count. - e.series.n, n = binary.Uvarint(buf) + v, n := binary.Uvarint(buf) + e.series.n = uint32(v) + buf = buf[n:] + + // Parse data block size. + sz, n = binary.Uvarint(buf) buf = buf[n:] // Save reference to series data. - e.series.data = buf[:e.series.n*SeriesIDSize] - buf = buf[e.series.n*SeriesIDSize:] + e.series.data = buf[:sz] + buf = buf[sz:] // Save length of elem. e.size = start - len(buf) @@ -457,7 +468,8 @@ func ReadTagBlockTrailer(data []byte) (TagBlockTrailer, error) { // TagBlockEncoder encodes a tags to a TagBlock section. type TagBlockEncoder struct { - w io.Writer + w io.Writer + buf bytes.Buffer // Track value offsets. offsets *rhh.HashMap @@ -520,7 +532,7 @@ func (enc *TagBlockEncoder) EncodeKey(key []byte, deleted bool) error { // EncodeValue writes a tag value to the underlying writer. // The tag key must be lexicographical sorted after the previous encoded tag key. -func (enc *TagBlockEncoder) EncodeValue(value []byte, deleted bool, seriesIDs []uint64) error { +func (enc *TagBlockEncoder) EncodeValue(value []byte, deleted bool, seriesIDs []uint32) error { if len(enc.keys) == 0 { return fmt.Errorf("tag key must be encoded before encoding values") } else if len(value) == 0 { @@ -542,16 +554,33 @@ func (enc *TagBlockEncoder) EncodeValue(value []byte, deleted bool, seriesIDs [] return err } + // Build series data in buffer. + enc.buf.Reset() + var prev uint32 + for _, seriesID := range seriesIDs { + delta := seriesID - prev + + var buf [binary.MaxVarintLen32]byte + i := binary.PutUvarint(buf[:], uint64(delta)) + if _, err := enc.buf.Write(buf[:i]); err != nil { + return err + } + + prev = seriesID + } + // Write series count. if err := writeUvarintTo(enc.w, uint64(len(seriesIDs)), &enc.n); err != nil { return err } - // Write series ids. - for _, seriesID := range seriesIDs { - if err := writeUint64To(enc.w, seriesID, &enc.n); err != nil { - return err - } + // Write data size & buffer. + if err := writeUvarintTo(enc.w, uint64(enc.buf.Len()), &enc.n); err != nil { + return err + } + nn, err := enc.buf.WriteTo(enc.w) + if enc.n += nn; err != nil { + return err } return nil @@ -721,31 +750,3 @@ func encodeTagValueFlag(deleted bool) byte { } return flag } - -/* -type tagSet struct { - deleted bool - data struct { - offset int64 - size int64 - } - hashIndex struct { - offset int64 - size int64 - } - values map[string]tagValue - - offset int64 -} - -func (ts tagSet) flag() byte { return encodeTagKeyFlag(ts.deleted) } - -type tagValue struct { - seriesIDs []uint64 - deleted bool - - offset int64 -} - -func (tv tagValue) flag() byte { return encodeTagValueFlag(tv.deleted) } -*/ diff --git a/tsdb/index/tsi1/tag_block_test.go b/tsdb/index/tsi1/tag_block_test.go index f69042a4f3..4de527e16d 100644 --- a/tsdb/index/tsi1/tag_block_test.go +++ b/tsdb/index/tsi1/tag_block_test.go @@ -17,19 +17,19 @@ func TestTagBlockWriter(t *testing.T) { if err := enc.EncodeKey([]byte("host"), false); err != nil { t.Fatal(err) - } else if err := enc.EncodeValue([]byte("server0"), false, []uint64{1}); err != nil { + } else if err := enc.EncodeValue([]byte("server0"), false, []uint32{1}); err != nil { t.Fatal(err) - } else if err := enc.EncodeValue([]byte("server1"), false, []uint64{2}); err != nil { + } else if err := enc.EncodeValue([]byte("server1"), false, []uint32{2}); err != nil { t.Fatal(err) - } else if err := enc.EncodeValue([]byte("server2"), false, []uint64{3}); err != nil { + } else if err := enc.EncodeValue([]byte("server2"), false, []uint32{3}); err != nil { t.Fatal(err) } if err := enc.EncodeKey([]byte("region"), false); err != nil { t.Fatal(err) - } else if err := enc.EncodeValue([]byte("us-east"), false, []uint64{1, 2}); err != nil { + } else if err := enc.EncodeValue([]byte("us-east"), false, []uint32{1, 2}); err != nil { t.Fatal(err) - } else if err := enc.EncodeValue([]byte("us-west"), false, []uint64{3}); err != nil { + } else if err := enc.EncodeValue([]byte("us-west"), false, []uint32{3}); err != nil { t.Fatal(err) } @@ -49,28 +49,28 @@ func TestTagBlockWriter(t *testing.T) { // Verify data. if e := blk.TagValueElem([]byte("region"), []byte("us-east")); e == nil { t.Fatal("expected element") - } else if a := e.(*tsi1.TagBlockValueElem).SeriesIDs(); !reflect.DeepEqual(a, []uint64{1, 2}) { + } else if a := e.(*tsi1.TagBlockValueElem).SeriesIDs(); !reflect.DeepEqual(a, []uint32{1, 2}) { t.Fatalf("unexpected series ids: %#v", a) } if e := blk.TagValueElem([]byte("region"), []byte("us-west")); e == nil { t.Fatal("expected element") - } else if a := e.(*tsi1.TagBlockValueElem).SeriesIDs(); !reflect.DeepEqual(a, []uint64{3}) { + } else if a := e.(*tsi1.TagBlockValueElem).SeriesIDs(); !reflect.DeepEqual(a, []uint32{3}) { t.Fatalf("unexpected series ids: %#v", a) } if e := blk.TagValueElem([]byte("host"), []byte("server0")); e == nil { t.Fatal("expected element") - } else if a := e.(*tsi1.TagBlockValueElem).SeriesIDs(); !reflect.DeepEqual(a, []uint64{1}) { + } else if a := e.(*tsi1.TagBlockValueElem).SeriesIDs(); !reflect.DeepEqual(a, []uint32{1}) { t.Fatalf("unexpected series ids: %#v", a) } if e := blk.TagValueElem([]byte("host"), []byte("server1")); e == nil { t.Fatal("expected element") - } else if a := e.(*tsi1.TagBlockValueElem).SeriesIDs(); !reflect.DeepEqual(a, []uint64{2}) { + } else if a := e.(*tsi1.TagBlockValueElem).SeriesIDs(); !reflect.DeepEqual(a, []uint32{2}) { t.Fatalf("unexpected series ids: %#v", a) } if e := blk.TagValueElem([]byte("host"), []byte("server2")); e == nil { t.Fatal("expected element") - } else if a := e.(*tsi1.TagBlockValueElem).SeriesIDs(); !reflect.DeepEqual(a, []uint64{3}) { + } else if a := e.(*tsi1.TagBlockValueElem).SeriesIDs(); !reflect.DeepEqual(a, []uint32{3}) { t.Fatalf("unexpected series ids: %#v", a) } } @@ -105,7 +105,7 @@ func benchmarkTagBlock_SeriesN(b *testing.B, tagN, valueN int, blk **tsi1.TagBlo } for j := 0; j < valueN; j++ { - if err := enc.EncodeValue([]byte(fmt.Sprintf("%08d", j)), false, []uint64{1}); err != nil { + if err := enc.EncodeValue([]byte(fmt.Sprintf("%08d", j)), false, []uint32{1}); err != nil { b.Fatal(err) } } diff --git a/tsdb/index/tsi1/tsi1.go b/tsdb/index/tsi1/tsi1.go index 9e8b9beb07..775ee1e99d 100644 --- a/tsdb/index/tsi1/tsi1.go +++ b/tsdb/index/tsi1/tsi1.go @@ -720,7 +720,7 @@ func (itr *seriesExprIterator) Next() SeriesElem { // seriesIDIterator represents a iterator over a list of series ids. type seriesIDIterator interface { - next() uint64 + next() uint32 } // writeTo writes write v into w. Updates n. @@ -773,6 +773,12 @@ func writeUvarintTo(w io.Writer, v uint64, n *int64) error { return err } +type uint32Slice []uint32 + +func (a uint32Slice) Len() int { return len(a) } +func (a uint32Slice) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a uint32Slice) Less(i, j int) bool { return a[i] < a[j] } + type uint64Slice []uint64 func (a uint64Slice) Len() int { return len(a) } diff --git a/tsdb/shard.go b/tsdb/shard.go index 01c069bcd0..9eeb94df28 100644 --- a/tsdb/shard.go +++ b/tsdb/shard.go @@ -170,6 +170,7 @@ func (s *Shard) WithLogger(log zap.Logger) { s.baseLogger = log if err := s.ready(); err == nil { s.engine.WithLogger(s.baseLogger) + s.index.WithLogger(s.baseLogger) } s.logger = s.baseLogger.With(zap.String("service", "shard")) } @@ -274,6 +275,7 @@ func (s *Shard) Open() error { return err } s.index = idx + idx.WithLogger(s.baseLogger) // Initialize underlying engine. e, err := NewEngine(s.id, idx, s.path, s.walPath, s.options)