diff --git a/cmd/influx/debug.go b/cmd/influx/debug.go index 164dc037d6..6e51c088f2 100644 --- a/cmd/influx/debug.go +++ b/cmd/influx/debug.go @@ -8,7 +8,7 @@ import ( "github.com/influxdata/influxdb/v2" "github.com/influxdata/influxdb/v2/internal/fs" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" + "github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1" "github.com/spf13/cobra" ) diff --git a/cmd/influx_inspect/buildtsi/buildtsi.go b/cmd/influx_inspect/buildtsi/buildtsi.go index c14166b0c5..07b9aca4f8 100644 --- a/cmd/influx_inspect/buildtsi/buildtsi.go +++ b/cmd/influx_inspect/buildtsi/buildtsi.go @@ -12,10 +12,9 @@ import ( "github.com/influxdata/influxdb/v2/pkg/fs" "github.com/influxdata/influxdb/v2/storage/wal" "github.com/influxdata/influxdb/v2/toml" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" - "github.com/influxdata/influxdb/v2/tsdb/tsi1" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" + "github.com/influxdata/influxdb/v2/v1/tsdb" + "github.com/influxdata/influxdb/v2/v1/tsdb/index/tsi1" + "github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1" "go.uber.org/zap" ) diff --git a/tsdb/seriesfile/series_verify.go b/cmd/influx_inspect/verify/seriesfile/verify.go similarity index 89% rename from tsdb/seriesfile/series_verify.go rename to cmd/influx_inspect/verify/seriesfile/verify.go index 31c8ce3771..68a73dc58f 100644 --- a/tsdb/seriesfile/series_verify.go +++ b/cmd/influx_inspect/verify/seriesfile/verify.go @@ -9,7 +9,7 @@ import ( "sort" "sync" - "github.com/influxdata/influxdb/v2/tsdb" + "github.com/influxdata/influxdb/v2/v1/tsdb" "go.uber.org/zap" ) @@ -125,7 +125,7 @@ func (v Verify) VerifyPartition(partitionPath string) (valid bool, err error) { return false, err } - segments := make([]*SeriesSegment, 0, len(segmentInfos)) + segments := make([]*tsdb.SeriesSegment, 0, len(segmentInfos)) ids := make(map[uint64]IDData) // check every segment @@ -137,7 +137,7 @@ func (v Verify) VerifyPartition(partitionPath string) (valid bool, err error) { } segmentPath := filepath.Join(partitionPath, segmentInfo.Name()) - segmentID, err := ParseSeriesSegmentFilename(segmentInfo.Name()) + segmentID, err := tsdb.ParseSeriesSegmentFilename(segmentInfo.Name()) if err != nil { continue } @@ -150,7 +150,7 @@ func (v Verify) VerifyPartition(partitionPath string) (valid bool, err error) { // open the segment for verifying the index. we want it to be open outside // the for loop as well, so the defer is ok. - segment := NewSeriesSegment(segmentID, segmentPath) + segment := tsdb.NewSeriesSegment(segmentID, segmentPath) if err := segment.Open(); err != nil { return false, err } @@ -186,11 +186,11 @@ func (v Verify) VerifySegment(segmentPath string, ids map[uint64]IDData) (valid v.Logger.Info("Verifying segment") // Open up the segment and grab it's data. - segmentID, err := ParseSeriesSegmentFilename(segmentName) + segmentID, err := tsdb.ParseSeriesSegmentFilename(segmentName) if err != nil { return false, err } - segment := NewSeriesSegment(segmentID, segmentPath) + segment := tsdb.NewSeriesSegment(segmentID, segmentPath) if err := segment.Open(); err != nil { v.Logger.Error("Error opening segment", zap.Error(err)) return false, nil @@ -207,7 +207,7 @@ func (v Verify) VerifySegment(segmentPath string, ids map[uint64]IDData) (valid }() // Skip the header: it has already been verified by the Open call. - if err := buf.advance(SeriesSegmentHeaderSize); err != nil { + if err := buf.advance(tsdb.SeriesSegmentHeaderSize); err != nil { v.Logger.Error("Unable to advance buffer", zap.Int64("offset", buf.offset), zap.Error(err)) @@ -224,39 +224,39 @@ entries: return false, nil } - flag, id, key, sz := ReadSeriesEntry(buf.data) + flag, id, key, sz := tsdb.ReadSeriesEntry(buf.data) // Check the flag is valid and for id monotonicity. hasKey := true switch flag { - case SeriesEntryInsertFlag: - if !firstID && prevID > id.RawID() { + case tsdb.SeriesEntryInsertFlag: + if !firstID && prevID > id { v.Logger.Error("ID is not monotonically increasing", zap.Uint64("prev_id", prevID), - zap.Uint64("id", id.RawID()), + zap.Uint64("id", id), zap.Int64("offset", buf.offset)) return false, nil } firstID = false - prevID = id.RawID() + prevID = id if ids != nil { keyCopy := make([]byte, len(key)) copy(keyCopy, key) - ids[id.RawID()] = IDData{ - Offset: JoinSeriesOffset(segment.ID(), uint32(buf.offset)), + ids[id] = IDData{ + Offset: tsdb.JoinSeriesOffset(segment.ID(), uint32(buf.offset)), Key: keyCopy, } } - case SeriesEntryTombstoneFlag: + case tsdb.SeriesEntryTombstoneFlag: hasKey = false if ids != nil { - data := ids[id.RawID()] + data := ids[id] data.Deleted = true - ids[id.RawID()] = data + ids[id] = data } case 0: // if zero, there are no more entries @@ -288,7 +288,7 @@ entries: zap.String("recovered", fmt.Sprint(rec))) } }() - ParseSeriesKey(key) + tsdb.ParseSeriesKey(key) parsed = true }() if !parsed { @@ -311,7 +311,7 @@ entries: // VerifyIndex performs verification on an index in a series file. The error is only returned // if there was some fatal problem with operating, not if there was a problem with the partition. // The ids map must be built from verifying the passed in segments. -func (v Verify) VerifyIndex(indexPath string, segments []*SeriesSegment, +func (v Verify) VerifyIndex(indexPath string, segments []*tsdb.SeriesSegment, ids map[uint64]IDData) (valid bool, err error) { v.Logger.Info("Verifying index") @@ -322,7 +322,7 @@ func (v Verify) VerifyIndex(indexPath string, segments []*SeriesSegment, } }() - index := NewSeriesIndex(indexPath) + index := tsdb.NewSeriesIndex(indexPath) if err := index.Open(); err != nil { v.Logger.Error("Error opening index", zap.Error(err)) return false, nil @@ -353,7 +353,7 @@ func (v Verify) VerifyIndex(indexPath string, segments []*SeriesSegment, IDData := ids[id] - if gotDeleted := index.IsDeleted(tsdb.NewSeriesID(id)); gotDeleted != IDData.Deleted { + if gotDeleted := index.IsDeleted(id); gotDeleted != IDData.Deleted { v.Logger.Error("Index inconsistency", zap.Uint64("id", id), zap.Bool("got_deleted", gotDeleted), @@ -367,7 +367,7 @@ func (v Verify) VerifyIndex(indexPath string, segments []*SeriesSegment, } // otherwise, check both that the offset is right and that we get the right id for the key - if gotOffset := index.FindOffsetByID(tsdb.NewSeriesID(id)); gotOffset != IDData.Offset { + if gotOffset := index.FindOffsetByID(id); gotOffset != IDData.Offset { v.Logger.Error("Index inconsistency", zap.Uint64("id", id), zap.Int64("got_offset", gotOffset), @@ -375,10 +375,10 @@ func (v Verify) VerifyIndex(indexPath string, segments []*SeriesSegment, return false, nil } - if gotID := index.FindIDBySeriesKey(segments, IDData.Key); gotID != tsdb.NewSeriesIDTyped(id) { + if gotID := index.FindIDBySeriesKey(segments, IDData.Key); gotID != id { v.Logger.Error("Index inconsistency", zap.Uint64("id", id), - zap.Uint64("got_id", gotID.RawID()), + zap.Uint64("got_id", gotID), zap.Uint64("expected_id", id)) return false, nil } diff --git a/tsdb/seriesfile/series_verify_test.go b/cmd/influx_inspect/verify/seriesfile/verify_test.go similarity index 81% rename from tsdb/seriesfile/series_verify_test.go rename to cmd/influx_inspect/verify/seriesfile/verify_test.go index fd2dde9ccf..42204b1c64 100644 --- a/tsdb/seriesfile/series_verify_test.go +++ b/cmd/influx_inspect/verify/seriesfile/verify_test.go @@ -1,7 +1,6 @@ package seriesfile_test import ( - "context" "fmt" "io" "io/ioutil" @@ -10,9 +9,9 @@ import ( "testing" "time" - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" + "github.com/influxdata/influxdb/v2/cmd/influx_inspect/verify/seriesfile" + "github.com/influxdata/influxdb/v2/v1/models" + "github.com/influxdata/influxdb/v2/v1/tsdb" "go.uber.org/zap" ) @@ -79,8 +78,8 @@ func NewTest(t *testing.T) *Test { // create a series file in the directory err = func() error { - seriesFile := seriesfile.NewSeriesFile(dir) - if err := seriesFile.Open(context.Background()); err != nil { + seriesFile := tsdb.NewSeriesFile(dir) + if err := seriesFile.Open(); err != nil { return err } defer seriesFile.Close() @@ -88,7 +87,7 @@ func NewTest(t *testing.T) *Test { const ( compactionThreshold = 100 - numSeries = 2 * seriesfile.SeriesFilePartitionN * compactionThreshold + numSeries = 2 * tsdb.SeriesFilePartitionN * compactionThreshold ) for _, partition := range seriesFile.Partitions() { @@ -103,17 +102,13 @@ func NewTest(t *testing.T) *Test { tagsSlice = append(tagsSlice, nil) } - keys := seriesfile.GenerateSeriesKeys(names, tagsSlice) - //keyPartitionIDs := seriesFile.SeriesKeysPartitionIDs(keys) - ids := make([]uint64, len(keys)) - - //ids, err := seriesFile.CreateSeriesListIfNotExists(names, tagsSlice) + ids, err := seriesFile.CreateSeriesListIfNotExists(names, tagsSlice) if err != nil { return err } // delete one series - if err := seriesFile.DeleteSeriesIDs([]tsdb.SeriesID{tsdb.NewSeriesID(ids[0])}); err != nil { + if err := seriesFile.DeleteSeriesID(ids[0]); err != nil { return err } diff --git a/cmd/influx_inspect/verify/tombstone/verify.go b/cmd/influx_inspect/verify/tombstone/verify.go new file mode 100644 index 0000000000..4c0252c801 --- /dev/null +++ b/cmd/influx_inspect/verify/tombstone/verify.go @@ -0,0 +1,142 @@ +// Package tombstone verifies integrity of tombstones. +package tombstone + +import ( + "errors" + "flag" + "fmt" + "io" + "os" + "path/filepath" + "time" + + "github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1" +) + +// Command represents the program execution for "influx_inspect verify-tombstone". +type Command struct { + Stderr io.Writer + Stdout io.Writer +} + +// NewCommand returns a new instance of Command. +func NewCommand() *Command { + return &Command{ + Stderr: os.Stderr, + Stdout: os.Stdout, + } +} + +// Run executes the command. +func (cmd *Command) Run(args ...string) error { + runner := verifier{w: cmd.Stdout} + fs := flag.NewFlagSet("verify-tombstone", flag.ExitOnError) + fs.StringVar(&runner.path, "path", os.Getenv("HOME")+"/.influxdb", "path to find tombstone files") + v := fs.Bool("v", false, "verbose: emit periodic progress") + vv := fs.Bool("vv", false, "very verbose: emit every tombstone entry key and time range") + vvv := fs.Bool("vvv", false, "very very verbose: emit every tombstone entry key and RFC3339Nano time range") + + fs.SetOutput(cmd.Stdout) + + if err := fs.Parse(args); err != nil { + return err + } + + if *v { + runner.verbosity = verbose + } + if *vv { + runner.verbosity = veryVerbose + } + if *vvv { + runner.verbosity = veryVeryVerbose + } + + return runner.Run() +} + +const ( + quiet = iota + verbose + veryVerbose + veryVeryVerbose +) + +type verifier struct { + path string + verbosity int + + w io.Writer + files []string + f string +} + +func (v *verifier) loadFiles() error { + return filepath.Walk(v.path, func(path string, f os.FileInfo, err error) error { + if err != nil { + return err + } + if filepath.Ext(path) == "."+tsm1.TombstoneFileExtension { + v.files = append(v.files, path) + } + return nil + }) +} + +func (v *verifier) Next() bool { + if len(v.files) == 0 { + return false + } + + v.f, v.files = v.files[0], v.files[1:] + return true +} + +func (v *verifier) Run() error { + if err := v.loadFiles(); err != nil { + return err + } + + var failed bool + start := time.Now() + for v.Next() { + if v.verbosity > quiet { + fmt.Fprintf(v.w, "Verifying: %q\n", v.f) + } + + tombstoner := tsm1.NewTombstoner(v.f, nil) + if !tombstoner.HasTombstones() { + fmt.Fprintf(v.w, "%s has no tombstone entries", v.f) + continue + } + + var totalEntries int64 + err := tombstoner.Walk(func(t tsm1.Tombstone) error { + totalEntries++ + if v.verbosity > quiet && totalEntries%(10*1e6) == 0 { + fmt.Fprintf(v.w, "Verified %d tombstone entries\n", totalEntries) + } else if v.verbosity > verbose { + var min interface{} = t.Min + var max interface{} = t.Max + if v.verbosity > veryVerbose { + min = time.Unix(0, t.Min) + max = time.Unix(0, t.Max) + } + fmt.Printf("key: %q, min: %v, max: %v\n", t.Key, min, max) + } + return nil + }) + if err != nil { + fmt.Fprintf(v.w, "%q failed to walk tombstone entries: %v. Last okay entry: %d\n", v.f, err, totalEntries) + failed = true + continue + } + + fmt.Fprintf(v.w, "Completed verification for %q in %v.\nVerified %d entries\n\n", v.f, time.Since(start), totalEntries) + } + + if failed { + return errors.New("failed tombstone verification") + } + return nil +} diff --git a/cmd/influx_inspect/verify/tsm/verify.go b/cmd/influx_inspect/verify/tsm/verify.go new file mode 100644 index 0000000000..777cbd7a33 --- /dev/null +++ b/cmd/influx_inspect/verify/tsm/verify.go @@ -0,0 +1,232 @@ +// Package tsm verifies integrity of TSM files. +package tsm + +import ( + "flag" + "fmt" + "hash/crc32" + "io" + "os" + "path/filepath" + "text/tabwriter" + "time" + "unicode/utf8" + + "github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1" + "github.com/pkg/errors" +) + +// Command represents the program execution for "influx_inspect verify". +type Command struct { + Stderr io.Writer + Stdout io.Writer +} + +// NewCommand returns a new instance of Command. +func NewCommand() *Command { + return &Command{ + Stderr: os.Stderr, + Stdout: os.Stdout, + } +} + +// Run executes the command. +func (cmd *Command) Run(args ...string) error { + var path string + fs := flag.NewFlagSet("verify", flag.ExitOnError) + fs.StringVar(&path, "dir", os.Getenv("HOME")+"/.influxdb", "Root storage path. [$HOME/.influxdb]") + + var checkUTF8 bool + fs.BoolVar(&checkUTF8, "check-utf8", false, "Verify series keys are valid UTF-8") + + fs.SetOutput(cmd.Stdout) + fs.Usage = cmd.printUsage + + if err := fs.Parse(args); err != nil { + return err + } + + dataPath := filepath.Join(path, "data") + tw := tabwriter.NewWriter(cmd.Stdout, 16, 8, 0, '\t', 0) + + var runner verifier + if checkUTF8 { + runner = &verifyUTF8{} + } else { + runner = &verifyChecksums{} + } + err := runner.Run(tw, dataPath) + tw.Flush() + return err +} + +// printUsage prints the usage message to STDERR. +func (cmd *Command) printUsage() { + usage := fmt.Sprintf(`Verifies the integrity of TSM files. + +Usage: influx_inspect verify [flags] + + -dir + The root storage path. + Must be changed if you are using a non-default storage directory. + Defaults to "%[1]s/.influxdb". + -check-utf8 + Verify series keys are valid UTF-8. + This check skips verification of block checksums. + `, os.Getenv("HOME")) + + fmt.Fprintf(cmd.Stdout, usage) +} + +type verifyTSM struct { + files []string + f string + start time.Time + err error +} + +func (v *verifyTSM) loadFiles(dataPath string) error { + err := filepath.Walk(dataPath, func(path string, f os.FileInfo, err error) error { + if err != nil { + return err + } + if filepath.Ext(path) == "."+tsm1.TSMFileExtension { + v.files = append(v.files, path) + } + return nil + }) + + if err != nil { + return errors.Wrap(err, "could not load storage files (use -dir for custom storage root)") + } + + return nil +} + +func (v *verifyTSM) Next() bool { + if len(v.files) == 0 { + return false + } + + v.f, v.files = v.files[0], v.files[1:] + return true +} + +func (v *verifyTSM) TSMReader() (string, *tsm1.TSMReader) { + file, err := os.OpenFile(v.f, os.O_RDONLY, 0600) + if err != nil { + v.err = err + return "", nil + } + + reader, err := tsm1.NewTSMReader(file) + if err != nil { + file.Close() + v.err = err + return "", nil + } + + return v.f, reader +} + +func (v *verifyTSM) Start() { + v.start = time.Now() +} + +func (v *verifyTSM) Elapsed() time.Duration { + return time.Since(v.start) +} + +type verifyChecksums struct { + verifyTSM + totalErrors int + total int +} + +func (v *verifyChecksums) Run(w io.Writer, dataPath string) error { + if err := v.loadFiles(dataPath); err != nil { + return err + } + + v.Start() + + for v.Next() { + f, reader := v.TSMReader() + if reader == nil { + break + } + + blockItr := reader.BlockIterator() + fileErrors := 0 + count := 0 + for blockItr.Next() { + v.total++ + key, _, _, _, checksum, buf, err := blockItr.Read() + if err != nil { + v.totalErrors++ + fileErrors++ + fmt.Fprintf(w, "%s: could not get checksum for key %v block %d due to error: %q\n", f, key, count, err) + } else if expected := crc32.ChecksumIEEE(buf); checksum != expected { + v.totalErrors++ + fileErrors++ + fmt.Fprintf(w, "%s: got %d but expected %d for key %v, block %d\n", f, checksum, expected, key, count) + } + count++ + } + if fileErrors == 0 { + fmt.Fprintf(w, "%s: healthy\n", f) + } + reader.Close() + } + + fmt.Fprintf(w, "Broken Blocks: %d / %d, in %vs\n", v.totalErrors, v.total, v.Elapsed().Seconds()) + + return v.err +} + +type verifyUTF8 struct { + verifyTSM + totalErrors int + total int +} + +func (v *verifyUTF8) Run(w io.Writer, dataPath string) error { + if err := v.loadFiles(dataPath); err != nil { + return err + } + + v.Start() + + for v.Next() { + f, reader := v.TSMReader() + if reader == nil { + break + } + + n := reader.KeyCount() + fileErrors := 0 + v.total += n + for i := 0; i < n; i++ { + key, _ := reader.KeyAt(i) + if !utf8.Valid(key) { + v.totalErrors++ + fileErrors++ + fmt.Fprintf(w, "%s: key #%d is not valid UTF-8\n", f, i) + } + } + if fileErrors == 0 { + fmt.Fprintf(w, "%s: healthy\n", f) + } + } + + fmt.Fprintf(w, "Invalid Keys: %d / %d, in %vs\n", v.totalErrors, v.total, v.Elapsed().Seconds()) + if v.totalErrors > 0 && v.err == nil { + v.err = errors.New("check-utf8: failed") + } + + return v.err +} + +type verifier interface { + Run(w io.Writer, dataPath string) error +} diff --git a/cmd/influx_inspect/verify/tsm/verify_test.go b/cmd/influx_inspect/verify/tsm/verify_test.go new file mode 100644 index 0000000000..7b6a3911bc --- /dev/null +++ b/cmd/influx_inspect/verify/tsm/verify_test.go @@ -0,0 +1,3 @@ +package tsm_test + +// TODO: write some tests diff --git a/cmd/influxd/generate/generator.go b/cmd/influxd/generate/generator.go index 60901eddf9..d982c5092a 100644 --- a/cmd/influxd/generate/generator.go +++ b/cmd/influxd/generate/generator.go @@ -14,10 +14,9 @@ import ( "github.com/influxdata/influxdb/v2/pkg/data/gen" "github.com/influxdata/influxdb/v2/pkg/limiter" "github.com/influxdata/influxdb/v2/storage" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" - "github.com/influxdata/influxdb/v2/tsdb/tsi1" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" + "github.com/influxdata/influxdb/v2/v1/tsdb" + "github.com/influxdata/influxdb/v2/v1/tsdb/index/tsi1" + "github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1" ) type Generator struct { diff --git a/cmd/influxd/generate/internal/shard/writer.go b/cmd/influxd/generate/internal/shard/writer.go index 5cbced2db6..d3af2c0b08 100644 --- a/cmd/influxd/generate/internal/shard/writer.go +++ b/cmd/influxd/generate/internal/shard/writer.go @@ -6,7 +6,7 @@ import ( "path/filepath" "github.com/influxdata/influxdb/v2/pkg/data/gen" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" + "github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1" ) const ( diff --git a/cmd/influxd/inspect/build_tsi.go b/cmd/influxd/inspect/build_tsi.go index a230647ec6..32124d5b28 100644 --- a/cmd/influxd/inspect/build_tsi.go +++ b/cmd/influxd/inspect/build_tsi.go @@ -13,9 +13,9 @@ import ( "github.com/influxdata/influxdb/v2/cmd/influx_inspect/buildtsi" "github.com/influxdata/influxdb/v2/logger" "github.com/influxdata/influxdb/v2/storage" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" - "github.com/influxdata/influxdb/v2/tsdb/tsi1" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" + "github.com/influxdata/influxdb/v2/v1/tsdb" + "github.com/influxdata/influxdb/v2/v1/tsdb/index/tsi1" + "github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1" "github.com/spf13/cobra" ) diff --git a/cmd/influxd/inspect/compact_series_file.go b/cmd/influxd/inspect/compact_series_file.go index c5250f2e0e..86fceb8d75 100644 --- a/cmd/influxd/inspect/compact_series_file.go +++ b/cmd/influxd/inspect/compact_series_file.go @@ -12,7 +12,6 @@ import ( "github.com/influxdata/influxdb/v2/internal/fs" "github.com/influxdata/influxdb/v2/storage" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" "github.com/spf13/cobra" "golang.org/x/sync/errgroup" ) diff --git a/cmd/influxd/inspect/dump_tsi1.go b/cmd/influxd/inspect/dump_tsi1.go index ae5e266b83..e9b8062672 100644 --- a/cmd/influxd/inspect/dump_tsi1.go +++ b/cmd/influxd/inspect/dump_tsi1.go @@ -8,7 +8,7 @@ import ( "regexp" "github.com/influxdata/influxdb/v2/internal/fs" - "github.com/influxdata/influxdb/v2/tsdb/tsi1" + "github.com/influxdata/influxdb/v2/v1/tsdb/index/tsi1" "github.com/spf13/cobra" "go.uber.org/zap" ) diff --git a/cmd/influxd/inspect/export_blocks.go b/cmd/influxd/inspect/export_blocks.go index 13963893ca..d630804157 100644 --- a/cmd/influxd/inspect/export_blocks.go +++ b/cmd/influxd/inspect/export_blocks.go @@ -3,7 +3,7 @@ package inspect import ( "os" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" + "github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1" "github.com/spf13/cobra" ) diff --git a/cmd/influxd/inspect/export_index.go b/cmd/influxd/inspect/export_index.go index 814d2c6c05..38cc95802a 100644 --- a/cmd/influxd/inspect/export_index.go +++ b/cmd/influxd/inspect/export_index.go @@ -7,8 +7,8 @@ import ( "path/filepath" "github.com/influxdata/influxdb/v2/internal/fs" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" - "github.com/influxdata/influxdb/v2/tsdb/tsi1" + "github.com/influxdata/influxdb/v2/v1/tsdb" + "github.com/influxdata/influxdb/v2/v1/tsdb/index/tsi1" "github.com/spf13/cobra" ) @@ -32,7 +32,7 @@ SQL format for easier inspection and debugging.`, cmd.RunE = func(cmd *cobra.Command, args []string) error { // Initialize series file. - sfile := seriesfile.NewSeriesFile(seriesFilePath) + sfile := tsdb.NewSeriesFile(seriesFilePath) if err := sfile.Open(context.Background()); err != nil { return err } diff --git a/cmd/influxd/inspect/report_tsi1.go b/cmd/influxd/inspect/report_tsi1.go index 352bd2210e..82bf6b0356 100644 --- a/cmd/influxd/inspect/report_tsi1.go +++ b/cmd/influxd/inspect/report_tsi1.go @@ -6,7 +6,7 @@ import ( "os" "github.com/influxdata/influxdb/v2" - "github.com/influxdata/influxdb/v2/tsdb/tsi1" + "github.com/influxdata/influxdb/v2/v1/tsdb/index/tsi1" "github.com/spf13/cobra" ) diff --git a/cmd/influxd/inspect/report_tsm.go b/cmd/influxd/inspect/report_tsm.go index 9a6b24089e..4caeebbb8b 100644 --- a/cmd/influxd/inspect/report_tsm.go +++ b/cmd/influxd/inspect/report_tsm.go @@ -5,7 +5,7 @@ import ( "github.com/influxdata/influxdb/v2" "github.com/influxdata/influxdb/v2/internal/fs" "github.com/influxdata/influxdb/v2/kit/errors" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" + "github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1" "github.com/spf13/cobra" "os" "path/filepath" diff --git a/cmd/influxd/inspect/verify_series.go b/cmd/influxd/inspect/verify_series.go index d36353fc91..b590476aef 100644 --- a/cmd/influxd/inspect/verify_series.go +++ b/cmd/influxd/inspect/verify_series.go @@ -4,8 +4,8 @@ import ( "os" "runtime" + "github.com/influxdata/influxdb/v2/cmd/influx_inspect/verify/seriesfile" "github.com/influxdata/influxdb/v2/logger" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" "github.com/spf13/cobra" "go.uber.org/zap/zapcore" ) diff --git a/cmd/influxd/inspect/verify_tsm.go b/cmd/influxd/inspect/verify_tsm.go index 6791ed1de5..7ab647fbd4 100644 --- a/cmd/influxd/inspect/verify_tsm.go +++ b/cmd/influxd/inspect/verify_tsm.go @@ -6,7 +6,7 @@ import ( "path/filepath" "github.com/influxdata/influxdb/v2/kit/cli" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" + "github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1" "github.com/spf13/cobra" ) diff --git a/cmd/influxd/launcher/engine.go b/cmd/influxd/launcher/engine.go index 3dd8452965..1ba180c11b 100644 --- a/cmd/influxd/launcher/engine.go +++ b/cmd/influxd/launcher/engine.go @@ -13,7 +13,7 @@ import ( "github.com/influxdata/influxdb/v2/models" "github.com/influxdata/influxdb/v2/storage" "github.com/influxdata/influxdb/v2/storage/reads" - "github.com/influxdata/influxdb/v2/tsdb/cursors" + "github.com/influxdata/influxdb/v2/v1/tsdb/cursors" "github.com/influxdata/influxql" "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" diff --git a/cmd/influxd/launcher/launcher.go b/cmd/influxd/launcher/launcher.go index 45af1a0ba9..d933dd7f8c 100644 --- a/cmd/influxd/launcher/launcher.go +++ b/cmd/influxd/launcher/launcher.go @@ -62,10 +62,10 @@ import ( "github.com/influxdata/influxdb/v2/task/backend/scheduler" "github.com/influxdata/influxdb/v2/telemetry" "github.com/influxdata/influxdb/v2/tenant" - _ "github.com/influxdata/influxdb/v2/tsdb/tsi1" // needed for tsi1 - _ "github.com/influxdata/influxdb/v2/tsdb/tsm1" // needed for tsm1 storage2 "github.com/influxdata/influxdb/v2/v1/services/storage" "github.com/influxdata/influxdb/v2/v1/storage/reads" + _ "github.com/influxdata/influxdb/v2/v1/tsdb/index/tsi1" // needed for tsi1 + _ "github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1" // needed for tsm1 "github.com/influxdata/influxdb/v2/vault" pzap "github.com/influxdata/influxdb/v2/zap" "github.com/opentracing/opentracing-go" diff --git a/cmd/influxd/launcher/storage_test.go b/cmd/influxd/launcher/storage_test.go index a40dd2e05c..7c8cb5926c 100644 --- a/cmd/influxd/launcher/storage_test.go +++ b/cmd/influxd/launcher/storage_test.go @@ -13,7 +13,7 @@ import ( "github.com/influxdata/influxdb/v2/cmd/influxd/launcher" "github.com/influxdata/influxdb/v2/http" "github.com/influxdata/influxdb/v2/toml" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" + "github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1" ) func TestStorage_WriteAndQuery(t *testing.T) { diff --git a/cmd/influxd/main.go b/cmd/influxd/main.go index 8a41aadd91..a08a01c15e 100644 --- a/cmd/influxd/main.go +++ b/cmd/influxd/main.go @@ -13,8 +13,8 @@ import ( "github.com/influxdata/influxdb/v2/cmd/influxd/launcher" "github.com/influxdata/influxdb/v2/cmd/influxd/restore" _ "github.com/influxdata/influxdb/v2/query/builtin" - _ "github.com/influxdata/influxdb/v2/tsdb/tsi1" - _ "github.com/influxdata/influxdb/v2/tsdb/tsm1" + _ "github.com/influxdata/influxdb/v2/v1/tsdb/index/tsi1" + _ "github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1" "github.com/spf13/cobra" ) diff --git a/gather/recorder.go b/gather/recorder.go index be746874cb..614db95002 100644 --- a/gather/recorder.go +++ b/gather/recorder.go @@ -6,7 +6,7 @@ import ( "github.com/influxdata/influxdb/v2/nats" "github.com/influxdata/influxdb/v2/storage" - "github.com/influxdata/influxdb/v2/tsdb" + "github.com/influxdata/influxdb/v2/v1/tsdb" "go.uber.org/zap" ) diff --git a/kit/cli/idflag.go b/kit/cli/idflag.go index ae99ca55ad..8f4a9689b8 100644 --- a/kit/cli/idflag.go +++ b/kit/cli/idflag.go @@ -2,7 +2,7 @@ package cli import ( "github.com/influxdata/influxdb/v2" - "github.com/influxdata/influxdb/v2/tsdb" + "github.com/influxdata/influxdb/v2/v1/tsdb" "github.com/spf13/cobra" "github.com/spf13/pflag" ) diff --git a/mock/reads_resultset.go b/mock/reads_resultset.go index ebc20f0c0e..16afa54bad 100644 --- a/mock/reads_resultset.go +++ b/mock/reads_resultset.go @@ -4,7 +4,7 @@ import ( "github.com/influxdata/influxdb/v2/models" "github.com/influxdata/influxdb/v2/pkg/data/gen" "github.com/influxdata/influxdb/v2/storage/reads" - "github.com/influxdata/influxdb/v2/tsdb/cursors" + "github.com/influxdata/influxdb/v2/v1/tsdb/cursors" ) type GeneratorResultSet struct { diff --git a/mock/reads_resultset_test.go b/mock/reads_resultset_test.go index 1d385b4ee2..443983c698 100644 --- a/mock/reads_resultset_test.go +++ b/mock/reads_resultset_test.go @@ -9,7 +9,7 @@ import ( "github.com/influxdata/influxdb/v2/mock" "github.com/influxdata/influxdb/v2/pkg/data/gen" "github.com/influxdata/influxdb/v2/storage/reads" - "github.com/influxdata/influxdb/v2/tsdb/cursors" + "github.com/influxdata/influxdb/v2/v1/tsdb/cursors" ) func mustNewSpecFromToml(tb testing.TB, toml string) *gen.Spec { diff --git a/models/points_test.go b/models/points_test.go index 083f11c738..263bd1c50f 100644 --- a/models/points_test.go +++ b/models/points_test.go @@ -19,7 +19,7 @@ import ( "github.com/google/go-cmp/cmp" "github.com/influxdata/influxdb/v2" "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/tsdb" + "github.com/influxdata/influxdb/v2/v1/tsdb" "github.com/stretchr/testify/assert" ) diff --git a/pkg/data/gen/arrays.gen.go b/pkg/data/gen/arrays.gen.go index 4e111f20b4..666832c58e 100644 --- a/pkg/data/gen/arrays.gen.go +++ b/pkg/data/gen/arrays.gen.go @@ -7,8 +7,8 @@ package gen import ( - "github.com/influxdata/influxdb/v2/tsdb/cursors" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" + "github.com/influxdata/influxdb/v2/v1/tsdb/cursors" + "github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1" ) type FloatValues interface { diff --git a/pkg/data/gen/arrays.gen.go.tmpl b/pkg/data/gen/arrays.gen.go.tmpl index 776b1a8ec0..bd854e3400 100644 --- a/pkg/data/gen/arrays.gen.go.tmpl +++ b/pkg/data/gen/arrays.gen.go.tmpl @@ -1,8 +1,8 @@ package gen import ( - "github.com/influxdata/influxdb/v2/tsdb/cursors" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" + "github.com/influxdata/influxdb/v2/v1/tsdb/cursors" + "github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1" ) {{range .}} diff --git a/pkg/data/gen/merged_series_generator_test.go b/pkg/data/gen/merged_series_generator_test.go index 84c00e6d09..8dd3aca286 100644 --- a/pkg/data/gen/merged_series_generator_test.go +++ b/pkg/data/gen/merged_series_generator_test.go @@ -10,7 +10,7 @@ import ( "github.com/google/go-cmp/cmp" "github.com/influxdata/influxdb/v2" "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/tsdb" + "github.com/influxdata/influxdb/v2/v1/tsdb" ) var ( diff --git a/pkg/data/gen/specs.go b/pkg/data/gen/specs.go index 9f995abc76..ef3febff14 100644 --- a/pkg/data/gen/specs.go +++ b/pkg/data/gen/specs.go @@ -13,7 +13,7 @@ import ( "github.com/BurntSushi/toml" "github.com/influxdata/influxdb/v2" "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/tsdb" + "github.com/influxdata/influxdb/v2/v1/tsdb" "github.com/pkg/errors" ) diff --git a/pkg/data/gen/values_sequence.gen.go b/pkg/data/gen/values_sequence.gen.go index 152da0cabc..73a58a0b99 100644 --- a/pkg/data/gen/values_sequence.gen.go +++ b/pkg/data/gen/values_sequence.gen.go @@ -8,7 +8,7 @@ package gen import ( "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/tsdb/cursors" + "github.com/influxdata/influxdb/v2/v1/tsdb/cursors" ) type FloatValuesSequence interface { diff --git a/pkg/data/gen/values_sequence.gen.go.tmpl b/pkg/data/gen/values_sequence.gen.go.tmpl index 562704cf5b..0c890006ed 100644 --- a/pkg/data/gen/values_sequence.gen.go.tmpl +++ b/pkg/data/gen/values_sequence.gen.go.tmpl @@ -2,7 +2,7 @@ package gen import ( "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/tsdb/cursors" + "github.com/influxdata/influxdb/v2/v1/tsdb/cursors" ) {{range .}} diff --git a/predicate/predicate.go b/predicate/predicate.go index b96f3e1fdb..baa5c5bf94 100644 --- a/predicate/predicate.go +++ b/predicate/predicate.go @@ -3,7 +3,7 @@ package predicate import ( "github.com/influxdata/influxdb/v2" "github.com/influxdata/influxdb/v2/storage/reads/datatypes" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" + "github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1" ) // Node is a predicate node. diff --git a/query/storage.go b/query/storage.go index c57393a707..46d060fc87 100644 --- a/query/storage.go +++ b/query/storage.go @@ -10,7 +10,7 @@ import ( "github.com/influxdata/flux/plan" "github.com/influxdata/influxdb/v2" "github.com/influxdata/influxdb/v2/storage/reads/datatypes" - "github.com/influxdata/influxdb/v2/tsdb/cursors" + "github.com/influxdata/influxdb/v2/v1/tsdb/cursors" ) // StorageReader is an interface for reading tables from the storage subsystem. diff --git a/storage/compat/compat.go b/storage/compat/compat.go index 44ebdb4367..8192e17b50 100644 --- a/storage/compat/compat.go +++ b/storage/compat/compat.go @@ -10,7 +10,7 @@ package compat import ( "github.com/influxdata/influxdb/v2/storage" "github.com/influxdata/influxdb/v2/toml" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" + "github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1" ) // Config matches the old toml layout from the influxdb repo, so that we can read diff --git a/storage/config.go b/storage/config.go index 8d7fd6d2fe..f6d6169dd1 100644 --- a/storage/config.go +++ b/storage/config.go @@ -5,9 +5,8 @@ import ( "time" "github.com/influxdata/influxdb/v2/toml" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" - "github.com/influxdata/influxdb/v2/tsdb/tsi1" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" + "github.com/influxdata/influxdb/v2/v1/tsdb/index/tsi1" + "github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1" ) // Default configuration values. diff --git a/storage/engine.go b/storage/engine.go index 5a8032e68e..1450e6cc50 100644 --- a/storage/engine.go +++ b/storage/engine.go @@ -14,11 +14,11 @@ import ( "github.com/influxdata/influxdb/v2" "github.com/influxdata/influxdb/v2/kit/tracing" "github.com/influxdata/influxdb/v2/logger" - "github.com/influxdata/influxdb/v2/tsdb/cursors" "github.com/influxdata/influxdb/v2/v1/coordinator" "github.com/influxdata/influxdb/v2/v1/models" "github.com/influxdata/influxdb/v2/v1/services/meta" "github.com/influxdata/influxdb/v2/v1/tsdb" + "github.com/influxdata/influxdb/v2/v1/tsdb/cursors" "github.com/influxdata/influxql" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" diff --git a/storage/engine_measurement_notime_schema.go b/storage/engine_measurement_notime_schema.go index e35475f85b..8f2dda80dd 100644 --- a/storage/engine_measurement_notime_schema.go +++ b/storage/engine_measurement_notime_schema.go @@ -4,7 +4,7 @@ import ( "context" "github.com/influxdata/influxdb/v2" - "github.com/influxdata/influxdb/v2/tsdb/cursors" + "github.com/influxdata/influxdb/v2/v1/tsdb/cursors" "github.com/influxdata/influxql" ) diff --git a/storage/engine_measurement_schema.go b/storage/engine_measurement_schema.go index 788c17ac3c..255a2dc87e 100644 --- a/storage/engine_measurement_schema.go +++ b/storage/engine_measurement_schema.go @@ -4,7 +4,7 @@ import ( "context" "github.com/influxdata/influxdb/v2" - "github.com/influxdata/influxdb/v2/tsdb/cursors" + "github.com/influxdata/influxdb/v2/v1/tsdb/cursors" "github.com/influxdata/influxql" ) diff --git a/storage/engine_schema.go b/storage/engine_schema.go index 27170860d9..33783c6420 100644 --- a/storage/engine_schema.go +++ b/storage/engine_schema.go @@ -4,7 +4,7 @@ import ( "context" "github.com/influxdata/influxdb/v2" - "github.com/influxdata/influxdb/v2/tsdb/cursors" + "github.com/influxdata/influxdb/v2/v1/tsdb/cursors" "github.com/influxdata/influxql" ) diff --git a/storage/engine_test.go b/storage/engine_test.go index 9cfe35bcbe..8ea4376c49 100644 --- a/storage/engine_test.go +++ b/storage/engine_test.go @@ -15,8 +15,8 @@ import ( "github.com/influxdata/influxdb/v2/models" "github.com/influxdata/influxdb/v2/storage" "github.com/influxdata/influxdb/v2/storage/reads/datatypes" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" + "github.com/influxdata/influxdb/v2/v1/tsdb" + "github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1" "github.com/prometheus/client_golang/prometheus" ) diff --git a/storage/flux/reader.go b/storage/flux/reader.go index 9b10519d2f..b471a39c2a 100644 --- a/storage/flux/reader.go +++ b/storage/flux/reader.go @@ -16,7 +16,7 @@ import ( "github.com/influxdata/influxdb/v2/query" storage "github.com/influxdata/influxdb/v2/storage/reads" "github.com/influxdata/influxdb/v2/storage/reads/datatypes" - "github.com/influxdata/influxdb/v2/tsdb/cursors" + "github.com/influxdata/influxdb/v2/v1/tsdb/cursors" ) // GroupCursorError is returned when two different cursor types diff --git a/storage/flux/table.gen.go b/storage/flux/table.gen.go index 6f9046afc5..764665873d 100644 --- a/storage/flux/table.gen.go +++ b/storage/flux/table.gen.go @@ -18,8 +18,7 @@ import ( "github.com/influxdata/influxdb/v2" "github.com/influxdata/influxdb/v2/models" storage "github.com/influxdata/influxdb/v2/storage/reads" - "github.com/influxdata/influxdb/v2/storage/reads/datatypes" - "github.com/influxdata/influxdb/v2/tsdb/cursors" + "github.com/influxdata/influxdb/v2/v1/tsdb/cursors" ) // @@ -308,7 +307,7 @@ func (t *floatWindowTable) advance() bool { } else { cr.cols[startColIdx] = start cr.cols[stopColIdx] = stop - cr.cols[valueColIdxWithoutTime] = values + cr.cols[windowedValueColIdx] = values } t.appendTags(cr) return true @@ -711,93 +710,26 @@ func (t *floatGroupTable) Do(f func(flux.ColReader) error) error { } func (t *floatGroupTable) advance() bool { - if t.cur == nil { - // For group aggregates, we will try to get all the series and all table buffers within those series - // all at once and merge them into one row when this advance() function is first called. - // At the end of this process, t.advanceCursor() already returns false and t.cur becomes nil. - // But we still need to return true to indicate that there is data to be returned. - // The second time when we call this advance(), t.cur is already nil, so we directly return false. +RETRY: + a := t.cur.Next() + l := a.Len() + if l == 0 { + if t.advanceCursor() { + goto RETRY + } + return false } - var arr *cursors.FloatArray - var len int - for { - arr = t.cur.Next() - len = arr.Len() - if len > 0 { - break - } - if !t.advanceCursor() { - return false - } - } - // handle the group without aggregate case - if t.gc.Aggregate() == nil { - // Retrieve the buffer for the data to avoid allocating - // additional slices. If the buffer is still being used - // because the references were retained, then we will - // allocate a new buffer. - colReader := t.allocateBuffer(len) - colReader.cols[timeColIdx] = arrow.NewInt(arr.Timestamps, t.alloc) - colReader.cols[valueColIdx] = t.toArrowBuffer(arr.Values) - t.appendTags(colReader) - t.appendBounds(colReader) - return true - } - - // handle the group with aggregate case - var value float64 - // For group count, sum, min, and max, the timestamp here is always math.MaxInt64. - // their final result does not contain _time, so this timestamp value can be anything - // and it won't matter. - // For group first, we need to assign the initial value to math.MaxInt64 so - // we can find the row with the smallest timestamp. - // Do not worry about data with math.MaxInt64 as its real timestamp. - // In OSS we require a |> range() call in the query and a math.MaxInt64 timestamp - // cannot make it through. - var timestamp int64 = math.MaxInt64 - if t.gc.Aggregate().Type == datatypes.AggregateTypeLast { - timestamp = math.MinInt64 - } - for { - // note that for the group aggregate case, len here should always be 1 - for i := 0; i < len; i++ { - switch t.gc.Aggregate().Type { - case datatypes.AggregateTypeCount: - panic("unsupported for aggregate count: Float") - case datatypes.AggregateTypeSum: - value += arr.Values[i] - case datatypes.AggregateTypeFirst: - if arr.Timestamps[i] < timestamp { - timestamp = arr.Timestamps[i] - value = arr.Values[i] - } - case datatypes.AggregateTypeLast: - if arr.Timestamps[i] > timestamp { - timestamp = arr.Timestamps[i] - value = arr.Values[i] - } - } - } - arr = t.cur.Next() - len = arr.Len() - if len > 0 { - continue - } - if !t.advanceCursor() { - break - } - } - colReader := t.allocateBuffer(1) - if IsSelector(t.gc.Aggregate()) { - colReader.cols[timeColIdx] = arrow.NewInt([]int64{timestamp}, t.alloc) - colReader.cols[valueColIdx] = t.toArrowBuffer([]float64{value}) - } else { - colReader.cols[valueColIdxWithoutTime] = t.toArrowBuffer([]float64{value}) - } - t.appendTags(colReader) - t.appendBounds(colReader) + // Retrieve the buffer for the data to avoid allocating + // additional slices. If the buffer is still being used + // because the references were retained, then we will + // allocate a new buffer. + cr := t.allocateBuffer(l) + cr.cols[timeColIdx] = arrow.NewInt(a.Timestamps, t.alloc) + cr.cols[valueColIdx] = t.toArrowBuffer(a.Values) + t.appendTags(cr) + t.appendBounds(cr) return true } @@ -1129,7 +1061,7 @@ func (t *integerWindowTable) advance() bool { } else { cr.cols[startColIdx] = start cr.cols[stopColIdx] = stop - cr.cols[valueColIdxWithoutTime] = values + cr.cols[windowedValueColIdx] = values } t.appendTags(cr) return true @@ -1532,93 +1464,26 @@ func (t *integerGroupTable) Do(f func(flux.ColReader) error) error { } func (t *integerGroupTable) advance() bool { - if t.cur == nil { - // For group aggregates, we will try to get all the series and all table buffers within those series - // all at once and merge them into one row when this advance() function is first called. - // At the end of this process, t.advanceCursor() already returns false and t.cur becomes nil. - // But we still need to return true to indicate that there is data to be returned. - // The second time when we call this advance(), t.cur is already nil, so we directly return false. +RETRY: + a := t.cur.Next() + l := a.Len() + if l == 0 { + if t.advanceCursor() { + goto RETRY + } + return false } - var arr *cursors.IntegerArray - var len int - for { - arr = t.cur.Next() - len = arr.Len() - if len > 0 { - break - } - if !t.advanceCursor() { - return false - } - } - // handle the group without aggregate case - if t.gc.Aggregate() == nil { - // Retrieve the buffer for the data to avoid allocating - // additional slices. If the buffer is still being used - // because the references were retained, then we will - // allocate a new buffer. - colReader := t.allocateBuffer(len) - colReader.cols[timeColIdx] = arrow.NewInt(arr.Timestamps, t.alloc) - colReader.cols[valueColIdx] = t.toArrowBuffer(arr.Values) - t.appendTags(colReader) - t.appendBounds(colReader) - return true - } - - // handle the group with aggregate case - var value int64 - // For group count, sum, min, and max, the timestamp here is always math.MaxInt64. - // their final result does not contain _time, so this timestamp value can be anything - // and it won't matter. - // For group first, we need to assign the initial value to math.MaxInt64 so - // we can find the row with the smallest timestamp. - // Do not worry about data with math.MaxInt64 as its real timestamp. - // In OSS we require a |> range() call in the query and a math.MaxInt64 timestamp - // cannot make it through. - var timestamp int64 = math.MaxInt64 - if t.gc.Aggregate().Type == datatypes.AggregateTypeLast { - timestamp = math.MinInt64 - } - for { - // note that for the group aggregate case, len here should always be 1 - for i := 0; i < len; i++ { - switch t.gc.Aggregate().Type { - case datatypes.AggregateTypeCount: - fallthrough - case datatypes.AggregateTypeSum: - value += arr.Values[i] - case datatypes.AggregateTypeFirst: - if arr.Timestamps[i] < timestamp { - timestamp = arr.Timestamps[i] - value = arr.Values[i] - } - case datatypes.AggregateTypeLast: - if arr.Timestamps[i] > timestamp { - timestamp = arr.Timestamps[i] - value = arr.Values[i] - } - } - } - arr = t.cur.Next() - len = arr.Len() - if len > 0 { - continue - } - if !t.advanceCursor() { - break - } - } - colReader := t.allocateBuffer(1) - if IsSelector(t.gc.Aggregate()) { - colReader.cols[timeColIdx] = arrow.NewInt([]int64{timestamp}, t.alloc) - colReader.cols[valueColIdx] = t.toArrowBuffer([]int64{value}) - } else { - colReader.cols[valueColIdxWithoutTime] = t.toArrowBuffer([]int64{value}) - } - t.appendTags(colReader) - t.appendBounds(colReader) + // Retrieve the buffer for the data to avoid allocating + // additional slices. If the buffer is still being used + // because the references were retained, then we will + // allocate a new buffer. + cr := t.allocateBuffer(l) + cr.cols[timeColIdx] = arrow.NewInt(a.Timestamps, t.alloc) + cr.cols[valueColIdx] = t.toArrowBuffer(a.Values) + t.appendTags(cr) + t.appendBounds(cr) return true } @@ -1948,7 +1813,7 @@ func (t *unsignedWindowTable) advance() bool { } else { cr.cols[startColIdx] = start cr.cols[stopColIdx] = stop - cr.cols[valueColIdxWithoutTime] = values + cr.cols[windowedValueColIdx] = values } t.appendTags(cr) return true @@ -2351,93 +2216,26 @@ func (t *unsignedGroupTable) Do(f func(flux.ColReader) error) error { } func (t *unsignedGroupTable) advance() bool { - if t.cur == nil { - // For group aggregates, we will try to get all the series and all table buffers within those series - // all at once and merge them into one row when this advance() function is first called. - // At the end of this process, t.advanceCursor() already returns false and t.cur becomes nil. - // But we still need to return true to indicate that there is data to be returned. - // The second time when we call this advance(), t.cur is already nil, so we directly return false. +RETRY: + a := t.cur.Next() + l := a.Len() + if l == 0 { + if t.advanceCursor() { + goto RETRY + } + return false } - var arr *cursors.UnsignedArray - var len int - for { - arr = t.cur.Next() - len = arr.Len() - if len > 0 { - break - } - if !t.advanceCursor() { - return false - } - } - // handle the group without aggregate case - if t.gc.Aggregate() == nil { - // Retrieve the buffer for the data to avoid allocating - // additional slices. If the buffer is still being used - // because the references were retained, then we will - // allocate a new buffer. - colReader := t.allocateBuffer(len) - colReader.cols[timeColIdx] = arrow.NewInt(arr.Timestamps, t.alloc) - colReader.cols[valueColIdx] = t.toArrowBuffer(arr.Values) - t.appendTags(colReader) - t.appendBounds(colReader) - return true - } - - // handle the group with aggregate case - var value uint64 - // For group count, sum, min, and max, the timestamp here is always math.MaxInt64. - // their final result does not contain _time, so this timestamp value can be anything - // and it won't matter. - // For group first, we need to assign the initial value to math.MaxInt64 so - // we can find the row with the smallest timestamp. - // Do not worry about data with math.MaxInt64 as its real timestamp. - // In OSS we require a |> range() call in the query and a math.MaxInt64 timestamp - // cannot make it through. - var timestamp int64 = math.MaxInt64 - if t.gc.Aggregate().Type == datatypes.AggregateTypeLast { - timestamp = math.MinInt64 - } - for { - // note that for the group aggregate case, len here should always be 1 - for i := 0; i < len; i++ { - switch t.gc.Aggregate().Type { - case datatypes.AggregateTypeCount: - panic("unsupported for aggregate count: Unsigned") - case datatypes.AggregateTypeSum: - value += arr.Values[i] - case datatypes.AggregateTypeFirst: - if arr.Timestamps[i] < timestamp { - timestamp = arr.Timestamps[i] - value = arr.Values[i] - } - case datatypes.AggregateTypeLast: - if arr.Timestamps[i] > timestamp { - timestamp = arr.Timestamps[i] - value = arr.Values[i] - } - } - } - arr = t.cur.Next() - len = arr.Len() - if len > 0 { - continue - } - if !t.advanceCursor() { - break - } - } - colReader := t.allocateBuffer(1) - if IsSelector(t.gc.Aggregate()) { - colReader.cols[timeColIdx] = arrow.NewInt([]int64{timestamp}, t.alloc) - colReader.cols[valueColIdx] = t.toArrowBuffer([]uint64{value}) - } else { - colReader.cols[valueColIdxWithoutTime] = t.toArrowBuffer([]uint64{value}) - } - t.appendTags(colReader) - t.appendBounds(colReader) + // Retrieve the buffer for the data to avoid allocating + // additional slices. If the buffer is still being used + // because the references were retained, then we will + // allocate a new buffer. + cr := t.allocateBuffer(l) + cr.cols[timeColIdx] = arrow.NewInt(a.Timestamps, t.alloc) + cr.cols[valueColIdx] = t.toArrowBuffer(a.Values) + t.appendTags(cr) + t.appendBounds(cr) return true } @@ -2767,7 +2565,7 @@ func (t *stringWindowTable) advance() bool { } else { cr.cols[startColIdx] = start cr.cols[stopColIdx] = stop - cr.cols[valueColIdxWithoutTime] = values + cr.cols[windowedValueColIdx] = values } t.appendTags(cr) return true @@ -3170,93 +2968,26 @@ func (t *stringGroupTable) Do(f func(flux.ColReader) error) error { } func (t *stringGroupTable) advance() bool { - if t.cur == nil { - // For group aggregates, we will try to get all the series and all table buffers within those series - // all at once and merge them into one row when this advance() function is first called. - // At the end of this process, t.advanceCursor() already returns false and t.cur becomes nil. - // But we still need to return true to indicate that there is data to be returned. - // The second time when we call this advance(), t.cur is already nil, so we directly return false. +RETRY: + a := t.cur.Next() + l := a.Len() + if l == 0 { + if t.advanceCursor() { + goto RETRY + } + return false } - var arr *cursors.StringArray - var len int - for { - arr = t.cur.Next() - len = arr.Len() - if len > 0 { - break - } - if !t.advanceCursor() { - return false - } - } - // handle the group without aggregate case - if t.gc.Aggregate() == nil { - // Retrieve the buffer for the data to avoid allocating - // additional slices. If the buffer is still being used - // because the references were retained, then we will - // allocate a new buffer. - colReader := t.allocateBuffer(len) - colReader.cols[timeColIdx] = arrow.NewInt(arr.Timestamps, t.alloc) - colReader.cols[valueColIdx] = t.toArrowBuffer(arr.Values) - t.appendTags(colReader) - t.appendBounds(colReader) - return true - } - - // handle the group with aggregate case - var value string - // For group count, sum, min, and max, the timestamp here is always math.MaxInt64. - // their final result does not contain _time, so this timestamp value can be anything - // and it won't matter. - // For group first, we need to assign the initial value to math.MaxInt64 so - // we can find the row with the smallest timestamp. - // Do not worry about data with math.MaxInt64 as its real timestamp. - // In OSS we require a |> range() call in the query and a math.MaxInt64 timestamp - // cannot make it through. - var timestamp int64 = math.MaxInt64 - if t.gc.Aggregate().Type == datatypes.AggregateTypeLast { - timestamp = math.MinInt64 - } - for { - // note that for the group aggregate case, len here should always be 1 - for i := 0; i < len; i++ { - switch t.gc.Aggregate().Type { - case datatypes.AggregateTypeCount: - panic("unsupported for aggregate count: String") - case datatypes.AggregateTypeSum: - panic("unsupported for aggregate sum: String") - case datatypes.AggregateTypeFirst: - if arr.Timestamps[i] < timestamp { - timestamp = arr.Timestamps[i] - value = arr.Values[i] - } - case datatypes.AggregateTypeLast: - if arr.Timestamps[i] > timestamp { - timestamp = arr.Timestamps[i] - value = arr.Values[i] - } - } - } - arr = t.cur.Next() - len = arr.Len() - if len > 0 { - continue - } - if !t.advanceCursor() { - break - } - } - colReader := t.allocateBuffer(1) - if IsSelector(t.gc.Aggregate()) { - colReader.cols[timeColIdx] = arrow.NewInt([]int64{timestamp}, t.alloc) - colReader.cols[valueColIdx] = t.toArrowBuffer([]string{value}) - } else { - colReader.cols[valueColIdxWithoutTime] = t.toArrowBuffer([]string{value}) - } - t.appendTags(colReader) - t.appendBounds(colReader) + // Retrieve the buffer for the data to avoid allocating + // additional slices. If the buffer is still being used + // because the references were retained, then we will + // allocate a new buffer. + cr := t.allocateBuffer(l) + cr.cols[timeColIdx] = arrow.NewInt(a.Timestamps, t.alloc) + cr.cols[valueColIdx] = t.toArrowBuffer(a.Values) + t.appendTags(cr) + t.appendBounds(cr) return true } @@ -3586,7 +3317,7 @@ func (t *booleanWindowTable) advance() bool { } else { cr.cols[startColIdx] = start cr.cols[stopColIdx] = stop - cr.cols[valueColIdxWithoutTime] = values + cr.cols[windowedValueColIdx] = values } t.appendTags(cr) return true @@ -3989,93 +3720,26 @@ func (t *booleanGroupTable) Do(f func(flux.ColReader) error) error { } func (t *booleanGroupTable) advance() bool { - if t.cur == nil { - // For group aggregates, we will try to get all the series and all table buffers within those series - // all at once and merge them into one row when this advance() function is first called. - // At the end of this process, t.advanceCursor() already returns false and t.cur becomes nil. - // But we still need to return true to indicate that there is data to be returned. - // The second time when we call this advance(), t.cur is already nil, so we directly return false. +RETRY: + a := t.cur.Next() + l := a.Len() + if l == 0 { + if t.advanceCursor() { + goto RETRY + } + return false } - var arr *cursors.BooleanArray - var len int - for { - arr = t.cur.Next() - len = arr.Len() - if len > 0 { - break - } - if !t.advanceCursor() { - return false - } - } - // handle the group without aggregate case - if t.gc.Aggregate() == nil { - // Retrieve the buffer for the data to avoid allocating - // additional slices. If the buffer is still being used - // because the references were retained, then we will - // allocate a new buffer. - colReader := t.allocateBuffer(len) - colReader.cols[timeColIdx] = arrow.NewInt(arr.Timestamps, t.alloc) - colReader.cols[valueColIdx] = t.toArrowBuffer(arr.Values) - t.appendTags(colReader) - t.appendBounds(colReader) - return true - } - - // handle the group with aggregate case - var value bool - // For group count, sum, min, and max, the timestamp here is always math.MaxInt64. - // their final result does not contain _time, so this timestamp value can be anything - // and it won't matter. - // For group first, we need to assign the initial value to math.MaxInt64 so - // we can find the row with the smallest timestamp. - // Do not worry about data with math.MaxInt64 as its real timestamp. - // In OSS we require a |> range() call in the query and a math.MaxInt64 timestamp - // cannot make it through. - var timestamp int64 = math.MaxInt64 - if t.gc.Aggregate().Type == datatypes.AggregateTypeLast { - timestamp = math.MinInt64 - } - for { - // note that for the group aggregate case, len here should always be 1 - for i := 0; i < len; i++ { - switch t.gc.Aggregate().Type { - case datatypes.AggregateTypeCount: - panic("unsupported for aggregate count: Boolean") - case datatypes.AggregateTypeSum: - panic("unsupported for aggregate sum: Boolean") - case datatypes.AggregateTypeFirst: - if arr.Timestamps[i] < timestamp { - timestamp = arr.Timestamps[i] - value = arr.Values[i] - } - case datatypes.AggregateTypeLast: - if arr.Timestamps[i] > timestamp { - timestamp = arr.Timestamps[i] - value = arr.Values[i] - } - } - } - arr = t.cur.Next() - len = arr.Len() - if len > 0 { - continue - } - if !t.advanceCursor() { - break - } - } - colReader := t.allocateBuffer(1) - if IsSelector(t.gc.Aggregate()) { - colReader.cols[timeColIdx] = arrow.NewInt([]int64{timestamp}, t.alloc) - colReader.cols[valueColIdx] = t.toArrowBuffer([]bool{value}) - } else { - colReader.cols[valueColIdxWithoutTime] = t.toArrowBuffer([]bool{value}) - } - t.appendTags(colReader) - t.appendBounds(colReader) + // Retrieve the buffer for the data to avoid allocating + // additional slices. If the buffer is still being used + // because the references were retained, then we will + // allocate a new buffer. + cr := t.allocateBuffer(l) + cr.cols[timeColIdx] = arrow.NewInt(a.Timestamps, t.alloc) + cr.cols[valueColIdx] = t.toArrowBuffer(a.Values) + t.appendTags(cr) + t.appendBounds(cr) return true } diff --git a/storage/flux/table.gen.go.tmpl b/storage/flux/table.gen.go.tmpl index 18c13bca1c..a84f0eb120 100644 --- a/storage/flux/table.gen.go.tmpl +++ b/storage/flux/table.gen.go.tmpl @@ -12,8 +12,7 @@ import ( "github.com/influxdata/influxdb/v2" "github.com/influxdata/influxdb/v2/models" storage "github.com/influxdata/influxdb/v2/storage/reads" - "github.com/influxdata/influxdb/v2/storage/reads/datatypes" - "github.com/influxdata/influxdb/v2/tsdb/cursors" + "github.com/influxdata/influxdb/v2/v1/tsdb/cursors" ) {{range .}} // @@ -304,7 +303,7 @@ func (t *{{.name}}WindowTable) advance() bool { } else { cr.cols[startColIdx] = start cr.cols[stopColIdx] = stop - cr.cols[valueColIdxWithoutTime] = values + cr.cols[windowedValueColIdx] = values } t.appendTags(cr) return true @@ -707,93 +706,26 @@ func (t *{{.name}}GroupTable) Do(f func(flux.ColReader) error) error { } func (t *{{.name}}GroupTable) advance() bool { - if t.cur == nil { - // For group aggregates, we will try to get all the series and all table buffers within those series - // all at once and merge them into one row when this advance() function is first called. - // At the end of this process, t.advanceCursor() already returns false and t.cur becomes nil. - // But we still need to return true to indicate that there is data to be returned. - // The second time when we call this advance(), t.cur is already nil, so we directly return false. +RETRY: + a := t.cur.Next() + l := a.Len() + if l == 0 { + if t.advanceCursor() { + goto RETRY + } + return false } - var arr *cursors.{{.Name}}Array - var len int - for { - arr = t.cur.Next() - len = arr.Len() - if len > 0 { - break - } - if !t.advanceCursor() { - return false - } - } - // handle the group without aggregate case - if t.gc.Aggregate() == nil { - // Retrieve the buffer for the data to avoid allocating - // additional slices. If the buffer is still being used - // because the references were retained, then we will - // allocate a new buffer. - colReader := t.allocateBuffer(len) - colReader.cols[timeColIdx] = arrow.NewInt(arr.Timestamps, t.alloc) - colReader.cols[valueColIdx] = t.toArrowBuffer(arr.Values) - t.appendTags(colReader) - t.appendBounds(colReader) - return true - } - - // handle the group with aggregate case - var value {{.Type}} - // For group count, sum, min, and max, the timestamp here is always math.MaxInt64. - // their final result does not contain _time, so this timestamp value can be anything - // and it won't matter. - // For group first, we need to assign the initial value to math.MaxInt64 so - // we can find the row with the smallest timestamp. - // Do not worry about data with math.MaxInt64 as its real timestamp. - // In OSS we require a |> range() call in the query and a math.MaxInt64 timestamp - // cannot make it through. - var timestamp int64 = math.MaxInt64 - if t.gc.Aggregate().Type == datatypes.AggregateTypeLast { - timestamp = math.MinInt64 - } - for { - // note that for the group aggregate case, len here should always be 1 - for i := 0; i < len; i++ { - switch t.gc.Aggregate().Type { - case datatypes.AggregateTypeCount: - {{if eq .Name "Integer"}}fallthrough{{else}}panic("unsupported for aggregate count: {{.Name}}"){{end}} - case datatypes.AggregateTypeSum: - {{if or (eq .Name "String") (eq .Name "Boolean")}}panic("unsupported for aggregate sum: {{.Name}}"){{else}}value += arr.Values[i]{{end}} - case datatypes.AggregateTypeFirst: - if arr.Timestamps[i] < timestamp { - timestamp = arr.Timestamps[i] - value = arr.Values[i] - } - case datatypes.AggregateTypeLast: - if arr.Timestamps[i] > timestamp { - timestamp = arr.Timestamps[i] - value = arr.Values[i] - } - } - } - arr = t.cur.Next() - len = arr.Len() - if len > 0 { - continue - } - if !t.advanceCursor() { - break - } - } - colReader := t.allocateBuffer(1) - if IsSelector(t.gc.Aggregate()) { - colReader.cols[timeColIdx] = arrow.NewInt([]int64{timestamp}, t.alloc) - colReader.cols[valueColIdx] = t.toArrowBuffer([]{{.Type}}{value}) - } else { - colReader.cols[valueColIdxWithoutTime] = t.toArrowBuffer([]{{.Type}}{value}) - } - t.appendTags(colReader) - t.appendBounds(colReader) + // Retrieve the buffer for the data to avoid allocating + // additional slices. If the buffer is still being used + // because the references were retained, then we will + // allocate a new buffer. + cr := t.allocateBuffer(l) + cr.cols[timeColIdx] = arrow.NewInt(a.Timestamps, t.alloc) + cr.cols[valueColIdx] = t.toArrowBuffer(a.Values) + t.appendTags(cr) + t.appendBounds(cr) return true } diff --git a/storage/points_writer.go b/storage/points_writer.go index c1b7351685..66a2942d4c 100644 --- a/storage/points_writer.go +++ b/storage/points_writer.go @@ -6,7 +6,6 @@ import ( "time" "github.com/influxdata/influxdb/v2" - "github.com/influxdata/influxdb/v2/tsdb" "github.com/influxdata/influxdb/v2/v1/models" ) @@ -52,20 +51,16 @@ func (w *LoggingPointsWriter) WritePoints(ctx context.Context, orgID influxdb.ID } // Log error to bucket. - name := tsdb.EncodeName(orgID, bkts[0].ID) pt, e := models.NewPoint( - string(name[:]), - models.NewTags(map[string]string{ - models.MeasurementTagKey: "write_errors", - models.FieldKeyTagKey: "error"}, - ), + "write_errors", + nil, models.Fields{"error": err.Error()}, time.Now(), ) if e != nil { return e } - if e := w.Underlying.WritePoints(ctx, orgID, bucketID, []models.Point{pt}); e != nil { + if e := w.Underlying.WritePoints(ctx, orgID, bkts[0].ID, []models.Point{pt}); e != nil { return e } diff --git a/storage/points_writer_test.go b/storage/points_writer_test.go index 1964501d0a..642f18bba0 100644 --- a/storage/points_writer_test.go +++ b/storage/points_writer_test.go @@ -13,7 +13,7 @@ import ( "github.com/influxdata/influxdb/v2/mock" "github.com/influxdata/influxdb/v2/models" "github.com/influxdata/influxdb/v2/storage" - "github.com/influxdata/influxdb/v2/tsdb" + "github.com/influxdata/influxdb/v2/v1/tsdb" ) func TestLoggingPointsWriter(t *testing.T) { diff --git a/storage/reads/aggregate_resultset.go b/storage/reads/aggregate_resultset.go index e1b34a0035..95f4ae7c5f 100644 --- a/storage/reads/aggregate_resultset.go +++ b/storage/reads/aggregate_resultset.go @@ -8,7 +8,7 @@ import ( "github.com/influxdata/influxdb/v2/kit/tracing" "github.com/influxdata/influxdb/v2/models" "github.com/influxdata/influxdb/v2/storage/reads/datatypes" - "github.com/influxdata/influxdb/v2/tsdb/cursors" + "github.com/influxdata/influxdb/v2/v1/tsdb/cursors" ) type windowAggregateResultSet struct { diff --git a/storage/reads/aggregate_resultset_test.go b/storage/reads/aggregate_resultset_test.go index 104f2dc56d..4fbb4e3692 100644 --- a/storage/reads/aggregate_resultset_test.go +++ b/storage/reads/aggregate_resultset_test.go @@ -8,7 +8,7 @@ import ( "github.com/influxdata/influxdb/v2/models" "github.com/influxdata/influxdb/v2/storage/reads" "github.com/influxdata/influxdb/v2/storage/reads/datatypes" - "github.com/influxdata/influxdb/v2/tsdb/cursors" + "github.com/influxdata/influxdb/v2/v1/tsdb/cursors" ) func TestNewWindowAggregateResultSet_Tags(t *testing.T) { diff --git a/storage/reads/array_cursor.gen.go b/storage/reads/array_cursor.gen.go index 8f1c2776dc..434265878c 100644 --- a/storage/reads/array_cursor.gen.go +++ b/storage/reads/array_cursor.gen.go @@ -11,7 +11,7 @@ import ( "fmt" "math" - "github.com/influxdata/influxdb/v2/tsdb/cursors" + "github.com/influxdata/influxdb/v2/v1/tsdb/cursors" ) const ( diff --git a/storage/reads/array_cursor.gen.go.tmpl b/storage/reads/array_cursor.gen.go.tmpl index 7cde140eb8..fabe354cf5 100644 --- a/storage/reads/array_cursor.gen.go.tmpl +++ b/storage/reads/array_cursor.gen.go.tmpl @@ -5,7 +5,7 @@ import ( "fmt" "math" - "github.com/influxdata/influxdb/v2/tsdb/cursors" + "github.com/influxdata/influxdb/v2/v1/tsdb/cursors" ) const ( diff --git a/storage/reads/array_cursor.go b/storage/reads/array_cursor.go index fdab5e2789..b701d8108c 100644 --- a/storage/reads/array_cursor.go +++ b/storage/reads/array_cursor.go @@ -5,7 +5,7 @@ import ( "fmt" "github.com/influxdata/influxdb/v2/storage/reads/datatypes" - "github.com/influxdata/influxdb/v2/tsdb/cursors" + "github.com/influxdata/influxdb/v2/v1/tsdb/cursors" ) type singleValue struct { diff --git a/storage/reads/array_cursor_test.go b/storage/reads/array_cursor_test.go index 3a6cb31a48..9b96e69109 100644 --- a/storage/reads/array_cursor_test.go +++ b/storage/reads/array_cursor_test.go @@ -6,7 +6,7 @@ import ( "time" "github.com/google/go-cmp/cmp" - "github.com/influxdata/influxdb/v2/tsdb/cursors" + "github.com/influxdata/influxdb/v2/v1/tsdb/cursors" ) func TestIntegerFilterArrayCursor(t *testing.T) { diff --git a/storage/reads/group_resultset.go b/storage/reads/group_resultset.go index a04c2a23f8..b27ebb21f0 100644 --- a/storage/reads/group_resultset.go +++ b/storage/reads/group_resultset.go @@ -9,7 +9,7 @@ import ( "github.com/influxdata/influxdb/v2/kit/tracing" "github.com/influxdata/influxdb/v2/models" "github.com/influxdata/influxdb/v2/storage/reads/datatypes" - "github.com/influxdata/influxdb/v2/tsdb/cursors" + "github.com/influxdata/influxdb/v2/v1/tsdb/cursors" ) type groupResultSet struct { diff --git a/storage/reads/resultset.go b/storage/reads/resultset.go index 45dc6a2973..2506299f1e 100644 --- a/storage/reads/resultset.go +++ b/storage/reads/resultset.go @@ -5,7 +5,7 @@ import ( "github.com/influxdata/influxdb/v2/models" "github.com/influxdata/influxdb/v2/storage/reads/datatypes" - "github.com/influxdata/influxdb/v2/tsdb/cursors" + "github.com/influxdata/influxdb/v2/v1/tsdb/cursors" ) type resultSet struct { diff --git a/storage/reads/resultset_lineprotocol.go b/storage/reads/resultset_lineprotocol.go index 3de073b522..2fb95b7509 100644 --- a/storage/reads/resultset_lineprotocol.go +++ b/storage/reads/resultset_lineprotocol.go @@ -6,7 +6,7 @@ import ( "strconv" "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/tsdb/cursors" + "github.com/influxdata/influxdb/v2/v1/tsdb/cursors" ) // ResultSetToLineProtocol transforms rs to line protocol and writes the diff --git a/storage/reads/series_cursor.go b/storage/reads/series_cursor.go index ffc7292c1f..bd6420e92b 100644 --- a/storage/reads/series_cursor.go +++ b/storage/reads/series_cursor.go @@ -10,7 +10,7 @@ import ( "github.com/influxdata/influxdb/v2/query" "github.com/influxdata/influxdb/v2/storage" "github.com/influxdata/influxdb/v2/storage/reads/datatypes" - "github.com/influxdata/influxdb/v2/tsdb/cursors" + "github.com/influxdata/influxdb/v2/v1/tsdb/cursors" "github.com/influxdata/influxql" ) diff --git a/storage/reads/store.go b/storage/reads/store.go index 71dacc04b1..7ea4ab98aa 100644 --- a/storage/reads/store.go +++ b/storage/reads/store.go @@ -7,7 +7,7 @@ import ( "github.com/influxdata/influxdb/v2/models" "github.com/influxdata/influxdb/v2/query" "github.com/influxdata/influxdb/v2/storage/reads/datatypes" - "github.com/influxdata/influxdb/v2/tsdb/cursors" + "github.com/influxdata/influxdb/v2/v1/tsdb/cursors" ) type ResultSet interface { diff --git a/storage/reads/store_test.go b/storage/reads/store_test.go index 304557a93a..03a2e3764a 100644 --- a/storage/reads/store_test.go +++ b/storage/reads/store_test.go @@ -8,7 +8,7 @@ import ( "github.com/influxdata/influxdb/v2/models" "github.com/influxdata/influxdb/v2/storage/reads" - "github.com/influxdata/influxdb/v2/tsdb/cursors" + "github.com/influxdata/influxdb/v2/v1/tsdb/cursors" ) func cursorToString(wr io.Writer, cur cursors.Cursor) { diff --git a/storage/reads/viewer.go b/storage/reads/viewer.go index 6be7e1fd20..cece52f9f9 100644 --- a/storage/reads/viewer.go +++ b/storage/reads/viewer.go @@ -5,7 +5,7 @@ import ( "github.com/influxdata/influxdb/v2" "github.com/influxdata/influxdb/v2/storage" - "github.com/influxdata/influxdb/v2/tsdb/cursors" + "github.com/influxdata/influxdb/v2/v1/tsdb/cursors" "github.com/influxdata/influxql" ) diff --git a/storage/readservice/store.go b/storage/readservice/store.go index e47aec4103..a9e2ee0b02 100644 --- a/storage/readservice/store.go +++ b/storage/readservice/store.go @@ -9,7 +9,7 @@ import ( "github.com/influxdata/influxdb/v2/models" "github.com/influxdata/influxdb/v2/storage/reads" "github.com/influxdata/influxdb/v2/storage/reads/datatypes" - "github.com/influxdata/influxdb/v2/tsdb/cursors" + "github.com/influxdata/influxdb/v2/v1/tsdb/cursors" "github.com/influxdata/influxql" ) diff --git a/storage/retention.go b/storage/retention.go index d623021f91..bf7f989b59 100644 --- a/storage/retention.go +++ b/storage/retention.go @@ -9,7 +9,7 @@ import ( "github.com/influxdata/influxdb/v2" "github.com/influxdata/influxdb/v2/kit/tracing" "github.com/influxdata/influxdb/v2/logger" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" + "github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1" "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" "go.uber.org/zap/zapcore" diff --git a/storage/retention_test.go b/storage/retention_test.go index b49e920341..7492ed172d 100644 --- a/storage/retention_test.go +++ b/storage/retention_test.go @@ -18,8 +18,8 @@ import ( "github.com/influxdata/influxdb/v2/kit/prom/promtest" "github.com/influxdata/influxdb/v2/logger" "github.com/influxdata/influxdb/v2/toml" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" + "github.com/influxdata/influxdb/v2/v1/tsdb" + "github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1" "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" ) diff --git a/storage/series_cursor.go b/storage/series_cursor.go index 7cbf032ace..2bcce8f349 100644 --- a/storage/series_cursor.go +++ b/storage/series_cursor.go @@ -7,9 +7,8 @@ import ( "github.com/influxdata/influxdb/v2" "github.com/influxdata/influxdb/v2/models" "github.com/influxdata/influxdb/v2/pkg/lifecycle" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" - "github.com/influxdata/influxdb/v2/tsdb/tsi1" + "github.com/influxdata/influxdb/v2/v1/tsdb" + "github.com/influxdata/influxdb/v2/v1/tsdb/index/tsi1" "github.com/influxdata/influxql" ) diff --git a/storage/series_cursor_test.go b/storage/series_cursor_test.go index e3da14d1bd..e500af559c 100644 --- a/storage/series_cursor_test.go +++ b/storage/series_cursor_test.go @@ -5,8 +5,7 @@ import ( "github.com/google/go-cmp/cmp" "github.com/influxdata/influxdb/v2" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" + "github.com/influxdata/influxdb/v2/v1/tsdb" ) func Test_NewSeriesCursor_UnexpectedOrg(t *testing.T) { diff --git a/storage/wal/dump.go b/storage/wal/dump.go index b1dd290294..9efb313d55 100644 --- a/storage/wal/dump.go +++ b/storage/wal/dump.go @@ -10,8 +10,8 @@ import ( "text/tabwriter" "github.com/influxdata/influxdb/v2/storage/reads/datatypes" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/value" + "github.com/influxdata/influxdb/v2/v1/tsdb" + "github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1" ) // Command represents the program execution for "influxd inspect dumpmwal diff --git a/storage/wal/dump_test.go b/storage/wal/dump_test.go index cdf3dabf2a..b72eac8b62 100644 --- a/storage/wal/dump_test.go +++ b/storage/wal/dump_test.go @@ -12,8 +12,8 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "github.com/influxdata/influxdb/v2" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/value" + "github.com/influxdata/influxdb/v2/v1/tsdb" + "github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1" ) func TestWalDump_RunWriteEntries(t *testing.T) { diff --git a/storage/wal/verifier_test.go b/storage/wal/verifier_test.go index 69e10fcdd7..54eda0c04e 100644 --- a/storage/wal/verifier_test.go +++ b/storage/wal/verifier_test.go @@ -2,14 +2,15 @@ package wal import ( "context" - "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" - "github.com/influxdata/influxdb/v2/kit/errors" - "github.com/influxdata/influxdb/v2/tsdb/value" "io/ioutil" "math/rand" "os" "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/influxdata/influxdb/v2/kit/errors" + "github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1" ) type Test struct { diff --git a/storage/wal/wal.go b/storage/wal/wal.go index f6c90cef0a..3fc742b023 100644 --- a/storage/wal/wal.go +++ b/storage/wal/wal.go @@ -25,7 +25,7 @@ import ( "github.com/influxdata/influxdb/v2/kit/tracing" "github.com/influxdata/influxdb/v2/pkg/limiter" "github.com/influxdata/influxdb/v2/pkg/pool" - "github.com/influxdata/influxdb/v2/tsdb/value" + "github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1" ) const ( diff --git a/storage/wal/wal_test.go b/storage/wal/wal_test.go index 904a599f38..a16244176a 100644 --- a/storage/wal/wal_test.go +++ b/storage/wal/wal_test.go @@ -12,7 +12,7 @@ import ( "github.com/golang/snappy" "github.com/influxdata/influxdb/v2" - "github.com/influxdata/influxdb/v2/tsdb/value" + "github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1/value" ) func TestWALWriter_WriteMulti_Single(t *testing.T) { diff --git a/task/backend/run_recorder.go b/task/backend/run_recorder.go index bc54044ed9..f8f9587bec 100644 --- a/task/backend/run_recorder.go +++ b/task/backend/run_recorder.go @@ -9,7 +9,7 @@ import ( "github.com/influxdata/influxdb/v2" "github.com/influxdata/influxdb/v2/models" "github.com/influxdata/influxdb/v2/storage" - "github.com/influxdata/influxdb/v2/tsdb" + "github.com/influxdata/influxdb/v2/v1/tsdb" "go.uber.org/zap" ) diff --git a/tsdb/cursors/arrayvalues.gen.go b/tsdb/cursors/arrayvalues.gen.go deleted file mode 100644 index 9eaffa8e40..0000000000 --- a/tsdb/cursors/arrayvalues.gen.go +++ /dev/null @@ -1,1107 +0,0 @@ -// Generated by tmpl -// https://github.com/benbjohnson/tmpl -// -// DO NOT EDIT! -// Source: arrayvalues.gen.go.tmpl - -package cursors - -type FloatArray struct { - Timestamps []int64 - Values []float64 -} - -func NewFloatArrayLen(sz int) *FloatArray { - return &FloatArray{ - Timestamps: make([]int64, sz), - Values: make([]float64, sz), - } -} - -func (a *FloatArray) MinTime() int64 { - return a.Timestamps[0] -} - -func (a *FloatArray) MaxTime() int64 { - return a.Timestamps[len(a.Timestamps)-1] -} - -func (a *FloatArray) Len() int { - return len(a.Timestamps) -} - -// search performs a binary search for UnixNano() v in a -// and returns the position, i, where v would be inserted. -// An additional check of a.Timestamps[i] == v is necessary -// to determine if the value v exists. -func (a *FloatArray) search(v int64) int { - // Define: f(x) → a.Timestamps[x] < v - // Define: f(-1) == true, f(n) == false - // Invariant: f(lo-1) == true, f(hi) == false - lo := 0 - hi := a.Len() - for lo < hi { - mid := int(uint(lo+hi) >> 1) - if a.Timestamps[mid] < v { - lo = mid + 1 // preserves f(lo-1) == true - } else { - hi = mid // preserves f(hi) == false - } - } - - // lo == hi - return lo -} - -// FindRange returns the positions where min and max would be -// inserted into the array. If a[0].UnixNano() > max or -// a[len-1].UnixNano() < min then FindRange returns (-1, -1) -// indicating the array is outside the [min, max]. The values must -// be deduplicated and sorted before calling FindRange or the results -// are undefined. -func (a *FloatArray) FindRange(min, max int64) (int, int) { - if a.Len() == 0 || min > max { - return -1, -1 - } - - minVal := a.MinTime() - maxVal := a.MaxTime() - - if maxVal < min || minVal > max { - return -1, -1 - } - - return a.search(min), a.search(max) -} - -// Exclude removes the subset of values in [min, max]. The values must -// be deduplicated and sorted before calling Exclude or the results are undefined. -func (a *FloatArray) Exclude(min, max int64) { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - return - } - - // a.Timestamps[rmin] ≥ min - // a.Timestamps[rmax] ≥ max - - if rmax < a.Len() { - if a.Timestamps[rmax] == max { - rmax++ - } - rest := a.Len() - rmax - if rest > 0 { - ts := a.Timestamps[:rmin+rest] - copy(ts[rmin:], a.Timestamps[rmax:]) - a.Timestamps = ts - - vs := a.Values[:rmin+rest] - copy(vs[rmin:], a.Values[rmax:]) - a.Values = vs - return - } - } - - a.Timestamps = a.Timestamps[:rmin] - a.Values = a.Values[:rmin] -} - -// Include returns the subset values between min and max inclusive. The values must -// be deduplicated and sorted before calling Include or the results are undefined. -func (a *FloatArray) Include(min, max int64) { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - a.Timestamps = a.Timestamps[:0] - a.Values = a.Values[:0] - return - } - - // a.Timestamps[rmin] ≥ min - // a.Timestamps[rmax] ≥ max - - if rmax < a.Len() && a.Timestamps[rmax] == max { - rmax++ - } - - if rmin > -1 { - ts := a.Timestamps[:rmax-rmin] - copy(ts, a.Timestamps[rmin:rmax]) - a.Timestamps = ts - vs := a.Values[:rmax-rmin] - copy(vs, a.Values[rmin:rmax]) - a.Values = vs - } else { - a.Timestamps = a.Timestamps[:rmax] - a.Values = a.Values[:rmax] - } -} - -// Merge overlays b to top of a. If two values conflict with -// the same timestamp, b is used. Both a and b must be sorted -// in ascending order. -func (a *FloatArray) Merge(b *FloatArray) { - if a.Len() == 0 { - *a = *b - return - } - - if b.Len() == 0 { - return - } - - // Normally, both a and b should not contain duplicates. Due to a bug in older versions, it's - // possible stored blocks might contain duplicate values. Remove them if they exists before - // merging. - // a = a.Deduplicate() - // b = b.Deduplicate() - - if a.MaxTime() < b.MinTime() { - a.Timestamps = append(a.Timestamps, b.Timestamps...) - a.Values = append(a.Values, b.Values...) - return - } - - if b.MaxTime() < a.MinTime() { - var tmp FloatArray - tmp.Timestamps = append(b.Timestamps, a.Timestamps...) - tmp.Values = append(b.Values, a.Values...) - *a = tmp - return - } - - out := NewFloatArrayLen(a.Len() + b.Len()) - i, j, k := 0, 0, 0 - for i < len(a.Timestamps) && j < len(b.Timestamps) { - if a.Timestamps[i] < b.Timestamps[j] { - out.Timestamps[k] = a.Timestamps[i] - out.Values[k] = a.Values[i] - i++ - } else if a.Timestamps[i] == b.Timestamps[j] { - out.Timestamps[k] = b.Timestamps[j] - out.Values[k] = b.Values[j] - i++ - j++ - } else { - out.Timestamps[k] = b.Timestamps[j] - out.Values[k] = b.Values[j] - j++ - } - k++ - } - - if i < len(a.Timestamps) { - n := copy(out.Timestamps[k:], a.Timestamps[i:]) - copy(out.Values[k:], a.Values[i:]) - k += n - } else if j < len(b.Timestamps) { - n := copy(out.Timestamps[k:], b.Timestamps[j:]) - copy(out.Values[k:], b.Values[j:]) - k += n - } - - a.Timestamps = out.Timestamps[:k] - a.Values = out.Values[:k] -} - -type IntegerArray struct { - Timestamps []int64 - Values []int64 -} - -func NewIntegerArrayLen(sz int) *IntegerArray { - return &IntegerArray{ - Timestamps: make([]int64, sz), - Values: make([]int64, sz), - } -} - -func (a *IntegerArray) MinTime() int64 { - return a.Timestamps[0] -} - -func (a *IntegerArray) MaxTime() int64 { - return a.Timestamps[len(a.Timestamps)-1] -} - -func (a *IntegerArray) Len() int { - return len(a.Timestamps) -} - -// search performs a binary search for UnixNano() v in a -// and returns the position, i, where v would be inserted. -// An additional check of a.Timestamps[i] == v is necessary -// to determine if the value v exists. -func (a *IntegerArray) search(v int64) int { - // Define: f(x) → a.Timestamps[x] < v - // Define: f(-1) == true, f(n) == false - // Invariant: f(lo-1) == true, f(hi) == false - lo := 0 - hi := a.Len() - for lo < hi { - mid := int(uint(lo+hi) >> 1) - if a.Timestamps[mid] < v { - lo = mid + 1 // preserves f(lo-1) == true - } else { - hi = mid // preserves f(hi) == false - } - } - - // lo == hi - return lo -} - -// FindRange returns the positions where min and max would be -// inserted into the array. If a[0].UnixNano() > max or -// a[len-1].UnixNano() < min then FindRange returns (-1, -1) -// indicating the array is outside the [min, max]. The values must -// be deduplicated and sorted before calling FindRange or the results -// are undefined. -func (a *IntegerArray) FindRange(min, max int64) (int, int) { - if a.Len() == 0 || min > max { - return -1, -1 - } - - minVal := a.MinTime() - maxVal := a.MaxTime() - - if maxVal < min || minVal > max { - return -1, -1 - } - - return a.search(min), a.search(max) -} - -// Exclude removes the subset of values in [min, max]. The values must -// be deduplicated and sorted before calling Exclude or the results are undefined. -func (a *IntegerArray) Exclude(min, max int64) { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - return - } - - // a.Timestamps[rmin] ≥ min - // a.Timestamps[rmax] ≥ max - - if rmax < a.Len() { - if a.Timestamps[rmax] == max { - rmax++ - } - rest := a.Len() - rmax - if rest > 0 { - ts := a.Timestamps[:rmin+rest] - copy(ts[rmin:], a.Timestamps[rmax:]) - a.Timestamps = ts - - vs := a.Values[:rmin+rest] - copy(vs[rmin:], a.Values[rmax:]) - a.Values = vs - return - } - } - - a.Timestamps = a.Timestamps[:rmin] - a.Values = a.Values[:rmin] -} - -// Include returns the subset values between min and max inclusive. The values must -// be deduplicated and sorted before calling Include or the results are undefined. -func (a *IntegerArray) Include(min, max int64) { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - a.Timestamps = a.Timestamps[:0] - a.Values = a.Values[:0] - return - } - - // a.Timestamps[rmin] ≥ min - // a.Timestamps[rmax] ≥ max - - if rmax < a.Len() && a.Timestamps[rmax] == max { - rmax++ - } - - if rmin > -1 { - ts := a.Timestamps[:rmax-rmin] - copy(ts, a.Timestamps[rmin:rmax]) - a.Timestamps = ts - vs := a.Values[:rmax-rmin] - copy(vs, a.Values[rmin:rmax]) - a.Values = vs - } else { - a.Timestamps = a.Timestamps[:rmax] - a.Values = a.Values[:rmax] - } -} - -// Merge overlays b to top of a. If two values conflict with -// the same timestamp, b is used. Both a and b must be sorted -// in ascending order. -func (a *IntegerArray) Merge(b *IntegerArray) { - if a.Len() == 0 { - *a = *b - return - } - - if b.Len() == 0 { - return - } - - // Normally, both a and b should not contain duplicates. Due to a bug in older versions, it's - // possible stored blocks might contain duplicate values. Remove them if they exists before - // merging. - // a = a.Deduplicate() - // b = b.Deduplicate() - - if a.MaxTime() < b.MinTime() { - a.Timestamps = append(a.Timestamps, b.Timestamps...) - a.Values = append(a.Values, b.Values...) - return - } - - if b.MaxTime() < a.MinTime() { - var tmp IntegerArray - tmp.Timestamps = append(b.Timestamps, a.Timestamps...) - tmp.Values = append(b.Values, a.Values...) - *a = tmp - return - } - - out := NewIntegerArrayLen(a.Len() + b.Len()) - i, j, k := 0, 0, 0 - for i < len(a.Timestamps) && j < len(b.Timestamps) { - if a.Timestamps[i] < b.Timestamps[j] { - out.Timestamps[k] = a.Timestamps[i] - out.Values[k] = a.Values[i] - i++ - } else if a.Timestamps[i] == b.Timestamps[j] { - out.Timestamps[k] = b.Timestamps[j] - out.Values[k] = b.Values[j] - i++ - j++ - } else { - out.Timestamps[k] = b.Timestamps[j] - out.Values[k] = b.Values[j] - j++ - } - k++ - } - - if i < len(a.Timestamps) { - n := copy(out.Timestamps[k:], a.Timestamps[i:]) - copy(out.Values[k:], a.Values[i:]) - k += n - } else if j < len(b.Timestamps) { - n := copy(out.Timestamps[k:], b.Timestamps[j:]) - copy(out.Values[k:], b.Values[j:]) - k += n - } - - a.Timestamps = out.Timestamps[:k] - a.Values = out.Values[:k] -} - -type UnsignedArray struct { - Timestamps []int64 - Values []uint64 -} - -func NewUnsignedArrayLen(sz int) *UnsignedArray { - return &UnsignedArray{ - Timestamps: make([]int64, sz), - Values: make([]uint64, sz), - } -} - -func (a *UnsignedArray) MinTime() int64 { - return a.Timestamps[0] -} - -func (a *UnsignedArray) MaxTime() int64 { - return a.Timestamps[len(a.Timestamps)-1] -} - -func (a *UnsignedArray) Len() int { - return len(a.Timestamps) -} - -// search performs a binary search for UnixNano() v in a -// and returns the position, i, where v would be inserted. -// An additional check of a.Timestamps[i] == v is necessary -// to determine if the value v exists. -func (a *UnsignedArray) search(v int64) int { - // Define: f(x) → a.Timestamps[x] < v - // Define: f(-1) == true, f(n) == false - // Invariant: f(lo-1) == true, f(hi) == false - lo := 0 - hi := a.Len() - for lo < hi { - mid := int(uint(lo+hi) >> 1) - if a.Timestamps[mid] < v { - lo = mid + 1 // preserves f(lo-1) == true - } else { - hi = mid // preserves f(hi) == false - } - } - - // lo == hi - return lo -} - -// FindRange returns the positions where min and max would be -// inserted into the array. If a[0].UnixNano() > max or -// a[len-1].UnixNano() < min then FindRange returns (-1, -1) -// indicating the array is outside the [min, max]. The values must -// be deduplicated and sorted before calling FindRange or the results -// are undefined. -func (a *UnsignedArray) FindRange(min, max int64) (int, int) { - if a.Len() == 0 || min > max { - return -1, -1 - } - - minVal := a.MinTime() - maxVal := a.MaxTime() - - if maxVal < min || minVal > max { - return -1, -1 - } - - return a.search(min), a.search(max) -} - -// Exclude removes the subset of values in [min, max]. The values must -// be deduplicated and sorted before calling Exclude or the results are undefined. -func (a *UnsignedArray) Exclude(min, max int64) { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - return - } - - // a.Timestamps[rmin] ≥ min - // a.Timestamps[rmax] ≥ max - - if rmax < a.Len() { - if a.Timestamps[rmax] == max { - rmax++ - } - rest := a.Len() - rmax - if rest > 0 { - ts := a.Timestamps[:rmin+rest] - copy(ts[rmin:], a.Timestamps[rmax:]) - a.Timestamps = ts - - vs := a.Values[:rmin+rest] - copy(vs[rmin:], a.Values[rmax:]) - a.Values = vs - return - } - } - - a.Timestamps = a.Timestamps[:rmin] - a.Values = a.Values[:rmin] -} - -// Include returns the subset values between min and max inclusive. The values must -// be deduplicated and sorted before calling Include or the results are undefined. -func (a *UnsignedArray) Include(min, max int64) { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - a.Timestamps = a.Timestamps[:0] - a.Values = a.Values[:0] - return - } - - // a.Timestamps[rmin] ≥ min - // a.Timestamps[rmax] ≥ max - - if rmax < a.Len() && a.Timestamps[rmax] == max { - rmax++ - } - - if rmin > -1 { - ts := a.Timestamps[:rmax-rmin] - copy(ts, a.Timestamps[rmin:rmax]) - a.Timestamps = ts - vs := a.Values[:rmax-rmin] - copy(vs, a.Values[rmin:rmax]) - a.Values = vs - } else { - a.Timestamps = a.Timestamps[:rmax] - a.Values = a.Values[:rmax] - } -} - -// Merge overlays b to top of a. If two values conflict with -// the same timestamp, b is used. Both a and b must be sorted -// in ascending order. -func (a *UnsignedArray) Merge(b *UnsignedArray) { - if a.Len() == 0 { - *a = *b - return - } - - if b.Len() == 0 { - return - } - - // Normally, both a and b should not contain duplicates. Due to a bug in older versions, it's - // possible stored blocks might contain duplicate values. Remove them if they exists before - // merging. - // a = a.Deduplicate() - // b = b.Deduplicate() - - if a.MaxTime() < b.MinTime() { - a.Timestamps = append(a.Timestamps, b.Timestamps...) - a.Values = append(a.Values, b.Values...) - return - } - - if b.MaxTime() < a.MinTime() { - var tmp UnsignedArray - tmp.Timestamps = append(b.Timestamps, a.Timestamps...) - tmp.Values = append(b.Values, a.Values...) - *a = tmp - return - } - - out := NewUnsignedArrayLen(a.Len() + b.Len()) - i, j, k := 0, 0, 0 - for i < len(a.Timestamps) && j < len(b.Timestamps) { - if a.Timestamps[i] < b.Timestamps[j] { - out.Timestamps[k] = a.Timestamps[i] - out.Values[k] = a.Values[i] - i++ - } else if a.Timestamps[i] == b.Timestamps[j] { - out.Timestamps[k] = b.Timestamps[j] - out.Values[k] = b.Values[j] - i++ - j++ - } else { - out.Timestamps[k] = b.Timestamps[j] - out.Values[k] = b.Values[j] - j++ - } - k++ - } - - if i < len(a.Timestamps) { - n := copy(out.Timestamps[k:], a.Timestamps[i:]) - copy(out.Values[k:], a.Values[i:]) - k += n - } else if j < len(b.Timestamps) { - n := copy(out.Timestamps[k:], b.Timestamps[j:]) - copy(out.Values[k:], b.Values[j:]) - k += n - } - - a.Timestamps = out.Timestamps[:k] - a.Values = out.Values[:k] -} - -type StringArray struct { - Timestamps []int64 - Values []string -} - -func NewStringArrayLen(sz int) *StringArray { - return &StringArray{ - Timestamps: make([]int64, sz), - Values: make([]string, sz), - } -} - -func (a *StringArray) MinTime() int64 { - return a.Timestamps[0] -} - -func (a *StringArray) MaxTime() int64 { - return a.Timestamps[len(a.Timestamps)-1] -} - -func (a *StringArray) Len() int { - return len(a.Timestamps) -} - -// search performs a binary search for UnixNano() v in a -// and returns the position, i, where v would be inserted. -// An additional check of a.Timestamps[i] == v is necessary -// to determine if the value v exists. -func (a *StringArray) search(v int64) int { - // Define: f(x) → a.Timestamps[x] < v - // Define: f(-1) == true, f(n) == false - // Invariant: f(lo-1) == true, f(hi) == false - lo := 0 - hi := a.Len() - for lo < hi { - mid := int(uint(lo+hi) >> 1) - if a.Timestamps[mid] < v { - lo = mid + 1 // preserves f(lo-1) == true - } else { - hi = mid // preserves f(hi) == false - } - } - - // lo == hi - return lo -} - -// FindRange returns the positions where min and max would be -// inserted into the array. If a[0].UnixNano() > max or -// a[len-1].UnixNano() < min then FindRange returns (-1, -1) -// indicating the array is outside the [min, max]. The values must -// be deduplicated and sorted before calling FindRange or the results -// are undefined. -func (a *StringArray) FindRange(min, max int64) (int, int) { - if a.Len() == 0 || min > max { - return -1, -1 - } - - minVal := a.MinTime() - maxVal := a.MaxTime() - - if maxVal < min || minVal > max { - return -1, -1 - } - - return a.search(min), a.search(max) -} - -// Exclude removes the subset of values in [min, max]. The values must -// be deduplicated and sorted before calling Exclude or the results are undefined. -func (a *StringArray) Exclude(min, max int64) { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - return - } - - // a.Timestamps[rmin] ≥ min - // a.Timestamps[rmax] ≥ max - - if rmax < a.Len() { - if a.Timestamps[rmax] == max { - rmax++ - } - rest := a.Len() - rmax - if rest > 0 { - ts := a.Timestamps[:rmin+rest] - copy(ts[rmin:], a.Timestamps[rmax:]) - a.Timestamps = ts - - vs := a.Values[:rmin+rest] - copy(vs[rmin:], a.Values[rmax:]) - a.Values = vs - return - } - } - - a.Timestamps = a.Timestamps[:rmin] - a.Values = a.Values[:rmin] -} - -// Include returns the subset values between min and max inclusive. The values must -// be deduplicated and sorted before calling Include or the results are undefined. -func (a *StringArray) Include(min, max int64) { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - a.Timestamps = a.Timestamps[:0] - a.Values = a.Values[:0] - return - } - - // a.Timestamps[rmin] ≥ min - // a.Timestamps[rmax] ≥ max - - if rmax < a.Len() && a.Timestamps[rmax] == max { - rmax++ - } - - if rmin > -1 { - ts := a.Timestamps[:rmax-rmin] - copy(ts, a.Timestamps[rmin:rmax]) - a.Timestamps = ts - vs := a.Values[:rmax-rmin] - copy(vs, a.Values[rmin:rmax]) - a.Values = vs - } else { - a.Timestamps = a.Timestamps[:rmax] - a.Values = a.Values[:rmax] - } -} - -// Merge overlays b to top of a. If two values conflict with -// the same timestamp, b is used. Both a and b must be sorted -// in ascending order. -func (a *StringArray) Merge(b *StringArray) { - if a.Len() == 0 { - *a = *b - return - } - - if b.Len() == 0 { - return - } - - // Normally, both a and b should not contain duplicates. Due to a bug in older versions, it's - // possible stored blocks might contain duplicate values. Remove them if they exists before - // merging. - // a = a.Deduplicate() - // b = b.Deduplicate() - - if a.MaxTime() < b.MinTime() { - a.Timestamps = append(a.Timestamps, b.Timestamps...) - a.Values = append(a.Values, b.Values...) - return - } - - if b.MaxTime() < a.MinTime() { - var tmp StringArray - tmp.Timestamps = append(b.Timestamps, a.Timestamps...) - tmp.Values = append(b.Values, a.Values...) - *a = tmp - return - } - - out := NewStringArrayLen(a.Len() + b.Len()) - i, j, k := 0, 0, 0 - for i < len(a.Timestamps) && j < len(b.Timestamps) { - if a.Timestamps[i] < b.Timestamps[j] { - out.Timestamps[k] = a.Timestamps[i] - out.Values[k] = a.Values[i] - i++ - } else if a.Timestamps[i] == b.Timestamps[j] { - out.Timestamps[k] = b.Timestamps[j] - out.Values[k] = b.Values[j] - i++ - j++ - } else { - out.Timestamps[k] = b.Timestamps[j] - out.Values[k] = b.Values[j] - j++ - } - k++ - } - - if i < len(a.Timestamps) { - n := copy(out.Timestamps[k:], a.Timestamps[i:]) - copy(out.Values[k:], a.Values[i:]) - k += n - } else if j < len(b.Timestamps) { - n := copy(out.Timestamps[k:], b.Timestamps[j:]) - copy(out.Values[k:], b.Values[j:]) - k += n - } - - a.Timestamps = out.Timestamps[:k] - a.Values = out.Values[:k] -} - -type BooleanArray struct { - Timestamps []int64 - Values []bool -} - -func NewBooleanArrayLen(sz int) *BooleanArray { - return &BooleanArray{ - Timestamps: make([]int64, sz), - Values: make([]bool, sz), - } -} - -func (a *BooleanArray) MinTime() int64 { - return a.Timestamps[0] -} - -func (a *BooleanArray) MaxTime() int64 { - return a.Timestamps[len(a.Timestamps)-1] -} - -func (a *BooleanArray) Len() int { - return len(a.Timestamps) -} - -// search performs a binary search for UnixNano() v in a -// and returns the position, i, where v would be inserted. -// An additional check of a.Timestamps[i] == v is necessary -// to determine if the value v exists. -func (a *BooleanArray) search(v int64) int { - // Define: f(x) → a.Timestamps[x] < v - // Define: f(-1) == true, f(n) == false - // Invariant: f(lo-1) == true, f(hi) == false - lo := 0 - hi := a.Len() - for lo < hi { - mid := int(uint(lo+hi) >> 1) - if a.Timestamps[mid] < v { - lo = mid + 1 // preserves f(lo-1) == true - } else { - hi = mid // preserves f(hi) == false - } - } - - // lo == hi - return lo -} - -// FindRange returns the positions where min and max would be -// inserted into the array. If a[0].UnixNano() > max or -// a[len-1].UnixNano() < min then FindRange returns (-1, -1) -// indicating the array is outside the [min, max]. The values must -// be deduplicated and sorted before calling FindRange or the results -// are undefined. -func (a *BooleanArray) FindRange(min, max int64) (int, int) { - if a.Len() == 0 || min > max { - return -1, -1 - } - - minVal := a.MinTime() - maxVal := a.MaxTime() - - if maxVal < min || minVal > max { - return -1, -1 - } - - return a.search(min), a.search(max) -} - -// Exclude removes the subset of values in [min, max]. The values must -// be deduplicated and sorted before calling Exclude or the results are undefined. -func (a *BooleanArray) Exclude(min, max int64) { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - return - } - - // a.Timestamps[rmin] ≥ min - // a.Timestamps[rmax] ≥ max - - if rmax < a.Len() { - if a.Timestamps[rmax] == max { - rmax++ - } - rest := a.Len() - rmax - if rest > 0 { - ts := a.Timestamps[:rmin+rest] - copy(ts[rmin:], a.Timestamps[rmax:]) - a.Timestamps = ts - - vs := a.Values[:rmin+rest] - copy(vs[rmin:], a.Values[rmax:]) - a.Values = vs - return - } - } - - a.Timestamps = a.Timestamps[:rmin] - a.Values = a.Values[:rmin] -} - -// Include returns the subset values between min and max inclusive. The values must -// be deduplicated and sorted before calling Include or the results are undefined. -func (a *BooleanArray) Include(min, max int64) { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - a.Timestamps = a.Timestamps[:0] - a.Values = a.Values[:0] - return - } - - // a.Timestamps[rmin] ≥ min - // a.Timestamps[rmax] ≥ max - - if rmax < a.Len() && a.Timestamps[rmax] == max { - rmax++ - } - - if rmin > -1 { - ts := a.Timestamps[:rmax-rmin] - copy(ts, a.Timestamps[rmin:rmax]) - a.Timestamps = ts - vs := a.Values[:rmax-rmin] - copy(vs, a.Values[rmin:rmax]) - a.Values = vs - } else { - a.Timestamps = a.Timestamps[:rmax] - a.Values = a.Values[:rmax] - } -} - -// Merge overlays b to top of a. If two values conflict with -// the same timestamp, b is used. Both a and b must be sorted -// in ascending order. -func (a *BooleanArray) Merge(b *BooleanArray) { - if a.Len() == 0 { - *a = *b - return - } - - if b.Len() == 0 { - return - } - - // Normally, both a and b should not contain duplicates. Due to a bug in older versions, it's - // possible stored blocks might contain duplicate values. Remove them if they exists before - // merging. - // a = a.Deduplicate() - // b = b.Deduplicate() - - if a.MaxTime() < b.MinTime() { - a.Timestamps = append(a.Timestamps, b.Timestamps...) - a.Values = append(a.Values, b.Values...) - return - } - - if b.MaxTime() < a.MinTime() { - var tmp BooleanArray - tmp.Timestamps = append(b.Timestamps, a.Timestamps...) - tmp.Values = append(b.Values, a.Values...) - *a = tmp - return - } - - out := NewBooleanArrayLen(a.Len() + b.Len()) - i, j, k := 0, 0, 0 - for i < len(a.Timestamps) && j < len(b.Timestamps) { - if a.Timestamps[i] < b.Timestamps[j] { - out.Timestamps[k] = a.Timestamps[i] - out.Values[k] = a.Values[i] - i++ - } else if a.Timestamps[i] == b.Timestamps[j] { - out.Timestamps[k] = b.Timestamps[j] - out.Values[k] = b.Values[j] - i++ - j++ - } else { - out.Timestamps[k] = b.Timestamps[j] - out.Values[k] = b.Values[j] - j++ - } - k++ - } - - if i < len(a.Timestamps) { - n := copy(out.Timestamps[k:], a.Timestamps[i:]) - copy(out.Values[k:], a.Values[i:]) - k += n - } else if j < len(b.Timestamps) { - n := copy(out.Timestamps[k:], b.Timestamps[j:]) - copy(out.Values[k:], b.Values[j:]) - k += n - } - - a.Timestamps = out.Timestamps[:k] - a.Values = out.Values[:k] -} - -type TimestampArray struct { - Timestamps []int64 -} - -func NewTimestampArrayLen(sz int) *TimestampArray { - return &TimestampArray{ - Timestamps: make([]int64, sz), - } -} - -func (a *TimestampArray) MinTime() int64 { - return a.Timestamps[0] -} - -func (a *TimestampArray) MaxTime() int64 { - return a.Timestamps[len(a.Timestamps)-1] -} - -func (a *TimestampArray) Len() int { - return len(a.Timestamps) -} - -// search performs a binary search for UnixNano() v in a -// and returns the position, i, where v would be inserted. -// An additional check of a.Timestamps[i] == v is necessary -// to determine if the value v exists. -func (a *TimestampArray) search(v int64) int { - // Define: f(x) → a.Timestamps[x] < v - // Define: f(-1) == true, f(n) == false - // Invariant: f(lo-1) == true, f(hi) == false - lo := 0 - hi := a.Len() - for lo < hi { - mid := int(uint(lo+hi) >> 1) - if a.Timestamps[mid] < v { - lo = mid + 1 // preserves f(lo-1) == true - } else { - hi = mid // preserves f(hi) == false - } - } - - // lo == hi - return lo -} - -// FindRange returns the positions where min and max would be -// inserted into the array. If a[0].UnixNano() > max or -// a[len-1].UnixNano() < min then FindRange returns (-1, -1) -// indicating the array is outside the [min, max]. The values must -// be deduplicated and sorted before calling FindRange or the results -// are undefined. -func (a *TimestampArray) FindRange(min, max int64) (int, int) { - if a.Len() == 0 || min > max { - return -1, -1 - } - - minVal := a.MinTime() - maxVal := a.MaxTime() - - if maxVal < min || minVal > max { - return -1, -1 - } - - return a.search(min), a.search(max) -} - -// Exclude removes the subset of timestamps in [min, max]. The timestamps must -// be deduplicated and sorted before calling Exclude or the results are undefined. -func (a *TimestampArray) Exclude(min, max int64) { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - return - } - - // a.Timestamps[rmin] ≥ min - // a.Timestamps[rmax] ≥ max - - if rmax < a.Len() { - if a.Timestamps[rmax] == max { - rmax++ - } - rest := a.Len() - rmax - if rest > 0 { - ts := a.Timestamps[:rmin+rest] - copy(ts[rmin:], a.Timestamps[rmax:]) - a.Timestamps = ts - return - } - } - - a.Timestamps = a.Timestamps[:rmin] -} - -// Contains returns true if values exist between min and max inclusive. The -// values must be sorted before calling Contains or the results are undefined. -func (a *TimestampArray) Contains(min, max int64) bool { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - return false - } - - // a.Timestamps[rmin] ≥ min - // a.Timestamps[rmax] ≥ max - - if a.Timestamps[rmin] == min { - return true - } - - if rmax < a.Len() && a.Timestamps[rmax] == max { - return true - } - - return rmax-rmin > 0 -} diff --git a/tsdb/cursors/arrayvalues.gen.go.tmpl b/tsdb/cursors/arrayvalues.gen.go.tmpl deleted file mode 100644 index 3e7632aeeb..0000000000 --- a/tsdb/cursors/arrayvalues.gen.go.tmpl +++ /dev/null @@ -1,259 +0,0 @@ -package cursors - -{{range .}} -{{- $typename := print .Name "Array" }} -{{- $hasType := or (and .Type true) false }} - -type {{ $typename }} struct { - Timestamps []int64 -{{- if $hasType }} - Values []{{.Type}} -{{- end }} -} - -func New{{$typename}}Len(sz int) *{{$typename}} { - return &{{$typename}}{ - Timestamps: make([]int64, sz), -{{- if $hasType }} - Values: make([]{{.Type}}, sz), -{{- end }} - } -} - -func (a *{{ $typename }}) MinTime() int64 { - return a.Timestamps[0] -} - -func (a *{{ $typename }}) MaxTime() int64 { - return a.Timestamps[len(a.Timestamps)-1] -} - -func (a *{{ $typename}}) Len() int { - return len(a.Timestamps) -} - -// search performs a binary search for UnixNano() v in a -// and returns the position, i, where v would be inserted. -// An additional check of a.Timestamps[i] == v is necessary -// to determine if the value v exists. -func (a *{{ $typename }}) search(v int64) int { - // Define: f(x) → a.Timestamps[x] < v - // Define: f(-1) == true, f(n) == false - // Invariant: f(lo-1) == true, f(hi) == false - lo := 0 - hi := a.Len() - for lo < hi { - mid := int(uint(lo+hi) >> 1) - if a.Timestamps[mid] < v { - lo = mid + 1 // preserves f(lo-1) == true - } else { - hi = mid // preserves f(hi) == false - } - } - - // lo == hi - return lo -} - -// FindRange returns the positions where min and max would be -// inserted into the array. If a[0].UnixNano() > max or -// a[len-1].UnixNano() < min then FindRange returns (-1, -1) -// indicating the array is outside the [min, max]. The values must -// be deduplicated and sorted before calling FindRange or the results -// are undefined. -func (a *{{ $typename }}) FindRange(min, max int64) (int, int) { - if a.Len() == 0 || min > max { - return -1, -1 - } - - minVal := a.MinTime() - maxVal := a.MaxTime() - - if maxVal < min || minVal > max { - return -1, -1 - } - - return a.search(min), a.search(max) -} - -{{- if $hasType }} -// Exclude removes the subset of values in [min, max]. The values must -// be deduplicated and sorted before calling Exclude or the results are undefined. -func (a *{{ $typename }}) Exclude(min, max int64) { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - return - } - - // a.Timestamps[rmin] ≥ min - // a.Timestamps[rmax] ≥ max - - if rmax < a.Len() { - if a.Timestamps[rmax] == max { - rmax++ - } - rest := a.Len()-rmax - if rest > 0 { - ts := a.Timestamps[:rmin+rest] - copy(ts[rmin:], a.Timestamps[rmax:]) - a.Timestamps = ts - - vs := a.Values[:rmin+rest] - copy(vs[rmin:], a.Values[rmax:]) - a.Values = vs - return - } - } - - a.Timestamps = a.Timestamps[:rmin] - a.Values = a.Values[:rmin] -} - -// Include returns the subset values between min and max inclusive. The values must -// be deduplicated and sorted before calling Include or the results are undefined. -func (a *{{ $typename }}) Include(min, max int64) { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - a.Timestamps = a.Timestamps[:0] - a.Values = a.Values[:0] - return - } - - // a.Timestamps[rmin] ≥ min - // a.Timestamps[rmax] ≥ max - - if rmax < a.Len() && a.Timestamps[rmax] == max { - rmax++ - } - - if rmin > -1 { - ts := a.Timestamps[:rmax-rmin] - copy(ts, a.Timestamps[rmin:rmax]) - a.Timestamps = ts - vs := a.Values[:rmax-rmin] - copy(vs, a.Values[rmin:rmax]) - a.Values = vs - } else { - a.Timestamps = a.Timestamps[:rmax] - a.Values = a.Values[:rmax] - } -} - -// Merge overlays b to top of a. If two values conflict with -// the same timestamp, b is used. Both a and b must be sorted -// in ascending order. -func (a *{{ $typename }}) Merge(b *{{ $typename }}) { - if a.Len() == 0 { - *a = *b - return - } - - if b.Len() == 0 { - return - } - - // Normally, both a and b should not contain duplicates. Due to a bug in older versions, it's - // possible stored blocks might contain duplicate values. Remove them if they exists before - // merging. - // a = a.Deduplicate() - // b = b.Deduplicate() - - if a.MaxTime() < b.MinTime() { - a.Timestamps = append(a.Timestamps, b.Timestamps...) - a.Values = append(a.Values, b.Values...) - return - } - - if b.MaxTime() < a.MinTime() { - var tmp {{$typename}} - tmp.Timestamps = append(b.Timestamps, a.Timestamps...) - tmp.Values = append(b.Values, a.Values...) - *a = tmp - return - } - - out := New{{$typename}}Len(a.Len()+b.Len()) - i, j, k := 0, 0, 0 - for i < len(a.Timestamps) && j < len(b.Timestamps) { - if a.Timestamps[i] < b.Timestamps[j] { - out.Timestamps[k] = a.Timestamps[i] - out.Values[k] = a.Values[i] - i++ - } else if a.Timestamps[i] == b.Timestamps[j] { - out.Timestamps[k] = b.Timestamps[j] - out.Values[k] = b.Values[j] - i++ - j++ - } else { - out.Timestamps[k] = b.Timestamps[j] - out.Values[k] = b.Values[j] - j++ - } - k++ - } - - if i < len(a.Timestamps) { - n := copy(out.Timestamps[k:], a.Timestamps[i:]) - copy(out.Values[k:], a.Values[i:]) - k += n - } else if j < len(b.Timestamps) { - n := copy(out.Timestamps[k:], b.Timestamps[j:]) - copy(out.Values[k:], b.Values[j:]) - k += n - } - - a.Timestamps = out.Timestamps[:k] - a.Values = out.Values[:k] -} -{{ else }} -// Exclude removes the subset of timestamps in [min, max]. The timestamps must -// be deduplicated and sorted before calling Exclude or the results are undefined. -func (a *{{ $typename }}) Exclude(min, max int64) { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - return - } - - // a.Timestamps[rmin] ≥ min - // a.Timestamps[rmax] ≥ max - - if rmax < a.Len() { - if a.Timestamps[rmax] == max { - rmax++ - } - rest := a.Len()-rmax - if rest > 0 { - ts := a.Timestamps[:rmin+rest] - copy(ts[rmin:], a.Timestamps[rmax:]) - a.Timestamps = ts - return - } - } - - a.Timestamps = a.Timestamps[:rmin] -} - -// Contains returns true if values exist between min and max inclusive. The -// values must be sorted before calling Contains or the results are undefined. -func (a *{{ $typename }}) Contains(min, max int64) bool { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - return false - } - - // a.Timestamps[rmin] ≥ min - // a.Timestamps[rmax] ≥ max - - if a.Timestamps[rmin] == min { - return true - } - - if rmax < a.Len() && a.Timestamps[rmax] == max { - return true - } - - return rmax-rmin > 0 -} -{{ end }} - -{{ end }} diff --git a/tsdb/cursors/arrayvalues.gen.go.tmpldata b/tsdb/cursors/arrayvalues.gen.go.tmpldata deleted file mode 100644 index 7ebe5b94c1..0000000000 --- a/tsdb/cursors/arrayvalues.gen.go.tmpldata +++ /dev/null @@ -1,26 +0,0 @@ -[ - { - "Name":"Float", - "Type":"float64" - }, - { - "Name":"Integer", - "Type":"int64" - }, - { - "Name":"Unsigned", - "Type":"uint64" - }, - { - "Name":"String", - "Type":"string" - }, - { - "Name":"Boolean", - "Type":"bool" - }, - { - "Name":"Timestamp", - "Type": null - } -] diff --git a/tsdb/cursors/arrayvalues.gen_test.go b/tsdb/cursors/arrayvalues.gen_test.go deleted file mode 100644 index f9bf6483be..0000000000 --- a/tsdb/cursors/arrayvalues.gen_test.go +++ /dev/null @@ -1,254 +0,0 @@ -package cursors - -import ( - "fmt" - "testing" - - "github.com/google/go-cmp/cmp" -) - -func makeIntegerArray(count int, min, max int64) *IntegerArray { - vals := NewIntegerArrayLen(count) - - ts := min - inc := (max - min) / int64(count) - - for i := 0; i < count; i++ { - vals.Timestamps[i] = ts - ts += inc - } - - return vals -} - -func makeIntegerArrayFromSlice(t []int64) *IntegerArray { - iv := NewIntegerArrayLen(len(t)) - copy(iv.Timestamps, t) - return iv -} - -func TestIntegerArray_FindRangeNoValues(t *testing.T) { - var vals IntegerArray - l, r := vals.FindRange(0, 100) - if exp := -1; l != exp { - t.Errorf("invalid l; exp=%d, got=%d", exp, l) - } - if exp := -1; r != exp { - t.Errorf("invalid r; exp=%d, got=%d", exp, r) - } -} - -func TestIntegerArray_FindRange(t *testing.T) { - vals := makeIntegerArrayFromSlice([]int64{10, 11, 13, 15, 17, 20, 21}) - - cases := []struct { - min, max int64 - l, r int - }{ - {12, 20, 2, 5}, - {22, 40, -1, -1}, - {1, 9, -1, -1}, - {1, 10, 0, 0}, - {1, 11, 0, 1}, - {15, 15, 3, 3}, - } - - for _, tc := range cases { - t.Run(fmt.Sprintf("%d→%d", tc.min, tc.max), func(t *testing.T) { - l, r := vals.FindRange(tc.min, tc.max) - if l != tc.l { - t.Errorf("left: got %d, exp %d", l, tc.l) - } - if r != tc.r { - t.Errorf("right: got %d, exp %d", r, tc.r) - } - }) - } -} - -func TestIntegerArray_Exclude(t *testing.T) { - cases := []struct { - n string - min, max int64 - exp []int64 - }{ - {"excl bad range", 18, 11, []int64{10, 12, 14, 16, 18}}, - {"excl none-lo", 0, 9, []int64{10, 12, 14, 16, 18}}, - {"excl none-hi", 19, 30, []int64{10, 12, 14, 16, 18}}, - {"excl first", 0, 10, []int64{12, 14, 16, 18}}, - {"excl last", 18, 20, []int64{10, 12, 14, 16}}, - {"excl all but first and last", 12, 16, []int64{10, 18}}, - {"excl none in middle", 13, 13, []int64{10, 12, 14, 16, 18}}, - {"excl middle", 14, 14, []int64{10, 12, 16, 18}}, - {"excl suffix", 14, 18, []int64{10, 12}}, - } - - for _, tc := range cases { - t.Run(fmt.Sprintf("%s[%d,%d]", tc.n, tc.min, tc.max), func(t *testing.T) { - vals := makeIntegerArray(5, 10, 20) - vals.Exclude(tc.min, tc.max) - got := vals.Timestamps - if !cmp.Equal(got, tc.exp) { - t.Errorf("unexpected values -got/+exp\n%s", cmp.Diff(got, tc.exp)) - } - }) - } -} - -func TestIntegerArray_Include(t *testing.T) { - cases := []struct { - n string - min, max int64 - exp []int64 - }{ - {"incl none-lo", 0, 9, []int64{}}, - {"incl none-hi", 19, 30, []int64{}}, - {"incl first", 0, 10, []int64{10}}, - {"incl last", 18, 20, []int64{18}}, - {"incl all but first and last", 12, 16, []int64{12, 14, 16}}, - {"incl none in middle", 13, 13, []int64{}}, - {"incl middle", 14, 14, []int64{14}}, - } - - for _, tc := range cases { - t.Run(fmt.Sprintf("%s[%d,%d]", tc.n, tc.min, tc.max), func(t *testing.T) { - vals := makeIntegerArray(5, 10, 20) - vals.Include(tc.min, tc.max) - got := vals.Timestamps - if !cmp.Equal(got, tc.exp) { - t.Errorf("unexpected values -got/+exp\n%s", cmp.Diff(got, tc.exp)) - } - }) - } -} - -func makeTimestampArray(count int, min, max int64) *TimestampArray { - vals := NewTimestampArrayLen(count) - - ts := min - inc := (max - min) / int64(count) - - for i := 0; i < count; i++ { - vals.Timestamps[i] = ts - ts += inc - } - - return vals -} - -func TestTimestampArray_Contains(t *testing.T) { - cases := []struct { - n string - min, max int64 - exp bool - }{ - {"no/lo", 0, 9, false}, - {"no/hi", 19, 30, false}, - {"no/middle", 13, 13, false}, - - {"yes/first", 0, 10, true}, - {"yes/first-eq", 10, 10, true}, - {"yes/last", 18, 20, true}, - {"yes/last-eq", 18, 18, true}, - {"yes/all but first and last", 12, 16, true}, - {"yes/middle-eq", 14, 14, true}, - {"yes/middle-overlap", 13, 15, true}, - {"yes/covers", 8, 22, true}, - } - - for _, tc := range cases { - t.Run(fmt.Sprintf("%s[%d,%d]", tc.n, tc.min, tc.max), func(t *testing.T) { - vals := makeTimestampArray(5, 10, 20) - if got := vals.Contains(tc.min, tc.max); got != tc.exp { - t.Errorf("Contains -got/+exp\n%s", cmp.Diff(got, tc.exp)) - } - }) - } -} - -func benchExclude(b *testing.B, vals *IntegerArray, min, max int64) { - b.ResetTimer() - - for i := 0; i < b.N; i++ { - vals.Exclude(min, max) - } -} - -func BenchmarkIntegerArray_ExcludeNone_1000(b *testing.B) { - benchExclude(b, makeIntegerArray(1000, 1000, 2000), 0, 500) -} - -func BenchmarkIntegerArray_ExcludeMiddleHalf_1000(b *testing.B) { - benchExclude(b, makeIntegerArray(1000, 1000, 2000), 1250, 1750) -} - -func BenchmarkIntegerArray_ExcludeFirst_1000(b *testing.B) { - benchExclude(b, makeIntegerArray(1000, 1000, 2000), 0, 1000) -} - -func BenchmarkIntegerArray_ExcludeLast_1000(b *testing.B) { - benchExclude(b, makeIntegerArray(1000, 1000, 2000), 1999, 2000) -} - -func BenchmarkIntegerArray_ExcludeNone_10000(b *testing.B) { - benchExclude(b, makeIntegerArray(10000, 10000, 20000), 00, 5000) -} - -func BenchmarkIntegerArray_ExcludeMiddleHalf_10000(b *testing.B) { - benchExclude(b, makeIntegerArray(10000, 10000, 20000), 12500, 17500) -} - -func BenchmarkIntegerArray_ExcludeFirst_10000(b *testing.B) { - benchExclude(b, makeIntegerArray(10000, 10000, 20000), 0, 10000) -} - -func BenchmarkIntegerArray_ExcludeLast_10000(b *testing.B) { - benchExclude(b, makeIntegerArray(10000, 10000, 20000), 19999, 20000) -} - -func benchInclude(b *testing.B, vals *IntegerArray, min, max int64) { - src := *vals - tmp := NewIntegerArrayLen(vals.Len()) - copy(tmp.Timestamps, vals.Timestamps) - copy(tmp.Values, vals.Values) - b.ResetTimer() - - for i := 0; i < b.N; i++ { - vals.Include(min, max) - *vals = src - copy(vals.Timestamps, tmp.Timestamps) - copy(vals.Values, tmp.Values) - } -} - -func BenchmarkIntegerArray_IncludeNone_1000(b *testing.B) { - benchInclude(b, makeIntegerArray(1000, 1000, 2000), 0, 500) -} - -func BenchmarkIntegerArray_IncludeMiddleHalf_1000(b *testing.B) { - benchInclude(b, makeIntegerArray(1000, 1000, 2000), 1250, 1750) -} - -func BenchmarkIntegerArray_IncludeFirst_1000(b *testing.B) { - benchInclude(b, makeIntegerArray(1000, 1000, 2000), 0, 1000) -} - -func BenchmarkIntegerArray_IncludeLast_1000(b *testing.B) { - benchInclude(b, makeIntegerArray(1000, 1000, 2000), 1999, 2000) -} - -func BenchmarkIntegerArray_IncludeNone_10000(b *testing.B) { - benchInclude(b, makeIntegerArray(10000, 10000, 20000), 00, 5000) -} - -func BenchmarkIntegerArray_IncludeMiddleHalf_10000(b *testing.B) { - benchInclude(b, makeIntegerArray(10000, 10000, 20000), 12500, 17500) -} - -func BenchmarkIntegerArray_IncludeFirst_10000(b *testing.B) { - benchInclude(b, makeIntegerArray(10000, 10000, 20000), 0, 10000) -} - -func BenchmarkIntegerArray_IncludeLast_10000(b *testing.B) { - benchInclude(b, makeIntegerArray(10000, 10000, 20000), 19999, 20000) -} diff --git a/tsdb/cursors/arrayvalues.go b/tsdb/cursors/arrayvalues.go deleted file mode 100644 index 6cf0bfbb23..0000000000 --- a/tsdb/cursors/arrayvalues.go +++ /dev/null @@ -1,41 +0,0 @@ -package cursors - -import "sort" - -func (a *FloatArray) Size() int { - // size of timestamps + values - return len(a.Timestamps)*8 + len(a.Values)*8 -} - -func (a *IntegerArray) Size() int { - // size of timestamps + values - return len(a.Timestamps)*8 + len(a.Values)*8 -} - -func (a *UnsignedArray) Size() int { - // size of timestamps + values - return len(a.Timestamps)*8 + len(a.Values)*8 -} - -func (a *StringArray) Size() int { - sz := len(a.Timestamps) * 8 - for _, s := range a.Values { - sz += len(s) - } - return sz -} - -func (a *BooleanArray) Size() int { - // size of timestamps + values - return len(a.Timestamps)*8 + len(a.Values) -} - -var _ sort.Interface = (*TimestampArray)(nil) - -func (a *TimestampArray) Less(i, j int) bool { - return a.Timestamps[i] < a.Timestamps[j] -} - -func (a *TimestampArray) Swap(i, j int) { - a.Timestamps[i], a.Timestamps[j] = a.Timestamps[j], a.Timestamps[i] -} diff --git a/tsdb/cursors/arrayvalues_test.go b/tsdb/cursors/arrayvalues_test.go deleted file mode 100644 index ac991a1ee8..0000000000 --- a/tsdb/cursors/arrayvalues_test.go +++ /dev/null @@ -1,459 +0,0 @@ -package cursors_test - -import ( - "strconv" - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/influxdata/influxdb/v2/tsdb/cursors" -) - -func makeBooleanArray(v ...interface{}) *cursors.BooleanArray { - if len(v)&1 == 1 { - panic("invalid array length") - } - a := cursors.NewBooleanArrayLen(len(v) / 2) - for i := 0; i < len(v); i += 2 { - a.Timestamps[i/2] = int64(v[i].(int)) - a.Values[i/2] = v[i+1].(bool) - } - return a -} - -func makeFloatArray(v ...interface{}) *cursors.FloatArray { - if len(v)&1 == 1 { - panic("invalid array length") - } - a := cursors.NewFloatArrayLen(len(v) / 2) - for i := 0; i < len(v); i += 2 { - a.Timestamps[i/2] = int64(v[i].(int)) - a.Values[i/2] = v[i+1].(float64) - } - return a -} - -func makeIntegerArray(v ...interface{}) *cursors.IntegerArray { - if len(v)&1 == 1 { - panic("invalid array length") - } - a := cursors.NewIntegerArrayLen(len(v) / 2) - for i := 0; i < len(v); i += 2 { - a.Timestamps[i/2] = int64(v[i].(int)) - a.Values[i/2] = int64(v[i+1].(int)) - } - return a -} - -func makeUnsignedArray(v ...interface{}) *cursors.UnsignedArray { - if len(v)&1 == 1 { - panic("invalid array length") - } - a := cursors.NewUnsignedArrayLen(len(v) / 2) - for i := 0; i < len(v); i += 2 { - a.Timestamps[i/2] = int64(v[i].(int)) - a.Values[i/2] = uint64(v[i+1].(int)) - } - return a -} - -func makeStringArray(v ...interface{}) *cursors.StringArray { - if len(v)&1 == 1 { - panic("invalid array length") - } - a := cursors.NewStringArrayLen(len(v) / 2) - for i := 0; i < len(v); i += 2 { - a.Timestamps[i/2] = int64(v[i].(int)) - a.Values[i/2] = strconv.Itoa(v[i+1].(int)) - } - return a -} - -func TestBooleanArray_Merge(t *testing.T) { - tests := []struct { - name string - a, b, exp *cursors.BooleanArray - }{ - { - name: "empty a", - - a: makeBooleanArray(), - b: makeBooleanArray(1, true, 2, true), - exp: makeBooleanArray(1, true, 2, true), - }, - { - name: "empty b", - - a: makeBooleanArray(1, true, 2, true), - b: makeBooleanArray(), - exp: makeBooleanArray(1, true, 2, true), - }, - { - name: "b replaces a", - - a: makeBooleanArray(1, true), - b: makeBooleanArray( - 0, false, - 1, false, // overwrites a - 2, false, - 3, false, - 4, false, - ), - exp: makeBooleanArray(0, false, 1, false, 2, false, 3, false, 4, false), - }, - { - name: "b replaces partial a", - - a: makeBooleanArray(1, true, 2, true, 3, true, 4, true), - b: makeBooleanArray( - 1, false, // overwrites a - 2, false, // overwrites a - ), - exp: makeBooleanArray( - 1, false, // overwrites a - 2, false, // overwrites a - 3, true, - 4, true, - ), - }, - { - name: "b replaces all a", - - a: makeBooleanArray(1, true, 2, true, 3, true, 4, true), - b: makeBooleanArray(1, false, 2, false, 3, false, 4, false), - exp: makeBooleanArray(1, false, 2, false, 3, false, 4, false), - }, - { - name: "b replaces a interleaved", - a: makeBooleanArray(0, true, 1, true, 2, true, 3, true, 4, true), - b: makeBooleanArray(0, false, 2, false, 4, false), - exp: makeBooleanArray(0, false, 1, true, 2, false, 3, true, 4, false), - }, - { - name: "b merges a interleaved", - a: makeBooleanArray(0, true, 2, true, 4, true), - b: makeBooleanArray(1, false, 3, false, 5, false), - exp: makeBooleanArray(0, true, 1, false, 2, true, 3, false, 4, true, 5, false), - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - test.a.Merge(test.b) - if !cmp.Equal(test.a, test.exp) { - t.Fatalf("unexpected values -got/+exp\n%s", cmp.Diff(test.a, test.exp)) - } - }) - } -} - -func TestFloatArray_Merge(t *testing.T) { - tests := []struct { - name string - a, b, exp *cursors.FloatArray - }{ - { - name: "empty a", - - a: makeFloatArray(), - b: makeFloatArray(1, 1.1, 2, 2.1), - exp: makeFloatArray(1, 1.1, 2, 2.1), - }, - { - name: "empty b", - - a: makeFloatArray(1, 1.0, 2, 2.0), - b: makeFloatArray(), - exp: makeFloatArray(1, 1.0, 2, 2.0), - }, - { - name: "b replaces a", - - a: makeFloatArray(1, 1.0), - b: makeFloatArray( - 0, 0.1, - 1, 1.1, // overwrites a - 2, 2.1, - 3, 3.1, - 4, 4.1, - ), - exp: makeFloatArray(0, 0.1, 1, 1.1, 2, 2.1, 3, 3.1, 4, 4.1), - }, - { - name: "b replaces partial a", - - a: makeFloatArray(1, 1.0, 2, 2.0, 3, 3.0, 4, 4.0), - b: makeFloatArray( - 1, 1.1, // overwrites a - 2, 2.1, // overwrites a - ), - exp: makeFloatArray( - 1, 1.1, // overwrites a - 2, 2.1, // overwrites a - 3, 3.0, - 4, 4.0, - ), - }, - { - name: "b replaces all a", - - a: makeFloatArray(1, 1.0, 2, 2.0, 3, 3.0, 4, 4.0), - b: makeFloatArray(1, 1.1, 2, 2.1, 3, 3.1, 4, 4.1), - exp: makeFloatArray(1, 1.1, 2, 2.1, 3, 3.1, 4, 4.1), - }, - { - name: "b replaces a interleaved", - a: makeFloatArray(0, 0.0, 1, 1.0, 2, 2.0, 3, 3.0, 4, 4.0), - b: makeFloatArray(0, 0.1, 2, 2.1, 4, 4.1), - exp: makeFloatArray(0, 0.1, 1, 1.0, 2, 2.1, 3, 3.0, 4, 4.1), - }, - { - name: "b merges a interleaved", - a: makeFloatArray(0, 0.0, 2, 2.0, 4, 4.0), - b: makeFloatArray(1, 1.1, 3, 3.1, 5, 5.1), - exp: makeFloatArray(0, 0.0, 1, 1.1, 2, 2.0, 3, 3.1, 4, 4.0, 5, 5.1), - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - test.a.Merge(test.b) - if !cmp.Equal(test.a, test.exp) { - t.Fatalf("unexpected values -got/+exp\n%s", cmp.Diff(test.a, test.exp)) - } - }) - } -} - -func TestIntegerArray_Merge(t *testing.T) { - tests := []struct { - name string - a, b, exp *cursors.IntegerArray - }{ - { - name: "empty a", - - a: makeIntegerArray(), - b: makeIntegerArray(1, 11, 2, 21), - exp: makeIntegerArray(1, 11, 2, 21), - }, - { - name: "empty b", - - a: makeIntegerArray(1, 10, 2, 20), - b: makeIntegerArray(), - exp: makeIntegerArray(1, 10, 2, 20), - }, - { - name: "b replaces a", - - a: makeIntegerArray(1, 10), - b: makeIntegerArray( - 0, 1, - 1, 11, // overwrites a - 2, 21, - 3, 31, - 4, 41, - ), - exp: makeIntegerArray(0, 1, 1, 11, 2, 21, 3, 31, 4, 41), - }, - { - name: "b replaces partial a", - - a: makeIntegerArray(1, 10, 2, 20, 3, 30, 4, 40), - b: makeIntegerArray( - 1, 11, // overwrites a - 2, 21, // overwrites a - ), - exp: makeIntegerArray( - 1, 11, // overwrites a - 2, 21, // overwrites a - 3, 30, - 4, 40, - ), - }, - { - name: "b replaces all a", - - a: makeIntegerArray(1, 10, 2, 20, 3, 30, 4, 40), - b: makeIntegerArray(1, 11, 2, 21, 3, 31, 4, 41), - exp: makeIntegerArray(1, 11, 2, 21, 3, 31, 4, 41), - }, - { - name: "b replaces a interleaved", - a: makeIntegerArray(0, 0, 1, 10, 2, 20, 3, 30, 4, 40), - b: makeIntegerArray(0, 1, 2, 21, 4, 41), - exp: makeIntegerArray(0, 1, 1, 10, 2, 21, 3, 30, 4, 41), - }, - { - name: "b merges a interleaved", - a: makeIntegerArray(0, 00, 2, 20, 4, 40), - b: makeIntegerArray(1, 11, 3, 31, 5, 51), - exp: makeIntegerArray(0, 00, 1, 11, 2, 20, 3, 31, 4, 40, 5, 51), - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - test.a.Merge(test.b) - if !cmp.Equal(test.a, test.exp) { - t.Fatalf("unexpected values -got/+exp\n%s", cmp.Diff(test.a, test.exp)) - } - }) - } -} - -func TestUnsignedArray_Merge(t *testing.T) { - tests := []struct { - name string - a, b, exp *cursors.UnsignedArray - }{ - { - name: "empty a", - - a: makeUnsignedArray(), - b: makeUnsignedArray(1, 11, 2, 21), - exp: makeUnsignedArray(1, 11, 2, 21), - }, - { - name: "empty b", - - a: makeUnsignedArray(1, 10, 2, 20), - b: makeUnsignedArray(), - exp: makeUnsignedArray(1, 10, 2, 20), - }, - { - name: "b replaces a", - - a: makeUnsignedArray(1, 10), - b: makeUnsignedArray( - 0, 1, - 1, 11, // overwrites a - 2, 21, - 3, 31, - 4, 41, - ), - exp: makeUnsignedArray(0, 1, 1, 11, 2, 21, 3, 31, 4, 41), - }, - { - name: "b replaces partial a", - - a: makeUnsignedArray(1, 10, 2, 20, 3, 30, 4, 40), - b: makeUnsignedArray( - 1, 11, // overwrites a - 2, 21, // overwrites a - ), - exp: makeUnsignedArray( - 1, 11, // overwrites a - 2, 21, // overwrites a - 3, 30, - 4, 40, - ), - }, - { - name: "b replaces all a", - - a: makeUnsignedArray(1, 10, 2, 20, 3, 30, 4, 40), - b: makeUnsignedArray(1, 11, 2, 21, 3, 31, 4, 41), - exp: makeUnsignedArray(1, 11, 2, 21, 3, 31, 4, 41), - }, - { - name: "b replaces a interleaved", - a: makeUnsignedArray(0, 0, 1, 10, 2, 20, 3, 30, 4, 40), - b: makeUnsignedArray(0, 1, 2, 21, 4, 41), - exp: makeUnsignedArray(0, 1, 1, 10, 2, 21, 3, 30, 4, 41), - }, - { - name: "b merges a interleaved", - a: makeUnsignedArray(0, 00, 2, 20, 4, 40), - b: makeUnsignedArray(1, 11, 3, 31, 5, 51), - exp: makeUnsignedArray(0, 00, 1, 11, 2, 20, 3, 31, 4, 40, 5, 51), - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - test.a.Merge(test.b) - if !cmp.Equal(test.a, test.exp) { - t.Fatalf("unexpected values -got/+exp\n%s", cmp.Diff(test.a, test.exp)) - } - }) - } -} - -func TestStringArray_Merge(t *testing.T) { - tests := []struct { - name string - a, b, exp *cursors.StringArray - }{ - { - name: "empty a", - - a: makeStringArray(), - b: makeStringArray(1, 11, 2, 21), - exp: makeStringArray(1, 11, 2, 21), - }, - { - name: "empty b", - - a: makeStringArray(1, 10, 2, 20), - b: makeStringArray(), - exp: makeStringArray(1, 10, 2, 20), - }, - { - name: "b replaces a", - - a: makeStringArray(1, 10), - b: makeStringArray( - 0, 1, - 1, 11, // overwrites a - 2, 21, - 3, 31, - 4, 41, - ), - exp: makeStringArray(0, 1, 1, 11, 2, 21, 3, 31, 4, 41), - }, - { - name: "b replaces partial a", - - a: makeStringArray(1, 10, 2, 20, 3, 30, 4, 40), - b: makeStringArray( - 1, 11, // overwrites a - 2, 21, // overwrites a - ), - exp: makeStringArray( - 1, 11, // overwrites a - 2, 21, // overwrites a - 3, 30, - 4, 40, - ), - }, - { - name: "b replaces all a", - - a: makeStringArray(1, 10, 2, 20, 3, 30, 4, 40), - b: makeStringArray(1, 11, 2, 21, 3, 31, 4, 41), - exp: makeStringArray(1, 11, 2, 21, 3, 31, 4, 41), - }, - { - name: "b replaces a interleaved", - a: makeStringArray(0, 0, 1, 10, 2, 20, 3, 30, 4, 40), - b: makeStringArray(0, 1, 2, 21, 4, 41), - exp: makeStringArray(0, 1, 1, 10, 2, 21, 3, 30, 4, 41), - }, - { - name: "b merges a interleaved", - a: makeStringArray(0, 00, 2, 20, 4, 40), - b: makeStringArray(1, 11, 3, 31, 5, 51), - exp: makeStringArray(0, 00, 1, 11, 2, 20, 3, 31, 4, 40, 5, 51), - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - test.a.Merge(test.b) - if !cmp.Equal(test.a, test.exp) { - t.Fatalf("unexpected values -got/+exp\n%s", cmp.Diff(test.a, test.exp)) - } - }) - } -} diff --git a/tsdb/cursors/cursor.go b/tsdb/cursors/cursor.go deleted file mode 100644 index 5b3ce91f43..0000000000 --- a/tsdb/cursors/cursor.go +++ /dev/null @@ -1,68 +0,0 @@ -package cursors - -import ( - "context" - - "github.com/influxdata/influxdb/v2/models" -) - -const DefaultMaxPointsPerBlock = 1000 - -type Cursor interface { - Close() - Err() error - Stats() CursorStats -} - -type IntegerArrayCursor interface { - Cursor - Next() *IntegerArray -} - -type FloatArrayCursor interface { - Cursor - Next() *FloatArray -} - -type UnsignedArrayCursor interface { - Cursor - Next() *UnsignedArray -} - -type StringArrayCursor interface { - Cursor - Next() *StringArray -} - -type BooleanArrayCursor interface { - Cursor - Next() *BooleanArray -} - -type CursorRequest struct { - Name []byte - Tags models.Tags - Field string - Ascending bool - StartTime int64 - EndTime int64 -} - -type CursorIterator interface { - Next(ctx context.Context, r *CursorRequest) (Cursor, error) - Stats() CursorStats -} - -type CursorIterators []CursorIterator - -// CursorStats represents stats collected by a cursor. -type CursorStats struct { - ScannedValues int // number of values scanned - ScannedBytes int // number of uncompressed bytes scanned -} - -// Add adds other to s and updates s. -func (s *CursorStats) Add(other CursorStats) { - s.ScannedValues += other.ScannedValues - s.ScannedBytes += other.ScannedBytes -} diff --git a/tsdb/cursors/gen.go b/tsdb/cursors/gen.go deleted file mode 100644 index 40bcfb2a3c..0000000000 --- a/tsdb/cursors/gen.go +++ /dev/null @@ -1,4 +0,0 @@ -package cursors - -//go:generate env GO111MODULE=on go run github.com/benbjohnson/tmpl -data=@arrayvalues.gen.go.tmpldata arrayvalues.gen.go.tmpl -//go:generate stringer -type FieldType diff --git a/tsdb/cursors/string.go b/tsdb/cursors/string.go deleted file mode 100644 index 2c2b13a7af..0000000000 --- a/tsdb/cursors/string.go +++ /dev/null @@ -1,81 +0,0 @@ -package cursors - -// StringIterator describes the behavior for enumerating a sequence of -// string values. -type StringIterator interface { - // Next advances the StringIterator to the next value. It returns false - // when there are no more values. - Next() bool - - // Value returns the current value. - Value() string - - Stats() CursorStats -} - -// EmptyStringIterator is an implementation of StringIterator that returns -// no values. -var EmptyStringIterator StringIterator = &stringIterator{} - -type stringIterator struct{} - -func (*stringIterator) Next() bool { return false } -func (*stringIterator) Value() string { return "" } -func (*stringIterator) Stats() CursorStats { return CursorStats{} } - -type StringSliceIterator struct { - s []string - v string - i int - stats CursorStats -} - -func NewStringSliceIterator(s []string) *StringSliceIterator { - return &StringSliceIterator{s: s, i: 0} -} - -func NewStringSliceIteratorWithStats(s []string, stats CursorStats) *StringSliceIterator { - return &StringSliceIterator{s: s, i: 0, stats: stats} -} - -func (s *StringSliceIterator) Next() bool { - if s.i < len(s.s) { - s.v = s.s[s.i] - s.i++ - return true - } - s.v = "" - return false -} - -func (s *StringSliceIterator) Value() string { - return s.v -} - -func (s *StringSliceIterator) Stats() CursorStats { - return s.stats -} - -func (s *StringSliceIterator) toSlice() []string { - if s.i < len(s.s) { - return s.s[s.i:] - } - return nil -} - -// StringIteratorToSlice reads the remainder of i into a slice and -// returns the result. -func StringIteratorToSlice(i StringIterator) []string { - if i == nil { - return nil - } - - if si, ok := i.(*StringSliceIterator); ok { - return si.toSlice() - } - var a []string - for i.Next() { - a = append(a, i.Value()) - } - return a -} diff --git a/tsdb/errors.go b/tsdb/errors.go deleted file mode 100644 index 9c5f4cfc2e..0000000000 --- a/tsdb/errors.go +++ /dev/null @@ -1,19 +0,0 @@ -package tsdb - -import ( - "fmt" -) - -// PartialWriteError indicates a write request could only write a portion of the -// requested values. -type PartialWriteError struct { - Reason string - Dropped int - - // A sorted slice of series keys that were dropped. - DroppedKeys [][]byte -} - -func (e PartialWriteError) Error() string { - return fmt.Sprintf("partial write: %s dropped=%d", e.Reason, e.Dropped) -} diff --git a/tsdb/explode.go b/tsdb/explode.go deleted file mode 100644 index 1f6801a847..0000000000 --- a/tsdb/explode.go +++ /dev/null @@ -1,106 +0,0 @@ -package tsdb - -import ( - "encoding/binary" - - "github.com/influxdata/influxdb/v2" - "github.com/influxdata/influxdb/v2/models" -) - -// DecodeName converts tsdb internal serialization back to organization and bucket IDs. -func DecodeName(name [16]byte) (org, bucket influxdb.ID) { - org = influxdb.ID(binary.BigEndian.Uint64(name[0:8])) - bucket = influxdb.ID(binary.BigEndian.Uint64(name[8:16])) - return -} - -// DecodeNameSlice converts tsdb internal serialization back to organization and bucket IDs. -func DecodeNameSlice(name []byte) (org, bucket influxdb.ID) { - return influxdb.ID(binary.BigEndian.Uint64(name[0:8])), influxdb.ID(binary.BigEndian.Uint64(name[8:16])) -} - -// EncodeName converts org/bucket pairs to the tsdb internal serialization -func EncodeName(org, bucket influxdb.ID) [16]byte { - var nameBytes [16]byte - binary.BigEndian.PutUint64(nameBytes[0:8], uint64(org)) - binary.BigEndian.PutUint64(nameBytes[8:16], uint64(bucket)) - return nameBytes -} - -// EncodeNameSlice converts org/bucket pairs to the tsdb internal serialization but returns a byte slice. -func EncodeNameSlice(org, bucket influxdb.ID) []byte { - buf := EncodeName(org, bucket) - return buf[:] -} - -// EncodeOrgName converts org to the tsdb internal serialization that may be used -// as a prefix when searching for keys matching a specific organization. -func EncodeOrgName(org influxdb.ID) [8]byte { - var orgBytes [8]byte - binary.BigEndian.PutUint64(orgBytes[0:8], uint64(org)) - return orgBytes -} - -// EncodeNameString converts org/bucket pairs to the tsdb internal serialization -func EncodeNameString(org, bucket influxdb.ID) string { - name := EncodeName(org, bucket) - return string(name[:]) -} - -// ExplodePoints creates a list of points that only contains one field per point. It also -// moves the measurement to a tag, and changes the measurement to be the provided argument. -func ExplodePoints(org, bucket influxdb.ID, points []models.Point) ([]models.Point, error) { - out := make([]models.Point, 0, len(points)) - - // TODO(jeff): We should add a RawEncode() method or something to the influxdb.ID type - // or we should use hex encoded measurement names. Either way, we shouldn't be doing a - // decode of the encode here, and we don't want to depend on details of how the ID type - // is represented. - ob := EncodeName(org, bucket) - name := string(ob[:]) - - tags := make(models.Tags, 1) - for _, pt := range points { - tags = tags[:1] // reset buffer for next point. - - tags[0] = models.NewTag(models.MeasurementTagKeyBytes, pt.Name()) - pt.ForEachTag(func(k, v []byte) bool { - tags = append(tags, models.NewTag(k, v)) - return true - }) - - t := pt.Time() - itr := pt.FieldIterator() - tags = append(tags, models.Tag{}) // Make room for field key and value. - - for itr.Next() { - tags[len(tags)-1] = models.NewTag(models.FieldKeyTagKeyBytes, itr.FieldKey()) - - var err error - field := make(models.Fields, 1) - switch itr.Type() { - case models.Float: - field[string(itr.FieldKey())], err = itr.FloatValue() - case models.Integer: - field[string(itr.FieldKey())], err = itr.IntegerValue() - case models.Boolean: - field[string(itr.FieldKey())], err = itr.BooleanValue() - case models.String: - field[string(itr.FieldKey())] = itr.StringValue() - case models.Unsigned: - field[string(itr.FieldKey())], err = itr.UnsignedValue() - } - if err != nil { - return nil, err - } - - pt, err := models.NewPoint(name, tags, field, t) - if err != nil { - return nil, err - } - out = append(out, pt) - } - } - - return out, nil -} diff --git a/tsdb/explode_test.go b/tsdb/explode_test.go deleted file mode 100644 index 1061cb22c1..0000000000 --- a/tsdb/explode_test.go +++ /dev/null @@ -1,42 +0,0 @@ -package tsdb_test - -import ( - "fmt" - "testing" - - "github.com/influxdata/influxdb/v2" - "github.com/influxdata/influxdb/v2/tsdb" -) - -func TestNames(t *testing.T) { - goodExamples := []struct { - Org uint64 - Bucket uint64 - Name [16]byte - }{ - {Org: 12345678, Bucket: 87654321, Name: [16]byte{0, 0, 0, 0, 0, 188, 97, 78, 0, 0, 0, 0, 5, 57, 127, 177}}, - {Org: 1234567891011, Bucket: 87654321, Name: [16]byte{0, 0, 1, 31, 113, 251, 8, 67, 0, 0, 0, 0, 5, 57, 127, 177}}, - {Org: 12345678, Bucket: 8765432100000, Name: [16]byte{0, 0, 0, 0, 0, 188, 97, 78, 0, 0, 7, 248, 220, 119, 116, 160}}, - {Org: 123456789929, Bucket: 8765432100000, Name: [16]byte{0, 0, 0, 28, 190, 153, 29, 169, 0, 0, 7, 248, 220, 119, 116, 160}}, - } - - for _, example := range goodExamples { - t.Run(fmt.Sprintf("%d%d", example.Org, example.Bucket), func(t *testing.T) { - - name := tsdb.EncodeName(influxdb.ID(example.Org), influxdb.ID(example.Bucket)) - - if got, exp := name, example.Name; got != exp { - t.Errorf("got name %q, expected %q", got, exp) - } - - org, bucket := tsdb.DecodeName(name) - - if gotOrg, expOrg := org, example.Org; gotOrg != influxdb.ID(expOrg) { - t.Errorf("got organization ID %q, expected %q", gotOrg, expOrg) - } - if gotBucket, expBucket := bucket, example.Bucket; gotBucket != influxdb.ID(expBucket) { - t.Errorf("got organization ID %q, expected %q", gotBucket, expBucket) - } - }) - } -} diff --git a/tsdb/gen_test.go b/tsdb/gen_test.go deleted file mode 100644 index 7ae09c3cd4..0000000000 --- a/tsdb/gen_test.go +++ /dev/null @@ -1,14 +0,0 @@ -//go:generate sh -c "curl -L https://github.com/influxdata/testdata/raw/2020.07.20.1/tsdbtestdata.tar.gz | tar xz" -package tsdb_test - -import ( - "fmt" - "os" -) - -func init() { - if _, err := os.Stat("./testdata"); err != nil { - fmt.Println("Run go generate to download testdata directory.") - os.Exit(1) - } -} diff --git a/tsdb/meta.go b/tsdb/meta.go deleted file mode 100644 index 43755c71fa..0000000000 --- a/tsdb/meta.go +++ /dev/null @@ -1,59 +0,0 @@ -package tsdb - -import ( - "github.com/influxdata/influxdb/v2/models" -) - -// MakeTagsKey converts a tag set to bytes for use as a lookup key. -func MakeTagsKey(keys []string, tags models.Tags) []byte { - // precondition: keys is sorted - // precondition: models.Tags is sorted - - // Empty maps marshal to empty bytes. - if len(keys) == 0 || len(tags) == 0 { - return nil - } - - sel := make([]int, 0, len(keys)) - - sz := 0 - i, j := 0, 0 - for i < len(keys) && j < len(tags) { - if keys[i] < string(tags[j].Key) { - i++ - } else if keys[i] > string(tags[j].Key) { - j++ - } else { - sel = append(sel, j) - sz += len(keys[i]) + len(tags[j].Value) - i++ - j++ - } - } - - if len(sel) == 0 { - // no tags matched the requested keys - return nil - } - - sz += (len(sel) * 2) - 1 // selected tags, add separators - - // Generate marshaled bytes. - b := make([]byte, sz) - buf := b - for _, k := range sel { - copy(buf, tags[k].Key) - buf[len(tags[k].Key)] = '|' - buf = buf[len(tags[k].Key)+1:] - } - - for i, k := range sel { - copy(buf, tags[k].Value) - if i < len(sel)-1 { - buf[len(tags[k].Value)] = '|' - buf = buf[len(tags[k].Value)+1:] - } - } - - return b -} diff --git a/tsdb/meta_test.go b/tsdb/meta_test.go deleted file mode 100644 index ec35381439..0000000000 --- a/tsdb/meta_test.go +++ /dev/null @@ -1,89 +0,0 @@ -package tsdb_test - -import ( - "bytes" - "fmt" - "testing" - - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/tsdb" -) - -// Ensure tags can be marshaled into a byte slice. -func TestMakeTagsKey(t *testing.T) { - for i, tt := range []struct { - keys []string - tags models.Tags - result []byte - }{ - { - keys: nil, - tags: nil, - result: nil, - }, - { - keys: []string{"foo"}, - tags: models.NewTags(map[string]string{"foo": "bar"}), - result: []byte(`foo|bar`), - }, - { - keys: []string{"foo"}, - tags: models.NewTags(map[string]string{"baz": "battttt"}), - result: []byte(``), - }, - { - keys: []string{"baz", "foo"}, - tags: models.NewTags(map[string]string{"baz": "battttt"}), - result: []byte(`baz|battttt`), - }, - { - keys: []string{"baz", "foo", "zzz"}, - tags: models.NewTags(map[string]string{"foo": "bar"}), - result: []byte(`foo|bar`), - }, - { - keys: []string{"baz", "foo"}, - tags: models.NewTags(map[string]string{"foo": "bar", "baz": "battttt"}), - result: []byte(`baz|foo|battttt|bar`), - }, - { - keys: []string{"baz"}, - tags: models.NewTags(map[string]string{"baz": "battttt", "foo": "bar"}), - result: []byte(`baz|battttt`), - }, - } { - result := tsdb.MakeTagsKey(tt.keys, tt.tags) - if !bytes.Equal(result, tt.result) { - t.Fatalf("%d. unexpected result: exp=%s, got=%s", i, tt.result, result) - } - } -} - -func BenchmarkMakeTagsKey_KeyN1(b *testing.B) { benchmarkMakeTagsKey(b, 1) } -func BenchmarkMakeTagsKey_KeyN3(b *testing.B) { benchmarkMakeTagsKey(b, 3) } -func BenchmarkMakeTagsKey_KeyN5(b *testing.B) { benchmarkMakeTagsKey(b, 5) } -func BenchmarkMakeTagsKey_KeyN10(b *testing.B) { benchmarkMakeTagsKey(b, 10) } - -func makeTagsAndKeys(keyN int) ([]string, models.Tags) { - const keySize, valueSize = 8, 15 - - // Generate tag map. - keys := make([]string, keyN) - tags := make(map[string]string) - for i := 0; i < keyN; i++ { - keys[i] = fmt.Sprintf("%0*d", keySize, i) - tags[keys[i]] = fmt.Sprintf("%0*d", valueSize, i) - } - - return keys, models.NewTags(tags) -} - -func benchmarkMakeTagsKey(b *testing.B, keyN int) { - keys, tags := makeTagsAndKeys(keyN) - - // Unmarshal map into byte slice. - b.ReportAllocs() - for i := 0; i < b.N; i++ { - tsdb.MakeTagsKey(keys, tags) - } -} diff --git a/tsdb/series_collection.go b/tsdb/series_collection.go deleted file mode 100644 index d22ca698ec..0000000000 --- a/tsdb/series_collection.go +++ /dev/null @@ -1,333 +0,0 @@ -package tsdb - -import ( - "sync" - "sync/atomic" - "unsafe" - - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/pkg/bytesutil" -) - -// SeriesCollection is a struct of arrays representation of a collection of series that allows -// for efficient filtering. -type SeriesCollection struct { - Points []models.Point - Keys [][]byte - SeriesKeys [][]byte - Names [][]byte - Tags []models.Tags - Types []models.FieldType - SeriesIDs []SeriesID - - // Keeps track of invalid entries. - Dropped uint64 - DroppedKeys [][]byte - Reason string - - // Used by the concurrent iterators to stage drops. Inefficient, but should be - // very infrequently used. - state *seriesCollectionState -} - -// seriesCollectionState keeps track of concurrent iterator state. -type seriesCollectionState struct { - mu sync.Mutex - reason string - index map[int]struct{} -} - -// NewSeriesCollection builds a SeriesCollection from a slice of points. It does some filtering -// of invalid points. -func NewSeriesCollection(points []models.Point) *SeriesCollection { - out := &SeriesCollection{ - Points: append([]models.Point(nil), points...), - Keys: make([][]byte, 0, len(points)), - Names: make([][]byte, 0, len(points)), - Tags: make([]models.Tags, 0, len(points)), - Types: make([]models.FieldType, 0, len(points)), - } - - for _, pt := range points { - out.Keys = append(out.Keys, pt.Key()) - out.Names = append(out.Names, pt.Name()) - out.Tags = append(out.Tags, pt.Tags()) - - fi := pt.FieldIterator() - fi.Next() - out.Types = append(out.Types, fi.Type()) - } - - return out -} - -// Duplicate returns a copy of the SeriesCollection. The slices are shared. Appending to any of -// them may or may not be reflected. -func (s SeriesCollection) Duplicate() *SeriesCollection { return &s } - -// Length returns the length of the first non-nil slice in the collection, or 0 if there is no -// non-nil slice. -func (s *SeriesCollection) Length() int { - switch { - case s.Points != nil: - return len(s.Points) - case s.Keys != nil: - return len(s.Keys) - case s.SeriesKeys != nil: - return len(s.SeriesKeys) - case s.Names != nil: - return len(s.Names) - case s.Tags != nil: - return len(s.Tags) - case s.Types != nil: - return len(s.Types) - case s.SeriesIDs != nil: - return len(s.SeriesIDs) - default: - return 0 - } -} - -// Copy will copy the element at src into dst in all slices that can: x[dst] = x[src]. -func (s *SeriesCollection) Copy(dst, src int) { - if dst == src { - return - } - udst, usrc := uint(dst), uint(src) - if n := uint(len(s.Points)); udst < n && usrc < n { - s.Points[udst] = s.Points[usrc] - } - if n := uint(len(s.Keys)); udst < n && usrc < n { - s.Keys[udst] = s.Keys[usrc] - } - if n := uint(len(s.SeriesKeys)); udst < n && usrc < n { - s.SeriesKeys[udst] = s.SeriesKeys[usrc] - } - if n := uint(len(s.Names)); udst < n && usrc < n { - s.Names[udst] = s.Names[usrc] - } - if n := uint(len(s.Tags)); udst < n && usrc < n { - s.Tags[udst] = s.Tags[usrc] - } - if n := uint(len(s.Types)); udst < n && usrc < n { - s.Types[udst] = s.Types[usrc] - } - if n := uint(len(s.SeriesIDs)); udst < n && usrc < n { - s.SeriesIDs[udst] = s.SeriesIDs[usrc] - } -} - -// Swap will swap the elements at i and j in all slices that can: x[i], x[j] = x[j], x[i]. -func (s *SeriesCollection) Swap(i, j int) { - if i == j { - return - } - ui, uj := uint(i), uint(j) - if n := uint(len(s.Points)); ui < n && uj < n { - s.Points[ui], s.Points[uj] = s.Points[uj], s.Points[ui] - } - if n := uint(len(s.Keys)); ui < n && uj < n { - s.Keys[ui], s.Keys[uj] = s.Keys[uj], s.Keys[ui] - } - if n := uint(len(s.SeriesKeys)); ui < n && uj < n { - s.SeriesKeys[ui], s.SeriesKeys[uj] = s.SeriesKeys[uj], s.SeriesKeys[ui] - } - if n := uint(len(s.Names)); ui < n && uj < n { - s.Names[ui], s.Names[uj] = s.Names[uj], s.Names[ui] - } - if n := uint(len(s.Tags)); ui < n && uj < n { - s.Tags[ui], s.Tags[uj] = s.Tags[uj], s.Tags[ui] - } - if n := uint(len(s.Types)); ui < n && uj < n { - s.Types[ui], s.Types[uj] = s.Types[uj], s.Types[ui] - } - if n := uint(len(s.SeriesIDs)); ui < n && uj < n { - s.SeriesIDs[ui], s.SeriesIDs[uj] = s.SeriesIDs[uj], s.SeriesIDs[ui] - } -} - -// Truncate will truncate all of the slices that can down to length: x = x[:length]. -func (s *SeriesCollection) Truncate(length int) { - ulength := uint(length) - if ulength < uint(len(s.Points)) { - s.Points = s.Points[:ulength] - } - if ulength < uint(len(s.Keys)) { - s.Keys = s.Keys[:ulength] - } - if ulength < uint(len(s.SeriesKeys)) { - s.SeriesKeys = s.SeriesKeys[:ulength] - } - if ulength < uint(len(s.Names)) { - s.Names = s.Names[:ulength] - } - if ulength < uint(len(s.Tags)) { - s.Tags = s.Tags[:ulength] - } - if ulength < uint(len(s.Types)) { - s.Types = s.Types[:ulength] - } - if ulength < uint(len(s.SeriesIDs)) { - s.SeriesIDs = s.SeriesIDs[:ulength] - } -} - -// Advance will advance all of the slices that can length elements: x = x[length:]. -func (s *SeriesCollection) Advance(length int) { - ulength := uint(length) - if ulength < uint(len(s.Points)) { - s.Points = s.Points[ulength:] - } - if ulength < uint(len(s.Keys)) { - s.Keys = s.Keys[ulength:] - } - if ulength < uint(len(s.SeriesKeys)) { - s.SeriesKeys = s.SeriesKeys[ulength:] - } - if ulength < uint(len(s.Names)) { - s.Names = s.Names[ulength:] - } - if ulength < uint(len(s.Tags)) { - s.Tags = s.Tags[ulength:] - } - if ulength < uint(len(s.Types)) { - s.Types = s.Types[ulength:] - } - if ulength < uint(len(s.SeriesIDs)) { - s.SeriesIDs = s.SeriesIDs[ulength:] - } -} - -// InvalidateAll causes all of the entries to become invalid. -func (s *SeriesCollection) InvalidateAll(reason string) { - if s.Reason == "" { - s.Reason = reason - } - s.Dropped += uint64(len(s.Keys)) - s.DroppedKeys = append(s.DroppedKeys, s.Keys...) - s.Truncate(0) -} - -// ApplyConcurrentDrops will remove all of the dropped values during concurrent iteration. It should -// not be called concurrently with any calls to Invalid. -func (s *SeriesCollection) ApplyConcurrentDrops() { - state := s.getState(false) - if state == nil { - return - } - - length, j := s.Length(), 0 - for i := 0; i < length; i++ { - if _, ok := state.index[i]; ok { - s.Dropped++ - - if i < len(s.Keys) { - s.DroppedKeys = append(s.DroppedKeys, s.Keys[i]) - } - - continue - } - - s.Copy(j, i) - j++ - } - s.Truncate(j) - - if s.Reason == "" { - s.Reason = state.reason - } - - // clear concurrent state - atomic.StorePointer((*unsafe.Pointer)(unsafe.Pointer(&s.state)), nil) -} - -// getState returns the SeriesCollection's concurrent state. If alloc is true and there -// is no state, it will attempt to allocate one and set it. It is safe to call concurrently, but -// not with ApplyConcurrentDrops. -func (s *SeriesCollection) getState(alloc bool) *seriesCollectionState { - addr := (*unsafe.Pointer)(unsafe.Pointer(&s.state)) - - // fast path: load pointer and it already exists. always return the result if we can't alloc. - if ptr := atomic.LoadPointer(addr); ptr != nil || !alloc { - return (*seriesCollectionState)(ptr) - } - - // nothing there. make a new state and try to swap it in. - atomic.CompareAndSwapPointer(addr, nil, unsafe.Pointer(new(seriesCollectionState))) - - // reload the pointer. this way we always end up with the winner of the race. - return (*seriesCollectionState)(atomic.LoadPointer(addr)) -} - -// invalidIndex stages the index as invalid with the reason. It will be removed when -// ApplyConcurrentDrops is called. -func (s *SeriesCollection) invalidIndex(index int, reason string) { - state := s.getState(true) - - state.mu.Lock() - if state.index == nil { - state.index = make(map[int]struct{}) - } - state.index[index] = struct{}{} - if state.reason == "" { - state.reason = reason - } - state.mu.Unlock() -} - -// PartialWriteError returns a PartialWriteError if any entries have been marked as invalid. It -// returns an error to avoid `return collection.PartialWriteError()` always being non-nil. -func (s *SeriesCollection) PartialWriteError() error { - if s.Dropped == 0 { - return nil - } - droppedKeys := bytesutil.SortDedup(s.DroppedKeys) - return PartialWriteError{ - Reason: s.Reason, - Dropped: len(droppedKeys), - DroppedKeys: droppedKeys, - } -} - -// Iterator returns a new iterator over the entries in the collection. Multiple iterators -// can exist at the same time. Marking entries as invalid/skipped is more expensive, but thread -// safe. You must call ApplyConcurrentDrops after all of the iterators are finished. -func (s *SeriesCollection) Iterator() SeriesCollectionIterator { - return SeriesCollectionIterator{ - s: s, - length: s.Length(), - index: -1, - } -} - -// SeriesCollectionIterator is an iterator over the collection of series. -type SeriesCollectionIterator struct { - s *SeriesCollection - length int - index int -} - -// Next advances the iterator and returns false if it's done. -func (i *SeriesCollectionIterator) Next() bool { - i.index++ - return i.index < i.length -} - -// Helpers that return the current state of the iterator. - -func (i SeriesCollectionIterator) Index() int { return i.index } -func (i SeriesCollectionIterator) Length() int { return i.length } -func (i SeriesCollectionIterator) Point() models.Point { return i.s.Points[i.index] } -func (i SeriesCollectionIterator) Key() []byte { return i.s.Keys[i.index] } -func (i SeriesCollectionIterator) SeriesKey() []byte { return i.s.SeriesKeys[i.index] } -func (i SeriesCollectionIterator) Name() []byte { return i.s.Names[i.index] } -func (i SeriesCollectionIterator) Tags() models.Tags { return i.s.Tags[i.index] } -func (i SeriesCollectionIterator) Type() models.FieldType { return i.s.Types[i.index] } -func (i SeriesCollectionIterator) SeriesID() SeriesID { return i.s.SeriesIDs[i.index] } - -// Invalid flags the current entry as invalid, including it in the set of dropped keys and -// recording a reason. Only the first reason is kept. This is safe for concurrent callers, -// but ApplyConcurrentDrops must be called after all iterators are finished. -func (i *SeriesCollectionIterator) Invalid(reason string) { - i.s.invalidIndex(i.index, reason) -} diff --git a/tsdb/series_collection_test.go b/tsdb/series_collection_test.go deleted file mode 100644 index 167358a19f..0000000000 --- a/tsdb/series_collection_test.go +++ /dev/null @@ -1,149 +0,0 @@ -package tsdb - -import ( - "reflect" - "testing" - "time" - - "github.com/influxdata/influxdb/v2/models" -) - -func TestSeriesCollection(t *testing.T) { - // some helper functions. short names because local scope and frequently used. - var ( - equal = reflect.DeepEqual - b = func(s string) []byte { return []byte(s) } - bs = func(s ...string) [][]byte { - out := make([][]byte, len(s)) - for i := range s { - out[i] = b(s[i]) - } - return out - } - - assertEqual = func(t *testing.T, name string, got, wanted interface{}) { - t.Helper() - if !equal(got, wanted) { - t.Fatalf("bad %s: got: %v but wanted: %v", name, got, wanted) - } - } - ) - - t.Run("New", func(t *testing.T) { - points := []models.Point{ - models.MustNewPoint("a", models.Tags{}, models.Fields{"f": 1.0}, time.Now()), - models.MustNewPoint("b", models.Tags{}, models.Fields{"b": true}, time.Now()), - models.MustNewPoint("c", models.Tags{}, models.Fields{"i": int64(1)}, time.Now()), - } - collection := NewSeriesCollection(points) - - assertEqual(t, "length", collection.Length(), 3) - - for iter := collection.Iterator(); iter.Next(); { - ipt, spt := iter.Point(), points[iter.Index()] - fi := spt.FieldIterator() - fi.Next() - - assertEqual(t, "point", ipt, spt) - assertEqual(t, "key", iter.Key(), spt.Key()) - assertEqual(t, "name", iter.Name(), spt.Name()) - assertEqual(t, "tags", iter.Tags(), spt.Tags()) - assertEqual(t, "type", iter.Type(), fi.Type()) - } - }) - - t.Run("Copy", func(t *testing.T) { - collection := &SeriesCollection{ - Keys: bs("ka", "kb", "kc"), - Names: bs("na", "nb", "nc"), - } - - collection.Copy(0, 2) - assertEqual(t, "keys", collection.Keys, bs("kc", "kb", "kc")) - assertEqual(t, "names", collection.Names, bs("nc", "nb", "nc")) - - collection.Copy(0, 4) // out of bounds - assertEqual(t, "keys", collection.Keys, bs("kc", "kb", "kc")) - assertEqual(t, "names", collection.Names, bs("nc", "nb", "nc")) - }) - - t.Run("Swap", func(t *testing.T) { - collection := &SeriesCollection{ - Keys: bs("ka", "kb", "kc"), - Names: bs("na", "nb", "nc"), - } - - collection.Swap(0, 2) - assertEqual(t, "keys", collection.Keys, bs("kc", "kb", "ka")) - assertEqual(t, "names", collection.Names, bs("nc", "nb", "na")) - - collection.Swap(0, 4) // out of bounds - assertEqual(t, "keys", collection.Keys, bs("kc", "kb", "ka")) - assertEqual(t, "names", collection.Names, bs("nc", "nb", "na")) - }) - - t.Run("Truncate", func(t *testing.T) { - collection := &SeriesCollection{ - Keys: bs("ka", "kb", "kc"), - Names: bs("na", "nb", "nc"), - } - - collection.Truncate(1) - assertEqual(t, "keys", collection.Keys, bs("ka")) - assertEqual(t, "names", collection.Names, bs("na")) - - collection.Truncate(0) - assertEqual(t, "keys", collection.Keys, bs()) - assertEqual(t, "names", collection.Names, bs()) - }) - - t.Run("Advance", func(t *testing.T) { - collection := &SeriesCollection{ - Keys: bs("ka", "kb", "kc"), - Names: bs("na", "nb", "nc"), - } - - collection.Advance(1) - assertEqual(t, "keys", collection.Keys, bs("kb", "kc")) - assertEqual(t, "names", collection.Names, bs("nb", "nc")) - - collection.Advance(1) - assertEqual(t, "keys", collection.Keys, bs("kc")) - assertEqual(t, "names", collection.Names, bs("nc")) - }) - - t.Run("InvalidateAll", func(t *testing.T) { - collection := &SeriesCollection{Keys: bs("ka", "kb", "kc")} - - collection.InvalidateAll("test reason") - assertEqual(t, "length", collection.Length(), 0) - assertEqual(t, "error", collection.PartialWriteError(), PartialWriteError{ - Reason: "test reason", - Dropped: 3, - DroppedKeys: bs("ka", "kb", "kc"), - }) - }) - - t.Run("Invalid", func(t *testing.T) { - collection := &SeriesCollection{Keys: bs("ka", "kb", "kc")} - - // invalidate half the entries - for iter := collection.Iterator(); iter.Next(); { - if iter.Index()%2 == 0 { - iter.Invalid("test reason") - } - } - - // nothing happens yet: all values are staged - assertEqual(t, "length", collection.Length(), 3) - - // apply all of the invalid calls - collection.ApplyConcurrentDrops() - assertEqual(t, "length", collection.Length(), 1) - assertEqual(t, "error", collection.PartialWriteError(), PartialWriteError{ - Reason: "test reason", - Dropped: 2, - DroppedKeys: bs("ka", "kc"), - }) - }) -} diff --git a/tsdb/series_id.go b/tsdb/series_id.go deleted file mode 100644 index a3667863fb..0000000000 --- a/tsdb/series_id.go +++ /dev/null @@ -1,78 +0,0 @@ -package tsdb - -import ( - "unsafe" - - "github.com/influxdata/influxdb/v2/models" -) - -const ( - // constants describing bit layout of id and type info - seriesIDTypeFlag = 1 << 63 // a flag marking that the id contains type info - seriesIDValueMask = 0xFFFFFFFF // series ids numerically are 32 bits - seriesIDTypeShift = 32 // we put the type right after the value info - seriesIDTypeMask = 0xFF << seriesIDTypeShift // a mask for the type byte - seriesIDSize = 8 -) - -// SeriesID is the type of a series id. It is logically a uint64, but encoded as a struct so -// that we gain more type checking when changing operations on it. The field is exported only -// so that tests that use reflection based comparisons still work; no one should use the field -// directly. -type SeriesID struct{ ID uint64 } - -// NewSeriesID constructs a series id from the raw value. It discards any type information. -func NewSeriesID(id uint64) SeriesID { return SeriesID{ID: id & seriesIDValueMask} } - -// IsZero returns if the SeriesID is zero. -func (s SeriesID) IsZero() bool { return s.ID == 0 } - -// ID returns the raw id for the SeriesID. -func (s SeriesID) RawID() uint64 { return s.ID } - -// WithType constructs a SeriesIDTyped with the given type. -func (s SeriesID) WithType(typ models.FieldType) SeriesIDTyped { - return NewSeriesIDTyped(s.ID | seriesIDTypeFlag | (uint64(typ&0xFF) << seriesIDTypeShift)) -} - -// Greater returns if the SeriesID is greater than the passed in value. -func (s SeriesID) Greater(o SeriesID) bool { return s.ID > o.ID } - -// Less returns if the SeriesID is less than the passed in value. -func (s SeriesID) Less(o SeriesID) bool { return s.ID < o.ID } - -// SeriesIDType represents a series id with a type. It is logically a uint64, but encoded as -// a struct so that we gain more type checking when changing operations on it. The field is -// exported only so that tests that use reflection based comparisons still work; no one should -// use the field directly. -type SeriesIDTyped struct{ ID uint64 } - -// NewSeriesIDTyped constructs a typed series id from the raw values. -func NewSeriesIDTyped(id uint64) SeriesIDTyped { return SeriesIDTyped{ID: id} } - -// IsZero returns if the SeriesIDTyped is zero. It ignores any type information. -func (s SeriesIDTyped) IsZero() bool { return s.ID&seriesIDValueMask == 0 } - -// ID returns the raw id for the SeriesIDTyped. -func (s SeriesIDTyped) RawID() uint64 { return s.ID } - -// SeriesID constructs a SeriesID, discarding any type information. -func (s SeriesIDTyped) SeriesID() SeriesID { return NewSeriesID(s.ID) } - -// HasType returns if the id actually contains a type. -func (s SeriesIDTyped) HasType() bool { return s.ID&seriesIDTypeFlag > 0 } - -// Type returns the associated type. -func (s SeriesIDTyped) Type() models.FieldType { - return models.FieldType((s.ID & seriesIDTypeMask) >> seriesIDTypeShift) -} - -type ( - // some static assertions that the SeriesIDSize matches the structs we defined. - // if the values are not the same, at least one will be negative causing a compilation failure - _ [seriesIDSize - unsafe.Sizeof(SeriesID{})]byte - _ [unsafe.Sizeof(SeriesID{}) - seriesIDSize]byte - - _ [seriesIDSize - unsafe.Sizeof(SeriesIDTyped{})]byte - _ [unsafe.Sizeof(SeriesIDTyped{}) - seriesIDSize]byte -) diff --git a/tsdb/series_id_test.go b/tsdb/series_id_test.go deleted file mode 100644 index 4406d910dc..0000000000 --- a/tsdb/series_id_test.go +++ /dev/null @@ -1,31 +0,0 @@ -package tsdb - -import ( - "math/rand" - "testing" - - "github.com/influxdata/influxdb/v2/models" -) - -func TestSeriesID(t *testing.T) { - types := []models.FieldType{ - models.Integer, - models.Float, - models.Boolean, - models.String, - models.Unsigned, - } - - for i := 0; i < 1000000; i++ { - id := NewSeriesID(uint64(rand.Int31())) - for _, typ := range types { - typed := id.WithType(typ) - if got := typed.Type(); got != typ { - t.Fatalf("wanted: %v got: %v", typ, got) - } - if got := typed.SeriesID(); id != got { - t.Fatalf("wanted: %016x got: %016x", id, got) - } - } - } -} diff --git a/tsdb/series_iterators.go b/tsdb/series_iterators.go deleted file mode 100644 index 6bd1ce61d3..0000000000 --- a/tsdb/series_iterators.go +++ /dev/null @@ -1,699 +0,0 @@ -package tsdb - -import ( - "bytes" - - "github.com/influxdata/influxql" -) - -// SeriesIDElem represents a single series and optional expression. -type SeriesIDElem struct { - SeriesID SeriesID - Expr influxql.Expr -} - -// SeriesIDIterator represents a iterator over a list of series ids. -type SeriesIDIterator interface { - Next() (SeriesIDElem, error) - Close() error -} - -// SeriesIDSetIterator represents an iterator that can produce a SeriesIDSet. -type SeriesIDSetIterator interface { - SeriesIDIterator - SeriesIDSet() *SeriesIDSet -} - -type seriesIDSetIterator struct { - ss *SeriesIDSet - itr SeriesIDSetIterable -} - -func NewSeriesIDSetIterator(ss *SeriesIDSet) SeriesIDSetIterator { - if ss == nil || ss.bitmap == nil { - return nil - } - return &seriesIDSetIterator{ss: ss, itr: ss.Iterator()} -} - -func (itr *seriesIDSetIterator) Next() (SeriesIDElem, error) { - if !itr.itr.HasNext() { - return SeriesIDElem{}, nil - } - return SeriesIDElem{SeriesID: NewSeriesID(uint64(itr.itr.Next()))}, nil -} - -func (itr *seriesIDSetIterator) Close() error { return nil } - -func (itr *seriesIDSetIterator) SeriesIDSet() *SeriesIDSet { return itr.ss } - -// NewSeriesIDSetIterators returns a slice of SeriesIDSetIterator if all itrs -// can be type casted. Otherwise returns nil. -func NewSeriesIDSetIterators(itrs []SeriesIDIterator) []SeriesIDSetIterator { - if len(itrs) == 0 { - return nil - } - - a := make([]SeriesIDSetIterator, len(itrs)) - for i := range itrs { - if itr, ok := itrs[i].(SeriesIDSetIterator); ok { - a[i] = itr - } else { - return nil - } - } - return a -} - -// NewSeriesIDSliceIterator returns a SeriesIDIterator that iterates over a slice. -func NewSeriesIDSliceIterator(ids []SeriesID) *SeriesIDSliceIterator { - return &SeriesIDSliceIterator{ids: ids} -} - -// SeriesIDSliceIterator iterates over a slice of series ids. -type SeriesIDSliceIterator struct { - ids []SeriesID -} - -// Next returns the next series id in the slice. -func (itr *SeriesIDSliceIterator) Next() (SeriesIDElem, error) { - if len(itr.ids) == 0 { - return SeriesIDElem{}, nil - } - id := itr.ids[0] - itr.ids = itr.ids[1:] - return SeriesIDElem{SeriesID: id}, nil -} - -func (itr *SeriesIDSliceIterator) Close() error { return nil } - -// SeriesIDSet returns a set of all remaining ids. -func (itr *SeriesIDSliceIterator) SeriesIDSet() *SeriesIDSet { - s := NewSeriesIDSet() - for _, id := range itr.ids { - s.AddNoLock(id) - } - return s -} - -type SeriesIDIterators []SeriesIDIterator - -func (a SeriesIDIterators) Close() (err error) { - for i := range a { - if e := a[i].Close(); e != nil && err == nil { - err = e - } - } - return err -} - -// seriesIDExprIterator is an iterator that attaches an associated expression. -type SeriesIDExprIterator struct { - itr SeriesIDIterator - expr influxql.Expr -} - -// newSeriesIDExprIterator returns a new instance of seriesIDExprIterator. -func NewSeriesIDExprIterator(itr SeriesIDIterator, expr influxql.Expr) SeriesIDIterator { - if itr == nil { - return nil - } - - return &SeriesIDExprIterator{ - itr: itr, - expr: expr, - } -} - -func (itr *SeriesIDExprIterator) Close() error { - return itr.itr.Close() -} - -// Next returns the next element in the iterator. -func (itr *SeriesIDExprIterator) Next() (SeriesIDElem, error) { - elem, err := itr.itr.Next() - if err != nil { - return SeriesIDElem{}, err - } else if elem.SeriesID.IsZero() { - return SeriesIDElem{}, nil - } - elem.Expr = itr.expr - return elem, nil -} - -// MergeSeriesIDIterators returns an iterator that merges a set of iterators. -// Iterators that are first in the list take precedence and a deletion by those -// early iterators will invalidate elements by later iterators. -func MergeSeriesIDIterators(itrs ...SeriesIDIterator) SeriesIDIterator { - if n := len(itrs); n == 0 { - return nil - } else if n == 1 { - return itrs[0] - } - - // Merge as series id sets, if available. - if a := NewSeriesIDSetIterators(itrs); a != nil { - sets := make([]*SeriesIDSet, len(a)) - for i := range a { - sets[i] = a[i].SeriesIDSet() - } - - ss := NewSeriesIDSet() - ss.Merge(sets...) - SeriesIDIterators(itrs).Close() - return NewSeriesIDSetIterator(ss) - } - - return &seriesIDMergeIterator{ - buf: make([]SeriesIDElem, len(itrs)), - itrs: itrs, - } -} - -// seriesIDMergeIterator is an iterator that merges multiple iterators together. -type seriesIDMergeIterator struct { - buf []SeriesIDElem - itrs []SeriesIDIterator -} - -func (itr *seriesIDMergeIterator) Close() (err error) { - return SeriesIDIterators(itr.itrs).Close() -} - -// Next returns the element with the next lowest name/tags across the iterators. -func (itr *seriesIDMergeIterator) Next() (SeriesIDElem, error) { - // Find next lowest id amongst the buffers. - var elem SeriesIDElem - for i := range itr.buf { - buf := &itr.buf[i] - - // Fill buffer. - if buf.SeriesID.IsZero() { - elem, err := itr.itrs[i].Next() - if err != nil { - return SeriesIDElem{}, nil - } else if elem.SeriesID.IsZero() { - continue - } - itr.buf[i] = elem - } - - if elem.SeriesID.IsZero() || buf.SeriesID.Less(elem.SeriesID) { - elem = *buf - } - } - - // Return EOF if no elements remaining. - if elem.SeriesID.IsZero() { - return SeriesIDElem{}, nil - } - - // Clear matching buffers. - for i := range itr.buf { - if itr.buf[i].SeriesID == elem.SeriesID { - itr.buf[i].SeriesID = SeriesID{} - } - } - return elem, nil -} - -// IntersectSeriesIDIterators returns an iterator that only returns series which -// occur in both iterators. If both series have associated expressions then -// they are combined together. -func IntersectSeriesIDIterators(itr0, itr1 SeriesIDIterator) SeriesIDIterator { - if itr0 == nil || itr1 == nil { - if itr0 != nil { - itr0.Close() - } - if itr1 != nil { - itr1.Close() - } - return nil - } - - // Create series id set, if available. - if a := NewSeriesIDSetIterators([]SeriesIDIterator{itr0, itr1}); a != nil { - itr0.Close() - itr1.Close() - return NewSeriesIDSetIterator(a[0].SeriesIDSet().And(a[1].SeriesIDSet())) - } - - return &seriesIDIntersectIterator{itrs: [2]SeriesIDIterator{itr0, itr1}} -} - -// seriesIDIntersectIterator is an iterator that merges two iterators together. -type seriesIDIntersectIterator struct { - buf [2]SeriesIDElem - itrs [2]SeriesIDIterator -} - -func (itr *seriesIDIntersectIterator) Close() (err error) { - if e := itr.itrs[0].Close(); e != nil && err == nil { - err = e - } - if e := itr.itrs[1].Close(); e != nil && err == nil { - err = e - } - return err -} - -// Next returns the next element which occurs in both iterators. -func (itr *seriesIDIntersectIterator) Next() (_ SeriesIDElem, err error) { - for { - // Fill buffers. - if itr.buf[0].SeriesID.IsZero() { - if itr.buf[0], err = itr.itrs[0].Next(); err != nil { - return SeriesIDElem{}, err - } - } - if itr.buf[1].SeriesID.IsZero() { - if itr.buf[1], err = itr.itrs[1].Next(); err != nil { - return SeriesIDElem{}, err - } - } - - // Exit if either buffer is still empty. - if itr.buf[0].SeriesID.IsZero() || itr.buf[1].SeriesID.IsZero() { - return SeriesIDElem{}, nil - } - - // Skip if both series are not equal. - if a, b := itr.buf[0].SeriesID, itr.buf[1].SeriesID; a.Less(b) { - itr.buf[0].SeriesID = SeriesID{} - continue - } else if a.Greater(b) { - itr.buf[1].SeriesID = SeriesID{} - continue - } - - // Merge series together if equal. - elem := itr.buf[0] - - // Attach expression. - expr0 := itr.buf[0].Expr - expr1 := itr.buf[1].Expr - if expr0 == nil { - elem.Expr = expr1 - } else if expr1 == nil { - elem.Expr = expr0 - } else { - elem.Expr = influxql.Reduce(&influxql.BinaryExpr{ - Op: influxql.AND, - LHS: expr0, - RHS: expr1, - }, nil) - } - - itr.buf[0].SeriesID, itr.buf[1].SeriesID = SeriesID{}, SeriesID{} - return elem, nil - } -} - -// UnionSeriesIDIterators returns an iterator that returns series from both -// both iterators. If both series have associated expressions then they are -// combined together. -func UnionSeriesIDIterators(itr0, itr1 SeriesIDIterator) SeriesIDIterator { - // Return other iterator if either one is nil. - if itr0 == nil { - return itr1 - } else if itr1 == nil { - return itr0 - } - - // Create series id set, if available. - if a := NewSeriesIDSetIterators([]SeriesIDIterator{itr0, itr1}); a != nil { - itr0.Close() - itr1.Close() - ss := NewSeriesIDSet() - ss.Merge(a[0].SeriesIDSet(), a[1].SeriesIDSet()) - return NewSeriesIDSetIterator(ss) - } - - return &seriesIDUnionIterator{itrs: [2]SeriesIDIterator{itr0, itr1}} -} - -// seriesIDUnionIterator is an iterator that unions two iterators together. -type seriesIDUnionIterator struct { - buf [2]SeriesIDElem - itrs [2]SeriesIDIterator -} - -func (itr *seriesIDUnionIterator) Close() (err error) { - if e := itr.itrs[0].Close(); e != nil && err == nil { - err = e - } - if e := itr.itrs[1].Close(); e != nil && err == nil { - err = e - } - return err -} - -// Next returns the next element which occurs in both iterators. -func (itr *seriesIDUnionIterator) Next() (_ SeriesIDElem, err error) { - // Fill buffers. - if itr.buf[0].SeriesID.IsZero() { - if itr.buf[0], err = itr.itrs[0].Next(); err != nil { - return SeriesIDElem{}, err - } - } - if itr.buf[1].SeriesID.IsZero() { - if itr.buf[1], err = itr.itrs[1].Next(); err != nil { - return SeriesIDElem{}, err - } - } - - // Return non-zero or lesser series. - if a, b := itr.buf[0].SeriesID, itr.buf[1].SeriesID; a.IsZero() && b.IsZero() { - return SeriesIDElem{}, nil - } else if b.IsZero() || (!a.IsZero() && a.Less(b)) { - elem := itr.buf[0] - itr.buf[0].SeriesID = SeriesID{} - return elem, nil - } else if a.IsZero() || (!b.IsZero() && a.Greater(b)) { - elem := itr.buf[1] - itr.buf[1].SeriesID = SeriesID{} - return elem, nil - } - - // Attach element. - elem := itr.buf[0] - - // Attach expression. - expr0 := itr.buf[0].Expr - expr1 := itr.buf[1].Expr - if expr0 != nil && expr1 != nil { - elem.Expr = influxql.Reduce(&influxql.BinaryExpr{ - Op: influxql.OR, - LHS: expr0, - RHS: expr1, - }, nil) - } else { - elem.Expr = nil - } - - itr.buf[0].SeriesID, itr.buf[1].SeriesID = SeriesID{}, SeriesID{} - return elem, nil -} - -// DifferenceSeriesIDIterators returns an iterator that only returns series which -// occur the first iterator but not the second iterator. -func DifferenceSeriesIDIterators(itr0, itr1 SeriesIDIterator) SeriesIDIterator { - if itr0 == nil && itr1 == nil { - return nil - } else if itr1 == nil { - return itr0 - } else if itr0 == nil { - itr1.Close() - return nil - } - - // Create series id set, if available. - if a := NewSeriesIDSetIterators([]SeriesIDIterator{itr0, itr1}); a != nil { - itr0.Close() - itr1.Close() - return NewSeriesIDSetIterator(NewSeriesIDSetNegate(a[0].SeriesIDSet(), a[1].SeriesIDSet())) - } - - return &seriesIDDifferenceIterator{itrs: [2]SeriesIDIterator{itr0, itr1}} -} - -// seriesIDDifferenceIterator is an iterator that merges two iterators together. -type seriesIDDifferenceIterator struct { - buf [2]SeriesIDElem - itrs [2]SeriesIDIterator -} - -func (itr *seriesIDDifferenceIterator) Close() (err error) { - if e := itr.itrs[0].Close(); e != nil && err == nil { - err = e - } - if e := itr.itrs[1].Close(); e != nil && err == nil { - err = e - } - return err -} - -// Next returns the next element which occurs only in the first iterator. -func (itr *seriesIDDifferenceIterator) Next() (_ SeriesIDElem, err error) { - for { - // Fill buffers. - if itr.buf[0].SeriesID.IsZero() { - if itr.buf[0], err = itr.itrs[0].Next(); err != nil { - return SeriesIDElem{}, err - } - } - if itr.buf[1].SeriesID.IsZero() { - if itr.buf[1], err = itr.itrs[1].Next(); err != nil { - return SeriesIDElem{}, err - } - } - - // Exit if first buffer is still empty. - if itr.buf[0].SeriesID.IsZero() { - return SeriesIDElem{}, nil - } else if itr.buf[1].SeriesID.IsZero() { - elem := itr.buf[0] - itr.buf[0].SeriesID = SeriesID{} - return elem, nil - } - - // Return first series if it's less. - // If second series is less then skip it. - // If both series are equal then skip both. - if a, b := itr.buf[0].SeriesID, itr.buf[1].SeriesID; a.Less(b) { - elem := itr.buf[0] - itr.buf[0].SeriesID = SeriesID{} - return elem, nil - } else if a.Greater(b) { - itr.buf[1].SeriesID = SeriesID{} - continue - } else { - itr.buf[0].SeriesID, itr.buf[1].SeriesID = SeriesID{}, SeriesID{} - continue - } - } -} - -// MeasurementIterator represents a iterator over a list of measurements. -type MeasurementIterator interface { - Close() error - Next() ([]byte, error) -} - -// MergeMeasurementIterators returns an iterator that merges a set of iterators. -// Iterators that are first in the list take precedence and a deletion by those -// early iterators will invalidate elements by later iterators. -func MergeMeasurementIterators(itrs ...MeasurementIterator) MeasurementIterator { - if len(itrs) == 0 { - return nil - } else if len(itrs) == 1 { - return itrs[0] - } - - return &measurementMergeIterator{ - buf: make([][]byte, len(itrs)), - itrs: itrs, - } -} - -type measurementMergeIterator struct { - buf [][]byte - itrs []MeasurementIterator -} - -func (itr *measurementMergeIterator) Close() (err error) { - for i := range itr.itrs { - if e := itr.itrs[i].Close(); e != nil && err == nil { - err = e - } - } - return err -} - -// Next returns the element with the next lowest name across the iterators. -// -// If multiple iterators contain the same name then the first is returned -// and the remaining ones are skipped. -func (itr *measurementMergeIterator) Next() (_ []byte, err error) { - // Find next lowest name amongst the buffers. - var name []byte - for i, buf := range itr.buf { - // Fill buffer if empty. - if buf == nil { - if buf, err = itr.itrs[i].Next(); err != nil { - return nil, err - } else if buf != nil { - itr.buf[i] = buf - } else { - continue - } - } - - // Find next lowest name. - if name == nil || bytes.Compare(itr.buf[i], name) == -1 { - name = itr.buf[i] - } - } - - // Return nil if no elements remaining. - if name == nil { - return nil, nil - } - - // Merge all elements together and clear buffers. - for i, buf := range itr.buf { - if buf == nil || !bytes.Equal(buf, name) { - continue - } - itr.buf[i] = nil - } - return name, nil -} - -// TagKeyIterator represents a iterator over a list of tag keys. -type TagKeyIterator interface { - Close() error - Next() ([]byte, error) -} - -// MergeTagKeyIterators returns an iterator that merges a set of iterators. -func MergeTagKeyIterators(itrs ...TagKeyIterator) TagKeyIterator { - if len(itrs) == 0 { - return nil - } else if len(itrs) == 1 { - return itrs[0] - } - - return &tagKeyMergeIterator{ - buf: make([][]byte, len(itrs)), - itrs: itrs, - } -} - -type tagKeyMergeIterator struct { - buf [][]byte - itrs []TagKeyIterator -} - -func (itr *tagKeyMergeIterator) Close() (err error) { - for i := range itr.itrs { - if e := itr.itrs[i].Close(); e != nil && err == nil { - err = e - } - } - return err -} - -// Next returns the element with the next lowest key across the iterators. -// -// If multiple iterators contain the same key then the first is returned -// and the remaining ones are skipped. -func (itr *tagKeyMergeIterator) Next() (_ []byte, err error) { - // Find next lowest key amongst the buffers. - var key []byte - for i, buf := range itr.buf { - // Fill buffer. - if buf == nil { - if buf, err = itr.itrs[i].Next(); err != nil { - return nil, err - } else if buf != nil { - itr.buf[i] = buf - } else { - continue - } - } - - // Find next lowest key. - if key == nil || bytes.Compare(buf, key) == -1 { - key = buf - } - } - - // Return nil if no elements remaining. - if key == nil { - return nil, nil - } - - // Merge elements and clear buffers. - for i, buf := range itr.buf { - if buf == nil || !bytes.Equal(buf, key) { - continue - } - itr.buf[i] = nil - } - return key, nil -} - -// TagValueIterator represents a iterator over a list of tag values. -type TagValueIterator interface { - Close() error - Next() ([]byte, error) -} - -// MergeTagValueIterators returns an iterator that merges a set of iterators. -func MergeTagValueIterators(itrs ...TagValueIterator) TagValueIterator { - if len(itrs) == 0 { - return nil - } else if len(itrs) == 1 { - return itrs[0] - } - - return &tagValueMergeIterator{ - buf: make([][]byte, len(itrs)), - itrs: itrs, - } -} - -type tagValueMergeIterator struct { - buf [][]byte - itrs []TagValueIterator -} - -func (itr *tagValueMergeIterator) Close() (err error) { - for i := range itr.itrs { - if e := itr.itrs[i].Close(); e != nil && err == nil { - err = e - } - } - return err -} - -// Next returns the element with the next lowest value across the iterators. -// -// If multiple iterators contain the same value then the first is returned -// and the remaining ones are skipped. -func (itr *tagValueMergeIterator) Next() (_ []byte, err error) { - // Find next lowest value amongst the buffers. - var value []byte - for i, buf := range itr.buf { - // Fill buffer. - if buf == nil { - if buf, err = itr.itrs[i].Next(); err != nil { - return nil, err - } else if buf != nil { - itr.buf[i] = buf - } else { - continue - } - } - - // Find next lowest value. - if value == nil || bytes.Compare(buf, value) == -1 { - value = buf - } - } - - // Return nil if no elements remaining. - if value == nil { - return nil, nil - } - - // Merge elements and clear buffers. - for i, buf := range itr.buf { - if buf == nil || !bytes.Equal(buf, value) { - continue - } - itr.buf[i] = nil - } - return value, nil -} diff --git a/tsdb/series_iterators_test.go b/tsdb/series_iterators_test.go deleted file mode 100644 index d56da00b7c..0000000000 --- a/tsdb/series_iterators_test.go +++ /dev/null @@ -1,357 +0,0 @@ -package tsdb_test - -import ( - "compress/gzip" - "context" - "fmt" - "io/ioutil" - "os" - "path/filepath" - "reflect" - "sync" - "testing" - - "github.com/influxdata/influxdb/v2/logger" - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/query" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" - "github.com/influxdata/influxdb/v2/tsdb/tsi1" - "github.com/influxdata/influxql" -) - -func toSeriesIDs(ids []uint64) []tsdb.SeriesID { - sids := make([]tsdb.SeriesID, 0, len(ids)) - for _, id := range ids { - sids = append(sids, tsdb.NewSeriesID(id)) - } - return sids -} - -// Ensure iterator can merge multiple iterators together. -func TestMergeSeriesIDIterators(t *testing.T) { - itr := tsdb.MergeSeriesIDIterators( - tsdb.NewSeriesIDSliceIterator(toSeriesIDs([]uint64{1, 2, 3})), - tsdb.NewSeriesIDSliceIterator(nil), - tsdb.NewSeriesIDSliceIterator(toSeriesIDs([]uint64{1, 2, 3, 4})), - ) - - if e, err := itr.Next(); err != nil { - t.Fatal(err) - } else if !reflect.DeepEqual(e, tsdb.SeriesIDElem{SeriesID: tsdb.NewSeriesID(1)}) { - t.Fatalf("unexpected elem(0): %#v", e) - } - if e, err := itr.Next(); err != nil { - t.Fatal(err) - } else if !reflect.DeepEqual(e, tsdb.SeriesIDElem{SeriesID: tsdb.NewSeriesID(2)}) { - t.Fatalf("unexpected elem(1): %#v", e) - } - if e, err := itr.Next(); err != nil { - t.Fatal(err) - } else if !reflect.DeepEqual(e, tsdb.SeriesIDElem{SeriesID: tsdb.NewSeriesID(3)}) { - t.Fatalf("unexpected elem(2): %#v", e) - } - if e, err := itr.Next(); err != nil { - t.Fatal(err) - } else if !reflect.DeepEqual(e, tsdb.SeriesIDElem{SeriesID: tsdb.NewSeriesID(4)}) { - t.Fatalf("unexpected elem(3): %#v", e) - } - if e, err := itr.Next(); err != nil { - t.Fatal(err) - } else if !e.SeriesID.IsZero() { - t.Fatalf("expected nil elem: %#v", e) - } -} - -// Index wraps a series file and index. -type Index struct { - rootPath string - - config tsi1.Config - *tsi1.Index - sfile *seriesfile.SeriesFile -} - -// MustNewIndex will initialize a new index using the provide type. It creates -// everything under the same root directory so it can be cleanly removed on Close. -// -// The index will not be opened. -func MustNewIndex(c tsi1.Config) *Index { - rootPath, err := ioutil.TempDir("", "influxdb-tsdb") - if err != nil { - panic(err) - } - - seriesPath, err := ioutil.TempDir(rootPath, "_series") - if err != nil { - panic(err) - } - - sfile := seriesfile.NewSeriesFile(seriesPath) - if err := sfile.Open(context.Background()); err != nil { - panic(err) - } - - i := tsi1.NewIndex(sfile, c, tsi1.WithPath(filepath.Join(rootPath, "index"))) - - if testing.Verbose() { - i.WithLogger(logger.New(os.Stderr)) - } - - idx := &Index{ - config: c, - Index: i, - rootPath: rootPath, - sfile: sfile, - } - return idx -} - -// MustOpenNewIndex will initialize a new index using the provide type and opens -// it. -func MustOpenNewIndex(c tsi1.Config) *Index { - idx := MustNewIndex(c) - idx.MustOpen() - return idx -} - -// MustOpen opens the underlying index or panics. -func (i *Index) MustOpen() { - if err := i.Index.Open(context.Background()); err != nil { - panic(err) - } -} - -// Reopen closes and re-opens the underlying index, without removing any data. -func (i *Index) Reopen() error { - if err := i.Index.Close(); err != nil { - return err - } - - if err := i.sfile.Close(); err != nil { - return err - } - - i.sfile = seriesfile.NewSeriesFile(i.sfile.Path()) - if err := i.sfile.Open(context.Background()); err != nil { - return err - } - - i.Index = tsi1.NewIndex(i.SeriesFile(), i.config, - tsi1.WithPath(filepath.Join(i.rootPath, "index"))) - return i.Index.Open(context.Background()) -} - -// Close closes the index cleanly and removes all on-disk data. -func (i *Index) Close() error { - if err := i.Index.Close(); err != nil { - return err - } - - if err := i.sfile.Close(); err != nil { - return err - } - return os.RemoveAll(i.rootPath) -} - -// This benchmark compares the TagSets implementation across index types. -// -// In the case of the TSI index, TagSets has to merge results across all several -// index partitions. -// -// Typical results on an i7 laptop. -// -// BenchmarkIndex_TagSets/1M_series/tsi1-8 100 18995530 ns/op 5221180 B/op 20379 allocs/op -func BenchmarkIndex_TagSets(b *testing.B) { - // Read line-protocol and coerce into tsdb format. - // 1M series generated with: - // $inch -b 10000 -c 1 -t 10,10,10,10,10,10 -f 1 -m 5 -p 1 - fd, err := os.Open("testdata/line-protocol-1M.txt.gz") - if err != nil { - b.Fatal(err) - } - - gzr, err := gzip.NewReader(fd) - if err != nil { - fd.Close() - b.Fatal(err) - } - - data, err := ioutil.ReadAll(gzr) - if err != nil { - b.Fatal(err) - } - - if err := fd.Close(); err != nil { - b.Fatal(err) - } - - points, err := models.ParsePoints(data, []byte("mm")) - if err != nil { - b.Fatal(err) - } - - // setup writes all of the above points to the index. - setup := func(idx *Index) { - batchSize := 10000 - for j := 0; j < 1; j++ { - for i := 0; i < len(points); i += batchSize { - collection := tsdb.NewSeriesCollection(points[i : i+batchSize]) - if err := idx.CreateSeriesListIfNotExists(collection); err != nil { - b.Fatal(err) - } - } - } - } - - var errResult error - - // This benchmark will merge eight bitsets each containing ~10,000 series IDs. - b.Run("1M series", func(b *testing.B) { - idx := MustOpenNewIndex(tsi1.NewConfig()) - setup(idx) - defer idx.Close() - - name := []byte("m4") - opt := query.IteratorOptions{Condition: influxql.MustParseExpr(`"tag5"::tag = 'value0'`)} - - ts := func() ([]*query.TagSet, error) { - return idx.Index.TagSets(name, opt) - } - - b.ReportAllocs() - b.ResetTimer() - - for i := 0; i < b.N; i++ { - // Will call TagSets on the appropriate implementation. - _, errResult = ts() - if errResult != nil { - b.Fatal(err) - } - } - - if err := idx.Close(); err != nil { - b.Fatal(err) - } - }) -} - -// This benchmark concurrently writes series to the index and fetches cached bitsets. -// The idea is to emphasize the performance difference when bitset caching is on and off. -// -// Typical results for an i7 laptop -// -// BenchmarkIndex_ConcurrentWriteQuery/inmem/queries_100000/cache-8 1 5963346204 ns/op 2499655768 B/op 23964183 allocs/op -// BenchmarkIndex_ConcurrentWriteQuery/inmem/queries_100000/no_cache-8 1 5314841090 ns/op 2499495280 B/op 23963322 allocs/op -// BenchmarkIndex_ConcurrentWriteQuery/tsi1/queries_100000/cache-8 1 1645048376 ns/op 2215402840 B/op 23048978 allocs/op -// BenchmarkIndex_ConcurrentWriteQuery/tsi1/queries_100000/no_cache-8 1 22242155616 ns/op 28277544136 B/op 79620463 allocs/op -func BenchmarkIndex_ConcurrentWriteQuery(b *testing.B) { - // Read line-protocol and coerce into tsdb format. - // 1M series generated with: - // $inch -b 10000 -c 1 -t 10,10,10,10,10,10 -f 1 -m 5 -p 1 - fd, err := os.Open("testdata/line-protocol-1M.txt.gz") - if err != nil { - b.Fatal(err) - } - - gzr, err := gzip.NewReader(fd) - if err != nil { - fd.Close() - b.Fatal(err) - } - - data, err := ioutil.ReadAll(gzr) - if err != nil { - b.Fatal(err) - } - - if err := fd.Close(); err != nil { - b.Fatal(err) - } - - points, err := models.ParsePoints(data, []byte("mm")) - if err != nil { - b.Fatal(err) - } - - runBenchmark := func(b *testing.B, queryN int, cacheSize uint64) { - config := tsi1.NewConfig() - config.SeriesIDSetCacheSize = cacheSize - idx := MustOpenNewIndex(config) - var wg sync.WaitGroup - begin := make(chan struct{}) - - // Run concurrent iterator... - runIter := func() { - keys := [][]string{ - {"m0", "tag2", "value4"}, - {"m1", "tag3", "value5"}, - {"m2", "tag4", "value6"}, - {"m3", "tag0", "value8"}, - {"m4", "tag5", "value0"}, - } - - <-begin // Wait for writes to land - for i := 0; i < queryN/5; i++ { - for _, key := range keys { - itr, err := idx.TagValueSeriesIDIterator([]byte(key[0]), []byte(key[1]), []byte(key[2])) - if err != nil { - b.Fatal(err) - } - - if itr == nil { - panic("should not happen") - } - - if err := itr.Close(); err != nil { - b.Fatal(err) - } - } - } - } - - batchSize := 10000 - wg.Add(1) - go func() { defer wg.Done(); runIter() }() - var once sync.Once - for j := 0; j < b.N; j++ { - for i := 0; i < len(points); i += batchSize { - collection := tsdb.NewSeriesCollection(points[i : i+batchSize]) - if err := idx.CreateSeriesListIfNotExists(collection); err != nil { - b.Fatal(err) - } - once.Do(func() { close(begin) }) - } - - // Wait for queries to finish - wg.Wait() - - // Reset the index... - b.StopTimer() - if err := idx.Close(); err != nil { - b.Fatal(err) - } - - // Re-open everything - idx = MustOpenNewIndex(tsi1.NewConfig()) - wg.Add(1) - begin = make(chan struct{}) - once = sync.Once{} - go func() { defer wg.Done(); runIter() }() - b.StartTimer() - } - } - - queries := []int{1e5} - for _, queryN := range queries { - b.Run(fmt.Sprintf("queries %d", queryN), func(b *testing.B) { - b.Run("cache", func(b *testing.B) { - runBenchmark(b, queryN, tsi1.DefaultSeriesIDSetCacheSize) - }) - - b.Run("no cache", func(b *testing.B) { - runBenchmark(b, queryN, 0) - }) - }) - } -} diff --git a/tsdb/series_set.go b/tsdb/series_set.go deleted file mode 100644 index 7d201cc218..0000000000 --- a/tsdb/series_set.go +++ /dev/null @@ -1,293 +0,0 @@ -package tsdb - -import ( - "io" - "sync" - "unsafe" - - "github.com/RoaringBitmap/roaring" -) - -// SeriesIDSet represents a lockable bitmap of series ids. -type SeriesIDSet struct { - sync.RWMutex - bitmap *roaring.Bitmap -} - -// NewSeriesIDSet returns a new instance of SeriesIDSet. -func NewSeriesIDSet(a ...SeriesID) *SeriesIDSet { - ss := &SeriesIDSet{bitmap: roaring.NewBitmap()} - if len(a) > 0 { - a32 := make([]uint32, len(a)) - for i := range a { - a32[i] = uint32(a[i].RawID()) - } - ss.bitmap.AddMany(a32) - } - return ss -} - -// NewSeriesIDSetNegate returns a new SeriesIDSet containing all the elements in a -// that are not present in b. That is, the set difference between a and b. -func NewSeriesIDSetNegate(a, b *SeriesIDSet) *SeriesIDSet { - a.RLock() - defer a.RUnlock() - b.RLock() - defer b.RUnlock() - - return &SeriesIDSet{bitmap: roaring.AndNot(a.bitmap, b.bitmap)} -} - -// Bytes estimates the memory footprint of this SeriesIDSet, in bytes. -func (s *SeriesIDSet) Bytes() int { - var b int - s.RLock() - b += 24 // mu RWMutex is 24 bytes - b += int(unsafe.Sizeof(s.bitmap)) + int(s.bitmap.GetSizeInBytes()) - s.RUnlock() - return b -} - -// Add adds the series id to the set. -func (s *SeriesIDSet) Add(id SeriesID) { - s.Lock() - defer s.Unlock() - s.AddNoLock(id) -} - -// AddNoLock adds the series id to the set. Add is not safe for use from multiple -// goroutines. Callers must manage synchronization. -func (s *SeriesIDSet) AddNoLock(id SeriesID) { - s.bitmap.Add(uint32(id.RawID())) -} - -// AddMany adds multiple ids to the SeriesIDSet. AddMany takes a lock, so may not be -// optimal to call many times with few ids. -func (s *SeriesIDSet) AddMany(ids ...SeriesID) { - if len(ids) == 0 { - return - } - - a32 := make([]uint32, len(ids)) - for i := range ids { - a32[i] = uint32(ids[i].RawID()) - } - - s.Lock() - defer s.Unlock() - s.bitmap.AddMany(a32) -} - -// Contains returns true if the id exists in the set. -func (s *SeriesIDSet) Contains(id SeriesID) bool { - s.RLock() - x := s.ContainsNoLock(id) - s.RUnlock() - return x -} - -// ContainsNoLock returns true if the id exists in the set. ContainsNoLock is -// not safe for use from multiple goroutines. The caller must manage synchronization. -func (s *SeriesIDSet) ContainsNoLock(id SeriesID) bool { - return s.bitmap.Contains(uint32(id.RawID())) -} - -// Remove removes the id from the set. -func (s *SeriesIDSet) Remove(id SeriesID) { - s.Lock() - defer s.Unlock() - s.RemoveNoLock(id) -} - -// RemoveNoLock removes the id from the set. RemoveNoLock is not safe for use -// from multiple goroutines. The caller must manage synchronization. -func (s *SeriesIDSet) RemoveNoLock(id SeriesID) { - s.bitmap.Remove(uint32(id.RawID())) -} - -// Cardinality returns the cardinality of the SeriesIDSet. -func (s *SeriesIDSet) Cardinality() uint64 { - s.RLock() - defer s.RUnlock() - return s.bitmap.GetCardinality() -} - -// Merge merged the contents of others into s. The caller does not need to -// provide s as an argument, and the contents of s will always be present in s -// after Merge returns. -func (s *SeriesIDSet) Merge(others ...*SeriesIDSet) { - bms := make([]*roaring.Bitmap, 0, len(others)+1) - - s.RLock() - bms = append(bms, s.bitmap) // Add ourself. - - // Add other bitsets. - for _, other := range others { - other.RLock() - defer other.RUnlock() // Hold until we have merged all the bitmaps - bms = append(bms, other.bitmap) - } - - result := roaring.FastOr(bms...) - s.RUnlock() - - s.Lock() - s.bitmap = result - s.Unlock() -} - -// MergeInPlace merges other into s, modifying s in the process. -func (s *SeriesIDSet) MergeInPlace(other *SeriesIDSet) { - if s == other { - return - } - - other.RLock() - s.Lock() - s.bitmap.Or(other.bitmap) - s.Unlock() - other.RUnlock() -} - -// Equals returns true if other and s are the same set of ids. -func (s *SeriesIDSet) Equals(other *SeriesIDSet) bool { - if s == other { - return true - } - - s.RLock() - defer s.RUnlock() - other.RLock() - defer other.RUnlock() - return s.bitmap.Equals(other.bitmap) -} - -// And returns a new SeriesIDSet containing elements that were present in s and other. -func (s *SeriesIDSet) And(other *SeriesIDSet) *SeriesIDSet { - s.RLock() - defer s.RUnlock() - other.RLock() - defer other.RUnlock() - return &SeriesIDSet{bitmap: roaring.And(s.bitmap, other.bitmap)} -} - -// RemoveSet removes all values in other from s, if they exist. -func (s *SeriesIDSet) RemoveSet(other *SeriesIDSet) { - s.RLock() - defer s.RUnlock() - other.RLock() - defer other.RUnlock() - s.bitmap.AndNot(other.bitmap) -} - -// ForEach calls f for each id in the set. The function is applied to the IDs -// in ascending order. -func (s *SeriesIDSet) ForEach(f func(id SeriesID)) { - s.RLock() - defer s.RUnlock() - itr := s.bitmap.Iterator() - for itr.HasNext() { - f(NewSeriesID(uint64(itr.Next()))) - } -} - -// ForEachNoLock calls f for each id in the set without taking a lock. -func (s *SeriesIDSet) ForEachNoLock(f func(id SeriesID)) { - itr := s.bitmap.Iterator() - for itr.HasNext() { - f(NewSeriesID(uint64(itr.Next()))) - } -} - -func (s *SeriesIDSet) String() string { - s.RLock() - defer s.RUnlock() - return s.bitmap.String() -} - -// Diff removes from s any elements also present in other. -func (s *SeriesIDSet) Diff(other *SeriesIDSet) { - other.RLock() - defer other.RUnlock() - - s.Lock() - defer s.Unlock() - s.bitmap = roaring.AndNot(s.bitmap, other.bitmap) -} - -// Clone returns a new SeriesIDSet with a deep copy of the underlying bitmap. -func (s *SeriesIDSet) Clone() *SeriesIDSet { - // Cloning the SeriesIDSet involves cloning s's bitmap. - // Unfortunately, if the bitmap is set to COW, the original bitmap is modified during clone, - // so we have to take a write lock rather than a read lock. - // For now, we'll just hold a write lock for clone; if this shows up as a bottleneck later, - // we can conditionally RLock if we are not COW. - s.Lock() - defer s.Unlock() - return s.CloneNoLock() -} - -// CloneNoLock calls Clone without taking a lock. -func (s *SeriesIDSet) CloneNoLock() *SeriesIDSet { - new := NewSeriesIDSet() - new.bitmap = s.bitmap.Clone() - return new -} - -// Iterator returns an iterator to the underlying bitmap. -// This iterator is not protected by a lock. -func (s *SeriesIDSet) Iterator() SeriesIDSetIterable { - return s.bitmap.Iterator() -} - -// UnmarshalBinary unmarshals data into the set. -func (s *SeriesIDSet) UnmarshalBinary(data []byte) error { - s.Lock() - defer s.Unlock() - return s.bitmap.UnmarshalBinary(data) -} - -// UnmarshalBinaryUnsafe unmarshals data into the set. -// References to the underlying data are used so data should not be reused by caller. -func (s *SeriesIDSet) UnmarshalBinaryUnsafe(data []byte) error { - s.Lock() - defer s.Unlock() - _, err := s.bitmap.FromBuffer(data) - return err -} - -// WriteTo writes the set to w. -func (s *SeriesIDSet) WriteTo(w io.Writer) (int64, error) { - s.RLock() - defer s.RUnlock() - return s.bitmap.WriteTo(w) -} - -// Clear clears the underlying bitmap for re-use. Clear is safe for use by multiple goroutines. -func (s *SeriesIDSet) Clear() { - s.Lock() - defer s.Unlock() - s.ClearNoLock() -} - -// ClearNoLock clears the underlying bitmap for re-use without taking a lock. -func (s *SeriesIDSet) ClearNoLock() { - s.bitmap.Clear() -} - -// Slice returns a slice of series ids. -func (s *SeriesIDSet) Slice() []uint64 { - s.RLock() - defer s.RUnlock() - - a := make([]uint64, 0, s.bitmap.GetCardinality()) - for _, seriesID := range s.bitmap.ToArray() { - a = append(a, uint64(seriesID)) - } - return a -} - -type SeriesIDSetIterable interface { - HasNext() bool - Next() uint32 -} diff --git a/tsdb/series_set_test.go b/tsdb/series_set_test.go deleted file mode 100644 index 0f4d91f76f..0000000000 --- a/tsdb/series_set_test.go +++ /dev/null @@ -1,778 +0,0 @@ -package tsdb - -import ( - "bytes" - "fmt" - "math" - "math/rand" - "runtime" - "sync" - "testing" -) - -func TestSeriesIDSet_NewSeriesIDSetNegate(t *testing.T) { - examples := [][3][]uint64{ - [3][]uint64{ - {1, 10, 20, 30}, - {10, 12, 13, 14, 20}, - {1, 30}, - }, - [3][]uint64{ - {}, - {10}, - {}, - }, - [3][]uint64{ - {1, 10, 20, 30}, - {1, 10, 20, 30}, - {}, - }, - [3][]uint64{ - {1, 10}, - {1, 10, 100}, - {}, - }, - [3][]uint64{ - {1, 10}, - {}, - {1, 10}, - }, - } - - for i, example := range examples { - t.Run(fmt.Sprint(i), func(t *testing.T) { - // Build sets. - a, b := NewSeriesIDSet(), NewSeriesIDSet() - for _, v := range example[0] { - a.Add(NewSeriesID(v)) - } - for _, v := range example[1] { - b.Add(NewSeriesID(v)) - } - - expected := NewSeriesIDSet() - for _, v := range example[2] { - expected.Add(NewSeriesID(v)) - } - - got := NewSeriesIDSetNegate(a, b) - if got.String() != expected.String() { - t.Fatalf("got %s, expected %s", got.String(), expected.String()) - } - }) - } -} - -func TestSeriesIDSet_RemoveSet(t *testing.T) { - examples := [][3][]uint64{ - [3][]uint64{ - {1, 10, 20, 30}, - {10, 12, 13, 14, 20}, - {1, 30}, - }, - [3][]uint64{ - {}, - {10}, - {}, - }, - [3][]uint64{ - {1, 10, 20, 30}, - {1, 10, 20, 30}, - {}, - }, - [3][]uint64{ - {1, 10}, - {1, 10, 100}, - {}, - }, - [3][]uint64{ - {1, 10}, - {}, - {1, 10}, - }, - } - - for i, example := range examples { - t.Run(fmt.Sprint(i), func(t *testing.T) { - // Build sets. - a, b := NewSeriesIDSet(), NewSeriesIDSet() - for _, v := range example[0] { - a.Add(NewSeriesID(v)) - } - for _, v := range example[1] { - b.Add(NewSeriesID(v)) - } - - expected := NewSeriesIDSet() - for _, v := range example[2] { - expected.Add(NewSeriesID(v)) - } - - a.RemoveSet(b) - if a.String() != expected.String() { - t.Fatalf("got %s, expected %s", a.String(), expected.String()) - } - }) - } -} - -// Ensure that cloning is race-free. -func TestSeriesIDSet_Clone_Race(t *testing.T) { - main := NewSeriesIDSet() - total := NewSeriesIDSet() - for i := uint64(0); i < 1024; i++ { - id := NewSeriesID(i) - main.AddNoLock(id) - total.AddNoLock(id) - } - - // One test with a closure around the main SeriesIDSet, - // so that we can run a subtest with and without COW. - test := func(t *testing.T) { - n := 10 * (runtime.NumCPU() + 1) - clones := make([]*SeriesIDSet, n) - var wg sync.WaitGroup - wg.Add(n) - for i := 1; i <= n; i++ { - go func(i int) { - defer wg.Done() - clones[i-1] = main.Clone() - - for j := 0; j < 1000; j++ { - id := NewSeriesID(uint64(j + (100000 * i))) - total.Add(id) - clones[i-1].AddNoLock(id) - } - }(i) - } - - wg.Wait() - for _, o := range clones { - if got, exp := o.Cardinality(), uint64(2024); got != exp { - t.Errorf("got cardinality %d, expected %d", got, exp) - } - } - - // The original set should be unaffected - if got, exp := main.Cardinality(), uint64(1024); got != exp { - t.Errorf("got cardinality %d, expected %d", got, exp) - } - - // Merging the clones should result in only 1024 shared values. - union := NewSeriesIDSet() - for _, o := range clones { - o.ForEachNoLock(func(id SeriesID) { - union.AddNoLock(id) - }) - } - - if !union.Equals(total) { - t.Fatal("union not equal to total") - } - } - t.Run("clone", test) -} - -var resultBool bool - -// Contains should be typically a constant time lookup. Example results on a laptop: -// -// BenchmarkSeriesIDSet_Contains/1-4 20000000 68.5 ns/op 0 B/op 0 allocs/op -// BenchmarkSeriesIDSet_Contains/2-4 20000000 70.8 ns/op 0 B/op 0 allocs/op -// BenchmarkSeriesIDSet_Contains/10-4 20000000 70.3 ns/op 0 B/op 0 allocs/op -// BenchmarkSeriesIDSet_Contains/100-4 20000000 71.3 ns/op 0 B/op 0 allocs/op -// BenchmarkSeriesIDSet_Contains/1000-4 20000000 80.5 ns/op 0 B/op 0 allocs/op -// BenchmarkSeriesIDSet_Contains/10000-4 20000000 67.3 ns/op 0 B/op 0 allocs/op -// BenchmarkSeriesIDSet_Contains/100000-4 20000000 73.1 ns/op 0 B/op 0 allocs/op -// BenchmarkSeriesIDSet_Contains/1000000-4 20000000 77.3 ns/op 0 B/op 0 allocs/op -// BenchmarkSeriesIDSet_Contains/10000000-4 20000000 75.3 ns/op 0 B/op 0 allocs/op -func BenchmarkSeriesIDSet_Contains(b *testing.B) { - cardinalities := []uint64{1, 2, 10, 100, 1000, 10000, 100000, 1000000, 10000000} - - for _, cardinality := range cardinalities { - // Setup... - set := NewSeriesIDSet() - for i := uint64(0); i < cardinality; i++ { - set.Add(NewSeriesID(i)) - } - - lookup := cardinality / 2 - b.Run(fmt.Sprint(cardinality), func(b *testing.B) { - for i := 0; i < b.N; i++ { - resultBool = set.Contains(NewSeriesID(lookup)) - } - }) - } -} - -var set *SeriesIDSet - -// Adding to a larger bitset shouldn't be significantly more expensive than adding -// to a smaller one. This benchmark adds a value to different cardinality sets. -// -// Example results from a laptop: -// BenchmarkSeriesIDSet_Add/1-4 1000000 1053 ns/op 48 B/op 2 allocs/op -// BenchmarkSeriesIDSet_Add/2-4 5000000 303 ns/op 0 B/op 0 allocs/op -// BenchmarkSeriesIDSet_Add/10-4 5000000 348 ns/op 0 B/op 0 allocs/op -// BenchmarkSeriesIDSet_Add/100-4 5000000 373 ns/op 0 B/op 0 allocs/op -// BenchmarkSeriesIDSet_Add/1000-4 5000000 342 ns/op 0 B/op 0 allocs/op -// -// -func BenchmarkSeriesIDSet_AddMore(b *testing.B) { - cardinalities := []uint64{1, 2, 10, 100, 1000, 10000, 100000, 1000000, 10000000} - - for _, cardinality := range cardinalities { - // Setup... - set = NewSeriesIDSet() - for i := uint64(0); i < cardinality-1; i++ { - set.Add(NewSeriesID(i)) - } - - b.Run(fmt.Sprint(cardinality), func(b *testing.B) { - for i := 0; i < b.N; i++ { - // Add next value - set.Add(NewSeriesID(cardinality)) - - b.StopTimer() - set.Remove(NewSeriesID(cardinality)) - b.StartTimer() - } - }) - } -} - -// Add benchmarks the cost of adding the same element to a set versus the -// cost of checking if it exists before adding it. -// -// Typical benchmarks from a laptop: -// -// BenchmarkSeriesIDSet_Add/cardinality_1000000_add/same-8 20000000 64.8 ns/op 0 B/op 0 allocs/op -// BenchmarkSeriesIDSet_Add/cardinality_1000000_add/random-8 2000000 704 ns/op 5 B/op 0 allocs/op -// BenchmarkSeriesIDSet_Add/cardinality_1000000_add/same_no_lock-8 50000000 40.3 ns/op 0 B/op 0 allocs/op -// BenchmarkSeriesIDSet_Add/cardinality_1000000_add/random_no_lock-8 2000000 644 ns/op 5 B/op 0 allocs/op -// BenchmarkSeriesIDSet_Add/cardinality_1000000_check_add/same_no_lock-8 50000000 34.0 ns/op 0 B/op 0 allocs/op -// BenchmarkSeriesIDSet_Add/cardinality_1000000_check_add/random_no_lock-8 2000000 860 ns/op 14 B/op 0 allocs/op -// BenchmarkSeriesIDSet_Add/cardinality_1000000_check_add/same_global_lock-8 30000000 49.8 ns/op 0 B/op 0 allocs/op -// BenchmarkSeriesIDSet_Add/cardinality_1000000_check_add/random_global_lock-8 2000000 914 ns/op 0 B/op 0 allocs/op -// BenchmarkSeriesIDSet_Add/cardinality_1000000_check_add/same_multi_lock-8 30000000 39.7 ns/op 0 B/op 0 allocs/op -// BenchmarkSeriesIDSet_Add/cardinality_1000000_check_add/random_multi_lock-8 1000000 1002 ns/op 0 B/op 0 allocs/op -// -func BenchmarkSeriesIDSet_Add(b *testing.B) { - // Setup... - set = NewSeriesIDSet() - for i := uint64(0); i < 1000000; i++ { - set.Add(NewSeriesID(i)) - } - lookup := NewSeriesID(300032) - - // Add the same value over and over. - b.Run("cardinality_1000000_add", func(b *testing.B) { - b.Run("same", func(b *testing.B) { - for i := 0; i < b.N; i++ { - set.Add(lookup) - } - }) - - b.Run("random", func(b *testing.B) { - for i := 0; i < b.N; i++ { - b.StopTimer() - x := NewSeriesID(uint64(rand.Intn(math.MaxInt32))) - b.StartTimer() - set.Add(x) - } - }) - - b.Run("same no lock", func(b *testing.B) { - for i := 0; i < b.N; i++ { - set.AddNoLock(lookup) - } - }) - - b.Run("random no lock", func(b *testing.B) { - for i := 0; i < b.N; i++ { - b.StopTimer() - x := NewSeriesID(uint64(rand.Intn(math.MaxInt32))) - b.StartTimer() - set.AddNoLock(x) - } - }) - }) - - // Add the same value over and over with no lock - b.Run("cardinality_1000000_check_add", func(b *testing.B) { - b.Run("same no lock", func(b *testing.B) { - for i := 0; i < b.N; i++ { - if !set.ContainsNoLock(lookup) { - set.AddNoLock(lookup) - } - } - }) - - b.Run("random no lock", func(b *testing.B) { - for i := 0; i < b.N; i++ { - b.StopTimer() - x := NewSeriesID(uint64(rand.Intn(math.MaxInt32))) - b.StartTimer() - if !set.ContainsNoLock(x) { - set.AddNoLock(x) - } - } - }) - - b.Run("same global lock", func(b *testing.B) { - for i := 0; i < b.N; i++ { - set.Lock() - if !set.ContainsNoLock(lookup) { - set.AddNoLock(lookup) - } - set.Unlock() - } - }) - - b.Run("random global lock", func(b *testing.B) { - for i := 0; i < b.N; i++ { - b.StopTimer() - x := NewSeriesID(uint64(rand.Intn(math.MaxInt32))) - b.StartTimer() - set.Lock() - if !set.ContainsNoLock(x) { - set.AddNoLock(x) - } - set.Unlock() - } - }) - - b.Run("same multi lock", func(b *testing.B) { - for i := 0; i < b.N; i++ { - if !set.Contains(lookup) { - set.Add(lookup) - } - } - }) - - b.Run("random multi lock", func(b *testing.B) { - for i := 0; i < b.N; i++ { - b.StopTimer() - x := NewSeriesID(uint64(rand.Intn(math.MaxInt32))) - b.StartTimer() - if !set.Contains(x) { - set.Add(x) - } - } - }) - }) -} - -var ssResult *SeriesIDSet - -// Benchmark various ways of creating a copy of a bitmap. Note, Clone_COW will result -// in a bitmap where future modifications will involve copies. -// -// Typical results from an i7 laptop. -// BenchmarkSeriesIDSet_Clone/cardinality_1000/re-use/Clone-8 30000 44171 ns/op 47200 B/op 1737 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_1000/re-use/Merge-8 100000 17877 ns/op 39008 B/op 30 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_1000/re-use/MergeInPlace-8 200000 7367 ns/op 0 B/op 0 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_1000/re-use/Add-8 10000 137460 ns/op 62336 B/op 2596 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_1000/re-use/WriteTo-8 30000 52896 ns/op 35872 B/op 866 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_1000/don't_re-use/Clone-8 30000 41940 ns/op 47200 B/op 1737 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_1000/don't_re-use/Merge-8 100000 17624 ns/op 39008 B/op 30 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_1000/don't_re-use/MergeInPlace-8 100000 17320 ns/op 38880 B/op 28 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_1000/don't_re-use/Add-8 10000 167544 ns/op 101216 B/op 2624 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_1000/don't_re-use/WriteTo-8 20000 66976 ns/op 52897 B/op 869 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_10000/re-use/Clone-8 10000 179933 ns/op 177072 B/op 5895 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_10000/re-use/Merge-8 20000 77574 ns/op 210656 B/op 42 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_10000/re-use/MergeInPlace-8 100000 23645 ns/op 0 B/op 0 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_10000/re-use/Add-8 2000 689254 ns/op 224161 B/op 9572 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_10000/re-use/WriteTo-8 10000 199052 ns/op 118791 B/op 2945 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_10000/don't_re-use/Clone-8 10000 183137 ns/op 177073 B/op 5895 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_10000/don't_re-use/Merge-8 20000 77502 ns/op 210656 B/op 42 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_10000/don't_re-use/MergeInPlace-8 20000 72610 ns/op 210528 B/op 40 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_10000/don't_re-use/Add-8 2000 724789 ns/op 434691 B/op 9612 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_10000/don't_re-use/WriteTo-8 10000 215734 ns/op 177159 B/op 2948 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_100000/re-use/Clone-8 5000 244971 ns/op 377648 B/op 6111 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_100000/re-use/Merge-8 20000 90580 ns/op 210656 B/op 42 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_100000/re-use/MergeInPlace-8 50000 24697 ns/op 0 B/op 0 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_100000/re-use/Add-8 500 3274456 ns/op 758996 B/op 19853 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_100000/re-use/WriteTo-8 5000 248791 ns/op 122392 B/op 3053 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_100000/don't_re-use/Clone-8 5000 269152 ns/op 377648 B/op 6111 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_100000/don't_re-use/Merge-8 20000 85948 ns/op 210657 B/op 42 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_100000/don't_re-use/MergeInPlace-8 20000 78142 ns/op 210528 B/op 40 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_100000/don't_re-use/Add-8 500 3123753 ns/op 969529 B/op 19893 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_100000/don't_re-use/WriteTo-8 10000 230657 ns/op 180684 B/op 3056 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_1000000/re-use/Clone-8 3000 551781 ns/op 2245424 B/op 6111 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_1000000/re-use/Merge-8 20000 92104 ns/op 210656 B/op 42 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_1000000/re-use/MergeInPlace-8 50000 27408 ns/op 0 B/op 0 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_1000000/re-use/Add-8 100 22573498 ns/op 6420446 B/op 30520 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_1000000/re-use/WriteTo-8 5000 284901 ns/op 123522 B/op 3053 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_1000000/don't_re-use/Clone-8 3000 679284 ns/op 2245424 B/op 6111 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_1000000/don't_re-use/Merge-8 20000 68965 ns/op 210656 B/op 42 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_1000000/don't_re-use/MergeInPlace-8 20000 64236 ns/op 210528 B/op 40 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_1000000/don't_re-use/Add-8 100 21960668 ns/op 6630979 B/op 30560 allocs/op -// BenchmarkSeriesIDSet_Clone/cardinality_1000000/don't_re-use/WriteTo-8 5000 298276 ns/op 181890 B/op 3056 allocs/op - -func BenchmarkSeriesIDSet_Clone(b *testing.B) { - toAddCardinalities := []int{1e3, 1e4, 1e5, 1e6} - - runBenchmarks := func(b *testing.B, other *SeriesIDSet, init func() *SeriesIDSet) { - b.Run("Clone", func(b *testing.B) { - for i := 0; i < b.N; i++ { - ssResult = other.Clone() - } - }) - - b.Run("Merge", func(b *testing.B) { - ssResult = init() - for i := 0; i < b.N; i++ { - ssResult.Merge(other) - b.StopTimer() - ssResult = init() - b.StartTimer() - } - }) - - b.Run("MergeInPlace", func(b *testing.B) { - ssResult = init() - for i := 0; i < b.N; i++ { - ssResult.MergeInPlace(other) - b.StopTimer() - ssResult = init() - b.StartTimer() - } - }) - - b.Run("Add", func(b *testing.B) { - ssResult = init() - for i := 0; i < b.N; i++ { - itr := other.Iterator() - ssResult.Lock() - for itr.HasNext() { - ssResult.AddNoLock(NewSeriesID(uint64(itr.Next()))) - } - ssResult.Unlock() - b.StopTimer() - ssResult = init() - b.StartTimer() - } - }) - - b.Run("WriteTo", func(b *testing.B) { - var buf bytes.Buffer - ssResult = init() - for i := 0; i < b.N; i++ { - other.WriteTo(&buf) - ssResult.UnmarshalBinaryUnsafe(buf.Bytes()) - b.StopTimer() - ssResult = init() - buf.Reset() - b.StartTimer() - } - }) - } - - for _, toAddCardinality := range toAddCardinalities { - b.Run(fmt.Sprintf("cardinality %d", toAddCardinality), func(b *testing.B) { - ids := make([]SeriesID, 0, toAddCardinality) - for i := 0; i < toAddCardinality; i++ { - ids = append(ids, NewSeriesID(uint64(rand.Intn(200000000)))) - } - other := NewSeriesIDSet(ids...) - - b.Run("re-use", func(b *testing.B) { - base := NewSeriesIDSet() - runBenchmarks(b, other, func() *SeriesIDSet { - base.Clear() - return base - }) - }) - - b.Run("don't re-use", func(b *testing.B) { - runBenchmarks(b, other, func() *SeriesIDSet { - return NewSeriesIDSet() - }) - }) - }) - } -} -func BenchmarkSeriesIDSet_AddMany(b *testing.B) { - cardinalities := []int{1, 1e3, 1e4, 1e5, 1e6} - toAddCardinalities := []int{1e3, 1e4, 1e5} - - for _, cardinality := range cardinalities { - ids := make([]SeriesID, 0, cardinality) - for i := 0; i < cardinality; i++ { - ids = append(ids, NewSeriesID(uint64(rand.Intn(200000000)))) - } - - // Setup... - set = NewSeriesIDSet(ids...) - - // Check if the value exists before adding it under two locks. - b.Run(fmt.Sprintf("cardinality %d", cardinality), func(b *testing.B) { - for _, toAddCardinality := range toAddCardinalities { - ids := make([]SeriesID, 0, toAddCardinality) - for i := 0; i < toAddCardinality; i++ { - ids = append(ids, NewSeriesID(uint64(rand.Intn(200000000)))) - } - - b.Run(fmt.Sprintf("adding %d", toAddCardinality), func(b *testing.B) { - b.Run("AddNoLock", func(b *testing.B) { - clone := set.Clone() - for i := 0; i < b.N; i++ { - for _, id := range ids { - clone.AddNoLock(id) - } - - b.StopTimer() - clone = set.Clone() - b.StartTimer() - } - }) - - b.Run("AddMany", func(b *testing.B) { - clone := set.Clone() - for i := 0; i < b.N; i++ { - clone.AddMany(ids...) - b.StopTimer() - clone = set.Clone() - b.StartTimer() - } - }) - - // Merge will involve a new bitmap being allocated. - b.Run("Merge", func(b *testing.B) { - clone := set.Clone() - for i := 0; i < b.N; i++ { - other := NewSeriesIDSet(ids...) - clone.Merge(other) - - b.StopTimer() - clone = set.Clone() - b.StartTimer() - } - }) - - b.Run("MergeInPlace", func(b *testing.B) { - clone := set.Clone() - for i := 0; i < b.N; i++ { - other := NewSeriesIDSet(ids...) - clone.MergeInPlace(other) - - b.StopTimer() - clone = set.Clone() - b.StartTimer() - } - }) - }) - } - }) - } -} - -// Remove benchmarks the cost of removing the same element in a set versus the -// cost of checking if it exists before removing it. -// -// Typical benchmarks from a laptop: -// -// BenchmarkSeriesIDSet_Remove/cardinality_1000000_remove_same-4 20000000 99.1 ns/op 0 B/op 0 allocs/op -// BenchmarkSeriesIDSet_Remove/cardinality_1000000_check_remove_global_lock-4 20000000 57.7 ns/op 0 B/op 0 allocs/op -// BenchmarkSeriesIDSet_Remove/cardinality_1000000_check_remove_multi_lock-4 20000000 80.1 ns/op 0 B/op 0 allocs/op -// -func BenchmarkSeriesIDSet_Remove(b *testing.B) { - // Setup... - set = NewSeriesIDSet() - for i := uint64(0); i < 1000000; i++ { - set.Add(NewSeriesID(i)) - } - lookup := uint64(300032) - - // Remove the same value over and over. - b.Run("cardinality_1000000_remove_same", func(b *testing.B) { - for i := 0; i < b.N; i++ { - set.Remove(NewSeriesID(lookup)) - } - }) - - // Check if the value exists before adding it. Subsequent repeats of the code - // will result in contains checks. - b.Run("cardinality_1000000_check_remove_global_lock", func(b *testing.B) { - for i := 0; i < b.N; i++ { - set.Lock() - if set.ContainsNoLock(NewSeriesID(lookup)) { - set.RemoveNoLock(NewSeriesID(lookup)) - } - set.Unlock() - } - }) - - // Check if the value exists before adding it under two locks. - b.Run("cardinality_1000000_check_remove_multi_lock", func(b *testing.B) { - for i := 0; i < b.N; i++ { - if set.Contains(NewSeriesID(lookup)) { - set.Remove(NewSeriesID(lookup)) - } - } - }) -} - -// BenchmarkSeriesIDSet_MassRemove benchmarks the cost of removing a large set of values. -func BenchmarkSeriesIDSet_MassRemove(b *testing.B) { - var size = uint64(1000000) - // Setup... - set = NewSeriesIDSet() - for i := uint64(0); i < size; i++ { - set.Add(NewSeriesID(i)) - } - - // Remove one at a time - b.Run("cardinality_1000000_remove_each", func(b *testing.B) { - clone := set.Clone() - for i := 0; i < b.N; i++ { - for j := uint64(0); j < size/2; j++ { - clone.RemoveNoLock(NewSeriesID(j)) - } - - b.StopTimer() - clone = set.Clone() - b.StartTimer() - } - }) - - // This is the case where a target series id set exists. - b.Run("cardinality_1000000_remove_set_exists", func(b *testing.B) { - clone := set.Clone() - other := NewSeriesIDSet() - for j := uint64(0); j < size/2; j++ { - other.AddNoLock(NewSeriesID(j)) - } - - for i := 0; i < b.N; i++ { - clone.RemoveSet(other) - b.StopTimer() - clone = set.Clone() - b.StartTimer() - } - }) - - // Make a target series id set and negate it - b.Run("cardinality_1000000_remove_set", func(b *testing.B) { - clone := set.Clone() - for i := 0; i < b.N; i++ { - other := NewSeriesIDSet() - for j := uint64(0); j < size/2; j++ { - other.AddNoLock(NewSeriesID(j)) - } - - clone.RemoveSet(other) - b.StopTimer() - clone = set.Clone() - b.StartTimer() - } - }) - - // This is the case where a new result set is created. - b.Run("cardinality_1000000_remove_set_new", func(b *testing.B) { - clone := set.Clone() - other := NewSeriesIDSet() - for j := uint64(0); j < size/2; j++ { - other.AddNoLock(NewSeriesID(j)) - } - - for i := 0; i < b.N; i++ { - _ = NewSeriesIDSetNegate(clone, other) - b.StopTimer() - clone = set.Clone() - b.StartTimer() - } - }) -} - -// Typical benchmarks for a laptop: -// -// BenchmarkSeriesIDSet_Merge_Duplicates/cardinality_1/shards_1-4 200000 8095 ns/op 16656 B/op 11 allocs/op -// BenchmarkSeriesIDSet_Merge_Duplicates/cardinality_1/shards_10-4 200000 11755 ns/op 18032 B/op 47 allocs/op -// BenchmarkSeriesIDSet_Merge_Duplicates/cardinality_1/shards_100-4 50000 41632 ns/op 31794 B/op 407 allocs/op -// BenchmarkSeriesIDSet_Merge_Duplicates/cardinality_10000/shards_1-4 200000 6022 ns/op 8384 B/op 7 allocs/op -// BenchmarkSeriesIDSet_Merge_Duplicates/cardinality_10000/shards_10-4 100000 19674 ns/op 9760 B/op 43 allocs/op -// BenchmarkSeriesIDSet_Merge_Duplicates/cardinality_10000/shards_100-4 10000 152865 ns/op 23522 B/op 403 allocs/op -// BenchmarkSeriesIDSet_Merge_Duplicates/cardinality_1000000/shards_1-4 200000 8252 ns/op 9712 B/op 44 allocs/op -// BenchmarkSeriesIDSet_Merge_Duplicates/cardinality_1000000/shards_10-4 50000 29566 ns/op 15984 B/op 143 allocs/op -// BenchmarkSeriesIDSet_Merge_Duplicates/cardinality_1000000/shards_100-4 10000 237672 ns/op 78710 B/op 1133 allocs/op -// BenchmarkSeriesIDSet_Merge_Duplicates/cardinality_10000000/shards_1-4 100000 21559 ns/op 25968 B/op 330 allocs/op -// BenchmarkSeriesIDSet_Merge_Duplicates/cardinality_10000000/shards_10-4 20000 102326 ns/op 114325 B/op 537 allocs/op -// BenchmarkSeriesIDSet_Merge_Duplicates/cardinality_10000000/shards_100-4 2000 1042697 ns/op 997909 B/op 2608 allocs/op -func BenchmarkSeriesIDSet_Merge_Duplicates(b *testing.B) { - cardinalities := []int{1, 10000, 1000000, 10000000} - shards := []int{1, 10, 100} - - for _, cardinality := range cardinalities { - set = NewSeriesIDSet() - for i := 0; i < cardinality; i++ { - set.Add(NewSeriesID(uint64(i))) - } - - for _, shard := range shards { - others := make([]*SeriesIDSet, 0, shard) - for s := 0; s < shard; s++ { - others = append(others, &SeriesIDSet{bitmap: set.bitmap.Clone()}) - } - - b.Run(fmt.Sprintf("cardinality_%d/shards_%d", cardinality, shard), func(b *testing.B) { - base := &SeriesIDSet{bitmap: set.bitmap.Clone()} - for i := 0; i < b.N; i++ { - base.Merge(others...) - b.StopTimer() - base.bitmap = set.bitmap.Clone() - b.StartTimer() - } - }) - - } - } -} - -// Typical benchmarks for a laptop: -// -// BenchmarkSeriesIDSet_Merge_Unique/cardinality_1/shards_1-4 200000 7841 ns/op 16656 B/op 11 allocs/op -// BenchmarkSeriesIDSet_Merge_Unique/cardinality_1/shards_10-4 200000 13093 ns/op 18048 B/op 47 allocs/op -// BenchmarkSeriesIDSet_Merge_Unique/cardinality_1/shards_100-4 30000 57399 ns/op 31985 B/op 407 allocs/op -// BenchmarkSeriesIDSet_Merge_Unique/cardinality_10000/shards_1-4 200000 7740 ns/op 8384 B/op 7 allocs/op -// BenchmarkSeriesIDSet_Merge_Unique/cardinality_10000/shards_10-4 50000 37116 ns/op 18208 B/op 52 allocs/op -// BenchmarkSeriesIDSet_Merge_Unique/cardinality_10000/shards_100-4 5000 409487 ns/op 210563 B/op 955 allocs/op -// BenchmarkSeriesIDSet_Merge_Unique/cardinality_1000000/shards_1-4 100000 19289 ns/op 19328 B/op 79 allocs/op -// BenchmarkSeriesIDSet_Merge_Unique/cardinality_1000000/shards_10-4 10000 129048 ns/op 159716 B/op 556 allocs/op -// BenchmarkSeriesIDSet_Merge_Unique/cardinality_1000000/shards_100-4 500 3482907 ns/op 5428116 B/op 6174 allocs/op -// BenchmarkSeriesIDSet_Merge_Unique/cardinality_10000000/shards_1-4 30000 43734 ns/op 51872 B/op 641 allocs/op -// BenchmarkSeriesIDSet_Merge_Unique/cardinality_10000000/shards_10-4 3000 514412 ns/op 748678 B/op 3687 allocs/op -// BenchmarkSeriesIDSet_Merge_Unique/cardinality_10000000/shards_100-4 30 61891687 ns/op 69626539 B/op 36038 allocs/op -func BenchmarkSeriesIDSet_Merge_Unique(b *testing.B) { - cardinalities := []int{1, 10000, 1000000, 10000000} - shards := []int{1, 10, 100} - - for _, cardinality := range cardinalities { - set = NewSeriesIDSet() - for i := 0; i < cardinality; i++ { - set.Add(NewSeriesID(uint64(i))) - } - - for _, shard := range shards { - others := make([]*SeriesIDSet, 0, shard) - for s := 1; s <= shard; s++ { - other := NewSeriesIDSet() - for i := 0; i < cardinality; i++ { - other.Add(NewSeriesID(uint64(i + (s * cardinality)))) - } - others = append(others, other) - } - - b.Run(fmt.Sprintf("cardinality_%d/shards_%d", cardinality, shard), func(b *testing.B) { - base := &SeriesIDSet{bitmap: set.bitmap.Clone()} - for i := 0; i < b.N; i++ { - base.Merge(others...) - b.StopTimer() - base.bitmap = set.bitmap.Clone() - b.StartTimer() - } - }) - } - } -} diff --git a/tsdb/seriesfile/config.go b/tsdb/seriesfile/config.go deleted file mode 100644 index 925df73ec8..0000000000 --- a/tsdb/seriesfile/config.go +++ /dev/null @@ -1,21 +0,0 @@ -package seriesfile - -const ( - // DefaultLargeSeriesWriteThreshold is the number of series per write - // that requires the series index be pregrown before insert. - DefaultLargeSeriesWriteThreshold = 10000 -) - -// Config contains all of the configuration related to tsdb. -type Config struct { - // LargeSeriesWriteThreshold is the threshold before a write requires - // preallocation to improve throughput. Currently used in the series file. - LargeSeriesWriteThreshold int `toml:"large-series-write-threshold"` -} - -// NewConfig return a new instance of config with default settings. -func NewConfig() Config { - return Config{ - LargeSeriesWriteThreshold: DefaultLargeSeriesWriteThreshold, - } -} diff --git a/tsdb/seriesfile/metrics.go b/tsdb/seriesfile/metrics.go deleted file mode 100644 index b565267407..0000000000 --- a/tsdb/seriesfile/metrics.go +++ /dev/null @@ -1,126 +0,0 @@ -package seriesfile - -import ( - "sort" - "sync" - - "github.com/influxdata/influxdb/v2/pkg/rhh" - "github.com/prometheus/client_golang/prometheus" -) - -// The following package variables act as singletons, to be shared by all -// storage.Engine instantiations. This allows multiple Series Files to be -// monitored within the same process. -var ( - sms *seriesFileMetrics // main metrics - ims *rhh.Metrics // hashmap specific metrics - mmu sync.RWMutex -) - -// PrometheusCollectors returns all the metrics associated with the tsdb package. -func PrometheusCollectors() []prometheus.Collector { - mmu.RLock() - defer mmu.RUnlock() - - var collectors []prometheus.Collector - if sms != nil { - collectors = append(collectors, sms.PrometheusCollectors()...) - } - - if ims != nil { - collectors = append(collectors, ims.PrometheusCollectors()...) - } - return collectors -} - -// namespace is the leading part of all published metrics for the Storage service. -const namespace = "storage" - -const seriesFileSubsystem = "series_file" // sub-system associated with metrics for the Series File. - -type seriesFileMetrics struct { - SeriesCreated *prometheus.CounterVec // Number of series created in Series File. - Series *prometheus.GaugeVec // Number of series. - DiskSize *prometheus.GaugeVec // Size occupied on disk. - Segments *prometheus.GaugeVec // Number of segment files. - - CompactionsActive *prometheus.GaugeVec // Number of active compactions. - CompactionDuration *prometheus.HistogramVec // Duration of compactions. - // The following metrics include a ``"status" = {ok, error}` label - Compactions *prometheus.CounterVec // Total number of compactions. -} - -// newSeriesFileMetrics initialises the prometheus metrics for tracking the Series File. -func newSeriesFileMetrics(labels prometheus.Labels) *seriesFileMetrics { - names := []string{"series_file_partition"} // All metrics have this label. - for k := range labels { - names = append(names, k) - } - sort.Strings(names) - - totalCompactions := append(append([]string(nil), names...), "status") - sort.Strings(totalCompactions) - - durationCompaction := append(append([]string(nil), names...), "component") - sort.Strings(durationCompaction) - - return &seriesFileMetrics{ - SeriesCreated: prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: seriesFileSubsystem, - Name: "series_created", - Help: "Number of series created in Series File.", - }, names), - Series: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: seriesFileSubsystem, - Name: "series_total", - Help: "Number of series in Series File.", - }, names), - DiskSize: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: seriesFileSubsystem, - Name: "disk_bytes", - Help: "Number of bytes Series File is using on disk.", - }, names), - Segments: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: seriesFileSubsystem, - Name: "segments_total", - Help: "Number of segment files in Series File.", - }, names), - CompactionsActive: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: seriesFileSubsystem, - Name: "index_compactions_active", - Help: "Number of active index compactions.", - }, durationCompaction), - CompactionDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: namespace, - Subsystem: seriesFileSubsystem, - Name: "index_compactions_duration_seconds", - Help: "Time taken for a successful compaction of index.", - // 30 buckets spaced exponentially between 5s and ~53 minutes. - Buckets: prometheus.ExponentialBuckets(5.0, 1.25, 30), - }, durationCompaction), - Compactions: prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: seriesFileSubsystem, - Name: "compactions_total", - Help: "Number of compactions.", - }, totalCompactions), - } -} - -// PrometheusCollectors satisfies the prom.PrometheusCollector interface. -func (m *seriesFileMetrics) PrometheusCollectors() []prometheus.Collector { - return []prometheus.Collector{ - m.SeriesCreated, - m.Series, - m.DiskSize, - m.Segments, - m.CompactionsActive, - m.CompactionDuration, - m.Compactions, - } -} diff --git a/tsdb/seriesfile/metrics_test.go b/tsdb/seriesfile/metrics_test.go deleted file mode 100644 index b653cc67e9..0000000000 --- a/tsdb/seriesfile/metrics_test.go +++ /dev/null @@ -1,180 +0,0 @@ -package seriesfile - -import ( - "context" - "io/ioutil" - "os" - "testing" - "time" - - "github.com/influxdata/influxdb/v2/kit/prom/promtest" - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/prometheus/client_golang/prometheus" - dto "github.com/prometheus/client_model/go" -) - -func TestMetrics_SeriesPartition(t *testing.T) { - // metrics to be shared by multiple file stores. - metrics := newSeriesFileMetrics(prometheus.Labels{"engine_id": "", "node_id": ""}) - - t1 := newSeriesPartitionTracker(metrics, prometheus.Labels{"series_file_partition": "0", "engine_id": "0", "node_id": "0"}) - t2 := newSeriesPartitionTracker(metrics, prometheus.Labels{"series_file_partition": "0", "engine_id": "1", "node_id": "0"}) - - reg := prometheus.NewRegistry() - reg.MustRegister(metrics.PrometheusCollectors()...) - - base := namespace + "_" + seriesFileSubsystem + "_" - - // All the metric names - gauges := []string{ - base + "series_total", - base + "disk_bytes", - base + "segments_total", - base + "index_compactions_active", - } - - counters := []string{ - base + "series_created", - base + "compactions_total", - } - - histograms := []string{ - base + "index_compactions_duration_seconds", - } - - // Generate some measurements. - for i, tracker := range []*seriesPartitionTracker{t1, t2} { - tracker.SetSeries(uint64(i + len(gauges[0]))) - tracker.SetDiskSize(uint64(i + len(gauges[1]))) - tracker.SetSegments(uint64(i + len(gauges[2]))) - - labels := tracker.Labels() - labels["component"] = "index" - tracker.metrics.CompactionsActive.With(labels).Add(float64(i + len(gauges[3]))) - - tracker.AddSeriesCreated(uint64(i + len(counters[0]))) - labels = tracker.Labels() - labels["status"] = "ok" - tracker.metrics.Compactions.With(labels).Add(float64(i + len(counters[1]))) - - labels = tracker.Labels() - labels["component"] = "index" - tracker.metrics.CompactionDuration.With(labels).Observe(float64(i + len(histograms[0]))) - } - - // Test that all the correct metrics are present. - mfs, err := reg.Gather() - if err != nil { - t.Fatal(err) - } - - // The label variants for the two caches. - labelVariants := []prometheus.Labels{ - prometheus.Labels{"engine_id": "0", "node_id": "0"}, - prometheus.Labels{"engine_id": "1", "node_id": "0"}, - } - - for i, labels := range labelVariants { - labels["series_file_partition"] = "0" - var metric *dto.Metric - - for _, name := range gauges { - exp := float64(i + len(name)) - - if name == base+"index_compactions_active" { - // Make a copy since we need to add a label - l := make(prometheus.Labels, len(labels)) - for k, v := range labels { - l[k] = v - } - l["component"] = "index" - metric = promtest.MustFindMetric(t, mfs, name, l) - } else { - metric = promtest.MustFindMetric(t, mfs, name, labels) - } - - if got := metric.GetGauge().GetValue(); got != exp { - t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) - } - } - - for _, name := range counters { - exp := float64(i + len(name)) - - if name == base+"compactions_total" { - // Make a copy since we need to add a label - l := make(prometheus.Labels, len(labels)) - for k, v := range labels { - l[k] = v - } - l["status"] = "ok" - - metric = promtest.MustFindMetric(t, mfs, name, l) - } else { - metric = promtest.MustFindMetric(t, mfs, name, labels) - } - - if got := metric.GetCounter().GetValue(); got != exp { - t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) - } - } - - for _, name := range histograms { - // Make a copy since we need to add a label - l := make(prometheus.Labels, len(labels)) - for k, v := range labels { - l[k] = v - } - l["component"] = "index" - - exp := float64(i + len(name)) - metric := promtest.MustFindMetric(t, mfs, name, l) - if got := metric.GetHistogram().GetSampleSum(); got != exp { - t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) - } - } - } -} - -// This test ensures that disabling metrics works even if a series file has been created before. -func TestMetrics_Disabled(t *testing.T) { - // This test messes with global state. Gotta fix it up otherwise other tests panic. I really - // am beginning to wonder about our metrics. - defer func() { - mmu.Lock() - sms = nil - ims = nil - mmu.Unlock() - }() - - path, err := ioutil.TempDir("", "sfile-metrics-") - if err != nil { - t.Fatal(err) - } - defer os.RemoveAll(path) - - // Step 1. make a series file with metrics and some labels - sfile := NewSeriesFile(path) - sfile.SetDefaultMetricLabels(prometheus.Labels{"foo": "bar"}) - if err := sfile.Open(context.Background()); err != nil { - t.Fatal(err) - } - if err := sfile.Close(); err != nil { - t.Fatal(err) - } - - // Step 2. open the series file again, but disable metrics - sfile = NewSeriesFile(path) - sfile.DisableMetrics() - if err := sfile.Open(context.Background()); err != nil { - t.Fatal(err) - } - defer sfile.Close() - - // Step 3. add a series - points := []models.Point{models.MustNewPoint("a", models.Tags{}, models.Fields{"f": 1.0}, time.Now())} - if err := sfile.CreateSeriesListIfNotExists(tsdb.NewSeriesCollection(points)); err != nil { - t.Fatal(err) - } -} diff --git a/tsdb/seriesfile/series_file.go b/tsdb/seriesfile/series_file.go deleted file mode 100644 index 5352f22565..0000000000 --- a/tsdb/seriesfile/series_file.go +++ /dev/null @@ -1,628 +0,0 @@ -package seriesfile - -import ( - "bytes" - "context" - "encoding/binary" - "errors" - "fmt" - "os" - "path/filepath" - "sort" - "sync" - - "github.com/cespare/xxhash" - "github.com/influxdata/influxdb/v2/kit/tracing" - "github.com/influxdata/influxdb/v2/logger" - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/pkg/binaryutil" - "github.com/influxdata/influxdb/v2/pkg/lifecycle" - "github.com/influxdata/influxdb/v2/pkg/mincore" - "github.com/influxdata/influxdb/v2/pkg/rhh" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/prometheus/client_golang/prometheus" - "go.uber.org/multierr" - "go.uber.org/zap" - "golang.org/x/sync/errgroup" - "golang.org/x/time/rate" -) - -var ( - ErrSeriesFileClosed = errors.New("tsdb: series file closed") - ErrInvalidSeriesPartitionID = errors.New("tsdb: invalid series partition id") -) - -const ( - // SeriesFilePartitionN is the number of partitions a series file is split into. - SeriesFilePartitionN = 8 -) - -// SeriesFile represents the section of the index that holds series data. -type SeriesFile struct { - mu sync.Mutex // protects concurrent open and close - res lifecycle.Resource - - path string - partitions []*SeriesPartition - - // N.B we have many partitions, but they must share the same metrics, so the - // metrics are managed in a single shared package variable and - // each partition decorates the same metric measurements with different - // partition id label values. - defaultMetricLabels prometheus.Labels - metricsEnabled bool - - pageFaultLimiter *rate.Limiter // Limits page faults by the series file - - LargeWriteThreshold int - - Logger *zap.Logger -} - -// NewSeriesFile returns a new instance of SeriesFile. -func NewSeriesFile(path string) *SeriesFile { - return &SeriesFile{ - path: path, - metricsEnabled: true, - Logger: zap.NewNop(), - - LargeWriteThreshold: DefaultLargeSeriesWriteThreshold, - } -} - -// WithLogger sets the logger on the SeriesFile and all underlying partitions. It must be called before Open. -func (f *SeriesFile) WithLogger(log *zap.Logger) { - f.Logger = log.With(zap.String("service", "series-file")) -} - -// SetDefaultMetricLabels sets the default labels for metrics on the Series File. -// It must be called before the SeriesFile is opened. -func (f *SeriesFile) SetDefaultMetricLabels(labels prometheus.Labels) { - f.defaultMetricLabels = make(prometheus.Labels, len(labels)) - for k, v := range labels { - f.defaultMetricLabels[k] = v - } -} - -// DisableMetrics ensures that activity is not collected via the prometheus metrics. -// DisableMetrics must be called before Open. -func (f *SeriesFile) DisableMetrics() { - f.metricsEnabled = false -} - -// WithPageFaultLimiter sets a limiter to restrict the number of page faults. -func (f *SeriesFile) WithPageFaultLimiter(limiter *rate.Limiter) { - f.pageFaultLimiter = limiter -} - -// Open memory maps the data file at the file's path. -func (f *SeriesFile) Open(ctx context.Context) error { - f.mu.Lock() - defer f.mu.Unlock() - - if f.res.Opened() { - return errors.New("series file already opened") - } - - span, ctx := tracing.StartSpanFromContext(ctx) - defer span.Finish() - - _, logEnd := logger.NewOperation(ctx, f.Logger, "Opening Series File", "series_file_open", zap.String("path", f.path)) - defer logEnd() - - // Create path if it doesn't exist. - if err := os.MkdirAll(filepath.Join(f.path), 0777); err != nil { - return err - } - - // Initialise metrics for trackers. - mmu.Lock() - if sms == nil && f.metricsEnabled { - sms = newSeriesFileMetrics(f.defaultMetricLabels) - } - if ims == nil && f.metricsEnabled { - // Make a copy of the default labels so that another label can be provided. - labels := make(prometheus.Labels, len(f.defaultMetricLabels)) - for k, v := range f.defaultMetricLabels { - labels[k] = v - } - labels["series_file_partition"] = "" // All partitions have this label. - ims = rhh.NewMetrics(namespace, seriesFileSubsystem+"_index", labels) - } - mmu.Unlock() - - // Open partitions. - f.partitions = make([]*SeriesPartition, 0, SeriesFilePartitionN) - for i := 0; i < SeriesFilePartitionN; i++ { - // TODO(edd): These partition initialisation should be moved up to NewSeriesFile. - p := NewSeriesPartition(i, f.SeriesPartitionPath(i)) - p.LargeWriteThreshold = f.LargeWriteThreshold - p.Logger = f.Logger.With(zap.Int("partition", p.ID())) - p.pageFaultLimiter = f.pageFaultLimiter - - // For each series file index, rhh trackers are used to track the RHH Hashmap. - // Each of the trackers needs to be given slightly different default - // labels to ensure the correct partition_ids are set as labels. - labels := make(prometheus.Labels, len(f.defaultMetricLabels)) - for k, v := range f.defaultMetricLabels { - labels[k] = v - } - labels["series_file_partition"] = fmt.Sprint(p.ID()) - - p.index.rhhMetrics = ims - p.index.rhhLabels = labels - p.index.rhhMetricsEnabled = f.metricsEnabled - - // Set the metric trackers on the partition with any injected default labels. - p.tracker = newSeriesPartitionTracker(sms, labels) - p.tracker.enabled = f.metricsEnabled - - if err := p.Open(); err != nil { - f.Logger.Error("Unable to open series file", - zap.String("path", f.path), - zap.Int("partition", p.ID()), - zap.Error(err)) - f.closeNoLock() - return err - } - f.partitions = append(f.partitions, p) - } - - // The resource is now open. - f.res.Open() - - return nil -} - -func (f *SeriesFile) closeNoLock() (err error) { - // Close the resource and wait for any outstanding references. - f.res.Close() - - var errs []error - for _, p := range f.partitions { - errs = append(errs, p.Close()) - } - return multierr.Combine(errs...) -} - -// Close unmaps the data file. -func (f *SeriesFile) Close() error { - f.mu.Lock() - defer f.mu.Unlock() - return f.closeNoLock() -} - -// Path returns the path to the file. -func (f *SeriesFile) Path() string { return f.path } - -// SeriesPartitionPath returns the path to a given partition. -func (f *SeriesFile) SeriesPartitionPath(i int) string { - return filepath.Join(f.path, fmt.Sprintf("%02x", i)) -} - -// Partitions returns all partitions. -func (f *SeriesFile) Partitions() []*SeriesPartition { return f.partitions } - -// Acquire ensures that the series file won't be closed until after the reference -// has been released. -func (f *SeriesFile) Acquire() (*lifecycle.Reference, error) { - return f.res.Acquire() -} - -// EnableCompactions allows compactions to run. -func (f *SeriesFile) EnableCompactions() { - for _, p := range f.partitions { - p.EnableCompactions() - } -} - -// DisableCompactions prevents new compactions from running. -func (f *SeriesFile) DisableCompactions() { - for _, p := range f.partitions { - p.DisableCompactions() - } -} - -// FileSize returns the size of all partitions, in bytes. -func (f *SeriesFile) FileSize() (n int64, err error) { - for _, p := range f.partitions { - v, err := p.FileSize() - n += v - if err != nil { - return n, err - } - } - return n, err -} - -// CreateSeriesListIfNotExists creates a list of series in bulk if they don't exist. It overwrites -// the collection's Keys and SeriesIDs fields. The collection's SeriesIDs slice will have IDs for -// every name+tags, creating new series IDs as needed. If any SeriesID is zero, then a type -// conflict has occurred for that series. -func (f *SeriesFile) CreateSeriesListIfNotExists(collection *tsdb.SeriesCollection) error { - collection.SeriesKeys = GenerateSeriesKeys(collection.Names, collection.Tags) - collection.SeriesIDs = make([]tsdb.SeriesID, len(collection.SeriesKeys)) - keyPartitionIDs := f.SeriesKeysPartitionIDs(collection.SeriesKeys) - - var g errgroup.Group - for i := range f.partitions { - p := f.partitions[i] - g.Go(func() error { - return p.CreateSeriesListIfNotExists(collection, keyPartitionIDs) - }) - } - if err := g.Wait(); err != nil { - return err - } - - collection.ApplyConcurrentDrops() - return nil -} - -// DeleteSeriesID flags a list of series as permanently deleted. -// If a series is reintroduced later then it must create a new id. -func (f *SeriesFile) DeleteSeriesIDs(ids []tsdb.SeriesID) error { - m := make(map[int][]tsdb.SeriesID) - for _, id := range ids { - partitionID := f.SeriesIDPartitionID(id) - m[partitionID] = append(m[partitionID], id) - } - - var g errgroup.Group - for partitionID, partitionIDs := range m { - partitionID, partitionIDs := partitionID, partitionIDs - g.Go(func() error { return f.partitions[partitionID].DeleteSeriesIDs(partitionIDs) }) - } - return g.Wait() -} - -// IsDeleted returns true if the ID has been deleted before. -func (f *SeriesFile) IsDeleted(id tsdb.SeriesID) bool { - p := f.SeriesIDPartition(id) - if p == nil { - return false - } - return p.IsDeleted(id) -} - -// SeriesKey returns the series key for a given id. -func (f *SeriesFile) SeriesKey(id tsdb.SeriesID) []byte { - if id.IsZero() { - return nil - } - p := f.SeriesIDPartition(id) - if p == nil { - return nil - } - return p.SeriesKey(id) -} - -// SeriesKeyName returns the measurement name for a series id. -func (f *SeriesFile) SeriesKeyName(id tsdb.SeriesID) []byte { - if id.IsZero() { - return nil - } - data := f.SeriesIDPartition(id).SeriesKey(id) - if data == nil { - return nil - } - _, data = ReadSeriesKeyLen(data) - name, _ := ReadSeriesKeyMeasurement(data) - return name -} - -// SeriesKeys returns a list of series keys from a list of ids. -func (f *SeriesFile) SeriesKeys(ids []tsdb.SeriesID) [][]byte { - keys := make([][]byte, len(ids)) - for i := range ids { - keys[i] = f.SeriesKey(ids[i]) - } - return keys -} - -// Series returns the parsed series name and tags for an offset. -func (f *SeriesFile) Series(id tsdb.SeriesID) ([]byte, models.Tags) { - key := f.SeriesKey(id) - if key == nil { - return nil, nil - } - return ParseSeriesKey(key) -} - -// SeriesID returns the series id for the series. -func (f *SeriesFile) SeriesID(name []byte, tags models.Tags, buf []byte) tsdb.SeriesID { - return f.SeriesIDTyped(name, tags, buf).SeriesID() -} - -// SeriesIDTyped returns the typed series id for the series. -func (f *SeriesFile) SeriesIDTyped(name []byte, tags models.Tags, buf []byte) tsdb.SeriesIDTyped { - key := AppendSeriesKey(buf[:0], name, tags) - return f.SeriesIDTypedBySeriesKey(key) -} - -// SeriesIDTypedBySeriesKey returns the typed series id for the series. -func (f *SeriesFile) SeriesIDTypedBySeriesKey(key []byte) tsdb.SeriesIDTyped { - keyPartition := f.SeriesKeyPartition(key) - if keyPartition == nil { - return tsdb.SeriesIDTyped{} - } - return keyPartition.FindIDTypedBySeriesKey(key) -} - -// HasSeries return true if the series exists. -func (f *SeriesFile) HasSeries(name []byte, tags models.Tags, buf []byte) bool { - return !f.SeriesID(name, tags, buf).IsZero() -} - -// SeriesCount returns the number of series. -func (f *SeriesFile) SeriesCount() uint64 { - var n uint64 - for _, p := range f.partitions { - n += p.SeriesCount() - } - return n -} - -// SeriesIDs returns a slice of series IDs in all partitions, sorted. -// This may return a lot of data at once, so use sparingly. -func (f *SeriesFile) SeriesIDs() []tsdb.SeriesID { - var ids []tsdb.SeriesID - for _, p := range f.partitions { - ids = p.AppendSeriesIDs(ids) - } - sort.Slice(ids, func(i, j int) bool { return ids[i].Less(ids[j]) }) - return ids -} - -func (f *SeriesFile) SeriesIDPartitionID(id tsdb.SeriesID) int { - return int((id.RawID() - 1) % SeriesFilePartitionN) -} - -func (f *SeriesFile) SeriesIDPartition(id tsdb.SeriesID) *SeriesPartition { - partitionID := f.SeriesIDPartitionID(id) - if partitionID >= len(f.partitions) { - return nil - } - return f.partitions[partitionID] -} - -func (f *SeriesFile) SeriesKeysPartitionIDs(keys [][]byte) []int { - partitionIDs := make([]int, len(keys)) - for i := range keys { - partitionIDs[i] = f.SeriesKeyPartitionID(keys[i]) - } - return partitionIDs -} - -func (f *SeriesFile) SeriesKeyPartitionID(key []byte) int { - return int(xxhash.Sum64(key) % SeriesFilePartitionN) -} - -func (f *SeriesFile) SeriesKeyPartition(key []byte) *SeriesPartition { - partitionID := f.SeriesKeyPartitionID(key) - if partitionID >= len(f.partitions) { - return nil - } - return f.partitions[partitionID] -} - -// AppendSeriesKey serializes name and tags to a byte slice. -// The total length is prepended as a uvarint. -func AppendSeriesKey(dst []byte, name []byte, tags models.Tags) []byte { - buf := make([]byte, binary.MaxVarintLen64) - origLen := len(dst) - - // The tag count is variable encoded, so we need to know ahead of time what - // the size of the tag count value will be. - tcBuf := make([]byte, binary.MaxVarintLen64) - tcSz := binary.PutUvarint(tcBuf, uint64(len(tags))) - - // Size of name/tags. Does not include total length. - size := 0 + // - 2 + // size of measurement - len(name) + // measurement - tcSz + // size of number of tags - (4 * len(tags)) + // length of each tag key and value - tags.Size() // size of tag keys/values - - // Variable encode length. - totalSz := binary.PutUvarint(buf, uint64(size)) - - // If caller doesn't provide a buffer then pre-allocate an exact one. - if dst == nil { - dst = make([]byte, 0, size+totalSz) - } - - // Append total length. - dst = append(dst, buf[:totalSz]...) - - // Append name. - binary.BigEndian.PutUint16(buf, uint16(len(name))) - dst = append(dst, buf[:2]...) - dst = append(dst, name...) - - // Append tag count. - dst = append(dst, tcBuf[:tcSz]...) - - // Append tags. - for _, tag := range tags { - binary.BigEndian.PutUint16(buf, uint16(len(tag.Key))) - dst = append(dst, buf[:2]...) - dst = append(dst, tag.Key...) - - binary.BigEndian.PutUint16(buf, uint16(len(tag.Value))) - dst = append(dst, buf[:2]...) - dst = append(dst, tag.Value...) - } - - // Verify that the total length equals the encoded byte count. - if got, exp := len(dst)-origLen, size+totalSz; got != exp { - panic(fmt.Sprintf("series key encoding does not match calculated total length: actual=%d, exp=%d, key=%x", got, exp, dst)) - } - - return dst -} - -// ReadSeriesKey returns the series key from the beginning of the buffer. -func ReadSeriesKey(data []byte) (key, remainder []byte) { - sz, n := binary.Uvarint(data) - return data[:int(sz)+n], data[int(sz)+n:] -} - -func ReadSeriesKeyLen(data []byte) (sz int, remainder []byte) { - sz64, i := binary.Uvarint(data) - return int(sz64), data[i:] -} - -func ReadSeriesKeyMeasurement(data []byte) (name, remainder []byte) { - n, data := binary.BigEndian.Uint16(data), data[2:] - return data[:n], data[n:] -} - -func ReadSeriesKeyTagN(data []byte) (n int, remainder []byte) { - n64, i := binary.Uvarint(data) - return int(n64), data[i:] -} - -func ReadSeriesKeyTag(data []byte) (key, value, remainder []byte) { - n, data := binary.BigEndian.Uint16(data), data[2:] - key, data = data[:n], data[n:] - - n, data = binary.BigEndian.Uint16(data), data[2:] - value, data = data[:n], data[n:] - return key, value, data -} - -// ParseSeriesKey extracts the name & tags from a series key. -func ParseSeriesKey(data []byte) (name []byte, tags models.Tags) { - return parseSeriesKey(data, nil) -} - -// ParseSeriesKeyInto extracts the name and tags for data, parsing the tags into -// dstTags, which is then returened. -// -// The returned dstTags may have a different length and capacity. -func ParseSeriesKeyInto(data []byte, dstTags models.Tags) ([]byte, models.Tags) { - return parseSeriesKey(data, dstTags) -} - -// parseSeriesKey extracts the name and tags from data, attempting to re-use the -// provided tags value rather than allocating. The returned tags may have a -// different length and capacity to those provided. -func parseSeriesKey(data []byte, dst models.Tags) ([]byte, models.Tags) { - var name []byte - _, data = ReadSeriesKeyLen(data) - name, data = ReadSeriesKeyMeasurement(data) - tagN, data := ReadSeriesKeyTagN(data) - - dst = dst[:cap(dst)] // Grow dst to use full capacity - if got, want := len(dst), tagN; got < want { - dst = append(dst, make(models.Tags, want-got)...) - } else if got > want { - dst = dst[:want] - } - dst = dst[:tagN] - - for i := 0; i < tagN; i++ { - var key, value []byte - key, value, data = ReadSeriesKeyTag(data) - dst[i].Key, dst[i].Value = key, value - } - - return name, dst -} - -func CompareSeriesKeys(a, b []byte) int { - // Handle 'nil' keys. - if len(a) == 0 && len(b) == 0 { - return 0 - } else if len(a) == 0 { - return -1 - } else if len(b) == 0 { - return 1 - } - - // Read total size. - _, a = ReadSeriesKeyLen(a) - _, b = ReadSeriesKeyLen(b) - - // Read names. - name0, a := ReadSeriesKeyMeasurement(a) - name1, b := ReadSeriesKeyMeasurement(b) - - // Compare names, return if not equal. - if cmp := bytes.Compare(name0, name1); cmp != 0 { - return cmp - } - - // Read tag counts. - tagN0, a := ReadSeriesKeyTagN(a) - tagN1, b := ReadSeriesKeyTagN(b) - - // Compare each tag in order. - for i := 0; ; i++ { - // Check for EOF. - if i == tagN0 && i == tagN1 { - return 0 - } else if i == tagN0 { - return -1 - } else if i == tagN1 { - return 1 - } - - // Read keys. - var key0, key1, value0, value1 []byte - key0, value0, a = ReadSeriesKeyTag(a) - key1, value1, b = ReadSeriesKeyTag(b) - - // Compare keys & values. - if cmp := bytes.Compare(key0, key1); cmp != 0 { - return cmp - } else if cmp := bytes.Compare(value0, value1); cmp != 0 { - return cmp - } - } -} - -// GenerateSeriesKeys generates series keys for a list of names & tags using -// a single large memory block. -func GenerateSeriesKeys(names [][]byte, tagsSlice []models.Tags) [][]byte { - buf := make([]byte, 0, SeriesKeysSize(names, tagsSlice)) - keys := make([][]byte, len(names)) - for i := range names { - offset := len(buf) - buf = AppendSeriesKey(buf, names[i], tagsSlice[i]) - keys[i] = buf[offset:] - } - return keys -} - -// SeriesKeysSize returns the number of bytes required to encode a list of name/tags. -func SeriesKeysSize(names [][]byte, tagsSlice []models.Tags) int { - var n int - for i := range names { - n += SeriesKeySize(names[i], tagsSlice[i]) - } - return n -} - -// SeriesKeySize returns the number of bytes required to encode a series key. -func SeriesKeySize(name []byte, tags models.Tags) int { - var n int - n += 2 + len(name) - n += binaryutil.UvarintSize(uint64(len(tags))) - for _, tag := range tags { - n += 2 + len(tag.Key) - n += 2 + len(tag.Value) - } - n += binaryutil.UvarintSize(uint64(n)) - return n -} - -// wait rate limits page faults to the underlying data. Skipped if limiter is not set. -func wait(limiter *mincore.Limiter, b []byte) error { - if limiter == nil { - return nil - } - return limiter.WaitRange(context.Background(), b) -} diff --git a/tsdb/seriesfile/series_file_test.go b/tsdb/seriesfile/series_file_test.go deleted file mode 100644 index a78b8cc22d..0000000000 --- a/tsdb/seriesfile/series_file_test.go +++ /dev/null @@ -1,494 +0,0 @@ -package seriesfile_test - -import ( - "bytes" - "context" - "fmt" - "io/ioutil" - "os" - "path" - "testing" - - "github.com/influxdata/influxdb/v2/logger" - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" - "golang.org/x/sync/errgroup" -) - -func TestParseSeriesKeyInto(t *testing.T) { - name := []byte("cpu") - tags := models.NewTags(map[string]string{"region": "east", "server": "a"}) - key := seriesfile.AppendSeriesKey(nil, name, tags) - - dst := make(models.Tags, 0) - gotName, gotTags := seriesfile.ParseSeriesKeyInto(key, dst) - - if !bytes.Equal(gotName, name) { - t.Fatalf("got %q, expected %q", gotName, name) - } - - if got, exp := len(gotTags), 2; got != exp { - t.Fatalf("got tags length %d, expected %d", got, exp) - } else if got, exp := gotTags, tags; !got.Equal(exp) { - t.Fatalf("got tags %v, expected %v", got, exp) - } - - dst = make(models.Tags, 0, 5) - _, gotTags = seriesfile.ParseSeriesKeyInto(key, dst) - if got, exp := len(gotTags), 2; got != exp { - t.Fatalf("got tags length %d, expected %d", got, exp) - } else if got, exp := cap(gotTags), 5; got != exp { - t.Fatalf("got tags capacity %d, expected %d", got, exp) - } else if got, exp := gotTags, tags; !got.Equal(exp) { - t.Fatalf("got tags %v, expected %v", got, exp) - } - - dst = make(models.Tags, 1) - _, gotTags = seriesfile.ParseSeriesKeyInto(key, dst) - if got, exp := len(gotTags), 2; got != exp { - t.Fatalf("got tags length %d, expected %d", got, exp) - } else if got, exp := gotTags, tags; !got.Equal(exp) { - t.Fatalf("got tags %v, expected %v", got, exp) - } -} - -// Ensure that broken series files are closed -func TestSeriesFile_Open_WhenFileCorrupt_ShouldReturnErr(t *testing.T) { - f := NewBrokenSeriesFile([]byte{0, 0, 0, 0, 0}) - defer f.Close() - f.Logger = logger.New(os.Stdout) - - err := f.Open(context.Background()) - if err == nil { - t.Fatalf("should report error") - } -} - -// Ensure series file contains the correct set of series. -func TestSeriesFile_Series(t *testing.T) { - sfile := MustOpenSeriesFile() - defer sfile.Close() - - series := []Series{ - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "east"}), Type: models.Integer}, - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "west"}), Type: models.Integer}, - {Name: []byte("mem"), Tags: models.NewTags(map[string]string{"region": "east"}), Type: models.Integer}, - } - for _, s := range series { - collection := &tsdb.SeriesCollection{ - Names: [][]byte{[]byte(s.Name)}, - Tags: []models.Tags{s.Tags}, - Types: []models.FieldType{s.Type}, - } - if err := sfile.CreateSeriesListIfNotExists(collection); err != nil { - t.Fatal(err) - } - } - - // Verify total number of series is correct. - if n := sfile.SeriesCount(); n != 3 { - t.Fatalf("unexpected series count: %d", n) - } - - // Verify all series exist. - for i, s := range series { - if seriesID := sfile.SeriesID(s.Name, s.Tags, nil); seriesID.IsZero() { - t.Fatalf("series does not exist: i=%d", i) - } - } - - // Verify non-existent series doesn't exist. - if sfile.HasSeries([]byte("foo"), models.NewTags(map[string]string{"region": "north"}), nil) { - t.Fatal("series should not exist") - } -} - -// Ensure series file can be compacted. -func TestSeriesFileCompactor(t *testing.T) { - sfile := MustOpenSeriesFile() - defer sfile.Close() - - // Disable automatic compactions. - for _, p := range sfile.Partitions() { - p.CompactThreshold = 0 - } - - collection := new(tsdb.SeriesCollection) - for i := 0; i < 10000; i++ { - collection.Names = append(collection.Names, []byte(fmt.Sprintf("m%d", i))) - collection.Tags = append(collection.Tags, models.NewTags(map[string]string{"foo": "bar"})) - collection.Types = append(collection.Types, models.Integer) - } - if err := sfile.CreateSeriesListIfNotExists(collection); err != nil { - t.Fatal(err) - } - if err := collection.PartialWriteError(); err != nil { - t.Fatal(err) - } - - // Verify total number of series is correct. - if n := sfile.SeriesCount(); n != uint64(len(collection.Names)) { - t.Fatalf("unexpected series count: %d", n) - } - - // Compact in-place for each partition. - for _, p := range sfile.Partitions() { - compactor := seriesfile.NewSeriesPartitionCompactor() - if _, err := compactor.Compact(p); err != nil { - t.Fatal(err) - } - } - - // Verify all series exist. - for iter := collection.Iterator(); iter.Next(); { - if seriesID := sfile.SeriesID(iter.Name(), iter.Tags(), nil); seriesID.IsZero() { - t.Fatalf("series does not exist: %s,%s", iter.Name(), iter.Tags().String()) - } - } - - // Verify total number of series is correct. - if got, exp := sfile.SeriesCount(), uint64(len(collection.Names)); got != exp { - t.Fatalf("SeriesCount()=%d, expected %d (after compaction)", got, exp) - } -} - -// Ensures that types are tracked and checked by the series file. -func TestSeriesFile_Type(t *testing.T) { - sfile := MustOpenSeriesFile() - defer sfile.Close() - - // Add the series with some types. - collection := &tsdb.SeriesCollection{ - Names: [][]byte{[]byte("a"), []byte("b"), []byte("c")}, - Tags: []models.Tags{{}, {}, {}}, - Types: []models.FieldType{models.Integer, models.Float, models.Boolean}, - } - if err := sfile.CreateSeriesListIfNotExists(collection); err != nil { - t.Fatal(err) - } - - // Attempt to add the series again but with different types. - collection = &tsdb.SeriesCollection{ - Names: [][]byte{[]byte("a"), []byte("b"), []byte("c"), []byte("d")}, - Tags: []models.Tags{{}, {}, {}, {}}, - Types: []models.FieldType{models.String, models.String, models.String, models.String}, - } - if err := sfile.CreateSeriesListIfNotExists(collection); err != nil { - t.Fatal(err) - } - - // All of the series except d should be dropped. - if err := collection.PartialWriteError(); err == nil { - t.Fatal("expected partial write error") - } - if collection.Length() != 1 { - t.Fatal("expected one series to remain in collection") - } - if got := string(collection.Names[0]); got != "d" { - t.Fatal("got invalid name on remaining series:", got) - } -} - -// Ensure series file deletions persist across compactions. -func TestSeriesFile_DeleteSeriesID(t *testing.T) { - sfile := MustOpenSeriesFile() - defer sfile.Close() - - if err := sfile.CreateSeriesListIfNotExists(&tsdb.SeriesCollection{ - Names: [][]byte{[]byte("m1")}, - Tags: []models.Tags{{}}, - Types: []models.FieldType{models.String}, - }); err != nil { - t.Fatal(err) - } else if err := sfile.CreateSeriesListIfNotExists(&tsdb.SeriesCollection{ - Names: [][]byte{[]byte("m2")}, - Tags: []models.Tags{{}}, - Types: []models.FieldType{models.String}, - }); err != nil { - t.Fatal(err) - } else if err := sfile.ForceCompact(); err != nil { - t.Fatal(err) - } - id := sfile.SeriesID([]byte("m1"), nil, nil) - - // Verify total number of series is correct. - if got, exp := sfile.SeriesCount(), uint64(2); got != exp { - t.Fatalf("SeriesCount()=%d, expected %d (before deleted)", got, exp) - } - - // Delete and ensure deletion. - if err := sfile.DeleteSeriesIDs([]tsdb.SeriesID{id}); err != nil { - t.Fatal(err) - } else if !sfile.IsDeleted(id) { - t.Fatal("expected deletion before compaction") - } - - // Verify total number of series is correct. - if got, exp := sfile.SeriesCount(), uint64(1); got != exp { - t.Fatalf("SeriesCount()=%d, expected %d (before compaction)", got, exp) - } - - if err := sfile.ForceCompact(); err != nil { - t.Fatal(err) - } else if !sfile.IsDeleted(id) { - t.Fatal("expected deletion after compaction") - } else if got, exp := sfile.SeriesCount(), uint64(1); got != exp { - t.Fatalf("SeriesCount()=%d, expected %d (after compaction)", got, exp) - } - - if err := sfile.Reopen(); err != nil { - t.Fatal(err) - } else if !sfile.IsDeleted(id) { - t.Fatal("expected deletion after reopen") - } else if got, exp := sfile.SeriesCount(), uint64(1); got != exp { - t.Fatalf("SeriesCount()=%d, expected %d (after reopen)", got, exp) - } - - // Recreate series with new ID. - if err := sfile.CreateSeriesListIfNotExists(&tsdb.SeriesCollection{ - Names: [][]byte{[]byte("m1")}, - Tags: []models.Tags{{}}, - Types: []models.FieldType{models.String}, - }); err != nil { - t.Fatal(err) - } else if got, exp := sfile.SeriesCount(), uint64(2); got != exp { - t.Fatalf("SeriesCount()=%d, expected %d (after recreate)", got, exp) - } - - if err := sfile.ForceCompact(); err != nil { - t.Fatal(err) - } else if !sfile.IsDeleted(id) { - t.Fatal("expected deletion after compaction") - } else if got, exp := sfile.SeriesCount(), uint64(2); got != exp { - t.Fatalf("SeriesCount()=%d, expected %d (after recreate & compaction)", got, exp) - } - - if err := sfile.Reopen(); err != nil { - t.Fatal(err) - } else if !sfile.IsDeleted(id) { - t.Fatal("expected deletion after reopen") - } else if got, exp := sfile.SeriesCount(), uint64(2); got != exp { - t.Fatalf("SeriesCount()=%d, expected %d (after recreate & compaction)", got, exp) - } -} - -func TestSeriesFile_Compaction(t *testing.T) { - const n = 1000 - - sfile := MustOpenSeriesFile() - defer sfile.Close() - - // Generate a bunch of keys. - var collection tsdb.SeriesCollection - for i := 0; i < n; i++ { - collection.Names = append(collection.Names, []byte("cpu")) - collection.Tags = append(collection.Tags, models.NewTags(map[string]string{"region": fmt.Sprintf("r%d", i)})) - collection.Types = append(collection.Types, models.Integer) - } - - // Add all to the series file. - err := sfile.CreateSeriesListIfNotExists(&collection) - if err != nil { - t.Fatal(err) - } - - // Delete a subset of keys. - for i := 0; i < n; i++ { - if i%10 != 0 { - continue - } - - if id := sfile.SeriesID(collection.Names[i], collection.Tags[i], nil); id.IsZero() { - t.Fatal("expected series id") - } else if err := sfile.DeleteSeriesIDs([]tsdb.SeriesID{id}); err != nil { - t.Fatal(err) - } - } - - // Compute total size of all series data. - origSize, err := sfile.FileSize() - if err != nil { - t.Fatal(err) - } - - // Compact all segments. - var paths []string - for _, p := range sfile.Partitions() { - for _, ss := range p.Segments() { - if err := ss.CompactToPath(ss.Path()+".tmp", p.Index()); err != nil { - t.Fatal(err) - } - paths = append(paths, ss.Path()) - } - } - - // Close index. - if err := sfile.SeriesFile.Close(); err != nil { - t.Fatal(err) - } - - // Overwrite files. - for _, path := range paths { - if err := os.Rename(path+".tmp", path); err != nil { - t.Fatal(err) - } - } - - // Reopen index. - sfile.SeriesFile = seriesfile.NewSeriesFile(sfile.SeriesFile.Path()) - if err := sfile.SeriesFile.Open(context.Background()); err != nil { - t.Fatal(err) - } - - // Ensure series status is correct. - for i := 0; i < n; i++ { - if id := sfile.SeriesID(collection.Names[i], collection.Tags[i], nil); id.IsZero() { - continue - } else if got, want := sfile.IsDeleted(id), (i%10) == 0; got != want { - t.Fatalf("IsDeleted(%d)=%v, want %v", id, got, want) - } - } - - // Verify new size is smaller. - newSize, err := sfile.FileSize() - if err != nil { - t.Fatal(err) - } else if newSize >= origSize { - t.Fatalf("expected new size (%d) to be smaller than original size (%d)", newSize, origSize) - } - - t.Logf("original size: %d, new size: %d", origSize, newSize) -} - -var cachedCompactionSeriesFile *SeriesFile - -func BenchmarkSeriesFile_Compaction(b *testing.B) { - const n = 1000000 - - if cachedCompactionSeriesFile == nil { - sfile := MustOpenSeriesFile() - - // Generate a bunch of keys. - ids := make([]tsdb.SeriesID, n) - for i := 0; i < n; i++ { - collection := &tsdb.SeriesCollection{ - Names: [][]byte{[]byte("cpu")}, - Tags: []models.Tags{models.NewTags(map[string]string{"region": fmt.Sprintf("r%d", i)})}, - Types: []models.FieldType{models.Integer}, - } - - if err := sfile.CreateSeriesListIfNotExists(collection); err != nil { - b.Fatal(err) - } else if ids[i] = sfile.SeriesID(collection.Names[0], collection.Tags[0], nil); ids[i].IsZero() { - b.Fatalf("expected series id: i=%d", i) - } - } - - // Delete a subset of keys. - for i := 0; i < len(ids); i += 10 { - if err := sfile.DeleteSeriesIDs([]tsdb.SeriesID{ids[i]}); err != nil { - b.Fatal(err) - } - } - - cachedCompactionSeriesFile = sfile - } - - b.ResetTimer() - - for i := 0; i < b.N; i++ { - // Compact all segments in parallel. - var g errgroup.Group - for _, p := range cachedCompactionSeriesFile.Partitions() { - for _, segment := range p.Segments() { - p, segment := p, segment - g.Go(func() error { - return segment.CompactToPath(segment.Path()+".tmp", p.Index()) - }) - } - } - - if err := g.Wait(); err != nil { - b.Fatal(err) - } - } -} - -// Series represents name/tagset pairs that are used in testing. -type Series struct { - Name []byte - Tags models.Tags - Type models.FieldType - Deleted bool -} - -// SeriesFile is a test wrapper for tsdb.SeriesFile. -type SeriesFile struct { - *seriesfile.SeriesFile -} - -// NewSeriesFile returns a new instance of SeriesFile with a temporary file path. -func NewSeriesFile() *SeriesFile { - dir, err := ioutil.TempDir("", "tsdb-series-file-") - if err != nil { - panic(err) - } - return &SeriesFile{SeriesFile: seriesfile.NewSeriesFile(dir)} -} - -func NewBrokenSeriesFile(content []byte) *SeriesFile { - sFile := NewSeriesFile() - fPath := sFile.Path() - if err := sFile.Open(context.Background()); err != nil { - panic(err) - } - if err := sFile.SeriesFile.Close(); err != nil { - panic(err) - } - - segPath := path.Join(fPath, "00", "0000") - if _, err := os.Stat(segPath); os.IsNotExist(err) { - panic(err) - } - err := ioutil.WriteFile(segPath, content, 0777) - if err != nil { - panic(err) - } - return sFile -} - -// MustOpenSeriesFile returns a new, open instance of SeriesFile. Panic on error. -func MustOpenSeriesFile() *SeriesFile { - f := NewSeriesFile() - f.Logger = logger.New(os.Stdout) - if err := f.Open(context.Background()); err != nil { - panic(err) - } - return f -} - -// Close closes the log file and removes it from disk. -func (f *SeriesFile) Close() error { - defer os.RemoveAll(f.Path()) - return f.SeriesFile.Close() -} - -// Reopen close & reopens the series file. -func (f *SeriesFile) Reopen() error { - if err := f.SeriesFile.Close(); err != nil { - return err - } - f.SeriesFile = seriesfile.NewSeriesFile(f.SeriesFile.Path()) - return f.SeriesFile.Open(context.Background()) -} - -// ForceCompact executes an immediate compaction across all partitions. -func (f *SeriesFile) ForceCompact() error { - for _, p := range f.Partitions() { - if _, err := seriesfile.NewSeriesPartitionCompactor().Compact(p); err != nil { - return err - } - } - return nil -} diff --git a/tsdb/seriesfile/series_index.go b/tsdb/seriesfile/series_index.go deleted file mode 100644 index 0c6515b6a2..0000000000 --- a/tsdb/seriesfile/series_index.go +++ /dev/null @@ -1,436 +0,0 @@ -package seriesfile - -import ( - "bytes" - "encoding/binary" - "errors" - "io" - "os" - - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/pkg/mincore" - "github.com/influxdata/influxdb/v2/pkg/mmap" - "github.com/influxdata/influxdb/v2/pkg/rhh" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/prometheus/client_golang/prometheus" - "golang.org/x/time/rate" -) - -const ( - SeriesIndexVersion = 1 - SeriesIndexMagic = "SIDX" -) - -const ( - // SeriesIDSize is the size in bytes of a series key ID. - SeriesIDSize = 8 - SeriesOffsetSize = 8 - SeriesIndexElemSize = SeriesOffsetSize + SeriesIDSize - - SeriesIndexLoadFactor = 90 // rhh load factor - - SeriesIndexHeaderSize = 0 + - 4 + 1 + // magic + version - 8 + 8 + // max series + max offset - 8 + 8 + // count + capacity - 8 + 8 + // key/id map offset & size - 8 + 8 + // id/offset map offset & size - 0 -) - -var ErrInvalidSeriesIndex = errors.New("invalid series index") - -// SeriesIndex represents an index of key-to-id & id-to-offset mappings. -type SeriesIndex struct { - path string - - count uint64 - capacity int64 - mask int64 - - maxSeriesID tsdb.SeriesID - maxOffset int64 - - // metrics stores a shard instance of some Prometheus metrics. metrics - // must be set before Open is called. - rhhMetrics *rhh.Metrics - rhhLabels prometheus.Labels - rhhMetricsEnabled bool - - data []byte // mmap data - keyIDData []byte // key/id mmap data - idOffsetData []byte // id/offset mmap data - - // In-memory data since rebuild. - keyIDMap *rhh.HashMap - idOffsetMap map[tsdb.SeriesID]int64 - tombstones map[tsdb.SeriesID]struct{} - - limiter *mincore.Limiter // Limits page faults by the partition -} - -func NewSeriesIndex(path string) *SeriesIndex { - return &SeriesIndex{ - path: path, - rhhMetricsEnabled: true, - } -} - -// Open memory-maps the index file. -func (idx *SeriesIndex) Open() (err error) { - // Map data file, if it exists. - if err := func() error { - if _, err := os.Stat(idx.path); err != nil && !os.IsNotExist(err) { - return err - } else if err == nil { - if idx.data, err = mmap.Map(idx.path, 0); err != nil { - return err - } - - hdr, err := ReadSeriesIndexHeader(idx.data) - if err != nil { - return err - } - idx.count, idx.capacity, idx.mask = hdr.Count, hdr.Capacity, hdr.Capacity-1 - idx.maxSeriesID, idx.maxOffset = hdr.MaxSeriesID, hdr.MaxOffset - - idx.keyIDData = idx.data[hdr.KeyIDMap.Offset : hdr.KeyIDMap.Offset+hdr.KeyIDMap.Size] - idx.idOffsetData = idx.data[hdr.IDOffsetMap.Offset : hdr.IDOffsetMap.Offset+hdr.IDOffsetMap.Size] - } - return nil - }(); err != nil { - idx.Close() - return err - } - - options := rhh.DefaultOptions - options.Metrics = idx.rhhMetrics - options.Labels = idx.rhhLabels - options.MetricsEnabled = idx.rhhMetricsEnabled - - idx.keyIDMap = rhh.NewHashMap(options) - idx.idOffsetMap = make(map[tsdb.SeriesID]int64) - idx.tombstones = make(map[tsdb.SeriesID]struct{}) - return nil -} - -// Close unmaps the index file. -func (idx *SeriesIndex) Close() (err error) { - if idx.data != nil { - err = mmap.Unmap(idx.data) - } - idx.keyIDData = nil - idx.idOffsetData = nil - - idx.keyIDMap = nil - idx.idOffsetMap = nil - idx.tombstones = nil - return err -} - -// SetPageFaultLimiter sets the limiter used for rate limiting page faults. -// Must be called after Open(). -func (idx *SeriesIndex) SetPageFaultLimiter(limiter *rate.Limiter) { - idx.limiter = mincore.NewLimiter(limiter, idx.data) -} - -// Recover rebuilds the in-memory index for all new entries. -func (idx *SeriesIndex) Recover(segments []*SeriesSegment) error { - // Allocate new in-memory maps. - options := rhh.DefaultOptions - options.Metrics = idx.rhhMetrics - options.Labels = idx.rhhLabels - options.MetricsEnabled = idx.rhhMetricsEnabled - - idx.keyIDMap = rhh.NewHashMap(options) - idx.idOffsetMap = make(map[tsdb.SeriesID]int64) - idx.tombstones = make(map[tsdb.SeriesID]struct{}) - - // Process all entries since the maximum offset in the on-disk index. - minSegmentID, _ := SplitSeriesOffset(idx.maxOffset) - for _, segment := range segments { - if segment.ID() < minSegmentID { - continue - } - - if err := segment.ForEachEntry(func(flag uint8, id tsdb.SeriesIDTyped, offset int64, key []byte) error { - if offset <= idx.maxOffset { - return nil - } - idx.execEntry(flag, id, offset, key) - return nil - }); err != nil { - return err - } - } - return nil -} - -// GrowBy preallocates the in-memory hashmap to a larger size. -func (idx *SeriesIndex) GrowBy(delta int) { - if delta < 0 { - return - } - idx.keyIDMap.Grow(((idx.keyIDMap.Len() + int64(delta)) * 100) / int64(idx.keyIDMap.LoadFactor())) -} - -// Count returns the number of series in the index. -func (idx *SeriesIndex) Count() uint64 { - n := int64(idx.OnDiskCount()+idx.InMemCount()) - int64(len(idx.tombstones)) - if n < 0 { - n = 0 - } - return uint64(n) -} - -// OnDiskCount returns the number of series in the on-disk index. -func (idx *SeriesIndex) OnDiskCount() uint64 { return idx.count } - -// InMemCount returns the number of series in the in-memory index. -func (idx *SeriesIndex) InMemCount() uint64 { return uint64(len(idx.idOffsetMap)) } - -// OnDiskSize returns the on-disk size of the index in bytes. -func (idx *SeriesIndex) OnDiskSize() uint64 { return uint64(len(idx.data)) } - -// InMemSize returns the heap size of the index in bytes. The returned value is -// an estimation and does not include include all allocated memory. -func (idx *SeriesIndex) InMemSize() uint64 { - n := len(idx.idOffsetMap) - return uint64(2*8*n) + uint64(len(idx.tombstones)*8) -} - -func (idx *SeriesIndex) Insert(key []byte, id tsdb.SeriesIDTyped, offset int64) { - idx.execEntry(SeriesEntryInsertFlag, id, offset, key) -} - -// Delete marks the series id as deleted. -func (idx *SeriesIndex) Delete(id tsdb.SeriesID) { - // NOTE: WithType(0) kinda sucks here, but we know it will be masked off. - idx.execEntry(SeriesEntryTombstoneFlag, id.WithType(0), 0, nil) -} - -// IsDeleted returns true if series id has been deleted. -func (idx *SeriesIndex) IsDeleted(id tsdb.SeriesID) bool { - if _, ok := idx.tombstones[id]; ok { - return true - } - return idx.FindOffsetByID(id) == 0 -} - -func (idx *SeriesIndex) execEntry(flag uint8, id tsdb.SeriesIDTyped, offset int64, key []byte) { - untypedID := id.SeriesID() - switch flag { - case SeriesEntryInsertFlag: - idx.keyIDMap.PutQuiet(key, id) - idx.idOffsetMap[untypedID] = offset - - if untypedID.Greater(idx.maxSeriesID) { - idx.maxSeriesID = untypedID - } - if offset > idx.maxOffset { - idx.maxOffset = offset - } - - case SeriesEntryTombstoneFlag: - // Only add to tombstone if it exists on disk or in-memory. - // This affects counts if a tombstone exists but the ID doesn't exist. - if idx.FindOffsetByID(untypedID) != 0 { - idx.tombstones[untypedID] = struct{}{} - } - - default: - panic("unreachable") - } -} - -func (idx *SeriesIndex) FindIDBySeriesKey(segments []*SeriesSegment, key []byte) tsdb.SeriesIDTyped { - if v := idx.keyIDMap.Get(key); v != nil { - if id, _ := v.(tsdb.SeriesIDTyped); !id.IsZero() && !idx.IsDeleted(id.SeriesID()) { - return id - } - } - if len(idx.data) == 0 { - return tsdb.SeriesIDTyped{} - } - - hash := rhh.HashKey(key) - for d, pos := int64(0), hash&idx.mask; ; d, pos = d+1, (pos+1)&idx.mask { - elem := idx.keyIDData[(pos * SeriesIndexElemSize):] - elemOffset := int64(binary.BigEndian.Uint64(elem[:SeriesOffsetSize])) - _ = wait(idx.limiter, elem[:SeriesOffsetSize]) // elem size is two uint64s - - if elemOffset == 0 { - return tsdb.SeriesIDTyped{} - } - - elemKey := ReadSeriesKeyFromSegments(segments, elemOffset+SeriesEntryHeaderSize) - elemHash := rhh.HashKey(elemKey) - if d > rhh.Dist(elemHash, pos, idx.capacity) { - return tsdb.SeriesIDTyped{} - } else if elemHash == hash && bytes.Equal(elemKey, key) { - id := tsdb.NewSeriesIDTyped(binary.BigEndian.Uint64(elem[SeriesOffsetSize:])) - if idx.IsDeleted(id.SeriesID()) { - return tsdb.SeriesIDTyped{} - } - return id - } - } -} - -func (idx *SeriesIndex) FindIDByNameTags(segments []*SeriesSegment, name []byte, tags models.Tags, buf []byte) tsdb.SeriesIDTyped { - id := idx.FindIDBySeriesKey(segments, AppendSeriesKey(buf[:0], name, tags)) - if _, ok := idx.tombstones[id.SeriesID()]; ok { - return tsdb.SeriesIDTyped{} - } - return id -} - -func (idx *SeriesIndex) FindIDListByNameTags(segments []*SeriesSegment, names [][]byte, tagsSlice []models.Tags, buf []byte) (ids []tsdb.SeriesIDTyped, ok bool) { - ids, ok = make([]tsdb.SeriesIDTyped, len(names)), true - for i := range names { - id := idx.FindIDByNameTags(segments, names[i], tagsSlice[i], buf) - if id.IsZero() { - ok = false - continue - } - ids[i] = id - } - return ids, ok -} - -func (idx *SeriesIndex) FindOffsetByID(id tsdb.SeriesID) int64 { - if offset := idx.idOffsetMap[id]; offset != 0 { - return offset - } else if len(idx.data) == 0 { - return 0 - } - - hash := rhh.HashUint64(id.RawID()) - for d, pos := int64(0), hash&idx.mask; ; d, pos = d+1, (pos+1)&idx.mask { - elem := idx.idOffsetData[(pos * SeriesIndexElemSize):] - elemID := tsdb.NewSeriesID(binary.BigEndian.Uint64(elem[:SeriesIDSize])) - _ = wait(idx.limiter, elem[:SeriesIDSize]) - - if elemID == id { - return int64(binary.BigEndian.Uint64(elem[SeriesIDSize:])) - } else if elemID.IsZero() || d > rhh.Dist(rhh.HashUint64(elemID.RawID()), pos, idx.capacity) { - return 0 - } - } -} - -// Clone returns a copy of idx for use during compaction. In-memory maps are not cloned. -func (idx *SeriesIndex) Clone() *SeriesIndex { - tombstones := make(map[tsdb.SeriesID]struct{}, len(idx.tombstones)) - for id := range idx.tombstones { - tombstones[id] = struct{}{} - } - - idOffsetMap := make(map[tsdb.SeriesID]int64) - for k, v := range idx.idOffsetMap { - idOffsetMap[k] = v - } - - return &SeriesIndex{ - path: idx.path, - count: idx.count, - capacity: idx.capacity, - mask: idx.mask, - maxSeriesID: idx.maxSeriesID, - maxOffset: idx.maxOffset, - data: idx.data, - keyIDData: idx.keyIDData, - idOffsetData: idx.idOffsetData, - tombstones: tombstones, - idOffsetMap: idOffsetMap, - } -} - -// SeriesIndexHeader represents the header of a series index. -type SeriesIndexHeader struct { - Version uint8 - - MaxSeriesID tsdb.SeriesID - MaxOffset int64 - - Count uint64 - Capacity int64 - - KeyIDMap struct { - Offset int64 - Size int64 - } - - IDOffsetMap struct { - Offset int64 - Size int64 - } -} - -// NewSeriesIndexHeader returns a new instance of SeriesIndexHeader. -func NewSeriesIndexHeader() SeriesIndexHeader { - return SeriesIndexHeader{Version: SeriesIndexVersion} -} - -// ReadSeriesIndexHeader returns the header from data. -func ReadSeriesIndexHeader(data []byte) (hdr SeriesIndexHeader, err error) { - r := bytes.NewReader(data) - - // Read magic number. - magic := make([]byte, len(SeriesIndexMagic)) - if _, err := io.ReadFull(r, magic); err != nil { - return hdr, err - } else if !bytes.Equal([]byte(SeriesIndexMagic), magic) { - return hdr, ErrInvalidSeriesIndex - } - - // Read version. - if err := binary.Read(r, binary.BigEndian, &hdr.Version); err != nil { - return hdr, err - } - - // Read max offset. - if err := binary.Read(r, binary.BigEndian, &hdr.MaxSeriesID.ID); err != nil { - return hdr, err - } else if err := binary.Read(r, binary.BigEndian, &hdr.MaxOffset); err != nil { - return hdr, err - } - - // Read count & capacity. - if err := binary.Read(r, binary.BigEndian, &hdr.Count); err != nil { - return hdr, err - } else if err := binary.Read(r, binary.BigEndian, &hdr.Capacity); err != nil { - return hdr, err - } - - // Read key/id map position. - if err := binary.Read(r, binary.BigEndian, &hdr.KeyIDMap.Offset); err != nil { - return hdr, err - } else if err := binary.Read(r, binary.BigEndian, &hdr.KeyIDMap.Size); err != nil { - return hdr, err - } - - // Read offset/id map position. - if err := binary.Read(r, binary.BigEndian, &hdr.IDOffsetMap.Offset); err != nil { - return hdr, err - } else if err := binary.Read(r, binary.BigEndian, &hdr.IDOffsetMap.Size); err != nil { - return hdr, err - } - return hdr, nil -} - -// WriteTo writes the header to w. -func (hdr *SeriesIndexHeader) WriteTo(w io.Writer) (n int64, err error) { - var buf bytes.Buffer - buf.WriteString(SeriesIndexMagic) - binary.Write(&buf, binary.BigEndian, hdr.Version) - binary.Write(&buf, binary.BigEndian, hdr.MaxSeriesID) - binary.Write(&buf, binary.BigEndian, hdr.MaxOffset) - binary.Write(&buf, binary.BigEndian, hdr.Count) - binary.Write(&buf, binary.BigEndian, hdr.Capacity) - binary.Write(&buf, binary.BigEndian, hdr.KeyIDMap.Offset) - binary.Write(&buf, binary.BigEndian, hdr.KeyIDMap.Size) - binary.Write(&buf, binary.BigEndian, hdr.IDOffsetMap.Offset) - binary.Write(&buf, binary.BigEndian, hdr.IDOffsetMap.Size) - return buf.WriteTo(w) -} diff --git a/tsdb/seriesfile/series_index_test.go b/tsdb/seriesfile/series_index_test.go deleted file mode 100644 index 8548a84f6d..0000000000 --- a/tsdb/seriesfile/series_index_test.go +++ /dev/null @@ -1,142 +0,0 @@ -package seriesfile_test - -import ( - "bytes" - "path/filepath" - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" -) - -func toTypedSeriesID(id uint64) tsdb.SeriesIDTyped { - return tsdb.NewSeriesID(id).WithType(models.Empty) -} - -func TestSeriesIndex_Count(t *testing.T) { - dir, cleanup := MustTempDir() - defer cleanup() - - idx := seriesfile.NewSeriesIndex(filepath.Join(dir, "index")) - if err := idx.Open(); err != nil { - t.Fatal(err) - } - defer idx.Close() - - key0 := seriesfile.AppendSeriesKey(nil, []byte("m0"), nil) - idx.Insert(key0, toTypedSeriesID(1), 10) - key1 := seriesfile.AppendSeriesKey(nil, []byte("m1"), nil) - idx.Insert(key1, toTypedSeriesID(2), 20) - - if n := idx.Count(); n != 2 { - t.Fatalf("unexpected count: %d", n) - } -} - -func TestSeriesIndex_Delete(t *testing.T) { - dir, cleanup := MustTempDir() - defer cleanup() - - idx := seriesfile.NewSeriesIndex(filepath.Join(dir, "index")) - if err := idx.Open(); err != nil { - t.Fatal(err) - } - defer idx.Close() - - key0 := seriesfile.AppendSeriesKey(nil, []byte("m0"), nil) - idx.Insert(key0, toTypedSeriesID(1), 10) - key1 := seriesfile.AppendSeriesKey(nil, []byte("m1"), nil) - idx.Insert(key1, toTypedSeriesID(2), 20) - idx.Delete(tsdb.NewSeriesID(1)) - - if !idx.IsDeleted(tsdb.NewSeriesID(1)) { - t.Fatal("expected deletion") - } else if idx.IsDeleted(tsdb.NewSeriesID(2)) { - t.Fatal("expected series to exist") - } - - if exp, got := idx.Count(), uint64(1); exp != got { - t.Fatalf("Count()=%d, expected %d", exp, got) - } -} - -func TestSeriesIndex_FindIDBySeriesKey(t *testing.T) { - dir, cleanup := MustTempDir() - defer cleanup() - - idx := seriesfile.NewSeriesIndex(filepath.Join(dir, "index")) - if err := idx.Open(); err != nil { - t.Fatal(err) - } - defer idx.Close() - - key0 := seriesfile.AppendSeriesKey(nil, []byte("m0"), nil) - idx.Insert(key0, toTypedSeriesID(1), 10) - key1 := seriesfile.AppendSeriesKey(nil, []byte("m1"), nil) - idx.Insert(key1, toTypedSeriesID(2), 20) - badKey := seriesfile.AppendSeriesKey(nil, []byte("not_found"), nil) - - if id := idx.FindIDBySeriesKey(nil, key0); id != toTypedSeriesID(1) { - t.Fatalf("unexpected id(0): %d", id) - } else if id := idx.FindIDBySeriesKey(nil, key1); id != toTypedSeriesID(2) { - t.Fatalf("unexpected id(1): %d", id) - } else if id := idx.FindIDBySeriesKey(nil, badKey); !id.IsZero() { - t.Fatalf("unexpected id(2): %d", id) - } - - if id := idx.FindIDByNameTags(nil, []byte("m0"), nil, nil); id != toTypedSeriesID(1) { - t.Fatalf("unexpected id(0): %d", id) - } else if id := idx.FindIDByNameTags(nil, []byte("m1"), nil, nil); id != toTypedSeriesID(2) { - t.Fatalf("unexpected id(1): %d", id) - } else if id := idx.FindIDByNameTags(nil, []byte("not_found"), nil, nil); !id.IsZero() { - t.Fatalf("unexpected id(2): %d", id) - } -} - -func TestSeriesIndex_FindOffsetByID(t *testing.T) { - dir, cleanup := MustTempDir() - defer cleanup() - - idx := seriesfile.NewSeriesIndex(filepath.Join(dir, "index")) - if err := idx.Open(); err != nil { - t.Fatal(err) - } - defer idx.Close() - - idx.Insert(seriesfile.AppendSeriesKey(nil, []byte("m0"), nil), toTypedSeriesID(1), 10) - idx.Insert(seriesfile.AppendSeriesKey(nil, []byte("m1"), nil), toTypedSeriesID(2), 20) - - if offset := idx.FindOffsetByID(tsdb.NewSeriesID(1)); offset != 10 { - t.Fatalf("unexpected offset(0): %d", offset) - } else if offset := idx.FindOffsetByID(tsdb.NewSeriesID(2)); offset != 20 { - t.Fatalf("unexpected offset(1): %d", offset) - } else if offset := idx.FindOffsetByID(tsdb.NewSeriesID(3)); offset != 0 { - t.Fatalf("unexpected offset(2): %d", offset) - } -} - -func TestSeriesIndexHeader(t *testing.T) { - // Verify header initializes correctly. - hdr := seriesfile.NewSeriesIndexHeader() - if hdr.Version != seriesfile.SeriesIndexVersion { - t.Fatalf("unexpected version: %d", hdr.Version) - } - hdr.MaxSeriesID = tsdb.NewSeriesID(10) - hdr.MaxOffset = 20 - hdr.Count = 30 - hdr.Capacity = 40 - hdr.KeyIDMap.Offset, hdr.KeyIDMap.Size = 50, 60 - hdr.IDOffsetMap.Offset, hdr.IDOffsetMap.Size = 70, 80 - - // Marshal/unmarshal. - var buf bytes.Buffer - if _, err := hdr.WriteTo(&buf); err != nil { - t.Fatal(err) - } else if other, err := seriesfile.ReadSeriesIndexHeader(buf.Bytes()); err != nil { - t.Fatal(err) - } else if diff := cmp.Diff(hdr, other); diff != "" { - t.Fatal(diff) - } -} diff --git a/tsdb/seriesfile/series_partition.go b/tsdb/seriesfile/series_partition.go deleted file mode 100644 index 5e8c293a18..0000000000 --- a/tsdb/seriesfile/series_partition.go +++ /dev/null @@ -1,984 +0,0 @@ -package seriesfile - -import ( - "context" - "encoding/binary" - "errors" - "fmt" - "io/ioutil" - "os" - "path/filepath" - "sync" - "time" - - "github.com/influxdata/influxdb/v2/kit/tracing" - "github.com/influxdata/influxdb/v2/logger" - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/pkg/fs" - "github.com/influxdata/influxdb/v2/pkg/rhh" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/prometheus/client_golang/prometheus" - "go.uber.org/zap" - "golang.org/x/time/rate" -) - -var ( - ErrSeriesPartitionClosed = errors.New("tsdb: series partition closed") - ErrSeriesPartitionCompactionCancelled = errors.New("tsdb: series partition compaction cancelled") -) - -// DefaultSeriesPartitionCompactThreshold is the number of series IDs to hold in the in-memory -// series map before compacting and rebuilding the on-disk representation. -const DefaultSeriesPartitionCompactThreshold = 1 << 17 // 128K - -// SeriesPartition represents a subset of series file data. -type SeriesPartition struct { - mu sync.RWMutex - wg sync.WaitGroup - id int - path string - - closed bool - closing chan struct{} - once sync.Once - - segments []*SeriesSegment - index *SeriesIndex - seq uint64 // series id sequence - - compacting bool - compactionsDisabled int - - pageFaultLimiter *rate.Limiter // Limits page faults by the partition - - CompactThreshold int - LargeWriteThreshold int - - tracker *seriesPartitionTracker - Logger *zap.Logger -} - -// NewSeriesPartition returns a new instance of SeriesPartition. -func NewSeriesPartition(id int, path string) *SeriesPartition { - p := &SeriesPartition{ - id: id, - path: path, - closing: make(chan struct{}), - CompactThreshold: DefaultSeriesPartitionCompactThreshold, - LargeWriteThreshold: DefaultLargeSeriesWriteThreshold, - tracker: newSeriesPartitionTracker(newSeriesFileMetrics(nil), prometheus.Labels{"series_file_partition": fmt.Sprint(id)}), - Logger: zap.NewNop(), - seq: uint64(id) + 1, - } - p.index = NewSeriesIndex(p.IndexPath()) - return p -} - -// Open memory maps the data file at the partition's path. -func (p *SeriesPartition) Open() error { - if p.closed { - return errors.New("tsdb: cannot reopen series partition") - } - - // Create path if it doesn't exist. - if err := os.MkdirAll(filepath.Join(p.path), 0777); err != nil { - return err - } - - // Open components. - if err := func() (err error) { - if err := p.openSegments(); err != nil { - return err - } - // Init last segment for writes. - if err := p.activeSegment().InitForWrite(); err != nil { - return err - } - - if err := p.index.Open(); err != nil { - return err - } - p.index.SetPageFaultLimiter(p.pageFaultLimiter) - - if err = p.index.Recover(p.segments); err != nil { - return err - } - return nil - }(); err != nil { - p.Close() - return err - } - - p.tracker.SetSeries(p.index.Count()) // Set series count metric. - p.tracker.SetDiskSize(p.DiskSize()) // Set on-disk size metric. - return nil -} - -func (p *SeriesPartition) openSegments() error { - fis, err := ioutil.ReadDir(p.path) - if err != nil { - return err - } - - for _, fi := range fis { - segmentID, err := ParseSeriesSegmentFilename(fi.Name()) - if err != nil { - continue - } - - segment := NewSeriesSegment(segmentID, filepath.Join(p.path, fi.Name())) - if err := segment.Open(); err != nil { - return err - } - segment.SetPageFaultLimiter(p.pageFaultLimiter) - p.segments = append(p.segments, segment) - } - - // Find max series id by searching segments in reverse order. - for i := len(p.segments) - 1; i >= 0; i-- { - if seq := p.segments[i].MaxSeriesID(); seq.RawID() >= p.seq { - // Reset our sequence num to the next one to assign - p.seq = seq.RawID() + SeriesFilePartitionN - break - } - } - - // Create initial segment if none exist. - if len(p.segments) == 0 { - segment, err := CreateSeriesSegment(0, filepath.Join(p.path, "0000")) - if err != nil { - return err - } - segment.SetPageFaultLimiter(p.pageFaultLimiter) - p.segments = append(p.segments, segment) - } - - p.tracker.SetSegments(uint64(len(p.segments))) - return nil -} - -// Close unmaps the data files. -func (p *SeriesPartition) Close() (err error) { - p.once.Do(func() { close(p.closing) }) - p.wg.Wait() - - p.mu.Lock() - defer p.mu.Unlock() - - p.closed = true - - for _, s := range p.segments { - if e := s.Close(); e != nil && err == nil { - err = e - } - } - p.segments = nil - - if p.index != nil { - if e := p.index.Close(); e != nil && err == nil { - err = e - } - } - p.index = nil - - return err -} - -// ID returns the partition id. -func (p *SeriesPartition) ID() int { return p.id } - -// Path returns the path to the partition. -func (p *SeriesPartition) Path() string { return p.path } - -// IndexPath returns the path to the series index. -func (p *SeriesPartition) IndexPath() string { return filepath.Join(p.path, "index") } - -// Index returns the partition's index. -func (p *SeriesPartition) Index() *SeriesIndex { return p.index } - -// Segments returns the segments in the partition. -func (p *SeriesPartition) Segments() []*SeriesSegment { return p.segments } - -// FileSize returns the size of all partitions, in bytes. -func (p *SeriesPartition) FileSize() (n int64, err error) { - for _, ss := range p.segments { - fi, err := os.Stat(ss.Path()) - if err != nil { - return 0, err - } - n += fi.Size() - } - return n, err -} - -// CreateSeriesListIfNotExists creates a list of series in bulk if they don't exist. -// The ids parameter is modified to contain series IDs for all keys belonging to this partition. -// If the type does not match the existing type for the key, a zero id is stored. -func (p *SeriesPartition) CreateSeriesListIfNotExists(collection *tsdb.SeriesCollection, keyPartitionIDs []int) error { - p.mu.RLock() - if p.closed { - p.mu.RUnlock() - return ErrSeriesPartitionClosed - } - - span, ctx := tracing.StartSpanFromContext(context.TODO()) - defer span.Finish() - - writeRequired := 0 - for iter := collection.Iterator(); iter.Next(); { - index := iter.Index() - if keyPartitionIDs[index] != p.id { - continue - } - id := p.index.FindIDBySeriesKey(p.segments, iter.SeriesKey()) - if id.IsZero() { - writeRequired++ - continue - } - if id.HasType() && id.Type() != iter.Type() { - iter.Invalid(fmt.Sprintf( - "series type mismatch: already %s but got %s", - id.Type(), iter.Type())) - continue - } - collection.SeriesIDs[index] = id.SeriesID() - } - p.mu.RUnlock() - - // Exit if all series for this partition already exist. - if writeRequired == 0 { - return nil - } - - type keyRange struct { - key []byte - id tsdb.SeriesIDTyped - offset int64 - } - - // Preallocate the space we'll need before grabbing the lock. - newKeyRanges := make([]keyRange, 0, writeRequired) - newIDs := make(map[string]tsdb.SeriesIDTyped, writeRequired) - - // Pre-grow index for large writes. - if writeRequired >= p.LargeWriteThreshold { - p.mu.Lock() - p.index.GrowBy(writeRequired) - p.mu.Unlock() - } - - // Obtain write lock to create new series. - p.mu.Lock() - defer p.mu.Unlock() - - if p.closed { - return ErrSeriesPartitionClosed - } - - for iter := collection.Iterator(); iter.Next(); { - index := iter.Index() - - // Skip series that don't belong to the partition or have already been created. - if keyPartitionIDs[index] != p.id || !iter.SeriesID().IsZero() { - continue - } - - // Re-attempt lookup under write lock. Be sure to double check the type. If the type - // doesn't match what we found, we should not set the ids field for it, but we should - // stop processing the key. - key, typ := iter.SeriesKey(), iter.Type() - - // First check the map, then the index. - id := newIDs[string(key)] - if id.IsZero() { - id = p.index.FindIDBySeriesKey(p.segments, key) - } - - // If the id is found, we are done processing this key. We should only set the ids slice - // if the type matches. - if !id.IsZero() { - if id.HasType() && id.Type() != typ { - iter.Invalid(fmt.Sprintf( - "series type mismatch: already %s but got %s", - id.Type(), iter.Type())) - continue - } - collection.SeriesIDs[index] = id.SeriesID() - continue - } - - // Write to series log and save offset. - id, offset, err := p.insert(key, typ) - if err != nil { - return err - } - - // Append new key to be added to hash map after flush. - collection.SeriesIDs[index] = id.SeriesID() - newIDs[string(key)] = id - newKeyRanges = append(newKeyRanges, keyRange{key, id, offset}) - } - - // Flush active segment writes so we can access data in mmap. - if segment := p.activeSegment(); segment != nil { - if err := segment.Flush(); err != nil { - return err - } - } - - // Add keys to hash map(s). - for _, keyRange := range newKeyRanges { - p.index.Insert(keyRange.key, keyRange.id, keyRange.offset) - } - p.tracker.AddSeriesCreated(uint64(len(newKeyRanges))) // Track new series in metric. - p.tracker.AddSeries(uint64(len(newKeyRanges))) - - // Check if we've crossed the compaction threshold. - if p.compactionsEnabled() && !p.compacting && p.CompactThreshold != 0 && p.index.InMemCount() >= uint64(p.CompactThreshold) { - p.compacting = true - log, logEnd := logger.NewOperation(ctx, p.Logger, "Series partition compaction", "series_partition_compaction", zap.String("path", p.path)) - - p.wg.Add(1) - p.tracker.IncCompactionsActive() - go func() { - defer p.wg.Done() - - compactor := NewSeriesPartitionCompactor() - compactor.cancel = p.closing - duration, err := compactor.Compact(p) - if err != nil { - p.tracker.IncCompactionErr() - log.Error("Series partition compaction failed", zap.Error(err)) - } else { - p.tracker.IncCompactionOK(duration) - } - - logEnd() - - // Clear compaction flag. - p.mu.Lock() - p.compacting = false - p.mu.Unlock() - p.tracker.DecCompactionsActive() - - // Disk size may have changed due to compaction. - p.tracker.SetDiskSize(p.DiskSize()) - }() - } - - return nil -} - -// Compacting returns if the SeriesPartition is currently compacting. -func (p *SeriesPartition) Compacting() bool { - p.mu.RLock() - defer p.mu.RUnlock() - return p.compacting -} - -// DeleteSeriesID flags a list of series as permanently deleted. -// If a series is reintroduced later then it must create a new id. -func (p *SeriesPartition) DeleteSeriesIDs(ids []tsdb.SeriesID) error { - p.mu.Lock() - defer p.mu.Unlock() - - if p.closed { - return ErrSeriesPartitionClosed - } - - var n uint64 - for _, id := range ids { - // Already tombstoned, ignore. - if p.index.IsDeleted(id) { - continue - } - - // Write tombstone entries. The type is ignored in tombstones. - _, err := p.writeLogEntry(AppendSeriesEntry(nil, SeriesEntryTombstoneFlag, id.WithType(models.Empty), nil)) - if err != nil { - return err - } - n++ - } - - // Flush active segment write. - if segment := p.activeSegment(); segment != nil { - if err := segment.Flush(); err != nil { - return err - } - } - - // Mark tombstone in memory. - for _, id := range ids { - p.index.Delete(id) - } - p.tracker.SubSeries(n) - - return nil -} - -// IsDeleted returns true if the ID has been deleted before. -func (p *SeriesPartition) IsDeleted(id tsdb.SeriesID) bool { - p.mu.RLock() - if p.closed { - p.mu.RUnlock() - return false - } - v := p.index.IsDeleted(id) - p.mu.RUnlock() - return v -} - -// SeriesKey returns the series key for a given id. -func (p *SeriesPartition) SeriesKey(id tsdb.SeriesID) []byte { - if id.IsZero() { - return nil - } - p.mu.RLock() - if p.closed { - p.mu.RUnlock() - return nil - } - key := p.seriesKeyByOffset(p.index.FindOffsetByID(id)) - p.mu.RUnlock() - return key -} - -// Series returns the parsed series name and tags for an offset. -func (p *SeriesPartition) Series(id tsdb.SeriesID) ([]byte, models.Tags) { - key := p.SeriesKey(id) - if key == nil { - return nil, nil - } - return ParseSeriesKey(key) -} - -// FindIDBySeriesKey return the series id for the series key. -func (p *SeriesPartition) FindIDBySeriesKey(key []byte) tsdb.SeriesID { - return p.FindIDTypedBySeriesKey(key).SeriesID() -} - -// FindIDTypedBySeriesKey return the typed series id for the series key. -func (p *SeriesPartition) FindIDTypedBySeriesKey(key []byte) tsdb.SeriesIDTyped { - p.mu.RLock() - if p.closed { - p.mu.RUnlock() - return tsdb.SeriesIDTyped{} - } - id := p.index.FindIDBySeriesKey(p.segments, key) - p.mu.RUnlock() - return id -} - -// SeriesCount returns the number of series. -func (p *SeriesPartition) SeriesCount() uint64 { - p.mu.RLock() - if p.closed { - p.mu.RUnlock() - return 0 - } - n := p.index.Count() - p.mu.RUnlock() - return n -} - -// DiskSize returns the number of bytes taken up on disk by the partition. -func (p *SeriesPartition) DiskSize() uint64 { - p.mu.RLock() - defer p.mu.RUnlock() - return p.diskSize() -} - -func (p *SeriesPartition) diskSize() uint64 { - totalSize := p.index.OnDiskSize() - for _, segment := range p.segments { - totalSize += uint64(len(segment.Data())) - } - return totalSize -} - -func (p *SeriesPartition) DisableCompactions() { - p.mu.Lock() - defer p.mu.Unlock() - p.compactionsDisabled++ -} - -func (p *SeriesPartition) EnableCompactions() { - p.mu.Lock() - defer p.mu.Unlock() - - if p.compactionsEnabled() { - return - } - p.compactionsDisabled-- -} - -func (p *SeriesPartition) compactionsEnabled() bool { - return p.compactionsDisabled == 0 -} - -// AppendSeriesIDs returns a list of all series ids. -func (p *SeriesPartition) AppendSeriesIDs(a []tsdb.SeriesID) []tsdb.SeriesID { - for _, segment := range p.segments { - a = segment.AppendSeriesIDs(a) - } - return a -} - -// activeSegment returns the last segment. -func (p *SeriesPartition) activeSegment() *SeriesSegment { - if len(p.segments) == 0 { - return nil - } - return p.segments[len(p.segments)-1] -} - -func (p *SeriesPartition) insert(key []byte, typ models.FieldType) (id tsdb.SeriesIDTyped, offset int64, err error) { - id = tsdb.NewSeriesID(p.seq).WithType(typ) - offset, err = p.writeLogEntry(AppendSeriesEntry(nil, SeriesEntryInsertFlag, id, key)) - if err != nil { - return tsdb.SeriesIDTyped{}, 0, err - } - - p.seq += SeriesFilePartitionN - return id, offset, nil -} - -// writeLogEntry appends an entry to the end of the active segment. -// If there is no more room in the segment then a new segment is added. -func (p *SeriesPartition) writeLogEntry(data []byte) (offset int64, err error) { - segment := p.activeSegment() - if segment == nil || !segment.CanWrite(data) { - if segment, err = p.createSegment(); err != nil { - return 0, err - } - } - return segment.WriteLogEntry(data) -} - -// createSegment appends a new segment -func (p *SeriesPartition) createSegment() (*SeriesSegment, error) { - // Close writer for active segment, if one exists. - if segment := p.activeSegment(); segment != nil { - if err := segment.CloseForWrite(); err != nil { - return nil, err - } - } - - // Generate a new sequential segment identifier. - var id uint16 - if len(p.segments) > 0 { - id = p.segments[len(p.segments)-1].ID() + 1 - } - filename := fmt.Sprintf("%04x", id) - - // Generate new empty segment. - segment, err := CreateSeriesSegment(id, filepath.Join(p.path, filename)) - if err != nil { - return nil, err - } - segment.SetPageFaultLimiter(p.pageFaultLimiter) - p.segments = append(p.segments, segment) - - // Allow segment to write. - if err := segment.InitForWrite(); err != nil { - return nil, err - } - p.tracker.SetSegments(uint64(len(p.segments))) - p.tracker.SetDiskSize(p.diskSize()) // Disk size will change with new segment. - return segment, nil -} - -func (p *SeriesPartition) seriesKeyByOffset(offset int64) []byte { - if offset == 0 { - return nil - } - - segmentID, pos := SplitSeriesOffset(offset) - for _, segment := range p.segments { - if segment.ID() != segmentID { - continue - } - - buf := segment.Slice(pos + SeriesEntryHeaderSize) - key, _ := ReadSeriesKey(buf) - _ = wait(segment.limiter, buf[:len(key)]) - return key - } - - return nil -} - -type seriesPartitionTracker struct { - metrics *seriesFileMetrics - labels prometheus.Labels - enabled bool -} - -func newSeriesPartitionTracker(metrics *seriesFileMetrics, defaultLabels prometheus.Labels) *seriesPartitionTracker { - return &seriesPartitionTracker{ - metrics: metrics, - labels: defaultLabels, - enabled: true, - } -} - -// Labels returns a copy of labels for use with Series File metrics. -func (t *seriesPartitionTracker) Labels() prometheus.Labels { - l := make(map[string]string, len(t.labels)) - for k, v := range t.labels { - l[k] = v - } - return l -} - -// AddSeriesCreated increases the number of series created in the partition by n. -func (t *seriesPartitionTracker) AddSeriesCreated(n uint64) { - if !t.enabled { - return - } - - labels := t.Labels() - t.metrics.SeriesCreated.With(labels).Add(float64(n)) -} - -// SetSeries sets the number of series in the partition. -func (t *seriesPartitionTracker) SetSeries(n uint64) { - if !t.enabled { - return - } - - labels := t.Labels() - t.metrics.Series.With(labels).Set(float64(n)) -} - -// AddSeries increases the number of series in the partition by n. -func (t *seriesPartitionTracker) AddSeries(n uint64) { - if !t.enabled { - return - } - - labels := t.Labels() - t.metrics.Series.With(labels).Add(float64(n)) -} - -// SubSeries decreases the number of series in the partition by n. -func (t *seriesPartitionTracker) SubSeries(n uint64) { - if !t.enabled { - return - } - - labels := t.Labels() - t.metrics.Series.With(labels).Sub(float64(n)) -} - -// SetDiskSize sets the number of bytes used by files for in partition. -func (t *seriesPartitionTracker) SetDiskSize(sz uint64) { - if !t.enabled { - return - } - - labels := t.Labels() - t.metrics.DiskSize.With(labels).Set(float64(sz)) -} - -// SetSegments sets the number of segments files for the partition. -func (t *seriesPartitionTracker) SetSegments(n uint64) { - if !t.enabled { - return - } - - labels := t.Labels() - t.metrics.Segments.With(labels).Set(float64(n)) -} - -// IncCompactionsActive increments the number of active compactions for the -// components of a partition (index and segments). -func (t *seriesPartitionTracker) IncCompactionsActive() { - if !t.enabled { - return - } - - labels := t.Labels() - labels["component"] = "index" // TODO(edd): when we add segment compactions we will add a new label value. - t.metrics.CompactionsActive.With(labels).Inc() -} - -// DecCompactionsActive decrements the number of active compactions for the -// components of a partition (index and segments). -func (t *seriesPartitionTracker) DecCompactionsActive() { - if !t.enabled { - return - } - - labels := t.Labels() - labels["component"] = "index" // TODO(edd): when we add segment compactions we will add a new label value. - t.metrics.CompactionsActive.With(labels).Dec() -} - -// incCompactions increments the number of compactions for the partition. -// Callers should use IncCompactionOK and IncCompactionErr. -func (t *seriesPartitionTracker) incCompactions(status string, duration time.Duration) { - if !t.enabled { - return - } - - if duration > 0 { - labels := t.Labels() - labels["component"] = "index" - t.metrics.CompactionDuration.With(labels).Observe(duration.Seconds()) - } - - labels := t.Labels() - labels["status"] = status - t.metrics.Compactions.With(labels).Inc() -} - -// IncCompactionOK increments the number of successful compactions for the partition. -func (t *seriesPartitionTracker) IncCompactionOK(duration time.Duration) { - t.incCompactions("ok", duration) -} - -// IncCompactionErr increments the number of failed compactions for the partition. -func (t *seriesPartitionTracker) IncCompactionErr() { t.incCompactions("error", 0) } - -// SeriesPartitionCompactor represents an object reindexes a series partition and optionally compacts segments. -type SeriesPartitionCompactor struct { - cancel <-chan struct{} -} - -// NewSeriesPartitionCompactor returns a new instance of SeriesPartitionCompactor. -func NewSeriesPartitionCompactor() *SeriesPartitionCompactor { - return &SeriesPartitionCompactor{} -} - -// Compact rebuilds the series partition index. -func (c *SeriesPartitionCompactor) Compact(p *SeriesPartition) (time.Duration, error) { - // Snapshot the partitions and index so we can check tombstones and replay at the end under lock. - p.mu.RLock() - segments := CloneSeriesSegments(p.segments) - index := p.index.Clone() - seriesN := p.index.Count() - p.mu.RUnlock() - - now := time.Now() - - // Compact index to a temporary location. - indexPath := index.path + ".compacting" - if err := c.compactIndexTo(index, seriesN, segments, indexPath); err != nil { - return 0, err - } - duration := time.Since(now) - - // Swap compacted index under lock & replay since compaction. - if err := func() error { - p.mu.Lock() - defer p.mu.Unlock() - - // Reopen index with new file. - if err := p.index.Close(); err != nil { - return err - } else if err := fs.RenameFileWithReplacement(indexPath, index.path); err != nil { - return err - } - - p.index.SetPageFaultLimiter(p.pageFaultLimiter) - if err := p.index.Open(); err != nil { - return err - } - - // Replay new entries. - if err := p.index.Recover(p.segments); err != nil { - return err - } - return nil - }(); err != nil { - return 0, err - } - - return duration, nil -} - -func (c *SeriesPartitionCompactor) compactIndexTo(index *SeriesIndex, seriesN uint64, segments []*SeriesSegment, path string) error { - hdr := NewSeriesIndexHeader() - hdr.Count = seriesN - hdr.Capacity = pow2((int64(hdr.Count) * 100) / SeriesIndexLoadFactor) - - // Allocate space for maps. - keyIDMap := make([]byte, (hdr.Capacity * SeriesIndexElemSize)) - idOffsetMap := make([]byte, (hdr.Capacity * SeriesIndexElemSize)) - - // Reindex all partitions. - var entryN int - for _, segment := range segments { - errDone := errors.New("done") - - if err := segment.ForEachEntry(func(flag uint8, id tsdb.SeriesIDTyped, offset int64, key []byte) error { - // Make sure we don't go past the offset where the compaction began. - if offset > index.maxOffset { - return errDone - } - - // Check for cancellation periodically. - if entryN++; entryN%1000 == 0 { - select { - case <-c.cancel: - return ErrSeriesPartitionCompactionCancelled - default: - } - } - - // Only process insert entries. - switch flag { - case SeriesEntryInsertFlag: // fallthrough - case SeriesEntryTombstoneFlag: - return nil - default: - return fmt.Errorf("unexpected series partition log entry flag: %d", flag) - } - - untypedID := id.SeriesID() - - // Save max series identifier processed. - hdr.MaxSeriesID, hdr.MaxOffset = untypedID, offset - - // Ignore entry if tombstoned. - if index.IsDeleted(untypedID) { - return nil - } - - // Insert into maps. - c.insertIDOffsetMap(idOffsetMap, hdr.Capacity, untypedID, offset) - return c.insertKeyIDMap(keyIDMap, hdr.Capacity, segments, key, offset, id) - }); err == errDone { - break - } else if err != nil { - return err - } - } - - // Open file handler. - f, err := fs.CreateFile(path) - if err != nil { - return err - } - defer f.Close() - - // Calculate map positions. - hdr.KeyIDMap.Offset, hdr.KeyIDMap.Size = SeriesIndexHeaderSize, int64(len(keyIDMap)) - hdr.IDOffsetMap.Offset, hdr.IDOffsetMap.Size = hdr.KeyIDMap.Offset+hdr.KeyIDMap.Size, int64(len(idOffsetMap)) - - // Write header. - if _, err := hdr.WriteTo(f); err != nil { - return err - } - - // Write maps. - if _, err := f.Write(keyIDMap); err != nil { - return err - } else if _, err := f.Write(idOffsetMap); err != nil { - return err - } - - // Sync & close. - if err := f.Sync(); err != nil { - return err - } else if err := f.Close(); err != nil { - return err - } - - return nil -} - -func (c *SeriesPartitionCompactor) insertKeyIDMap(dst []byte, capacity int64, segments []*SeriesSegment, key []byte, offset int64, id tsdb.SeriesIDTyped) error { - mask := capacity - 1 - hash := rhh.HashKey(key) - - // Continue searching until we find an empty slot or lower probe distance. - for i, dist, pos := int64(0), int64(0), hash&mask; ; i, dist, pos = i+1, dist+1, (pos+1)&mask { - assert(i <= capacity, "key/id map full") - elem := dst[(pos * SeriesIndexElemSize):] - - // If empty slot found or matching offset, insert and exit. - elemOffset := int64(binary.BigEndian.Uint64(elem[:SeriesOffsetSize])) - elemID := tsdb.NewSeriesIDTyped(binary.BigEndian.Uint64(elem[SeriesOffsetSize:])) - if elemOffset == 0 || elemOffset == offset { - binary.BigEndian.PutUint64(elem[:SeriesOffsetSize], uint64(offset)) - binary.BigEndian.PutUint64(elem[SeriesOffsetSize:], id.RawID()) - return nil - } - - // Read key at position & hash. - elemKey := ReadSeriesKeyFromSegments(segments, elemOffset+SeriesEntryHeaderSize) - elemHash := rhh.HashKey(elemKey) - - // If the existing elem has probed less than us, then swap places with - // existing elem, and keep going to find another slot for that elem. - if d := rhh.Dist(elemHash, pos, capacity); d < dist { - // Insert current values. - binary.BigEndian.PutUint64(elem[:SeriesOffsetSize], uint64(offset)) - binary.BigEndian.PutUint64(elem[SeriesOffsetSize:], id.RawID()) - - // Swap with values in that position. - offset, id = elemOffset, elemID - - // Update current distance. - dist = d - } - } -} - -func (c *SeriesPartitionCompactor) insertIDOffsetMap(dst []byte, capacity int64, id tsdb.SeriesID, offset int64) { - mask := capacity - 1 - hash := rhh.HashUint64(id.RawID()) - - // Continue searching until we find an empty slot or lower probe distance. - for i, dist, pos := int64(0), int64(0), hash&mask; ; i, dist, pos = i+1, dist+1, (pos+1)&mask { - assert(i <= capacity, "id/offset map full") - elem := dst[(pos * SeriesIndexElemSize):] - - // If empty slot found or matching id, insert and exit. - elemID := tsdb.NewSeriesID(binary.BigEndian.Uint64(elem[:SeriesIDSize])) - elemOffset := int64(binary.BigEndian.Uint64(elem[SeriesIDSize:])) - if elemOffset == 0 || elemOffset == offset { - binary.BigEndian.PutUint64(elem[:SeriesIDSize], id.RawID()) - binary.BigEndian.PutUint64(elem[SeriesIDSize:], uint64(offset)) - return - } - - // Hash key. - elemHash := rhh.HashUint64(elemID.RawID()) - - // If the existing elem has probed less than us, then swap places with - // existing elem, and keep going to find another slot for that elem. - if d := rhh.Dist(elemHash, pos, capacity); d < dist { - // Insert current values. - binary.BigEndian.PutUint64(elem[:SeriesIDSize], id.RawID()) - binary.BigEndian.PutUint64(elem[SeriesIDSize:], uint64(offset)) - - // Swap with values in that position. - id, offset = elemID, elemOffset - - // Update current distance. - dist = d - } - } -} - -// pow2 returns the number that is the next highest power of 2. -// Returns v if it is a power of 2. -func pow2(v int64) int64 { - for i := int64(2); i < 1<<62; i *= 2 { - if i >= v { - return i - } - } - panic("unreachable") -} - -// assert will panic with a given formatted message if the given condition is false. -func assert(condition bool, msg string, v ...interface{}) { - if !condition { - panic(fmt.Sprintf("assert failed: "+msg, v...)) - } -} diff --git a/tsdb/seriesfile/series_partition_test.go b/tsdb/seriesfile/series_partition_test.go deleted file mode 100644 index 5b16aea9d9..0000000000 --- a/tsdb/seriesfile/series_partition_test.go +++ /dev/null @@ -1,76 +0,0 @@ -package seriesfile_test - -import ( - "fmt" - "io/ioutil" - "os" - "strconv" - "testing" - - "github.com/influxdata/influxdb/v2/logger" - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" -) - -func BenchmarkSeriesPartition_CreateSeriesListIfNotExists(b *testing.B) { - for _, n := range []int{1000, 10000, 100000, 1000000} { - b.Run(strconv.Itoa(n), func(b *testing.B) { - var collection tsdb.SeriesCollection - for i := 0; i < n; i++ { - collection.Names = append(collection.Names, []byte("cpu")) - collection.Tags = append(collection.Tags, models.Tags{ - {Key: []byte("tag0"), Value: []byte("value0")}, - {Key: []byte("tag1"), Value: []byte("value1")}, - {Key: []byte("tag2"), Value: []byte("value2")}, - {Key: []byte("tag3"), Value: []byte("value3")}, - {Key: []byte("tag4"), Value: []byte(fmt.Sprintf("value%d", i))}, - }) - collection.Types = append(collection.Types, models.Integer) - } - collection.SeriesKeys = seriesfile.GenerateSeriesKeys(collection.Names, collection.Tags) - collection.SeriesIDs = make([]tsdb.SeriesID, len(collection.SeriesKeys)) - keyPartitionIDs := make([]int, n) - - b.ResetTimer() - for j := 0; j < b.N; j++ { - p := MustOpenSeriesPartition() - if err := p.CreateSeriesListIfNotExists(&collection, keyPartitionIDs); err != nil { - b.Fatal(err) - } else if err := p.Close(); err != nil { - b.Fatal(err) - } - } - }) - } -} - -// SeriesPartition is a test wrapper for tsdb.SeriesPartition. -type SeriesPartition struct { - *seriesfile.SeriesPartition -} - -// NewSeriesPartition returns a new instance of SeriesPartition with a temporary file path. -func NewSeriesPartition() *SeriesPartition { - dir, err := ioutil.TempDir("", "tsdb-series-partition-") - if err != nil { - panic(err) - } - return &SeriesPartition{SeriesPartition: seriesfile.NewSeriesPartition(0, dir)} -} - -// MustOpenSeriesPartition returns a new, open instance of SeriesPartition. Panic on error. -func MustOpenSeriesPartition() *SeriesPartition { - f := NewSeriesPartition() - f.Logger = logger.New(os.Stdout) - if err := f.Open(); err != nil { - panic(err) - } - return f -} - -// Close closes the partition and removes it from disk. -func (f *SeriesPartition) Close() error { - defer os.RemoveAll(f.Path()) - return f.SeriesPartition.Close() -} diff --git a/tsdb/seriesfile/series_segment.go b/tsdb/seriesfile/series_segment.go deleted file mode 100644 index 5d619f7e84..0000000000 --- a/tsdb/seriesfile/series_segment.go +++ /dev/null @@ -1,468 +0,0 @@ -package seriesfile - -import ( - "bufio" - "bytes" - "encoding/binary" - "errors" - "fmt" - "io" - "os" - "regexp" - "strconv" - - "github.com/influxdata/influxdb/v2/pkg/fs" - "github.com/influxdata/influxdb/v2/pkg/mincore" - "github.com/influxdata/influxdb/v2/pkg/mmap" - "github.com/influxdata/influxdb/v2/tsdb" - "golang.org/x/time/rate" -) - -const ( - SeriesSegmentVersion = 1 - SeriesSegmentMagic = "SSEG" - - SeriesSegmentHeaderSize = 4 + 1 // magic + version -) - -// Series entry constants. -const ( - SeriesEntryFlagSize = 1 - SeriesEntryHeaderSize = 1 + 8 // flag + id - - SeriesEntryInsertFlag = 0x01 - SeriesEntryTombstoneFlag = 0x02 -) - -var ( - ErrInvalidSeriesSegment = errors.New("invalid series segment") - ErrInvalidSeriesSegmentVersion = errors.New("invalid series segment version") - ErrSeriesSegmentNotWritable = errors.New("series segment not writable") -) - -// SeriesSegment represents a log of series entries. -type SeriesSegment struct { - id uint16 - path string - - data []byte // mmap file - file *os.File // write file handle - w *bufio.Writer // bufferred file handle - size uint32 // current file size - - limiter *mincore.Limiter -} - -// NewSeriesSegment returns a new instance of SeriesSegment. -func NewSeriesSegment(id uint16, path string) *SeriesSegment { - return &SeriesSegment{ - id: id, - path: path, - } -} - -// CreateSeriesSegment generates an empty segment at path. -func CreateSeriesSegment(id uint16, path string) (*SeriesSegment, error) { - // Generate segment in temp location. - f, err := fs.CreateFile(path + ".initializing") - if err != nil { - return nil, err - } - defer f.Close() - - // Write header to file and close. - hdr := NewSeriesSegmentHeader() - if _, err := hdr.WriteTo(f); err != nil { - return nil, err - } else if err := f.Truncate(int64(SeriesSegmentSize(id))); err != nil { - return nil, err - } else if err := f.Sync(); err != nil { - return nil, err - } else if err := f.Close(); err != nil { - return nil, err - } - - // Swap with target path. - if err := fs.RenameFile(f.Name(), path); err != nil { - return nil, err - } - - // Open segment at new location. - segment := NewSeriesSegment(id, path) - if err := segment.Open(); err != nil { - return nil, err - } - return segment, nil -} - -// Open memory maps the data file at the file's path. -func (s *SeriesSegment) Open() error { - if err := func() (err error) { - // Memory map file data. - if s.data, err = mmap.Map(s.path, int64(SeriesSegmentSize(s.id))); err != nil { - return err - } - - // Read header. - hdr, err := ReadSeriesSegmentHeader(s.data) - if err != nil { - return err - } else if hdr.Version != SeriesSegmentVersion { - return ErrInvalidSeriesSegmentVersion - } - - return nil - }(); err != nil { - s.Close() - return err - } - - return nil -} - -// InitForWrite initializes a write handle for the segment. -// This is only used for the last segment in the series file. -func (s *SeriesSegment) InitForWrite() (err error) { - // Only calculcate segment data size if writing. - for s.size = uint32(SeriesSegmentHeaderSize); s.size < uint32(len(s.data)); { - flag, _, _, sz := ReadSeriesEntry(s.data[s.size:]) - if !IsValidSeriesEntryFlag(flag) { - break - } - _ = wait(s.limiter, s.data[s.size:int64(s.size)+sz]) - s.size += uint32(sz) - } - - // Open file handler for writing & seek to end of data. - if s.file, err = os.OpenFile(s.path, os.O_WRONLY|os.O_CREATE, 0666); err != nil { - return err - } else if _, err := s.file.Seek(int64(s.size), io.SeekStart); err != nil { - return err - } - s.w = bufio.NewWriterSize(s.file, 32*1024) - - return nil -} - -// Close unmaps the segment. -func (s *SeriesSegment) Close() (err error) { - if e := s.CloseForWrite(); e != nil && err == nil { - err = e - } - - if s.data != nil { - if e := mmap.Unmap(s.data); e != nil && err == nil { - err = e - } - s.data = nil - } - - return err -} - -func (s *SeriesSegment) CloseForWrite() (err error) { - if s.w != nil { - if e := s.w.Flush(); e != nil && err == nil { - err = e - } - s.w = nil - } - - if s.file != nil { - if e := s.file.Close(); e != nil && err == nil { - err = e - } - s.file = nil - } - return err -} - -// SetPageFaultLimiter sets the limiter used for rate limiting page faults. -// Must be called after Open(). -func (s *SeriesSegment) SetPageFaultLimiter(limiter *rate.Limiter) { - s.limiter = mincore.NewLimiter(limiter, s.data) -} - -// Data returns the raw data. -func (s *SeriesSegment) Data() []byte { return s.data } - -// ID returns the id the segment was initialized with. -func (s *SeriesSegment) ID() uint16 { return s.id } - -// Size returns the size of the data in the segment. -// This is only populated once InitForWrite() is called. -func (s *SeriesSegment) Size() int64 { return int64(s.size) } - -// Slice returns a byte slice starting at pos. -func (s *SeriesSegment) Slice(pos uint32) []byte { return s.data[pos:] } - -func (s *SeriesSegment) Path() string { return s.path } - -// WriteLogEntry writes entry data into the segment. -// Returns the offset of the beginning of the entry. -func (s *SeriesSegment) WriteLogEntry(data []byte) (offset int64, err error) { - if !s.CanWrite(data) { - return 0, ErrSeriesSegmentNotWritable - } - - offset = JoinSeriesOffset(s.id, s.size) - if _, err := s.w.Write(data); err != nil { - return 0, err - } - s.size += uint32(len(data)) - - return offset, nil -} - -// CanWrite returns true if segment has space to write entry data. -func (s *SeriesSegment) CanWrite(data []byte) bool { - return s.w != nil && s.size+uint32(len(data)) <= SeriesSegmentSize(s.id) -} - -// Flush flushes the buffer to disk. -func (s *SeriesSegment) Flush() error { - if s.w == nil { - return nil - } - return s.w.Flush() -} - -// AppendSeriesIDs appends all the segments ids to a slice. Returns the new slice. -func (s *SeriesSegment) AppendSeriesIDs(a []tsdb.SeriesID) []tsdb.SeriesID { - s.ForEachEntry(func(flag uint8, id tsdb.SeriesIDTyped, _ int64, _ []byte) error { - if flag == SeriesEntryInsertFlag { - a = append(a, id.SeriesID()) - } - return nil - }) - return a -} - -// MaxSeriesID returns the highest series id in the segment. -func (s *SeriesSegment) MaxSeriesID() tsdb.SeriesID { - var max tsdb.SeriesID - s.ForEachEntry(func(flag uint8, id tsdb.SeriesIDTyped, _ int64, _ []byte) error { - untypedID := id.SeriesID() - if flag == SeriesEntryInsertFlag && untypedID.Greater(max) { - max = untypedID - } - return nil - }) - return max -} - -// ForEachEntry executes fn for every entry in the segment. -func (s *SeriesSegment) ForEachEntry(fn func(flag uint8, id tsdb.SeriesIDTyped, offset int64, key []byte) error) error { - for pos := uint32(SeriesSegmentHeaderSize); pos < uint32(len(s.data)); { - flag, id, key, sz := ReadSeriesEntry(s.data[pos:]) - if !IsValidSeriesEntryFlag(flag) { - break - } - _ = wait(s.limiter, s.data[pos:int64(pos)+sz]) - - offset := JoinSeriesOffset(s.id, pos) - if err := fn(flag, id, offset, key); err != nil { - return err - } - pos += uint32(sz) - } - return nil -} - -// Clone returns a copy of the segment. Excludes the write handler, if set. -func (s *SeriesSegment) Clone() *SeriesSegment { - return &SeriesSegment{ - id: s.id, - path: s.path, - data: s.data, - size: s.size, - } -} - -// CompactToPath rewrites the segment to a new file and removes tombstoned entries. -func (s *SeriesSegment) CompactToPath(path string, index *SeriesIndex) error { - dst, err := CreateSeriesSegment(s.id, path) - if err != nil { - return err - } - defer dst.Close() - - if err = dst.InitForWrite(); err != nil { - return err - } - - // Iterate through the segment and write any entries to a new segment - // that exist in the index. - var buf []byte - if err = s.ForEachEntry(func(flag uint8, id tsdb.SeriesIDTyped, _ int64, key []byte) error { - if index.IsDeleted(id.SeriesID()) { - return nil // series id has been deleted from index - } else if flag == SeriesEntryTombstoneFlag { - return fmt.Errorf("[series id %d]: tombstone entry but exists in index", id) - } - - // copy entry over to new segment - buf = AppendSeriesEntry(buf[:0], flag, id, key) - _, err := dst.WriteLogEntry(buf) - return err - }); err != nil { - return err - } - - // Close the segment and truncate it to its maximum size. - size := dst.size - if err := dst.Close(); err != nil { - return err - } else if err := os.Truncate(dst.path, int64(size)); err != nil { - return err - } - return nil -} - -// CloneSeriesSegments returns a copy of a slice of segments. -func CloneSeriesSegments(a []*SeriesSegment) []*SeriesSegment { - other := make([]*SeriesSegment, len(a)) - for i := range a { - other[i] = a[i].Clone() - } - return other -} - -// FindSegment returns a segment by id. -func FindSegment(a []*SeriesSegment, id uint16) *SeriesSegment { - for _, segment := range a { - if segment.id == id { - return segment - } - } - return nil -} - -// ReadSeriesKeyFromSegments returns a series key from an offset within a set of segments. -func ReadSeriesKeyFromSegments(a []*SeriesSegment, offset int64) []byte { - segmentID, pos := SplitSeriesOffset(offset) - segment := FindSegment(a, segmentID) - if segment == nil { - return nil - } - buf := segment.Slice(pos) - key, _ := ReadSeriesKey(buf) - _ = wait(segment.limiter, buf[:len(key)]) - return key -} - -// JoinSeriesOffset returns an offset that combines the 2-byte segmentID and 4-byte pos. -func JoinSeriesOffset(segmentID uint16, pos uint32) int64 { - return (int64(segmentID) << 32) | int64(pos) -} - -// SplitSeriesOffset splits a offset into its 2-byte segmentID and 4-byte pos parts. -func SplitSeriesOffset(offset int64) (segmentID uint16, pos uint32) { - return uint16((offset >> 32) & 0xFFFF), uint32(offset & 0xFFFFFFFF) -} - -// IsValidSeriesSegmentFilename returns true if filename is a 4-character lowercase hexadecimal number. -func IsValidSeriesSegmentFilename(filename string) bool { - return seriesSegmentFilenameRegex.MatchString(filename) -} - -// ParseSeriesSegmentFilename returns the id represented by the hexadecimal filename. -func ParseSeriesSegmentFilename(filename string) (uint16, error) { - i, err := strconv.ParseUint(filename, 16, 32) - return uint16(i), err -} - -var seriesSegmentFilenameRegex = regexp.MustCompile(`^[0-9a-f]{4}$`) - -// SeriesSegmentSize returns the maximum size of the segment. -// The size goes up by powers of 2 starting from 4MB and reaching 256MB. -func SeriesSegmentSize(id uint16) uint32 { - const min = 22 // 4MB - const max = 28 // 256MB - - shift := id + min - if shift >= max { - shift = max - } - return 1 << shift -} - -// SeriesSegmentHeader represents the header of a series segment. -type SeriesSegmentHeader struct { - Version uint8 -} - -// NewSeriesSegmentHeader returns a new instance of SeriesSegmentHeader. -func NewSeriesSegmentHeader() SeriesSegmentHeader { - return SeriesSegmentHeader{Version: SeriesSegmentVersion} -} - -// ReadSeriesSegmentHeader returns the header from data. -func ReadSeriesSegmentHeader(data []byte) (hdr SeriesSegmentHeader, err error) { - r := bytes.NewReader(data) - - // Read magic number. - magic := make([]byte, len(SeriesSegmentMagic)) - if _, err := io.ReadFull(r, magic); err != nil { - return hdr, err - } else if !bytes.Equal([]byte(SeriesSegmentMagic), magic) { - return hdr, ErrInvalidSeriesSegment - } - - // Read version. - if err := binary.Read(r, binary.BigEndian, &hdr.Version); err != nil { - return hdr, err - } - - return hdr, nil -} - -// WriteTo writes the header to w. -func (hdr *SeriesSegmentHeader) WriteTo(w io.Writer) (n int64, err error) { - var buf bytes.Buffer - buf.WriteString(SeriesSegmentMagic) - binary.Write(&buf, binary.BigEndian, hdr.Version) - return buf.WriteTo(w) -} - -func ReadSeriesEntry(data []byte) (flag uint8, id tsdb.SeriesIDTyped, key []byte, sz int64) { - // If flag byte is zero then no more entries exist. - flag, data = uint8(data[0]), data[1:] - if !IsValidSeriesEntryFlag(flag) { - return 0, tsdb.SeriesIDTyped{}, nil, 1 - } - - id, data = tsdb.NewSeriesIDTyped(binary.BigEndian.Uint64(data)), data[8:] - switch flag { - case SeriesEntryInsertFlag: - key, _ = ReadSeriesKey(data) - } - return flag, id, key, int64(SeriesEntryHeaderSize + len(key)) -} - -func AppendSeriesEntry(dst []byte, flag uint8, id tsdb.SeriesIDTyped, key []byte) []byte { - buf := make([]byte, 8) - binary.BigEndian.PutUint64(buf, id.RawID()) - - dst = append(dst, flag) - dst = append(dst, buf...) - - switch flag { - case SeriesEntryInsertFlag: - dst = append(dst, key...) - case SeriesEntryTombstoneFlag: - default: - panic(fmt.Sprintf("unreachable: invalid flag: %d", flag)) - } - return dst -} - -// IsValidSeriesEntryFlag returns true if flag is valid. -func IsValidSeriesEntryFlag(flag byte) bool { - switch flag { - case SeriesEntryInsertFlag, SeriesEntryTombstoneFlag: - return true - default: - return false - } -} diff --git a/tsdb/seriesfile/series_segment_test.go b/tsdb/seriesfile/series_segment_test.go deleted file mode 100644 index 0583a06216..0000000000 --- a/tsdb/seriesfile/series_segment_test.go +++ /dev/null @@ -1,277 +0,0 @@ -package seriesfile_test - -import ( - "bytes" - "fmt" - "io/ioutil" - "os" - "path/filepath" - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" -) - -func MustTempDir() (string, func()) { - dir, err := ioutil.TempDir("", "test-series-segment") - if err != nil { - panic(fmt.Sprintf("failed to create temp dir: %v", err)) - } - return dir, func() { os.RemoveAll(dir) } -} - -func TestSeriesSegment(t *testing.T) { - dir, cleanup := MustTempDir() - defer cleanup() - - // Create a new initial segment (4mb) and initialize for writing. - segment, err := seriesfile.CreateSeriesSegment(0, filepath.Join(dir, "0000")) - if err != nil { - t.Fatal(err) - } else if err := segment.InitForWrite(); err != nil { - t.Fatal(err) - } - defer segment.Close() - - // Write initial entry. - key1 := seriesfile.AppendSeriesKey(nil, []byte("m0"), nil) - offset, err := segment.WriteLogEntry(seriesfile.AppendSeriesEntry(nil, seriesfile.SeriesEntryInsertFlag, toTypedSeriesID(1), key1)) - if err != nil { - t.Fatal(err) - } else if offset != seriesfile.SeriesSegmentHeaderSize { - t.Fatalf("unexpected offset: %d", offset) - } - - // Write a large entry (3mb). - key2 := seriesfile.AppendSeriesKey(nil, bytes.Repeat([]byte("m"), 3*(1<<20)), nil) - if _, err := segment.WriteLogEntry(seriesfile.AppendSeriesEntry(nil, seriesfile.SeriesEntryInsertFlag, toTypedSeriesID(2), key2)); err != nil { - t.Fatal(err) - } else if offset != seriesfile.SeriesSegmentHeaderSize { - t.Fatalf("unexpected offset: %d", offset) - } - - // Write another entry that is too large for the remaining segment space. - if _, err := segment.WriteLogEntry(seriesfile.AppendSeriesEntry(nil, seriesfile.SeriesEntryInsertFlag, toTypedSeriesID(3), seriesfile.AppendSeriesKey(nil, bytes.Repeat([]byte("n"), 3*(1<<20)), nil))); err != seriesfile.ErrSeriesSegmentNotWritable { - t.Fatalf("unexpected error: %s", err) - } - - // Verify two entries exist. - var n int - segment.ForEachEntry(func(flag uint8, id tsdb.SeriesIDTyped, offset int64, key []byte) error { - switch n { - case 0: - if flag != seriesfile.SeriesEntryInsertFlag || id != toTypedSeriesID(1) || !bytes.Equal(key1, key) { - t.Fatalf("unexpected entry(0): %d, %d, %q", flag, id, key) - } - case 1: - if flag != seriesfile.SeriesEntryInsertFlag || id != toTypedSeriesID(2) || !bytes.Equal(key2, key) { - t.Fatalf("unexpected entry(1): %d, %d, %q", flag, id, key) - } - default: - t.Fatalf("too many entries") - } - n++ - return nil - }) - if n != 2 { - t.Fatalf("unexpected entry count: %d", n) - } -} - -func TestSeriesSegment_AppendSeriesIDs(t *testing.T) { - dir, cleanup := MustTempDir() - defer cleanup() - - segment, err := seriesfile.CreateSeriesSegment(0, filepath.Join(dir, "0000")) - if err != nil { - t.Fatal(err) - } else if err := segment.InitForWrite(); err != nil { - t.Fatal(err) - } - defer segment.Close() - - // Write entries. - if _, err := segment.WriteLogEntry(seriesfile.AppendSeriesEntry(nil, seriesfile.SeriesEntryInsertFlag, toTypedSeriesID(10), seriesfile.AppendSeriesKey(nil, []byte("m0"), nil))); err != nil { - t.Fatal(err) - } else if _, err := segment.WriteLogEntry(seriesfile.AppendSeriesEntry(nil, seriesfile.SeriesEntryInsertFlag, toTypedSeriesID(11), seriesfile.AppendSeriesKey(nil, []byte("m1"), nil))); err != nil { - t.Fatal(err) - } else if err := segment.Flush(); err != nil { - t.Fatal(err) - } - - // Collect series ids with existing set. - a := segment.AppendSeriesIDs(toSeriesIDs([]uint64{1, 2})) - if diff := cmp.Diff(a, toSeriesIDs([]uint64{1, 2, 10, 11})); diff != "" { - t.Fatal(diff) - } -} - -func toSeriesIDs(ids []uint64) []tsdb.SeriesID { - sids := make([]tsdb.SeriesID, 0, len(ids)) - for _, id := range ids { - sids = append(sids, tsdb.NewSeriesID(id)) - } - return sids -} - -func TestSeriesSegment_MaxSeriesID(t *testing.T) { - dir, cleanup := MustTempDir() - defer cleanup() - - segment, err := seriesfile.CreateSeriesSegment(0, filepath.Join(dir, "0000")) - if err != nil { - t.Fatal(err) - } else if err := segment.InitForWrite(); err != nil { - t.Fatal(err) - } - defer segment.Close() - - // Write entries. - if _, err := segment.WriteLogEntry(seriesfile.AppendSeriesEntry(nil, seriesfile.SeriesEntryInsertFlag, toTypedSeriesID(10), seriesfile.AppendSeriesKey(nil, []byte("m0"), nil))); err != nil { - t.Fatal(err) - } else if _, err := segment.WriteLogEntry(seriesfile.AppendSeriesEntry(nil, seriesfile.SeriesEntryInsertFlag, toTypedSeriesID(11), seriesfile.AppendSeriesKey(nil, []byte("m1"), nil))); err != nil { - t.Fatal(err) - } else if err := segment.Flush(); err != nil { - t.Fatal(err) - } - - // Verify maximum. - if max := segment.MaxSeriesID(); max != tsdb.NewSeriesID(11) { - t.Fatalf("unexpected max: %d", max) - } -} - -func TestSeriesSegmentHeader(t *testing.T) { - // Verify header initializes correctly. - hdr := seriesfile.NewSeriesSegmentHeader() - if hdr.Version != seriesfile.SeriesSegmentVersion { - t.Fatalf("unexpected version: %d", hdr.Version) - } - - // Marshal/unmarshal. - var buf bytes.Buffer - if _, err := hdr.WriteTo(&buf); err != nil { - t.Fatal(err) - } else if other, err := seriesfile.ReadSeriesSegmentHeader(buf.Bytes()); err != nil { - t.Fatal(err) - } else if diff := cmp.Diff(hdr, other); diff != "" { - t.Fatal(diff) - } -} - -func TestSeriesSegment_PartialWrite(t *testing.T) { - dir, cleanup := MustTempDir() - defer cleanup() - - // Create a new initial segment (4mb) and initialize for writing. - segment, err := seriesfile.CreateSeriesSegment(0, filepath.Join(dir, "0000")) - if err != nil { - t.Fatal(err) - } else if err := segment.InitForWrite(); err != nil { - t.Fatal(err) - } - defer segment.Close() - - // Write two entries. - if _, err := segment.WriteLogEntry(seriesfile.AppendSeriesEntry(nil, seriesfile.SeriesEntryInsertFlag, toTypedSeriesID(1), seriesfile.AppendSeriesKey(nil, []byte("A"), nil))); err != nil { - t.Fatal(err) - } else if _, err := segment.WriteLogEntry(seriesfile.AppendSeriesEntry(nil, seriesfile.SeriesEntryInsertFlag, toTypedSeriesID(2), seriesfile.AppendSeriesKey(nil, []byte("B"), nil))); err != nil { - t.Fatal(err) - } - sz := segment.Size() - entrySize := len(seriesfile.AppendSeriesEntry(nil, seriesfile.SeriesEntryInsertFlag, toTypedSeriesID(2), seriesfile.AppendSeriesKey(nil, []byte("B"), nil))) - - // Close segment. - if err := segment.Close(); err != nil { - t.Fatal(err) - } - - // Truncate at each point and reopen. - for i := entrySize; i > 0; i-- { - if err := os.Truncate(filepath.Join(dir, "0000"), sz-int64(entrySize-i)); err != nil { - t.Fatal(err) - } - segment := seriesfile.NewSeriesSegment(0, filepath.Join(dir, "0000")) - if err := segment.Open(); err != nil { - t.Fatal(err) - } else if err := segment.InitForWrite(); err != nil { - t.Fatal(err) - } else if err := segment.Close(); err != nil { - t.Fatal(err) - } - } -} - -func TestJoinSeriesOffset(t *testing.T) { - if offset := seriesfile.JoinSeriesOffset(0x1234, 0x56789ABC); offset != 0x123456789ABC { - t.Fatalf("unexpected offset: %x", offset) - } -} - -func TestSplitSeriesOffset(t *testing.T) { - if segmentID, pos := seriesfile.SplitSeriesOffset(0x123456789ABC); segmentID != 0x1234 || pos != 0x56789ABC { - t.Fatalf("unexpected segmentID/pos: %x/%x", segmentID, pos) - } -} - -func TestIsValidSeriesSegmentFilename(t *testing.T) { - if seriesfile.IsValidSeriesSegmentFilename("") { - t.Fatal("expected invalid") - } else if seriesfile.IsValidSeriesSegmentFilename("0ab") { - t.Fatal("expected invalid") - } else if !seriesfile.IsValidSeriesSegmentFilename("192a") { - t.Fatal("expected valid") - } -} - -func TestParseSeriesSegmentFilename(t *testing.T) { - if v, err := seriesfile.ParseSeriesSegmentFilename("a90b"); err != nil { - t.Fatal(err) - } else if v != 0xA90B { - t.Fatalf("unexpected value: %x", v) - } - if v, err := seriesfile.ParseSeriesSegmentFilename("0001"); err != nil { - t.Fatal(err) - } else if v != 1 { - t.Fatalf("unexpected value: %x", v) - } - if _, err := seriesfile.ParseSeriesSegmentFilename("invalid"); err == nil { - t.Fatal("expected error") - } -} - -func TestSeriesSegmentSize(t *testing.T) { - const mb = (1 << 20) - if sz := seriesfile.SeriesSegmentSize(0); sz != 4*mb { - t.Fatalf("unexpected size: %d", sz) - } else if sz := seriesfile.SeriesSegmentSize(1); sz != 8*mb { - t.Fatalf("unexpected size: %d", sz) - } else if sz := seriesfile.SeriesSegmentSize(2); sz != 16*mb { - t.Fatalf("unexpected size: %d", sz) - } else if sz := seriesfile.SeriesSegmentSize(3); sz != 32*mb { - t.Fatalf("unexpected size: %d", sz) - } else if sz := seriesfile.SeriesSegmentSize(4); sz != 64*mb { - t.Fatalf("unexpected size: %d", sz) - } else if sz := seriesfile.SeriesSegmentSize(5); sz != 128*mb { - t.Fatalf("unexpected size: %d", sz) - } else if sz := seriesfile.SeriesSegmentSize(6); sz != 256*mb { - t.Fatalf("unexpected size: %d", sz) - } else if sz := seriesfile.SeriesSegmentSize(7); sz != 256*mb { - t.Fatalf("unexpected size: %d", sz) - } -} - -func TestSeriesEntry(t *testing.T) { - seriesKey := seriesfile.AppendSeriesKey(nil, []byte("m0"), nil) - buf := seriesfile.AppendSeriesEntry(nil, 1, toTypedSeriesID(2), seriesKey) - if flag, id, key, sz := seriesfile.ReadSeriesEntry(buf); flag != 1 { - t.Fatalf("unexpected flag: %d", flag) - } else if id != toTypedSeriesID(2) { - t.Fatalf("unexpected id: %d", id) - } else if !bytes.Equal(seriesKey, key) { - t.Fatalf("unexpected key: %q", key) - } else if sz != int64(seriesfile.SeriesEntryHeaderSize+len(key)) { - t.Fatalf("unexpected size: %d", sz) - } -} diff --git a/tsdb/testdata/line-protocol-1M.txt.gz b/tsdb/testdata/line-protocol-1M.txt.gz deleted file mode 100644 index e862b24e19..0000000000 Binary files a/tsdb/testdata/line-protocol-1M.txt.gz and /dev/null differ diff --git a/tsdb/tsi1/DESIGN.md b/tsdb/tsi1/DESIGN.md deleted file mode 100644 index 9935a2bac3..0000000000 --- a/tsdb/tsi1/DESIGN.md +++ /dev/null @@ -1,83 +0,0 @@ -# Time-Series Index - -## Introduction - -## Architecture - -### index structures and access patterns -### series ID sets -### partitioning and file types -### compactions - -## File Format - -## Access Times - -### Insertion - -TODO - -### Retrieval - -This section provides some general idea of the typical timings one can expect to experience when accessing the index. - -#### Measurement Retrieval - -Approximate times for retrieving _all_ measurements, equivalent to executing `SHOW MEASUREMENTS`, follow. These types of query only involve materialising data held in the index. - - - Retrieve 1 measurement from TSI index: `~100µs` - - Retrieve 100 measurements from TSI index: `~200µs` - - Retrieve 10,000 measurements from TSI index: `~8ms` - - -Note: as the number of measurements gets larger, much of the time will be spent allocating and materialising the measurements into a `[][]byte` to be returned to the caller. - - -#### Tag Keys Retrieval - -Approximate times for retrieving _all_ tag keys, equivalent to executing `SHOW TAG KEYS`, follow. These types of query only involve materialising data held in the index. - - - Retrieve 1 tag key from TSI index: `~65µs` - - Retrieve 100 tag keys from TSI index: `~90µs` - - Retrieve 1,000 tag keys from TSI index: `~1.3ms` - -Note: the times here show only the TSI index access for retrieving the tag keys. In practice, the measurement retrieval times need to be added on top, since you need a measurement name to access the tag keys. - - -#### Tag Value Retrieval - -Approximate times for retrieving _all_ tag values for a _specific_ tag key, equivalent to `SHOW TAG VALUES WITH KEY = "region"`, follow. These types of query only involve materialising data held in the index. - - - Retrieve 1 tag value from TSI index: `~20µs` - - Retrieve 100 tag values from TSI index: `~240µs` - - Retrieve 10,000 tag values from TSI index: `~13ms` - - -#### Series ID Retrieval - -Approximate times for retrieving a set of matching series ids for different total cardinalities, follow. - - - Retrieve 1 series id for db with cardinality 1: `~50µs` (`10µs`) - - Retrieve 10 series ids for db with cardinality 100: `~50µs` (`10µs`) - - Retrieve 100 series ids for db with cardinality 10,000: `~80µs` (`10µs`) - - Retrieve 10,000 series ids for db with cardinality 1,000,000: `~600µs` (`10µs`) - - Retrieve 100,000 series ids for db with cardinality 10,000,000: `~22ms` (`10µs`) - - -Note: the initial time is for the first observation. The second—parenthesised—time is for subsequent observations. Subsequent observations make use of the TSI bitset cache introduced in [#10234](https://github.com/influxdata/influxdb/pull/10234). - - -## Complex Series ID Retrieval - -Approximate times for retrieving a set of matching series ids for different total cardinalities. In these cases, each retrieval is based on two tag key/value predicates, e.g., `SHOW SERIES WHERE "region" = 'west' AND "zone" = 'a'` - - - Retrieve 1,000 series ids for db with cardinality 1,000,000: `~8ms` (`15µs`) - - Retrieve 10,000 series ids for db with cardinality 10,000,000: `~7ms` (`25µs`) - - -Note: the initial time is for the first observation. The second—parenthesised—time is for subsequent observations. Subsequent observations make use of the TSI bitset cache introduced in [#10234](https://github.com/influxdata/influxdb/pull/10234). -In these more complex cases, a series ID set is retrieved for each of the predicates. The sets are then intersected to identify the final set. Cache times, then, are typically doubled since each series id set for each predicate is stored separately. -There will be some additional overhead for the intersection operation. - - - diff --git a/tsdb/tsi1/cache.go b/tsdb/tsi1/cache.go deleted file mode 100644 index 908a0a8205..0000000000 --- a/tsdb/tsi1/cache.go +++ /dev/null @@ -1,302 +0,0 @@ -package tsi1 - -import ( - "container/list" - "sync" - - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/prometheus/client_golang/prometheus" -) - -// TagValueSeriesIDCache is an LRU cache for series id sets associated with -// name -> key -> value mappings. The purpose of the cache is to provide -// efficient means to get sets of series ids that would otherwise involve merging -// many individual bitmaps at query time. -// -// When initialising a TagValueSeriesIDCache a capacity must be provided. When -// more than c items are added to the cache, the least recently used item is -// evicted from the cache. -// -// A TagValueSeriesIDCache comprises a linked list implementation to track the -// order by which items should be evicted from the cache, and a hashmap implementation -// to provide constant time retrievals of items from the cache. -type TagValueSeriesIDCache struct { - sync.RWMutex - cache map[string]map[string]map[string]*list.Element - evictor *list.List - - tracker *cacheTracker - capacity uint64 -} - -// NewTagValueSeriesIDCache returns a TagValueSeriesIDCache with capacity c. -func NewTagValueSeriesIDCache(c uint64) *TagValueSeriesIDCache { - return &TagValueSeriesIDCache{ - cache: map[string]map[string]map[string]*list.Element{}, - evictor: list.New(), - tracker: newCacheTracker(newCacheMetrics(nil), nil), - capacity: c, - } -} - -// Get returns the SeriesIDSet associated with the {name, key, value} tuple if it -// exists. -func (c *TagValueSeriesIDCache) Get(name, key, value []byte) *tsdb.SeriesIDSet { - c.Lock() - defer c.Unlock() - return c.get(name, key, value) -} - -func (c *TagValueSeriesIDCache) get(name, key, value []byte) *tsdb.SeriesIDSet { - if mmap, ok := c.cache[string(name)]; ok { - if tkmap, ok := mmap[string(key)]; ok { - if ele, ok := tkmap[string(value)]; ok { - c.tracker.IncGetHit() - c.evictor.MoveToFront(ele) // This now becomes most recently used. - return ele.Value.(*seriesIDCacheElement).SeriesIDSet - } - } - } - c.tracker.IncGetMiss() - return nil -} - -// exists returns true if the an item exists for the tuple {name, key, value}. -func (c *TagValueSeriesIDCache) exists(name, key, value []byte) bool { - if mmap, ok := c.cache[string(name)]; ok { - if tkmap, ok := mmap[string(key)]; ok { - _, ok := tkmap[string(value)] - return ok - } - } - return false -} - -// addToSet adds x to the SeriesIDSet associated with the tuple {name, key, value} -// if it exists. This method takes a lock on the underlying SeriesIDSet. -// -// NB this does not count as an access on the set—therefore the set is not promoted -// within the LRU cache. -func (c *TagValueSeriesIDCache) addToSet(name, key, value []byte, x tsdb.SeriesID) { - if mmap, ok := c.cache[string(name)]; ok { - if tkmap, ok := mmap[string(key)]; ok { - if ele, ok := tkmap[string(value)]; ok { - ss := ele.Value.(*seriesIDCacheElement).SeriesIDSet - if ss == nil { - ele.Value.(*seriesIDCacheElement).SeriesIDSet = tsdb.NewSeriesIDSet(x) - return - } - ele.Value.(*seriesIDCacheElement).SeriesIDSet.Add(x) - } - } - } -} - -// measurementContainsSets returns true if there are sets cached for the provided measurement. -func (c *TagValueSeriesIDCache) measurementContainsSets(name []byte) bool { - _, ok := c.cache[string(name)] - return ok -} - -// Put adds the SeriesIDSet to the cache under the tuple {name, key, value}. If -// the cache is at its limit, then the least recently used item is evicted. -func (c *TagValueSeriesIDCache) Put(name, key, value []byte, ss *tsdb.SeriesIDSet) { - c.Lock() - // Check under the write lock if the relevant item is now in the cache. - if c.exists(name, key, value) { - c.Unlock() - c.tracker.IncPutHit() - return - } - defer c.Unlock() - - // Ensure our SeriesIDSet is go heap backed. - if ss != nil { - ss = ss.Clone() - } - - // Create list item, and add to the front of the eviction list. - listElement := c.evictor.PushFront(&seriesIDCacheElement{ - name: string(name), - key: string(key), - value: string(value), - SeriesIDSet: ss, - }) - - // Add the listElement to the set of items. - if mmap, ok := c.cache[string(name)]; ok { - if tkmap, ok := mmap[string(key)]; ok { - if _, ok := tkmap[string(value)]; ok { - goto EVICT - } - - // Add the set to the map - tkmap[string(value)] = listElement - goto EVICT - } - - // No series set map for the tag key - first tag value for the tag key. - mmap[string(key)] = map[string]*list.Element{string(value): listElement} - goto EVICT - } - - // No map for the measurement - first tag key for the measurment. - c.cache[string(name)] = map[string]map[string]*list.Element{ - string(key): {string(value): listElement}, - } - -EVICT: - c.checkEviction() - c.tracker.IncPutMiss() -} - -// Delete removes x from the tuple {name, key, value} if it exists. -// This method takes a lock on the underlying SeriesIDSet. -func (c *TagValueSeriesIDCache) Delete(name, key, value []byte, x tsdb.SeriesID) { - c.Lock() - c.delete(name, key, value, x) - c.Unlock() -} - -// DeleteMeasurement removes all cached entries for the provided measurement name. -func (c *TagValueSeriesIDCache) DeleteMeasurement(name []byte) { - c.Lock() - delete(c.cache, string(name)) - c.Unlock() -} - -// delete removes x from the tuple {name, key, value} if it exists. -func (c *TagValueSeriesIDCache) delete(name, key, value []byte, x tsdb.SeriesID) { - if mmap, ok := c.cache[string(name)]; ok { - if tkmap, ok := mmap[string(key)]; ok { - if ele, ok := tkmap[string(value)]; ok { - if ss := ele.Value.(*seriesIDCacheElement).SeriesIDSet; ss != nil { - ele.Value.(*seriesIDCacheElement).SeriesIDSet.Remove(x) - c.tracker.IncDeletesHit() - return - } - } - } - } - c.tracker.IncDeletesMiss() -} - -// checkEviction checks if the cache is too big, and evicts the least recently used -// item if it is. -func (c *TagValueSeriesIDCache) checkEviction() { - l := uint64(c.evictor.Len()) - c.tracker.SetSize(l) - if l <= c.capacity { - return - } - - e := c.evictor.Back() // Least recently used item. - listElement := e.Value.(*seriesIDCacheElement) - name := listElement.name - key := listElement.key - value := listElement.value - - c.evictor.Remove(e) // Remove from evictor - delete(c.cache[string(name)][string(key)], string(value)) // Remove from hashmap of items. - - // Check if there are no more tag values for the tag key. - if len(c.cache[string(name)][string(key)]) == 0 { - delete(c.cache[string(name)], string(key)) - } - - // Check there are no more tag keys for the measurement. - if len(c.cache[string(name)]) == 0 { - delete(c.cache, string(name)) - } - c.tracker.IncEvictions() -} - -func (c *TagValueSeriesIDCache) PrometheusCollectors() []prometheus.Collector { - var collectors []prometheus.Collector - collectors = append(collectors, c.tracker.metrics.PrometheusCollectors()...) - return collectors -} - -// seriesIDCacheElement is an item stored within a cache. -type seriesIDCacheElement struct { - name string - key string - value string - SeriesIDSet *tsdb.SeriesIDSet -} - -type cacheTracker struct { - metrics *cacheMetrics - labels prometheus.Labels - enabled bool -} - -func newCacheTracker(metrics *cacheMetrics, defaultLabels prometheus.Labels) *cacheTracker { - return &cacheTracker{metrics: metrics, labels: defaultLabels, enabled: true} -} - -// Labels returns a copy of labels for use with index cache metrics. -func (t *cacheTracker) Labels() prometheus.Labels { - l := make(map[string]string, len(t.labels)) - for k, v := range t.labels { - l[k] = v - } - return l -} - -func (t *cacheTracker) SetSize(sz uint64) { - if !t.enabled { - return - } - - labels := t.Labels() - t.metrics.Size.With(labels).Set(float64(sz)) -} - -func (t *cacheTracker) incGet(status string) { - if !t.enabled { - return - } - - labels := t.Labels() - labels["status"] = status - t.metrics.Gets.With(labels).Inc() -} - -func (t *cacheTracker) IncGetHit() { t.incGet("hit") } -func (t *cacheTracker) IncGetMiss() { t.incGet("miss") } - -func (t *cacheTracker) incPut(status string) { - if !t.enabled { - return - } - - labels := t.Labels() - labels["status"] = status - t.metrics.Puts.With(labels).Inc() -} - -func (t *cacheTracker) IncPutHit() { t.incPut("hit") } -func (t *cacheTracker) IncPutMiss() { t.incPut("miss") } - -func (t *cacheTracker) incDeletes(status string) { - if !t.enabled { - return - } - - labels := t.Labels() - labels["status"] = status - t.metrics.Deletes.With(labels).Inc() -} - -func (t *cacheTracker) IncDeletesHit() { t.incDeletes("hit") } -func (t *cacheTracker) IncDeletesMiss() { t.incDeletes("miss") } - -func (t *cacheTracker) IncEvictions() { - if !t.enabled { - return - } - - labels := t.Labels() - t.metrics.Evictions.With(labels).Inc() -} diff --git a/tsdb/tsi1/cache_test.go b/tsdb/tsi1/cache_test.go deleted file mode 100644 index 0d10fe2604..0000000000 --- a/tsdb/tsi1/cache_test.go +++ /dev/null @@ -1,255 +0,0 @@ -package tsi1 - -import ( - "math/rand" - "sync" - "sync/atomic" - "testing" - "time" - - "github.com/influxdata/influxdb/v2/tsdb" -) - -func newSeriesIDSet(ids ...int) *tsdb.SeriesIDSet { - out := make([]tsdb.SeriesID, 0, len(ids)) - for _, v := range ids { - out = append(out, tsdb.NewSeriesID(uint64(v))) - } - return tsdb.NewSeriesIDSet(out...) -} - -func TestTagValueSeriesIDCache(t *testing.T) { - m0k0v0 := newSeriesIDSet(1, 2, 3, 4, 5) - m0k0v1 := newSeriesIDSet(10, 20, 30, 40, 50) - m0k1v2 := newSeriesIDSet() - m1k3v0 := newSeriesIDSet(900, 0, 929) - - cache := TestCache{NewTagValueSeriesIDCache(10)} - cache.Has(t, "m0", "k0", "v0", nil) - - // Putting something in the cache makes it retrievable. - cache.PutByString("m0", "k0", "v0", m0k0v0) - cache.Has(t, "m0", "k0", "v0", m0k0v0) - - // Putting something else under the same key will not replace the original item. - cache.PutByString("m0", "k0", "v0", newSeriesIDSet(100, 200)) - cache.Has(t, "m0", "k0", "v0", m0k0v0) - - // Add another item to the cache. - cache.PutByString("m0", "k0", "v1", m0k0v1) - cache.Has(t, "m0", "k0", "v0", m0k0v0) - cache.Has(t, "m0", "k0", "v1", m0k0v1) - - // Add some more items - cache.PutByString("m0", "k1", "v2", m0k1v2) - cache.PutByString("m1", "k3", "v0", m1k3v0) - cache.Has(t, "m0", "k0", "v0", m0k0v0) - cache.Has(t, "m0", "k0", "v1", m0k0v1) - cache.Has(t, "m0", "k1", "v2", m0k1v2) - cache.Has(t, "m1", "k3", "v0", m1k3v0) -} - -func TestTagValueSeriesIDCache_eviction(t *testing.T) { - m0k0v0 := newSeriesIDSet(1, 2, 3, 4, 5) - m0k0v1 := newSeriesIDSet(10, 20, 30, 40, 50) - m0k1v2 := newSeriesIDSet() - m1k3v0 := newSeriesIDSet(900, 0, 929) - - cache := TestCache{NewTagValueSeriesIDCache(4)} - cache.PutByString("m0", "k0", "v0", m0k0v0) - cache.PutByString("m0", "k0", "v1", m0k0v1) - cache.PutByString("m0", "k1", "v2", m0k1v2) - cache.PutByString("m1", "k3", "v0", m1k3v0) - cache.Has(t, "m0", "k0", "v0", m0k0v0) - cache.Has(t, "m0", "k0", "v1", m0k0v1) - cache.Has(t, "m0", "k1", "v2", m0k1v2) - cache.Has(t, "m1", "k3", "v0", m1k3v0) - - // Putting another item in the cache will evict m0k0v0 - m2k0v0 := newSeriesIDSet(8, 8, 8) - cache.PutByString("m2", "k0", "v0", m2k0v0) - if got, exp := cache.evictor.Len(), 4; got != exp { - t.Fatalf("cache size was %d, expected %d", got, exp) - } - cache.HasNot(t, "m0", "k0", "v0") - cache.Has(t, "m0", "k0", "v1", m0k0v1) - cache.Has(t, "m0", "k1", "v2", m0k1v2) - cache.Has(t, "m1", "k3", "v0", m1k3v0) - cache.Has(t, "m2", "k0", "v0", m2k0v0) - - // Putting another item in the cache will evict m0k0v1. That will mean - // there will be no values left under the tuple {m0, k0} - if _, ok := cache.cache[string("m0")][string("k0")]; !ok { - t.Fatalf("Map missing for key %q", "k0") - } - - m2k0v1 := newSeriesIDSet(8, 8, 8) - cache.PutByString("m2", "k0", "v1", m2k0v1) - if got, exp := cache.evictor.Len(), 4; got != exp { - t.Fatalf("cache size was %d, expected %d", got, exp) - } - cache.HasNot(t, "m0", "k0", "v0") - cache.HasNot(t, "m0", "k0", "v1") - cache.Has(t, "m0", "k1", "v2", m0k1v2) - cache.Has(t, "m1", "k3", "v0", m1k3v0) - cache.Has(t, "m2", "k0", "v0", m2k0v0) - cache.Has(t, "m2", "k0", "v1", m2k0v1) - - // Further, the map for all tag values for the tuple {m0, k0} should be removed. - if _, ok := cache.cache[string("m0")][string("k0")]; ok { - t.Fatalf("Map present for key %q, should be removed", "k0") - } - - // Putting another item in the cache will evict m0k1v2. That will mean - // there will be no values left under the tuple {m0} - if _, ok := cache.cache[string("m0")]; !ok { - t.Fatalf("Map missing for key %q", "k0") - } - m2k0v2 := newSeriesIDSet(8, 9, 9) - cache.PutByString("m2", "k0", "v2", m2k0v2) - cache.HasNot(t, "m0", "k0", "v0") - cache.HasNot(t, "m0", "k0", "v1") - cache.HasNot(t, "m0", "k1", "v2") - cache.Has(t, "m1", "k3", "v0", m1k3v0) - cache.Has(t, "m2", "k0", "v0", m2k0v0) - cache.Has(t, "m2", "k0", "v1", m2k0v1) - cache.Has(t, "m2", "k0", "v2", m2k0v2) - - // The map for all tag values for the tuple {m0} should be removed. - if _, ok := cache.cache[string("m0")]; ok { - t.Fatalf("Map present for key %q, should be removed", "k0") - } - - // Putting another item in the cache will evict m2k0v0 if we first get m1k3v0 - // because m2k0v0 will have been used less recently... - m3k0v0 := newSeriesIDSet(1000) - cache.Has(t, "m1", "k3", "v0", m1k3v0) // This makes it the most recently used rather than the least. - cache.PutByString("m3", "k0", "v0", m3k0v0) - - cache.HasNot(t, "m0", "k0", "v0") - cache.HasNot(t, "m0", "k0", "v1") - cache.HasNot(t, "m0", "k1", "v2") - cache.HasNot(t, "m2", "k0", "v0") // This got pushed to the back. - - cache.Has(t, "m1", "k3", "v0", m1k3v0) // This got saved because we looked at it before we added to the cache - cache.Has(t, "m2", "k0", "v1", m2k0v1) - cache.Has(t, "m2", "k0", "v2", m2k0v2) - cache.Has(t, "m3", "k0", "v0", m3k0v0) -} - -func TestTagValueSeriesIDCache_addToSet(t *testing.T) { - cache := TestCache{NewTagValueSeriesIDCache(4)} - cache.PutByString("m0", "k0", "v0", nil) // Puts a nil set in the cache. - s2 := newSeriesIDSet(100) - cache.PutByString("m0", "k0", "v1", s2) - cache.Has(t, "m0", "k0", "v0", nil) - cache.Has(t, "m0", "k0", "v1", s2) - - cache.addToSet([]byte("m0"), []byte("k0"), []byte("v0"), tsdb.NewSeriesID(20)) // No non-nil set exists so one will be created - cache.addToSet([]byte("m0"), []byte("k0"), []byte("v1"), tsdb.NewSeriesID(101)) // No non-nil set exists so one will be created - cache.Has(t, "m0", "k0", "v1", newSeriesIDSet(100, 101)) - - ss := cache.GetByString("m0", "k0", "v0") - if !newSeriesIDSet(20).Equals(ss) { - t.Fatalf("series id set was %v", ss) - } -} - -func TestTagValueSeriesIDCache_ConcurrentGetPutDelete(t *testing.T) { - t.Skip("https://github.com/influxdata/influxdb/issues/13963") - // Exercise concurrent operations against a series ID cache. - // This will catch any likely data races, when run with the race detector. - - if testing.Short() { - t.Skip("Skipping long test") - } - - t.Parallel() - - const letters = "abcde" - rnd := func(rng *rand.Rand) []byte { - return []byte{letters[rng.Intn(len(letters)-1)]} - } - - cache := TestCache{NewTagValueSeriesIDCache(100)} - done := make(chan struct{}) - var wg sync.WaitGroup - - var seriesIDCounter int32 // Atomic counter to ensure unique series IDs. - for i := 0; i < 5; i++ { - wg.Add(1) - go func() { - defer wg.Done() - - // Local rng to avoid lock contention. - rng := rand.New(rand.NewSource(rand.Int63())) - for { - select { - case <-done: - return - default: - } - nextID := int(atomic.AddInt32(&seriesIDCounter, 1)) - cache.Put(rnd(rng), rnd(rng), rnd(rng), newSeriesIDSet(nextID)) - } - }() - } - - var gets, deletes int32 - for i := 0; i < 5; i++ { - wg.Add(1) - go func() { - defer wg.Done() - - // Local rng to avoid lock contention. - rng := rand.New(rand.NewSource(rand.Int63())) - for { - select { - case <-done: - return - default: - } - name, key, value := rnd(rng), rnd(rng), rnd(rng) - if set := cache.Get(name, key, value); set != nil { - ids := set.Slice() - for _, id := range ids { - cache.Delete(name, key, value, tsdb.NewSeriesID(id)) - atomic.AddInt32(&deletes, 1) - } - } - atomic.AddInt32(&gets, 1) - } - }() - } - - time.Sleep(10 * time.Second) - close(done) - wg.Wait() - t.Logf("Concurrently executed against series ID cache: gets=%d puts=%d deletes=%d", gets, seriesIDCounter, deletes) -} - -type TestCache struct { - *TagValueSeriesIDCache -} - -func (c TestCache) Has(t *testing.T, name, key, value string, ss *tsdb.SeriesIDSet) { - if got, exp := c.Get([]byte(name), []byte(key), []byte(value)), ss; !got.Equals(exp) { - t.Helper() - t.Fatalf("got set %v, expected %v", got, exp) - } -} - -func (c TestCache) HasNot(t *testing.T, name, key, value string) { - if got := c.Get([]byte(name), []byte(key), []byte(value)); got != nil { - t.Helper() - t.Fatalf("got non-nil set %v for {%q, %q, %q}", got, name, key, value) - } -} - -func (c TestCache) GetByString(name, key, value string) *tsdb.SeriesIDSet { - return c.Get([]byte(name), []byte(key), []byte(value)) -} - -func (c TestCache) PutByString(name, key, value string, ss *tsdb.SeriesIDSet) { - c.Put([]byte(name), []byte(key), []byte(value), ss) -} diff --git a/tsdb/tsi1/config.go b/tsdb/tsi1/config.go deleted file mode 100644 index 7591781027..0000000000 --- a/tsdb/tsi1/config.go +++ /dev/null @@ -1,44 +0,0 @@ -package tsi1 - -import ( - "time" - - "github.com/influxdata/influxdb/v2/toml" -) - -// DefaultMaxIndexLogFileSize is the default threshold, in bytes, when an index -// write-ahead log file will compact into an index file. -const DefaultMaxIndexLogFileSize = 1 * 1024 * 1024 // 1MB - -// DefaultSeriesIDSetCacheSize is the default number of series ID sets to cache. -const DefaultSeriesIDSetCacheSize = 1000 - -// Config holds configurable Index options. -type Config struct { - // MaxIndexLogFileSize is the threshold, in bytes, when an index write-ahead log file will - // compact into an index file. Lower sizes will cause log files to be compacted more quickly - // and result in lower heap usage at the expense of write throughput. Higher sizes will - // be compacted less frequently, store more series in-memory, and provide higher write throughput. - MaxIndexLogFileSize toml.Size `toml:"max-index-log-file-size"` - - // SeriesIDSetCacheSize determines the size taken up by the cache of series ID - // sets in the index. Since a series id set is a compressed bitmap of all series ids - // matching a tag key/value pair, setting this size does not necessarily limit the - // size on heap the cache takes up. Care should be taken. - // - // The cache uses an LRU strategy for eviction. Setting the value to 0 will - // disable the cache. - SeriesIDSetCacheSize uint64 - - // StatsTTL sets the time-to-live for the stats cache. If zero, then caching - // is disabled. If set then stats are cached for the given amount of time. - StatsTTL time.Duration `toml:"stats-ttl"` -} - -// NewConfig returns a new Config. -func NewConfig() Config { - return Config{ - MaxIndexLogFileSize: toml.Size(DefaultMaxIndexLogFileSize), - SeriesIDSetCacheSize: DefaultSeriesIDSetCacheSize, - } -} diff --git a/tsdb/tsi1/doc.go b/tsdb/tsi1/doc.go deleted file mode 100644 index 01fff96baa..0000000000 --- a/tsdb/tsi1/doc.go +++ /dev/null @@ -1,228 +0,0 @@ -/* - -Package tsi1 provides a memory-mapped index implementation that supports -high cardinality series. - -Overview - -The top-level object in tsi1 is the Index. It is the primary access point from -the rest of the system. The Index is composed of LogFile and IndexFile objects. - -Log files are small write-ahead log files that record new series immediately -in the order that they are received. The data within the file is indexed -in-memory so it can be quickly accessed. When the system is restarted, this log -file is replayed and the in-memory representation is rebuilt. - -Index files also contain series information, however, they are highly indexed -so that reads can be performed quickly. Index files are built through a process -called compaction where a log file or multiple index files are merged together. - - -Operations - -The index can perform many tasks related to series, measurement, & tag data. -All data is inserted by adding a series to the index. When adding a series, -the measurement, tag keys, and tag values are all extracted and indexed -separately. - -Once a series has been added, it can be removed in several ways. First, the -individual series can be removed. Second, it can be removed as part of a bulk -operation by deleting the entire measurement. - -The query engine needs to be able to look up series in a variety of ways such -as by measurement name, by tag value, or by using regular expressions. The -index provides an API to iterate over subsets of series and perform set -operations such as unions and intersections. - - -Log File Layout - -The write-ahead file that series initially are inserted into simply appends -all new operations sequentially. It is simply composed of a series of log -entries. An entry contains a flag to specify the operation type, the measurement -name, the tag set, and a checksum. - - ┏━━━━━━━━━LogEntry━━━━━━━━━┓ - ┃ ┌──────────────────────┐ ┃ - ┃ │ Flag │ ┃ - ┃ ├──────────────────────┤ ┃ - ┃ │ Measurement │ ┃ - ┃ ├──────────────────────┤ ┃ - ┃ │ Key/Value │ ┃ - ┃ ├──────────────────────┤ ┃ - ┃ │ Key/Value │ ┃ - ┃ ├──────────────────────┤ ┃ - ┃ │ Key/Value │ ┃ - ┃ ├──────────────────────┤ ┃ - ┃ │ Checksum │ ┃ - ┃ └──────────────────────┘ ┃ - ┗━━━━━━━━━━━━━━━━━━━━━━━━━━┛ - -When the log file is replayed, if the checksum is incorrect or the entry is -incomplete (because of a partially failed write) then the log is truncated. - - -Index File Layout - -The index file is composed of 3 main block types: one series block, one or more -tag blocks, and one measurement block. At the end of the index file is a -trailer that records metadata such as the offsets to these blocks. - - -Series Block Layout - -The series block stores raw series keys in sorted order. It also provides hash -indexes so that series can be looked up quickly. Hash indexes are inserted -periodically so that memory size is limited at write time. Once all the series -and hash indexes have been written then a list of index entries are written -so that hash indexes can be looked up via binary search. After the entries -is a trailer which contains metadata about the block. - - ┏━━━━━━━SeriesBlock━━━━━━━━┓ - ┃ ┌──────────────────────┐ ┃ - ┃ │ Series Key │ ┃ - ┃ ├──────────────────────┤ ┃ - ┃ │ Series Key │ ┃ - ┃ ├──────────────────────┤ ┃ - ┃ │ Series Key │ ┃ - ┃ ├──────────────────────┤ ┃ - ┃ │ │ ┃ - ┃ │ Hash Index │ ┃ - ┃ │ │ ┃ - ┃ ├──────────────────────┤ ┃ - ┃ │ Series Key │ ┃ - ┃ ├──────────────────────┤ ┃ - ┃ │ Series Key │ ┃ - ┃ ├──────────────────────┤ ┃ - ┃ │ Series Key │ ┃ - ┃ ├──────────────────────┤ ┃ - ┃ │ │ ┃ - ┃ │ Hash Index │ ┃ - ┃ │ │ ┃ - ┃ ├──────────────────────┤ ┃ - ┃ │ Index Entries │ ┃ - ┃ ├──────────────────────┤ ┃ - ┃ │ Trailer │ ┃ - ┃ └──────────────────────┘ ┃ - ┗━━━━━━━━━━━━━━━━━━━━━━━━━━┛ - - -Tag Block Layout - -After the series block is one or more tag blocks. One of these blocks exists -for every measurement in the index file. The block is structured as a sorted -list of values for each key and then a sorted list of keys. Each of these lists -has their own hash index for fast direct lookups. - - ┏━━━━━━━━Tag Block━━━━━━━━━┓ - ┃ ┌──────────────────────┐ ┃ - ┃ │ Value │ ┃ - ┃ ├──────────────────────┤ ┃ - ┃ │ Value │ ┃ - ┃ ├──────────────────────┤ ┃ - ┃ │ Value │ ┃ - ┃ ├──────────────────────┤ ┃ - ┃ │ │ ┃ - ┃ │ Hash Index │ ┃ - ┃ │ │ ┃ - ┃ └──────────────────────┘ ┃ - ┃ ┌──────────────────────┐ ┃ - ┃ │ Value │ ┃ - ┃ ├──────────────────────┤ ┃ - ┃ │ Value │ ┃ - ┃ ├──────────────────────┤ ┃ - ┃ │ │ ┃ - ┃ │ Hash Index │ ┃ - ┃ │ │ ┃ - ┃ └──────────────────────┘ ┃ - ┃ ┌──────────────────────┐ ┃ - ┃ │ Key │ ┃ - ┃ ├──────────────────────┤ ┃ - ┃ │ Key │ ┃ - ┃ ├──────────────────────┤ ┃ - ┃ │ │ ┃ - ┃ │ Hash Index │ ┃ - ┃ │ │ ┃ - ┃ └──────────────────────┘ ┃ - ┃ ┌──────────────────────┐ ┃ - ┃ │ Trailer │ ┃ - ┃ └──────────────────────┘ ┃ - ┗━━━━━━━━━━━━━━━━━━━━━━━━━━┛ - -Each entry for values contains a sorted list of offsets for series keys that use -that value. Series iterators can be built around a single tag key value or -multiple iterators can be merged with set operators such as union or -intersection. - - -Measurement block - -The measurement block stores a sorted list of measurements, their associated -series offsets, and the offset to their tag block. This allows all series for -a measurement to be traversed quickly and it allows fast direct lookups of -measurements and their tags. - - ┏━━━━Measurement Block━━━━━┓ - ┃ ┌──────────────────────┐ ┃ - ┃ │ Measurement │ ┃ - ┃ ├──────────────────────┤ ┃ - ┃ │ Measurement │ ┃ - ┃ ├──────────────────────┤ ┃ - ┃ │ Measurement │ ┃ - ┃ ├──────────────────────┤ ┃ - ┃ │ │ ┃ - ┃ │ Hash Index │ ┃ - ┃ │ │ ┃ - ┃ ├──────────────────────┤ ┃ - ┃ │ Trailer │ ┃ - ┃ └──────────────────────┘ ┃ - ┗━━━━━━━━━━━━━━━━━━━━━━━━━━┛ - - -Manifest file - -The index is simply an ordered set of log and index files. These files can be -merged together or rewritten but their order must always be the same. This is -because series, measurements, & tags can be marked as deleted (aka tombstoned) -and this action needs to be tracked in time order. - -Whenever the set of active files is changed, a manifest file is written to -track the set. The manifest specifies the ordering of files and, on startup, -all files not in the manifest are removed from the index directory. - - -Compacting index files - -Compaction is the process of taking files and merging them together into a -single file. There are two stages of compaction within TSI. - -First, once log files exceed a size threshold then they are compacted into an -index file. This threshold is relatively small because log files must maintain -their index in the heap which TSI tries to avoid. Small log files are also very -quick to convert into an index file so this is done aggressively. - -Second, once a contiguous set of index files exceed a factor (e.g. 10x) then -they are all merged together into a single index file and the old files are -discarded. Because all blocks are written in sorted order, the new index file -can be streamed and minimize memory use. - - -Concurrency - -Index files are immutable so they do not require fine grained locks, however, -compactions require that we track which files are in use so they are not -discarded too soon. This is done by using reference counting with file sets. - -A file set is simply an ordered list of index files. When the current file set -is obtained from the index, a counter is incremented to track its usage. Once -the user is done with the file set, it is released and the counter is -decremented. A file cannot be removed from the file system until this counter -returns to zero. - -Besides the reference counting, there are no other locking mechanisms when -reading or writing index files. Log files, however, do require a lock whenever -they are accessed. This is another reason to minimize log file size. - - -*/ -package tsi1 diff --git a/tsdb/tsi1/dump_tsi1.go b/tsdb/tsi1/dump_tsi1.go deleted file mode 100644 index 3272ca6185..0000000000 --- a/tsdb/tsi1/dump_tsi1.go +++ /dev/null @@ -1,378 +0,0 @@ -package tsi1 - -import ( - "context" - "fmt" - "io" - "os" - "path/filepath" - "regexp" - "text/tabwriter" - - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" - "go.uber.org/zap" -) - -// Command represents the program execution for "influxd inspect dump-tsi". -type DumpTSI struct { - // Standard input/output, overridden for testing. - Stderr io.Writer - Stdout io.Writer - - Logger *zap.Logger - - // Optional: defaults to DataPath/_series - SeriesFilePath string - - // root dir of the engine - DataPath string - - ShowSeries bool - ShowMeasurements bool - ShowTagKeys bool - ShowTagValues bool - ShowTagValueSeries bool - - MeasurementFilter *regexp.Regexp - TagKeyFilter *regexp.Regexp - TagValueFilter *regexp.Regexp -} - -// NewCommand returns a new instance of Command. -func NewDumpTSI(logger *zap.Logger) DumpTSI { - dump := DumpTSI{ - Logger: logger, - Stderr: os.Stderr, - Stdout: os.Stdout, - } - return dump -} - -// Run executes the command. -func (cmd *DumpTSI) Run() error { - sfile := seriesfile.NewSeriesFile(cmd.SeriesFilePath) - sfile.Logger = cmd.Logger - if err := sfile.Open(context.Background()); err != nil { - return err - } - defer sfile.Close() - - // Build a file set from the paths on the command line. - idx, fs, err := cmd.readFileSet(sfile) - if err != nil { - return err - } - - if cmd.ShowSeries { - if err := cmd.printSeries(sfile); err != nil { - return err - } - } - - // If this is an ad-hoc fileset then process it and close afterward. - if fs != nil { - defer fs.Release() - if cmd.ShowSeries || cmd.ShowMeasurements { - return cmd.printMeasurements(sfile, fs) - } - return cmd.printFileSummaries(fs) - } - - // Otherwise iterate over each partition in the index. - defer idx.Close() - for i := 0; i < int(idx.PartitionN); i++ { - if err := func() error { - fs := idx.PartitionAt(i).fileSet - if err != nil { - return err - } - defer fs.Release() - - if cmd.ShowSeries || cmd.ShowMeasurements { - return cmd.printMeasurements(sfile, fs) - } - return cmd.printFileSummaries(fs) - }(); err != nil { - return err - } - } - return nil -} - -func (cmd *DumpTSI) readFileSet(sfile *seriesfile.SeriesFile) (*Index, *FileSet, error) { - index := NewIndex(sfile, NewConfig(), WithPath(cmd.DataPath), DisableCompactions()) - - if err := index.Open(context.Background()); err != nil { - return nil, nil, err - } - return index, nil, nil -} - -func (cmd *DumpTSI) printSeries(sfile *seriesfile.SeriesFile) error { - if !cmd.ShowSeries { - return nil - } - - // Print header. - tw := tabwriter.NewWriter(cmd.Stdout, 8, 8, 1, '\t', 0) - fmt.Fprintln(tw, "Series\t") - - // Iterate over each series. - seriesIDs := sfile.SeriesIDs() - for _, seriesID := range seriesIDs { - if seriesID.ID == 0 { - break - } - name, tags := seriesfile.ParseSeriesKey(sfile.SeriesKey(seriesID)) - - if !cmd.matchSeries(name, tags) { - continue - } - - deleted := sfile.IsDeleted(seriesID) - - fmt.Fprintf(tw, "%s%s\t%v\n", name, tags.HashKey(), deletedString(deleted)) - } - - // Flush & write footer spacing. - if err := tw.Flush(); err != nil { - return err - } - fmt.Fprint(cmd.Stdout, "\n\n") - - return nil -} - -func (cmd *DumpTSI) printMeasurements(sfile *seriesfile.SeriesFile, fs *FileSet) error { - if !cmd.ShowMeasurements { - return nil - } - - tw := tabwriter.NewWriter(cmd.Stdout, 8, 8, 1, '\t', 0) - fmt.Fprintln(tw, "Measurement\t") - - // Iterate over each series. - if itr := fs.MeasurementIterator(); itr != nil { - for e := itr.Next(); e != nil; e = itr.Next() { - if cmd.MeasurementFilter != nil && !cmd.MeasurementFilter.Match(e.Name()) { - continue - } - - fmt.Fprintf(tw, "%s\t%v\n", e.Name(), deletedString(e.Deleted())) - if err := tw.Flush(); err != nil { - return err - } - - if err := cmd.printTagKeys(sfile, fs, e.Name()); err != nil { - return err - } - } - } - - fmt.Fprint(cmd.Stdout, "\n\n") - - return nil -} - -func (cmd *DumpTSI) printTagKeys(sfile *seriesfile.SeriesFile, fs *FileSet, name []byte) error { - if !cmd.ShowTagKeys { - return nil - } - - // Iterate over each key. - tw := tabwriter.NewWriter(cmd.Stdout, 8, 8, 1, '\t', 0) - itr := fs.TagKeyIterator(name) - for e := itr.Next(); e != nil; e = itr.Next() { - if cmd.TagKeyFilter != nil && !cmd.TagKeyFilter.Match(e.Key()) { - continue - } - - fmt.Fprintf(tw, " %s\t%v\n", e.Key(), deletedString(e.Deleted())) - if err := tw.Flush(); err != nil { - return err - } - - if err := cmd.printTagValues(sfile, fs, name, e.Key()); err != nil { - return err - } - } - fmt.Fprint(cmd.Stdout, "\n") - - return nil -} - -func (cmd *DumpTSI) printTagValues(sfile *seriesfile.SeriesFile, fs *FileSet, name, key []byte) error { - if !cmd.ShowTagValues { - return nil - } - - // Iterate over each value. - tw := tabwriter.NewWriter(cmd.Stdout, 8, 8, 1, '\t', 0) - itr := fs.TagValueIterator(name, key) - for e := itr.Next(); e != nil; e = itr.Next() { - if cmd.TagValueFilter != nil && !cmd.TagValueFilter.Match(e.Value()) { - continue - } - - fmt.Fprintf(tw, " %s\t%v\n", e.Value(), deletedString(e.Deleted())) - if err := tw.Flush(); err != nil { - return err - } - - if err := cmd.printTagValueSeries(sfile, fs, name, key, e.Value()); err != nil { - return err - } - } - fmt.Fprint(cmd.Stdout, "\n") - - return nil -} - -func (cmd *DumpTSI) printTagValueSeries(sfile *seriesfile.SeriesFile, fs *FileSet, name, key, value []byte) error { - if !cmd.ShowTagValueSeries { - return nil - } - - // Iterate over each series. - tw := tabwriter.NewWriter(cmd.Stdout, 8, 8, 1, '\t', 0) - itr, err := fs.TagValueSeriesIDIterator(name, key, value) - if err != nil { - return err - } - for { - e, err := itr.Next() - if err != nil { - return err - } else if e.SeriesID.ID == 0 { - break - } - - name, tags := seriesfile.ParseSeriesKey(sfile.SeriesKey(e.SeriesID)) - - if !cmd.matchSeries(name, tags) { - continue - } - - fmt.Fprintf(tw, " %s%s\n", name, tags.HashKey()) - if err := tw.Flush(); err != nil { - return err - } - } - fmt.Fprint(cmd.Stdout, "\n") - - return nil -} - -func (cmd *DumpTSI) printFileSummaries(fs *FileSet) error { - for _, f := range fs.Files() { - switch f := f.(type) { - case *LogFile: - fmt.Printf("got an alleged LogFile: %v\n", f.Path()) - if err := cmd.printLogFileSummary(f); err != nil { - return err - } - case *IndexFile: - if err := cmd.printIndexFileSummary(f); err != nil { - return err - } - default: - panic("unreachable") - } - fmt.Fprintln(cmd.Stdout, "") - } - return nil -} - -func (cmd *DumpTSI) printLogFileSummary(f *LogFile) error { - fmt.Fprintf(cmd.Stdout, "[LOG FILE] %s\n", filepath.Base(f.Path())) - tw := tabwriter.NewWriter(cmd.Stdout, 8, 8, 1, '\t', 0) - fmt.Fprintf(tw, "Series:\t%d\n", f.SeriesN()) - fmt.Fprintf(tw, "Measurements:\t%d\n", f.MeasurementN()) - fmt.Fprintf(tw, "Tag Keys:\t%d\n", f.TagKeyN()) - fmt.Fprintf(tw, "Tag Values:\t%d\n", f.TagValueN()) - return tw.Flush() -} - -func (cmd *DumpTSI) printIndexFileSummary(f *IndexFile) error { - fmt.Fprintf(cmd.Stdout, "[INDEX FILE] %s\n", filepath.Base(f.Path())) - - // Calculate summary stats. - var measurementN, measurementSeriesN, measurementSeriesSize uint64 - var keyN uint64 - var valueN, valueSeriesN, valueSeriesSize uint64 - - if mitr := f.MeasurementIterator(); mitr != nil { - for me, _ := mitr.Next().(*MeasurementBlockElem); me != nil; me, _ = mitr.Next().(*MeasurementBlockElem) { - kitr := f.TagKeyIterator(me.Name()) - for ke, _ := kitr.Next().(*TagBlockKeyElem); ke != nil; ke, _ = kitr.Next().(*TagBlockKeyElem) { - vitr := f.TagValueIterator(me.Name(), ke.Key()) - for ve, _ := vitr.Next().(*TagBlockValueElem); ve != nil; ve, _ = vitr.Next().(*TagBlockValueElem) { - valueN++ - valueSeriesN += uint64(ve.SeriesN()) - valueSeriesSize += uint64(len(ve.SeriesData())) - } - keyN++ - } - measurementN++ - measurementSeriesN += uint64(me.SeriesN()) - measurementSeriesSize += uint64(len(me.SeriesData())) - } - } - - // Write stats. - tw := tabwriter.NewWriter(cmd.Stdout, 8, 8, 1, '\t', 0) - fmt.Fprintf(tw, "Measurements:\t%d\n", measurementN) - fmt.Fprintf(tw, " Series data size:\t%d (%s)\n", measurementSeriesSize, formatSize(measurementSeriesSize)) - fmt.Fprintf(tw, " Bytes per series:\t%.01fb\n", float64(measurementSeriesSize)/float64(measurementSeriesN)) - fmt.Fprintf(tw, "Tag Keys:\t%d\n", keyN) - fmt.Fprintf(tw, "Tag Values:\t%d\n", valueN) - fmt.Fprintf(tw, " Series:\t%d\n", valueSeriesN) - fmt.Fprintf(tw, " Series data size:\t%d (%s)\n", valueSeriesSize, formatSize(valueSeriesSize)) - fmt.Fprintf(tw, " Bytes per series:\t%.01fb\n", float64(valueSeriesSize)/float64(valueSeriesN)) - return tw.Flush() -} - -// matchSeries returns true if the command filters matches the series. -func (cmd *DumpTSI) matchSeries(name []byte, tags models.Tags) bool { - // Filter by measurement. - if cmd.MeasurementFilter != nil && !cmd.MeasurementFilter.Match(name) { - return false - } - - // Filter by tag key/value. - if cmd.TagKeyFilter != nil || cmd.TagValueFilter != nil { - var matched bool - for _, tag := range tags { - if (cmd.TagKeyFilter == nil || cmd.TagKeyFilter.Match(tag.Key)) && (cmd.TagValueFilter == nil || cmd.TagValueFilter.Match(tag.Value)) { - matched = true - break - } - } - if !matched { - return false - } - } - - return true -} - -// deletedString returns "(deleted)" if v is true. -func deletedString(v bool) string { - if v { - return "(deleted)" - } - return "" -} - -func formatSize(v uint64) string { - denom := uint64(1) - var uom string - for _, uom = range []string{"b", "kb", "mb", "gb", "tb"} { - if denom*1024 > v { - break - } - denom *= 1024 - } - return fmt.Sprintf("%0.01f%s", float64(v)/float64(denom), uom) -} diff --git a/tsdb/tsi1/file_set.go b/tsdb/tsi1/file_set.go deleted file mode 100644 index ef99a667c4..0000000000 --- a/tsdb/tsi1/file_set.go +++ /dev/null @@ -1,568 +0,0 @@ -package tsi1 - -import ( - "bytes" - "errors" - "fmt" - "regexp" - "unsafe" - - "github.com/influxdata/influxdb/v2/pkg/lifecycle" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" - "github.com/influxdata/influxql" -) - -// FileSet represents a collection of files. -type FileSet struct { - sfile *seriesfile.SeriesFile - sfileref *lifecycle.Reference - files []File - filesref lifecycle.References - manifestSize int64 // Size of the manifest file in bytes. -} - -// NewFileSet returns a new instance of FileSet. -func NewFileSet(sfile *seriesfile.SeriesFile, files []File) (*FileSet, error) { - // First try to acquire a reference to the series file. - sfileref, err := sfile.Acquire() - if err != nil { - return nil, err - } - - // Next, acquire references to all of the passed in files. - filesref := make(lifecycle.References, 0, len(files)) - for _, f := range files { - ref, err := f.Acquire() - if err != nil { - filesref.Release() - sfileref.Release() - return nil, err - } - filesref = append(filesref, ref) - } - - return &FileSet{ - sfile: sfile, - sfileref: sfileref, - files: files, - filesref: filesref, - }, nil -} - -// bytes estimates the memory footprint of this FileSet, in bytes. -func (fs *FileSet) bytes() int { - var b int - // Do not count SeriesFile because it belongs to the code that constructed this FileSet. - for _, file := range fs.files { - b += file.bytes() - } - b += int(unsafe.Sizeof(*fs)) - return b -} - -func (fs *FileSet) SeriesFile() *seriesfile.SeriesFile { return fs.sfile } - -// Release releases all resources on the file set. -func (fs *FileSet) Release() { - fs.filesref.Release() - fs.sfileref.Release() -} - -// Duplicate returns a copy of the FileSet, acquiring another resource to the -// files and series file for the file set. -func (fs *FileSet) Duplicate() (*FileSet, error) { - return NewFileSet(fs.sfile, fs.files) -} - -// PrependLogFile returns a new file set with f added at the beginning. -// Filters do not need to be rebuilt because log files have no bloom filter. -func (fs *FileSet) PrependLogFile(f *LogFile) (*FileSet, error) { - return NewFileSet(fs.sfile, append([]File{f}, fs.files...)) -} - -// Size returns the on-disk size of the FileSet. -func (fs *FileSet) Size() int64 { - var total int64 - for _, f := range fs.files { - total += f.Size() - } - return total + int64(fs.manifestSize) -} - -// MustReplace swaps a list of files for a single file and returns a new file set. -// The caller should always guarantee that the files exist and are contiguous. -func (fs *FileSet) MustReplace(oldFiles []File, newFile File) (*FileSet, error) { - assert(len(oldFiles) > 0, "cannot replace empty files") - - // Find index of first old file. - var i int - for ; i < len(fs.files); i++ { - if fs.files[i] == oldFiles[0] { - break - } else if i == len(fs.files)-1 { - return nil, errors.New("first replacement file not found") - } - } - - // Ensure all old files are contiguous. - for j := range oldFiles { - if fs.files[i+j] != oldFiles[j] { - return nil, fmt.Errorf("cannot replace non-contiguous files: subset=%+v, fileset=%+v", Files(oldFiles).IDs(), Files(fs.files).IDs()) - } - } - - // Copy to new fileset. - other := make([]File, len(fs.files)-len(oldFiles)+1) - copy(other[:i], fs.files[:i]) - other[i] = newFile - copy(other[i+1:], fs.files[i+len(oldFiles):]) - - // Build new fileset and rebuild changed filters. - return NewFileSet(fs.sfile, other) -} - -// MaxID returns the highest file identifier. -func (fs *FileSet) MaxID() int { - var max int - for _, f := range fs.files { - if i := f.ID(); i > max { - max = i - } - } - return max -} - -// Files returns all files in the set. -func (fs *FileSet) Files() []File { - return fs.files -} - -// LogFiles returns all log files from the file set. -func (fs *FileSet) LogFiles() []*LogFile { - var a []*LogFile - for _, f := range fs.files { - if f, ok := f.(*LogFile); ok { - a = append(a, f) - } - } - return a -} - -// IndexFiles returns all index files from the file set. -func (fs *FileSet) IndexFiles() []*IndexFile { - var a []*IndexFile - for _, f := range fs.files { - if f, ok := f.(*IndexFile); ok { - a = append(a, f) - } - } - return a -} - -// LastContiguousIndexFilesByLevel returns the last contiguous files by level. -// These can be used by the compaction scheduler. -func (fs *FileSet) LastContiguousIndexFilesByLevel(level int) []*IndexFile { - if level == 0 { - return nil - } - - var a []*IndexFile - for i := len(fs.files) - 1; i >= 0; i-- { - f := fs.files[i] - - // Ignore files above level, stop on files below level. - if level < f.Level() { - continue - } else if level > f.Level() { - break - } - - a = append([]*IndexFile{f.(*IndexFile)}, a...) - } - return a -} - -// Measurement returns a measurement by name. -func (fs *FileSet) Measurement(name []byte) MeasurementElem { - for _, f := range fs.files { - if e := f.Measurement(name); e == nil { - continue - } else if e.Deleted() { - return nil - } else { - return e - } - } - return nil -} - -// MeasurementIterator returns an iterator over all measurements in the index. -func (fs *FileSet) MeasurementIterator() MeasurementIterator { - a := make([]MeasurementIterator, 0, len(fs.files)) - for _, f := range fs.files { - itr := f.MeasurementIterator() - if itr != nil { - a = append(a, itr) - } - } - return MergeMeasurementIterators(a...) -} - -// TagKeyIterator returns an iterator over all tag keys for a measurement. -func (fs *FileSet) TagKeyIterator(name []byte) TagKeyIterator { - a := make([]TagKeyIterator, 0, len(fs.files)) - for _, f := range fs.files { - itr := f.TagKeyIterator(name) - if itr != nil { - a = append(a, itr) - } - } - return MergeTagKeyIterators(a...) -} - -// MeasurementSeriesIDIterator returns a series iterator for a measurement. -func (fs *FileSet) MeasurementSeriesIDIterator(name []byte) tsdb.SeriesIDIterator { - a := make([]tsdb.SeriesIDIterator, 0, len(fs.files)) - for _, f := range fs.files { - itr := f.MeasurementSeriesIDIterator(name) - if itr != nil { - a = append(a, itr) - } - } - return tsdb.MergeSeriesIDIterators(a...) -} - -// MeasurementTagKeysByExpr extracts the tag keys wanted by the expression. -func (fs *FileSet) MeasurementTagKeysByExpr(name []byte, expr influxql.Expr) (map[string]struct{}, error) { - // Return all keys if no condition was passed in. - if expr == nil { - m := make(map[string]struct{}) - if itr := fs.TagKeyIterator(name); itr != nil { - for e := itr.Next(); e != nil; e = itr.Next() { - m[string(e.Key())] = struct{}{} - } - } - return m, nil - } - - switch e := expr.(type) { - case *influxql.BinaryExpr: - switch e.Op { - case influxql.EQ, influxql.NEQ, influxql.EQREGEX, influxql.NEQREGEX: - tag, ok := e.LHS.(*influxql.VarRef) - if !ok { - return nil, fmt.Errorf("left side of '%s' must be a tag key", e.Op.String()) - } else if tag.Val != "_tagKey" { - return nil, nil - } - - if influxql.IsRegexOp(e.Op) { - re, ok := e.RHS.(*influxql.RegexLiteral) - if !ok { - return nil, fmt.Errorf("right side of '%s' must be a regular expression", e.Op.String()) - } - return fs.tagKeysByFilter(name, e.Op, nil, re.Val), nil - } - - s, ok := e.RHS.(*influxql.StringLiteral) - if !ok { - return nil, fmt.Errorf("right side of '%s' must be a tag value string", e.Op.String()) - } - return fs.tagKeysByFilter(name, e.Op, []byte(s.Val), nil), nil - - case influxql.AND, influxql.OR: - lhs, err := fs.MeasurementTagKeysByExpr(name, e.LHS) - if err != nil { - return nil, err - } - - rhs, err := fs.MeasurementTagKeysByExpr(name, e.RHS) - if err != nil { - return nil, err - } - - if lhs != nil && rhs != nil { - if e.Op == influxql.OR { - return unionStringSets(lhs, rhs), nil - } - return intersectStringSets(lhs, rhs), nil - } else if lhs != nil { - return lhs, nil - } else if rhs != nil { - return rhs, nil - } - return nil, nil - default: - return nil, fmt.Errorf("invalid operator") - } - - case *influxql.ParenExpr: - return fs.MeasurementTagKeysByExpr(name, e.Expr) - } - - return nil, fmt.Errorf("%#v", expr) -} - -// tagKeysByFilter will filter the tag keys for the measurement. -func (fs *FileSet) tagKeysByFilter(name []byte, op influxql.Token, val []byte, regex *regexp.Regexp) map[string]struct{} { - ss := make(map[string]struct{}) - itr := fs.TagKeyIterator(name) - if itr != nil { - for e := itr.Next(); e != nil; e = itr.Next() { - var matched bool - switch op { - case influxql.EQ: - matched = bytes.Equal(e.Key(), val) - case influxql.NEQ: - matched = !bytes.Equal(e.Key(), val) - case influxql.EQREGEX: - matched = regex.Match(e.Key()) - case influxql.NEQREGEX: - matched = !regex.Match(e.Key()) - } - - if !matched { - continue - } - ss[string(e.Key())] = struct{}{} - } - } - return ss -} - -// TagKeySeriesIDIterator returns a series iterator for all values across a single key. -func (fs *FileSet) TagKeySeriesIDIterator(name, key []byte) (tsdb.SeriesIDIterator, error) { - a := make([]tsdb.SeriesIDIterator, 0, len(fs.files)) - for _, f := range fs.files { - itr, err := f.TagKeySeriesIDIterator(name, key) - if err != nil { - return nil, err - } else if itr != nil { - a = append(a, itr) - } - } - return tsdb.MergeSeriesIDIterators(a...), nil -} - -// HasTagKey returns true if the tag key exists. -func (fs *FileSet) HasTagKey(name, key []byte) bool { - for _, f := range fs.files { - if e := f.TagKey(name, key); e != nil { - return !e.Deleted() - } - } - return false -} - -// HasTagValue returns true if the tag value exists. -func (fs *FileSet) HasTagValue(name, key, value []byte) bool { - for _, f := range fs.files { - if e := f.TagValue(name, key, value); e != nil { - return !e.Deleted() - } - } - return false -} - -// TagValueIterator returns a value iterator for a tag key. -func (fs *FileSet) TagValueIterator(name, key []byte) TagValueIterator { - a := make([]TagValueIterator, 0, len(fs.files)) - for _, f := range fs.files { - itr := f.TagValueIterator(name, key) - if itr != nil { - a = append(a, itr) - } - } - return MergeTagValueIterators(a...) -} - -// TagValueSeriesIDIterator returns a series iterator for a single tag value. -func (fs *FileSet) TagValueSeriesIDIterator(name, key, value []byte) (tsdb.SeriesIDIterator, error) { - ss := tsdb.NewSeriesIDSet() - - var ftss *tsdb.SeriesIDSet - for i := len(fs.files) - 1; i >= 0; i-- { - f := fs.files[i] - - // Remove tombstones set in previous file. - if ftss != nil && ftss.Cardinality() > 0 { - ss.RemoveSet(ftss) - } - - // Fetch tag value series set for this file and merge into overall set. - fss, err := f.TagValueSeriesIDSet(name, key, value) - if err != nil { - return nil, err - } else if fss != nil { - ss.Merge(fss) - } - - // Fetch tombstone set to be processed on next file. - if ftss, err = f.TombstoneSeriesIDSet(); err != nil { - return nil, err - } - } - return tsdb.NewSeriesIDSetIterator(ss), nil -} - -// File represents a log or index file. -type File interface { - Close() error - Path() string - - ID() int - Level() int - - Measurement(name []byte) MeasurementElem - MeasurementIterator() MeasurementIterator - MeasurementHasSeries(ss *tsdb.SeriesIDSet, name []byte) bool - - TagKey(name, key []byte) TagKeyElem - TagKeyIterator(name []byte) TagKeyIterator - - TagValue(name, key, value []byte) TagValueElem - TagValueIterator(name, key []byte) TagValueIterator - - // Series iteration. - MeasurementSeriesIDIterator(name []byte) tsdb.SeriesIDIterator - TagKeySeriesIDIterator(name, key []byte) (tsdb.SeriesIDIterator, error) - TagValueSeriesIDSet(name, key, value []byte) (*tsdb.SeriesIDSet, error) - - // Bitmap series existence. - SeriesIDSet() (*tsdb.SeriesIDSet, error) - TombstoneSeriesIDSet() (*tsdb.SeriesIDSet, error) - - // Reference counting. - Acquire() (*lifecycle.Reference, error) - - // Size of file on disk - Size() int64 - - // Estimated memory footprint - bytes() int -} - -type Files []File - -func (a Files) IDs() []int { - ids := make([]int, len(a)) - for i := range a { - ids[i] = a[i].ID() - } - return ids -} - -// fileSetSeriesIDIterator attaches a fileset to an iterator that is released on close. -type fileSetSeriesIDIterator struct { - fs *FileSet - itr tsdb.SeriesIDIterator -} - -func newFileSetSeriesIDIterator(fs *FileSet, itr tsdb.SeriesIDIterator) tsdb.SeriesIDIterator { - if itr == nil { - fs.Release() - return nil - } - if itr, ok := itr.(tsdb.SeriesIDSetIterator); ok { - return &fileSetSeriesIDSetIterator{fs: fs, itr: itr} - } - return &fileSetSeriesIDIterator{fs: fs, itr: itr} -} - -func (itr *fileSetSeriesIDIterator) Next() (tsdb.SeriesIDElem, error) { - return itr.itr.Next() -} - -func (itr *fileSetSeriesIDIterator) Close() error { - itr.fs.Release() - return itr.itr.Close() -} - -// fileSetSeriesIDSetIterator attaches a fileset to an iterator that is released on close. -type fileSetSeriesIDSetIterator struct { - fs *FileSet - itr tsdb.SeriesIDSetIterator -} - -func (itr *fileSetSeriesIDSetIterator) Next() (tsdb.SeriesIDElem, error) { - return itr.itr.Next() -} - -func (itr *fileSetSeriesIDSetIterator) Close() error { - itr.fs.Release() - return itr.itr.Close() -} - -func (itr *fileSetSeriesIDSetIterator) SeriesIDSet() *tsdb.SeriesIDSet { - return itr.itr.SeriesIDSet() -} - -// fileSetMeasurementIterator attaches a fileset to an iterator that is released on close. -type fileSetMeasurementIterator struct { - fs *FileSet - itr tsdb.MeasurementIterator -} - -func newFileSetMeasurementIterator(fs *FileSet, itr tsdb.MeasurementIterator) tsdb.MeasurementIterator { - if itr == nil { - fs.Release() - return nil - } - return &fileSetMeasurementIterator{fs: fs, itr: itr} -} - -func (itr *fileSetMeasurementIterator) Next() ([]byte, error) { - return itr.itr.Next() -} - -func (itr *fileSetMeasurementIterator) Close() error { - itr.fs.Release() - return itr.itr.Close() -} - -// fileSetTagKeyIterator attaches a fileset to an iterator that is released on close. -type fileSetTagKeyIterator struct { - fs *FileSet - itr tsdb.TagKeyIterator -} - -func newFileSetTagKeyIterator(fs *FileSet, itr tsdb.TagKeyIterator) tsdb.TagKeyIterator { - if itr == nil { - fs.Release() - return nil - } - return &fileSetTagKeyIterator{fs: fs, itr: itr} -} - -func (itr *fileSetTagKeyIterator) Next() ([]byte, error) { - return itr.itr.Next() -} - -func (itr *fileSetTagKeyIterator) Close() error { - itr.fs.Release() - return itr.itr.Close() -} - -// fileSetTagValueIterator attaches a fileset to an iterator that is released on close. -type fileSetTagValueIterator struct { - fs *FileSet - itr tsdb.TagValueIterator -} - -func newFileSetTagValueIterator(fs *FileSet, itr tsdb.TagValueIterator) tsdb.TagValueIterator { - if itr == nil { - fs.Release() - return nil - } - return &fileSetTagValueIterator{fs: fs, itr: itr} -} - -func (itr *fileSetTagValueIterator) Next() ([]byte, error) { - return itr.itr.Next() -} - -func (itr *fileSetTagValueIterator) Close() error { - itr.fs.Release() - return itr.itr.Close() -} diff --git a/tsdb/tsi1/file_set_test.go b/tsdb/tsi1/file_set_test.go deleted file mode 100644 index 5250129791..0000000000 --- a/tsdb/tsi1/file_set_test.go +++ /dev/null @@ -1,318 +0,0 @@ -package tsi1_test - -import ( - "fmt" - "reflect" - "sort" - "testing" - - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" - "github.com/influxdata/influxdb/v2/tsdb/tsi1" -) - -// Ensure fileset can return an iterator over all series in the index. -func TestFileSet_SeriesIDIterator(t *testing.T) { - idx := MustOpenIndex(1, tsi1.NewConfig()) - defer idx.Close() - - // Create initial set of series. - if err := idx.CreateSeriesSliceIfNotExists([]Series{ - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "east"}), Type: models.Integer}, - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "west"}), Type: models.Integer}, - {Name: []byte("mem"), Tags: models.NewTags(map[string]string{"region": "east"}), Type: models.Integer}, - }); err != nil { - t.Fatal(err) - } - - // Verify initial set of series. - idx.Run(t, func(t *testing.T) { - fs, err := idx.PartitionAt(0).FileSet() - if err != nil { - t.Fatal(err) - } - defer fs.Release() - - seriesIDs := fs.SeriesFile().SeriesIDs() - if result := seriesIDsToStrings(fs.SeriesFile(), seriesIDs); !reflect.DeepEqual(result, []string{ - "cpu,[{region east}]", - "cpu,[{region west}]", - "mem,[{region east}]", - }) { - t.Fatalf("unexpected keys: %s", result) - } - }) - - // Add more series. - if err := idx.CreateSeriesSliceIfNotExists([]Series{ - {Name: []byte("disk"), Type: models.Integer}, - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "north"}), Type: models.Integer}, - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "east"}), Type: models.Integer}, - }); err != nil { - t.Fatal(err) - } - - // Verify additional series. - idx.Run(t, func(t *testing.T) { - fs, err := idx.PartitionAt(0).FileSet() - if err != nil { - t.Fatal(err) - } - defer fs.Release() - - seriesIDs := fs.SeriesFile().SeriesIDs() - if result := seriesIDsToStrings(fs.SeriesFile(), seriesIDs); !reflect.DeepEqual(result, []string{ - "cpu,[{region east}]", - "cpu,[{region north}]", - "cpu,[{region west}]", - "disk,[]", - "mem,[{region east}]", - }) { - t.Fatalf("unexpected keys: %s", result) - } - }) -} - -// Ensure fileset can return an iterator over all series for one measurement. -func TestFileSet_MeasurementSeriesIDIterator(t *testing.T) { - idx := MustOpenIndex(1, tsi1.NewConfig()) - defer idx.Close() - - // Create initial set of series. - if err := idx.CreateSeriesSliceIfNotExists([]Series{ - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "east"}), Type: models.Integer}, - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "west"}), Type: models.Integer}, - {Name: []byte("mem"), Tags: models.NewTags(map[string]string{"region": "east"}), Type: models.Integer}, - }); err != nil { - t.Fatal(err) - } - - // Verify initial set of series. - idx.Run(t, func(t *testing.T) { - fs, err := idx.PartitionAt(0).FileSet() - if err != nil { - t.Fatal(err) - } - defer fs.Release() - - itr := fs.MeasurementSeriesIDIterator([]byte("cpu")) - if itr == nil { - t.Fatal("expected iterator") - } - - if result := mustReadAllSeriesIDIteratorString(fs.SeriesFile(), itr); !reflect.DeepEqual(result, []string{ - "cpu,[{region east}]", - "cpu,[{region west}]", - }) { - t.Fatalf("unexpected keys: %s", result) - } - }) - - // Add more series. - if err := idx.CreateSeriesSliceIfNotExists([]Series{ - {Name: []byte("disk")}, - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "north"})}, - }); err != nil { - t.Fatal(err) - } - - // Verify additional series. - idx.Run(t, func(t *testing.T) { - fs, err := idx.PartitionAt(0).FileSet() - if err != nil { - t.Fatal(err) - } - defer fs.Release() - - itr := fs.MeasurementSeriesIDIterator([]byte("cpu")) - if itr == nil { - t.Fatalf("expected iterator") - } - - if result := mustReadAllSeriesIDIteratorString(fs.SeriesFile(), itr); !reflect.DeepEqual(result, []string{ - "cpu,[{region east}]", - "cpu,[{region north}]", - "cpu,[{region west}]", - }) { - t.Fatalf("unexpected keys: %s", result) - } - }) -} - -// Ensure fileset can return an iterator over all measurements for the index. -func TestFileSet_MeasurementIterator(t *testing.T) { - idx := MustOpenIndex(1, tsi1.NewConfig()) - defer idx.Close() - - // Create initial set of series. - if err := idx.CreateSeriesSliceIfNotExists([]Series{ - {Name: []byte("cpu"), Type: models.Integer}, - {Name: []byte("mem"), Type: models.Integer}, - }); err != nil { - t.Fatal(err) - } - - // Verify initial set of series. - idx.Run(t, func(t *testing.T) { - fs, err := idx.PartitionAt(0).FileSet() - if err != nil { - t.Fatal(err) - } - defer fs.Release() - - itr := fs.MeasurementIterator() - if itr == nil { - t.Fatal("expected iterator") - } - - expectedNames := []string{"cpu", "mem", ""} // Empty string implies end - for _, name := range expectedNames { - e := itr.Next() - if name == "" && e != nil { - t.Errorf("got measurement %s, expected nil measurement", e.Name()) - } else if e == nil && name != "" { - t.Errorf("got nil measurement, expected %s", name) - } else if e != nil && string(e.Name()) != name { - t.Errorf("got measurement %s, expected %s", e.Name(), name) - } - } - }) - - // Add more series. - if err := idx.CreateSeriesSliceIfNotExists([]Series{ - {Name: []byte("disk"), Tags: models.NewTags(map[string]string{"foo": "bar"}), Type: models.Integer}, - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "north", "x": "y"}), Type: models.Integer}, - }); err != nil { - t.Fatal(err) - } - - // Verify additional series. - idx.Run(t, func(t *testing.T) { - fs, err := idx.PartitionAt(0).FileSet() - if err != nil { - t.Fatal(err) - } - defer fs.Release() - - itr := fs.MeasurementIterator() - if itr == nil { - t.Fatal("expected iterator") - } - - expectedNames := []string{"cpu", "disk", "mem", ""} // Empty string implies end - for _, name := range expectedNames { - e := itr.Next() - if name == "" && e != nil { - t.Errorf("got measurement %s, expected nil measurement", e.Name()) - } else if e == nil && name != "" { - t.Errorf("got nil measurement, expected %s", name) - } else if e != nil && string(e.Name()) != name { - t.Errorf("got measurement %s, expected %s", e.Name(), name) - } - } - }) -} - -// Ensure fileset can return an iterator over all keys for one measurement. -func TestFileSet_TagKeyIterator(t *testing.T) { - idx := MustOpenIndex(1, tsi1.NewConfig()) - defer idx.Close() - - // Create initial set of series. - if err := idx.CreateSeriesSliceIfNotExists([]Series{ - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "east"}), Type: models.Integer}, - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "west", "type": "gpu"}), Type: models.Integer}, - {Name: []byte("mem"), Tags: models.NewTags(map[string]string{"region": "east", "misc": "other"}), Type: models.Integer}, - }); err != nil { - t.Fatal(err) - } - - // Verify initial set of series. - idx.Run(t, func(t *testing.T) { - fs, err := idx.PartitionAt(0).FileSet() - if err != nil { - t.Fatal(err) - } - defer fs.Release() - - itr := fs.TagKeyIterator([]byte("cpu")) - if itr == nil { - t.Fatalf("expected iterator") - } - - if e := itr.Next(); string(e.Key()) != `region` { - t.Fatalf("unexpected key: %s", e.Key()) - } else if e := itr.Next(); string(e.Key()) != `type` { - t.Fatalf("unexpected key: %s", e.Key()) - } else if e := itr.Next(); e != nil { - t.Fatalf("expected nil key: %s", e.Key()) - } - }) - - // Add more series. - if err := idx.CreateSeriesSliceIfNotExists([]Series{ - {Name: []byte("disk"), Tags: models.NewTags(map[string]string{"foo": "bar"})}, - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "north", "x": "y"})}, - }); err != nil { - t.Fatal(err) - } - - // Verify additional series. - idx.Run(t, func(t *testing.T) { - fs, err := idx.PartitionAt(0).FileSet() - if err != nil { - t.Fatal(err) - } - defer fs.Release() - - itr := fs.TagKeyIterator([]byte("cpu")) - if itr == nil { - t.Fatal("expected iterator") - } - - if e := itr.Next(); string(e.Key()) != `region` { - t.Fatalf("unexpected key: %s", e.Key()) - } else if e := itr.Next(); string(e.Key()) != `type` { - t.Fatalf("unexpected key: %s", e.Key()) - } else if e := itr.Next(); string(e.Key()) != `x` { - t.Fatalf("unexpected key: %s", e.Key()) - } else if e := itr.Next(); e != nil { - t.Fatalf("expected nil key: %s", e.Key()) - } - }) -} - -func mustReadAllSeriesIDIteratorString(sfile *seriesfile.SeriesFile, itr tsdb.SeriesIDIterator) []string { - if itr == nil { - return nil - } - - // Read all ids. - var ids []tsdb.SeriesID - for { - e, err := itr.Next() - if err != nil { - panic(err) - } else if e.SeriesID.IsZero() { - break - } - ids = append(ids, e.SeriesID) - } - - return seriesIDsToStrings(sfile, ids) -} - -func seriesIDsToStrings(sfile *seriesfile.SeriesFile, ids []tsdb.SeriesID) []string { - // Convert to keys and sort. - keys := sfile.SeriesKeys(ids) - sort.Slice(keys, func(i, j int) bool { return seriesfile.CompareSeriesKeys(keys[i], keys[j]) == -1 }) - - // Convert to strings. - a := make([]string, len(keys)) - for i := range a { - name, tags := seriesfile.ParseSeriesKey(keys[i]) - a[i] = fmt.Sprintf("%s,%s", name, tags.String()) - } - return a -} diff --git a/tsdb/tsi1/gen_test.go b/tsdb/tsi1/gen_test.go deleted file mode 100644 index 9f658bcfad..0000000000 --- a/tsdb/tsi1/gen_test.go +++ /dev/null @@ -1,14 +0,0 @@ -//go:generate sh -c "curl -L https://github.com/influxdata/testdata/raw/2020.07.17.0/tsi1testdata.tar.gz | tar xz" -package tsi1_test - -import ( - "fmt" - "os" -) - -func init() { - if _, err := os.Stat("./testdata"); err != nil { - fmt.Println("Run go generate to download testdata directory.") - os.Exit(1) - } -} diff --git a/tsdb/tsi1/index.go b/tsdb/tsi1/index.go deleted file mode 100644 index a4f766aacd..0000000000 --- a/tsdb/tsi1/index.go +++ /dev/null @@ -1,1705 +0,0 @@ -package tsi1 - -import ( - "bytes" - "context" - "errors" - "fmt" - "io/ioutil" - "os" - "path/filepath" - "regexp" - "runtime" - "sort" - "strconv" - "sync" - "sync/atomic" - "time" - "unsafe" - - "github.com/cespare/xxhash" - "github.com/influxdata/influxdb/v2/kit/tracing" - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/pkg/lifecycle" - "github.com/influxdata/influxdb/v2/pkg/mincore" - "github.com/influxdata/influxdb/v2/pkg/slices" - "github.com/influxdata/influxdb/v2/query" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" - "github.com/influxdata/influxql" - "github.com/prometheus/client_golang/prometheus" - "go.uber.org/zap" - "golang.org/x/sync/errgroup" - "golang.org/x/time/rate" -) - -// ErrCompactionInterrupted is returned if compactions are disabled or -// an index is closed while a compaction is occurring. -var ErrCompactionInterrupted = errors.New("tsi1: compaction interrupted") - -func init() { - if os.Getenv("INFLUXDB_EXP_TSI_PARTITIONS") != "" { - i, err := strconv.Atoi(os.Getenv("INFLUXDB_EXP_TSI_PARTITIONS")) - if err != nil { - panic(err) - } - DefaultPartitionN = uint64(i) - } -} - -// DefaultPartitionN determines how many shards the index will be partitioned into. -// -// NOTE: Currently, this must not be change once a database is created. Further, -// it must also be a power of 2. -// -var DefaultPartitionN uint64 = 8 - -// An IndexOption is a functional option for changing the configuration of -// an Index. -type IndexOption func(i *Index) - -// WithPath sets the root path of the Index -var WithPath = func(path string) IndexOption { - return func(i *Index) { - i.path = path - } -} - -// DisableCompactions disables compactions on the Index. -var DisableCompactions = func() IndexOption { - return func(i *Index) { - i.disableCompactions = true - } -} - -// DisableFsync disables flushing and syncing of underlying files. Primarily this -// impacts the LogFiles. This option can be set when working with the index in -// an offline manner, for cases where a hard failure can be overcome by re-running the tooling. -var DisableFsync = func() IndexOption { - return func(i *Index) { - i.disableFsync = true - } -} - -// WithLogFileBufferSize sets the size of the buffer used within LogFiles. -// Typically appending an entry to a LogFile involves writing 11 or 12 bytes, so -// depending on how many new series are being created within a batch, it may -// be appropriate to set this. -var WithLogFileBufferSize = func(sz int) IndexOption { - return func(i *Index) { - if sz > 1<<17 { // 128K - sz = 1 << 17 - } else if sz < 1<<12 { - sz = 1 << 12 // 4K (runtime default) - } - i.logfileBufferSize = sz - } -} - -// DisableMetrics ensures that activity is not collected via the prometheus metrics. -// DisableMetrics must be called before Open. -var DisableMetrics = func() IndexOption { - return func(i *Index) { - i.metricsEnabled = false - } -} - -// Index represents a collection of layered index files and WAL. -type Index struct { - mu sync.RWMutex - partitions []*Partition - res lifecycle.Resource - - defaultLabels prometheus.Labels - - tagValueCache *TagValueSeriesIDCache - partitionMetrics *partitionMetrics // Maintain a single set of partition metrics to be shared by partition. - metricsEnabled bool - - // The following may be set when initializing an Index. - path string // Root directory of the index partitions. - disableCompactions bool // Initially disables compactions on the index. - maxLogFileSize int64 // Maximum size of a LogFile before it's compacted. - logfileBufferSize int // The size of the buffer used by the LogFile. - disableFsync bool // Disables flushing buffers and fsyning files. Used when working with indexes offline. - pageFaultLimiter *rate.Limiter // Limits page faults by the index. - logger *zap.Logger // Index's logger. - config Config // The index configuration - - // The following must be set when initializing an Index. - sfile *seriesfile.SeriesFile // series lookup file - - // Index's version. - version int - - // Cardinality stats caching time-to-live. - StatsTTL time.Duration - - // Number of partitions used by the index. - PartitionN uint64 -} - -func (i *Index) UniqueReferenceID() uintptr { - return uintptr(unsafe.Pointer(i)) -} - -// NewIndex returns a new instance of Index. -func NewIndex(sfile *seriesfile.SeriesFile, c Config, options ...IndexOption) *Index { - idx := &Index{ - tagValueCache: NewTagValueSeriesIDCache(c.SeriesIDSetCacheSize), - partitionMetrics: newPartitionMetrics(nil), - metricsEnabled: true, - maxLogFileSize: int64(c.MaxIndexLogFileSize), - logger: zap.NewNop(), - version: Version, - config: c, - sfile: sfile, - StatsTTL: c.StatsTTL, - PartitionN: DefaultPartitionN, - } - - for _, option := range options { - option(idx) - } - - return idx -} - -// WithPageFaultLimiter sets a limiter to restrict the number of page faults. -func (i *Index) WithPageFaultLimiter(limiter *rate.Limiter) { - i.pageFaultLimiter = limiter -} - -// SetDefaultMetricLabels sets the default labels on the trackers. -func (i *Index) SetDefaultMetricLabels(labels prometheus.Labels) { - i.defaultLabels = make(prometheus.Labels, len(labels)) - for k, v := range labels { - i.defaultLabels[k] = v - } -} - -// Bytes estimates the memory footprint of this Index, in bytes. -func (i *Index) Bytes() int { - var b int - i.mu.RLock() - b += 24 // mu RWMutex is 24 bytes - b += int(unsafe.Sizeof(i.partitions)) - for _, p := range i.partitions { - b += int(unsafe.Sizeof(p)) + p.bytes() - } - b += int(unsafe.Sizeof(i.res)) - b += int(unsafe.Sizeof(i.path)) + len(i.path) - b += int(unsafe.Sizeof(i.disableCompactions)) - b += int(unsafe.Sizeof(i.maxLogFileSize)) - b += int(unsafe.Sizeof(i.logger)) - b += int(unsafe.Sizeof(i.sfile)) - // Do not count SeriesFile because it belongs to the code that constructed this Index. - b += int(unsafe.Sizeof(i.version)) - b += int(unsafe.Sizeof(i.PartitionN)) - i.mu.RUnlock() - return b -} - -// WithLogger sets the logger on the index after it's been created. -// -// It's not safe to call WithLogger after the index has been opened, or before -// it has been closed. -func (i *Index) WithLogger(l *zap.Logger) { - i.logger = l.With(zap.String("index", "tsi")) -} - -// SeriesFile returns the series file attached to the index. -func (i *Index) SeriesFile() *seriesfile.SeriesFile { return i.sfile } - -// SeriesIDSet returns the set of series ids associated with series in this -// index. Any series IDs for series no longer present in the index are filtered out. -func (i *Index) SeriesIDSet() *tsdb.SeriesIDSet { - seriesIDSet := tsdb.NewSeriesIDSet() - others := make([]*tsdb.SeriesIDSet, 0, i.PartitionN) - for _, p := range i.partitions { - others = append(others, p.seriesIDSet) - } - seriesIDSet.Merge(others...) - return seriesIDSet -} - -// Open opens the index. -func (i *Index) Open(ctx context.Context) error { - i.mu.Lock() - defer i.mu.Unlock() - - if i.res.Opened() { - return errors.New("index already open") - } - - span, _ := tracing.StartSpanFromContext(ctx) - defer span.Finish() - - // Ensure root exists. - if err := os.MkdirAll(i.path, 0777); err != nil { - return err - } - - mmu.Lock() - if cms == nil && i.metricsEnabled { - cms = newCacheMetrics(i.defaultLabels) - } - if pms == nil && i.metricsEnabled { - pms = newPartitionMetrics(i.defaultLabels) - } - mmu.Unlock() - - // Set the correct shared metrics on the cache - i.tagValueCache.tracker = newCacheTracker(cms, i.defaultLabels) - i.tagValueCache.tracker.enabled = i.metricsEnabled - - // Initialize index partitions. - i.partitions = make([]*Partition, i.PartitionN) - for j := 0; j < len(i.partitions); j++ { - p := NewPartition(i.sfile, filepath.Join(i.path, fmt.Sprint(j))) - p.MaxLogFileSize = i.maxLogFileSize - p.StatsTTL = i.StatsTTL - p.nosync = i.disableFsync - p.logbufferSize = i.logfileBufferSize - p.pageFaultLimiter = i.pageFaultLimiter - p.logger = i.logger.With(zap.String("tsi1_partition", fmt.Sprint(j+1))) - - // Each of the trackers needs to be given slightly different default - // labels to ensure the correct partition ids are set as labels. - labels := make(prometheus.Labels, len(i.defaultLabels)) - for k, v := range i.defaultLabels { - labels[k] = v - } - labels["index_partition"] = fmt.Sprint(j) - p.tracker = newPartitionTracker(pms, labels) - p.tracker.enabled = i.metricsEnabled - i.partitions[j] = p - } - - // Open all the Partitions in parallel. - partitionN := len(i.partitions) - n := i.availableThreads() - - // Store results. - errC := make(chan error, partitionN) - - // Run fn on each partition using a fixed number of goroutines. - var pidx uint32 // Index of maximum Partition being worked on. - for k := 0; k < n; k++ { - go func(k int) { - for { - idx := int(atomic.AddUint32(&pidx, 1) - 1) // Get next partition to work on. - if idx >= partitionN { - return // No more work. - } - err := i.partitions[idx].Open() - errC <- err - } - }(k) - } - - // Check for error. Be sure to read from every partition so that we can - // clean up appropriately in the case of errors. - var err error - for i := 0; i < partitionN; i++ { - if perr := <-errC; err == nil { - err = perr - } - } - if err != nil { - for _, p := range i.partitions { - p.Close() - } - return err - } - - // Mark opened. - i.res.Open() - i.logger.Info("Index opened", zap.Int("partitions", partitionN)) - - return nil -} - -// Acquire returns a reference to the index that causes it to be unable to be -// closed until the reference is released. -func (i *Index) Acquire() (*lifecycle.Reference, error) { - return i.res.Acquire() -} - -// Compact requests a compaction of partitions. -func (i *Index) Compact() { - i.mu.Lock() - defer i.mu.Unlock() - for _, p := range i.partitions { - p.Compact() - } -} - -// EnableCompactions allows compactions to proceed again. -func (i *Index) EnableCompactions() { - for _, p := range i.partitions { - p.EnableCompactions() - } -} - -// DisableCompactions stops any ongoing compactions and waits for them to finish. -func (i *Index) DisableCompactions() { - for _, p := range i.partitions { - p.DisableCompactions() - } -} - -// Wait blocks until all outstanding compactions have completed. -func (i *Index) Wait() { - for _, p := range i.partitions { - p.Wait() - } -} - -// Close closes the index. -func (i *Index) Close() error { - // Lock index and close partitions. - i.mu.Lock() - defer i.mu.Unlock() - - // Wait for any references to the index before closing - // the partitions. - i.res.Close() - - for _, p := range i.partitions { - if err := p.Close(); err != nil { - return err - } - } - - return nil -} - -// Path returns the path the index was opened with. -func (i *Index) Path() string { return i.path } - -// PartitionAt returns the partition by index. -func (i *Index) PartitionAt(index int) *Partition { - return i.partitions[index] -} - -// partitionIdx returns the index of the partition that key belongs in. -func (i *Index) partitionIdx(key []byte) int { - return int(xxhash.Sum64(key) & (i.PartitionN - 1)) -} - -// availableThreads returns the minimum of GOMAXPROCS and the number of -// partitions in the Index. -func (i *Index) availableThreads() int { - n := runtime.GOMAXPROCS(0) - if len(i.partitions) < n { - return len(i.partitions) - } - return n -} - -// ForEachMeasurementName iterates over all measurement names in the index, -// applying fn. It returns the first error encountered, if any. -// -// ForEachMeasurementName does not call fn on each partition concurrently so the -// call may provide a non-goroutine safe fn. -func (i *Index) ForEachMeasurementName(fn func(name []byte) error) error { - itr, err := i.MeasurementIterator() - if err != nil { - return err - } else if itr == nil { - return nil - } - defer itr.Close() - - // Iterate over all measurements. - for { - e, err := itr.Next() - if err != nil { - return err - } else if e == nil { - break - } - - if err := fn(e); err != nil { - return err - } - } - return nil -} - -// MeasurementExists returns true if a measurement exists. -func (i *Index) MeasurementExists(name []byte) (bool, error) { - n := i.availableThreads() - - // Store errors - var found uint32 // Use this to signal we found the measurement. - errC := make(chan error, i.PartitionN) - - // Check each partition for the measurement concurrently. - var pidx uint32 // Index of maximum Partition being worked on. - for k := 0; k < n; k++ { - go func() { - for { - idx := int(atomic.AddUint32(&pidx, 1) - 1) // Get next partition to check - if idx >= len(i.partitions) { - return // No more work. - } - - // Check if the measurement has been found. If it has don't - // need to check this partition and can just move on. - if atomic.LoadUint32(&found) == 1 { - errC <- nil - continue - } - - b, err := i.partitions[idx].MeasurementExists(name) - if b { - atomic.StoreUint32(&found, 1) - } - errC <- err - } - }() - } - - // Check for error - for i := 0; i < cap(errC); i++ { - if err := <-errC; err != nil { - return false, err - } - } - - // Check if we found the measurement. - return atomic.LoadUint32(&found) == 1, nil -} - -// MeasurementHasSeries returns true if a measurement has non-tombstoned series. -func (i *Index) MeasurementHasSeries(name []byte) (bool, error) { - for _, p := range i.partitions { - if v, err := p.MeasurementHasSeries(name); err != nil { - return false, err - } else if v { - return true, nil - } - } - return false, nil -} - -// fetchByteValues is a helper for gathering values from each partition in the index, -// based on some criteria. -// -// fn is a function that works on partition idx and calls into some method on -// the partition that returns some ordered values. -func (i *Index) fetchByteValues(fn func(idx int) ([][]byte, error)) ([][]byte, error) { - n := i.availableThreads() - - // Store results. - names := make([][][]byte, i.PartitionN) - errC := make(chan error, i.PartitionN) - - var pidx uint32 // Index of maximum Partition being worked on. - for k := 0; k < n; k++ { - go func() { - for { - idx := int(atomic.AddUint32(&pidx, 1) - 1) // Get next partition to work on. - if idx >= len(i.partitions) { - return // No more work. - } - - pnames, err := fn(idx) - - // This is safe since there are no readers on names until all - // the writers are done. - names[idx] = pnames - errC <- err - } - }() - } - - // Check for error - for i := 0; i < cap(errC); i++ { - if err := <-errC; err != nil { - return nil, err - } - } - - // It's now safe to read from names. - return slices.MergeSortedBytes(names[:]...), nil -} - -// MeasurementIterator returns an iterator over all measurements. -func (i *Index) MeasurementIterator() (tsdb.MeasurementIterator, error) { - itrs := make([]tsdb.MeasurementIterator, 0, len(i.partitions)) - for _, p := range i.partitions { - itr, err := p.MeasurementIterator() - if err != nil { - for _, itr := range itrs { - itr.Close() - } - return nil, err - } else if itr != nil { - itrs = append(itrs, itr) - } - } - return tsdb.MergeMeasurementIterators(itrs...), nil -} - -func (i *Index) MeasurementSeriesByExprIterator(name []byte, expr influxql.Expr) (tsdb.SeriesIDIterator, error) { - return i.measurementSeriesByExprIterator(name, expr) -} - -// measurementSeriesByExprIterator returns a series iterator for a measurement -// that is filtered by expr. See MeasurementSeriesByExprIterator for more details. -// -// measurementSeriesByExprIterator guarantees to never take any locks on the -// series file. -func (i *Index) measurementSeriesByExprIterator(name []byte, expr influxql.Expr) (tsdb.SeriesIDIterator, error) { - // Return all series for the measurement if there are no tag expressions. - if expr == nil { - itr, err := i.measurementSeriesIDIterator(name) - if err != nil { - return nil, err - } - return FilterUndeletedSeriesIDIterator(i.sfile, itr) - } - - itr, err := i.seriesByExprIterator(name, expr) - if err != nil { - return nil, err - } - - return FilterUndeletedSeriesIDIterator(i.sfile, itr) -} - -// MeasurementSeriesIDIterator returns an iterator over all non-tombstoned series -// for the provided measurement. -func (i *Index) MeasurementSeriesIDIterator(name []byte) (tsdb.SeriesIDIterator, error) { - itr, err := i.measurementSeriesIDIterator(name) - if err != nil { - return nil, err - } - return FilterUndeletedSeriesIDIterator(i.sfile, itr) -} - -// measurementSeriesIDIterator returns an iterator over all series in a measurement. -func (i *Index) measurementSeriesIDIterator(name []byte) (tsdb.SeriesIDIterator, error) { - itrs := make([]tsdb.SeriesIDIterator, 0, len(i.partitions)) - for _, p := range i.partitions { - itr, err := p.MeasurementSeriesIDIterator(name) - if err != nil { - tsdb.SeriesIDIterators(itrs).Close() - return nil, err - } else if itr != nil { - itrs = append(itrs, itr) - } - } - return tsdb.MergeSeriesIDIterators(itrs...), nil -} - -// MeasurementNamesByRegex returns measurement names for the provided regex. -func (i *Index) MeasurementNamesByRegex(re *regexp.Regexp) ([][]byte, error) { - return i.fetchByteValues(func(idx int) ([][]byte, error) { - return i.partitions[idx].MeasurementNamesByRegex(re) - }) -} - -// DropMeasurement deletes a measurement from the index. It returns the first -// error encountered, if any. -func (i *Index) DropMeasurement(name []byte) error { - n := i.availableThreads() - - // Store results. - errC := make(chan error, i.PartitionN) - - var pidx uint32 // Index of maximum Partition being worked on. - for k := 0; k < n; k++ { - go func() { - for { - idx := int(atomic.AddUint32(&pidx, 1) - 1) // Get next partition to work on. - if idx >= len(i.partitions) { - return // No more work. - } - errC <- i.partitions[idx].DropMeasurement(name) - } - }() - } - - // Remove any cached bitmaps for the measurement. - i.tagValueCache.DeleteMeasurement(name) - - // Check for error - for i := 0; i < cap(errC); i++ { - if err := <-errC; err != nil { - return err - } - } - return nil -} - -// CreateSeriesListIfNotExists creates a list of series if they doesn't exist in bulk. -func (i *Index) CreateSeriesListIfNotExists(collection *tsdb.SeriesCollection) error { - // Create the series list on the series file first. This validates all of the types for - // the collection. - err := i.sfile.CreateSeriesListIfNotExists(collection) - if err != nil { - return err - } - - // We need to move different series into collections for each partition - // to process. - pCollections := make([]tsdb.SeriesCollection, i.PartitionN) - - // Determine partition for series using each series key. - for iter := collection.Iterator(); iter.Next(); { - pCollection := &pCollections[i.partitionIdx(iter.Key())] - pCollection.Names = append(pCollection.Names, iter.Name()) - pCollection.Tags = append(pCollection.Tags, iter.Tags()) - pCollection.SeriesIDs = append(pCollection.SeriesIDs, iter.SeriesID()) - } - - // Process each subset of series on each partition. - n := i.availableThreads() - - // Store errors. - errC := make(chan error, i.PartitionN) - - var pidx uint32 // Index of maximum Partition being worked on. - for k := 0; k < n; k++ { - go func() { - i.mu.RLock() - partitionN := len(i.partitions) - i.mu.RUnlock() - - for { - idx := int(atomic.AddUint32(&pidx, 1) - 1) // Get next partition to work on. - if idx >= partitionN { - return // No more work. - } - - i.mu.RLock() - partition := i.partitions[idx] - i.mu.RUnlock() - - ids, err := partition.createSeriesListIfNotExists(&pCollections[idx]) - if len(ids) == 0 { - errC <- err - continue - } - - // Some cached bitset results may need to be updated. - i.tagValueCache.RLock() - for j, id := range ids { - if id.IsZero() { - continue - } - - name := pCollections[idx].Names[j] - tags := pCollections[idx].Tags[j] - if i.tagValueCache.measurementContainsSets(name) { - for _, pair := range tags { - // TODO(edd): It's not clear to me yet whether it will be better to take a lock - // on every series id set, or whether to gather them all up under the cache rlock - // and then take the cache lock and update them all at once (without invoking a lock - // on each series id set). - // - // Taking the cache lock will block all queries, but is one lock. Taking each series set - // lock might be many lock/unlocks but will only block a query that needs that particular set. - // - // Need to think on it, but I think taking a lock on each series id set is the way to go. - // - // One other option here is to take a lock on the series id set when we first encounter it - // and then keep it locked until we're done with all the ids. - // - // Note: this will only add `id` to the set if it exists. - i.tagValueCache.addToSet(name, pair.Key, pair.Value, id) // Takes a lock on the series id set - } - } - } - i.tagValueCache.RUnlock() - - errC <- err - } - }() - } - - // Check for error - for i := 0; i < cap(errC); i++ { - if err := <-errC; err != nil { - return err - } - } - - return nil -} - -// InitializeSeries is a no-op. This only applies to the in-memory index. -func (i *Index) InitializeSeries(*tsdb.SeriesCollection) error { - return nil -} - -// DropSeries drops the provided set of series from the index. If cascade is true -// and this is the last series to the measurement, the measurment will also be dropped. -func (i *Index) DropSeries(items []DropSeriesItem, cascade bool) error { - // Split into batches for each partition. - m := make(map[int][]tsdb.SeriesID) - for _, item := range items { - partitionID := i.partitionIdx(item.Key) - m[partitionID] = append(m[partitionID], item.SeriesID) - } - - // Remove from all partitions in parallel. - var g errgroup.Group - for partitionID, ids := range m { - partitionID, ids := partitionID, ids - g.Go(func() error { return i.partitions[partitionID].DropSeries(ids) }) - } - if err := g.Wait(); err != nil { - return err - } - - if !cascade { - return nil - } - - // Clear tag value cache & determine unique set of measurement names. - nameSet := make(map[string]struct{}) - for _, item := range items { - // Extract measurement name & tags. - name, tags := models.ParseKeyBytes(item.Key) - nameSet[string(name)] = struct{}{} - - // If there are cached sets for any of the tag pairs, they will need to be - // updated with the series id. - i.tagValueCache.RLock() - if i.tagValueCache.measurementContainsSets(name) { - for _, pair := range tags { - i.tagValueCache.delete(name, pair.Key, pair.Value, item.SeriesID) // Takes a lock on the series id set - } - } - i.tagValueCache.RUnlock() - } - - for name := range nameSet { - namebytes := []byte(name) - - // Check if that was the last series for the measurement in the entire index. - if ok, err := i.MeasurementHasSeries(namebytes); err != nil { - return err - } else if ok { - continue - } - - // If no more series exist in the measurement then delete the measurement. - if err := i.DropMeasurement(namebytes); err != nil { - return err - } - } - return nil -} - -// DropSeriesGlobal is a no-op on the tsi1 index. -func (i *Index) DropSeriesGlobal(key []byte) error { return nil } - -// DropMeasurementIfSeriesNotExist drops a measurement only if there are no more -// series for the measurment. -func (i *Index) DropMeasurementIfSeriesNotExist(name []byte) error { - // Check if that was the last series for the measurement in the entire index. - if ok, err := i.MeasurementHasSeries(name); err != nil { - return err - } else if ok { - return nil - } - - // If no more series exist in the measurement then delete the measurement. - return i.DropMeasurement(name) -} - -// SeriesN returns the series cardinality in the index. It is the sum of all -// partition cardinalities. -func (i *Index) SeriesN() int64 { - var total int64 - for _, p := range i.partitions { - total += int64(p.seriesIDSet.Cardinality()) - } - return total -} - -// HasTagKey returns true if tag key exists. It returns the first error -// encountered if any. -func (i *Index) HasTagKey(name, key []byte) (bool, error) { - n := i.availableThreads() - - // Store errors - var found uint32 // Use this to signal we found the tag key. - errC := make(chan error, i.PartitionN) - - // Check each partition for the tag key concurrently. - var pidx uint32 // Index of maximum Partition being worked on. - for k := 0; k < n; k++ { - go func() { - for { - idx := int(atomic.AddUint32(&pidx, 1) - 1) // Get next partition to check - if idx >= len(i.partitions) { - return // No more work. - } - - // Check if the tag key has already been found. If it has, we - // don't need to check this partition and can just move on. - if atomic.LoadUint32(&found) == 1 { - errC <- nil - continue - } - - b, err := i.partitions[idx].HasTagKey(name, key) - if b { - atomic.StoreUint32(&found, 1) - } - errC <- err - } - }() - } - - // Check for error - for i := 0; i < cap(errC); i++ { - if err := <-errC; err != nil { - return false, err - } - } - - // Check if we found the tag key. - return atomic.LoadUint32(&found) == 1, nil -} - -// HasTagValue returns true if tag value exists. -func (i *Index) HasTagValue(name, key, value []byte) (bool, error) { - n := i.availableThreads() - - // Store errors - var found uint32 // Use this to signal we found the tag key. - errC := make(chan error, i.PartitionN) - - // Check each partition for the tag key concurrently. - var pidx uint32 // Index of maximum Partition being worked on. - for k := 0; k < n; k++ { - go func() { - for { - idx := int(atomic.AddUint32(&pidx, 1) - 1) // Get next partition to check - if idx >= len(i.partitions) { - return // No more work. - } - - // Check if the tag key has already been found. If it has, we - // don't need to check this partition and can just move on. - if atomic.LoadUint32(&found) == 1 { - errC <- nil - continue - } - - b, err := i.partitions[idx].HasTagValue(name, key, value) - if b { - atomic.StoreUint32(&found, 1) - } - errC <- err - } - }() - } - - // Check for error - for i := 0; i < cap(errC); i++ { - if err := <-errC; err != nil { - return false, err - } - } - - // Check if we found the tag key. - return atomic.LoadUint32(&found) == 1, nil -} - -// TagKeyIterator returns an iterator for all keys across a single measurement. -func (i *Index) TagKeyIterator(name []byte) (tsdb.TagKeyIterator, error) { - a := make([]tsdb.TagKeyIterator, 0, len(i.partitions)) - for _, p := range i.partitions { - itr, err := p.TagKeyIterator(name) - if err != nil { - for _, itr := range a { - itr.Close() - } - return nil, err - } else if itr != nil { - a = append(a, itr) - } - } - return tsdb.MergeTagKeyIterators(a...), nil -} - -// TagValueIterator returns an iterator for all values across a single key. -func (i *Index) TagValueIterator(name, key []byte) (tsdb.TagValueIterator, error) { - a := make([]tsdb.TagValueIterator, 0, len(i.partitions)) - for _, p := range i.partitions { - itr, err := p.TagValueIterator(name, key) - if err != nil { - for _, itr := range a { - itr.Close() - } - return nil, err - } else if itr != nil { - a = append(a, itr) - } - } - return tsdb.MergeTagValueIterators(a...), nil -} - -// TagKeySeriesIDIterator returns a series iterator for all values across a single key. -func (i *Index) TagKeySeriesIDIterator(name, key []byte) (tsdb.SeriesIDIterator, error) { - itr, err := i.tagKeySeriesIDIterator(name, key) - if err != nil { - return nil, err - } - return FilterUndeletedSeriesIDIterator(i.sfile, itr) -} - -// tagKeySeriesIDIterator returns a series iterator for all values across a single key. -func (i *Index) tagKeySeriesIDIterator(name, key []byte) (tsdb.SeriesIDIterator, error) { - a := make([]tsdb.SeriesIDIterator, 0, len(i.partitions)) - for _, p := range i.partitions { - itr, err := p.TagKeySeriesIDIterator(name, key) - if err != nil { - for _, itr := range a { - itr.Close() - } - return nil, err - } else if itr != nil { - a = append(a, itr) - } - } - - return tsdb.MergeSeriesIDIterators(a...), nil -} - -// TagValueSeriesIDIterator returns a series iterator for a single tag value. -func (i *Index) TagValueSeriesIDIterator(name, key, value []byte) (tsdb.SeriesIDIterator, error) { - itr, err := i.tagValueSeriesIDIterator(name, key, value) - if err != nil { - return nil, err - } - return FilterUndeletedSeriesIDIterator(i.sfile, itr) -} - -// tagValueSeriesIDIterator returns a series iterator for a single tag value. -func (i *Index) tagValueSeriesIDIterator(name, key, value []byte) (tsdb.SeriesIDIterator, error) { - // Check series ID set cache... - if i.config.SeriesIDSetCacheSize > 0 { // Cache enabled. - if ss := i.tagValueCache.Get(name, key, value); ss != nil { - // Return a clone because the set is mutable. - return tsdb.NewSeriesIDSetIterator(ss.Clone()), nil - } - } - - a := make([]tsdb.SeriesIDIterator, 0, len(i.partitions)) - for _, p := range i.partitions { - itr, err := p.TagValueSeriesIDIterator(name, key, value) - if err != nil { - return nil, err - } else if itr != nil { - a = append(a, itr) - } - } - - itr := tsdb.MergeSeriesIDIterators(a...) - if i.config.SeriesIDSetCacheSize == 0 { // Cache disabled. - return itr, nil - } - - // Check if the iterator contains only series id sets. Cache them... - if ssitr, ok := itr.(tsdb.SeriesIDSetIterator); ok { - ss := ssitr.SeriesIDSet() - i.tagValueCache.Put(name, key, value, ss) - } - return itr, nil -} - -func (i *Index) TagSets(name []byte, opt query.IteratorOptions) ([]*query.TagSet, error) { - itr, err := i.MeasurementSeriesByExprIterator(name, opt.Condition) - if err != nil { - return nil, err - } else if itr == nil { - return nil, nil - } - defer itr.Close() - // measurementSeriesByExprIterator filters deleted series IDs; no need to - // do so here. - - var dims []string - if len(opt.Dimensions) > 0 { - dims = make([]string, len(opt.Dimensions)) - copy(dims, opt.Dimensions) - sort.Strings(dims) - } - - // For every series, get the tag values for the requested tag keys i.e. - // dimensions. This is the TagSet for that series. Series with the same - // TagSet are then grouped together, because for the purpose of GROUP BY - // they are part of the same composite series. - tagSets := make(map[string]*query.TagSet, 64) - var seriesN, maxSeriesN int - - if opt.MaxSeriesN > 0 { - maxSeriesN = opt.MaxSeriesN - } else { - maxSeriesN = int(^uint(0) >> 1) - } - - // The tag sets require a string for each series key in the set, The series - // file formatted keys need to be parsed into models format. Since they will - // end up as strings we can re-use an intermediate buffer for this process. - var keyBuf []byte - var tagsBuf models.Tags // Buffer for tags. Tags are not needed outside of each loop iteration. - for { - se, err := itr.Next() - if err != nil { - return nil, err - } else if se.SeriesID.IsZero() { - break - } - - // Skip if the series has been tombstoned. - key := i.sfile.SeriesKey(se.SeriesID) - if len(key) == 0 { - continue - } - - if seriesN&0x3fff == 0x3fff { - // check every 16384 series if the query has been canceled - select { - case <-opt.InterruptCh: - return nil, query.ErrQueryInterrupted - default: - } - } - - if seriesN > maxSeriesN { - return nil, fmt.Errorf("max-select-series limit exceeded: (%d/%d)", seriesN, opt.MaxSeriesN) - } - - // NOTE - must not escape this loop iteration. - _, tagsBuf = seriesfile.ParseSeriesKeyInto(key, tagsBuf) - var tagsAsKey []byte - if len(dims) > 0 { - tagsAsKey = tsdb.MakeTagsKey(dims, tagsBuf) - } - - tagSet, ok := tagSets[string(tagsAsKey)] - if !ok { - // This TagSet is new, create a new entry for it. - tagSet = &query.TagSet{ - Tags: nil, - Key: tagsAsKey, - } - } - - // Associate the series and filter with the Tagset. - keyBuf = models.AppendMakeKey(keyBuf, name, tagsBuf) - tagSet.AddFilter(string(keyBuf), se.Expr) - keyBuf = keyBuf[:0] - - // Ensure it's back in the map. - tagSets[string(tagsAsKey)] = tagSet - seriesN++ - } - - // Sort the series in each tag set. - for _, t := range tagSets { - sort.Sort(t) - } - - // The TagSets have been created, as a map of TagSets. Just send - // the values back as a slice, sorting for consistency. - sortedTagsSets := make([]*query.TagSet, 0, len(tagSets)) - for _, v := range tagSets { - sortedTagsSets = append(sortedTagsSets, v) - } - sort.Sort(byTagKey(sortedTagsSets)) - - return sortedTagsSets, nil -} - -type byTagKey []*query.TagSet - -func (t byTagKey) Len() int { return len(t) } -func (t byTagKey) Less(i, j int) bool { return bytes.Compare(t[i].Key, t[j].Key) < 0 } -func (t byTagKey) Swap(i, j int) { t[i], t[j] = t[j], t[i] } - -// MeasurementTagKeysByExpr extracts the tag keys wanted by the expression. -func (i *Index) MeasurementTagKeysByExpr(name []byte, expr influxql.Expr) (map[string]struct{}, error) { - n := i.availableThreads() - - // Store results. - keys := make([]map[string]struct{}, i.PartitionN) - errC := make(chan error, i.PartitionN) - - var pidx uint32 // Index of maximum Partition being worked on. - for k := 0; k < n; k++ { - go func() { - for { - idx := int(atomic.AddUint32(&pidx, 1) - 1) // Get next partition to work on. - if idx >= len(i.partitions) { - return // No more work. - } - - // This is safe since there are no readers on keys until all - // the writers are done. - tagKeys, err := i.partitions[idx].MeasurementTagKeysByExpr(name, expr) - keys[idx] = tagKeys - errC <- err - } - }() - } - - // Check for error - for i := 0; i < cap(errC); i++ { - if err := <-errC; err != nil { - return nil, err - } - } - - // Merge into single map. - result := keys[0] - for k := 1; k < len(i.partitions); k++ { - for k := range keys[k] { - result[k] = struct{}{} - } - } - return result, nil -} - -// DiskSizeBytes returns the size of the index on disk. -func (i *Index) DiskSizeBytes() int64 { - fs, err := i.FileSet() - if err != nil { - i.logger.Warn("Index is closing down") - return 0 - } - defer fs.Release() - - var manifestSize int64 - // Get MANIFEST sizes from each partition. - for _, p := range i.partitions { - manifestSize += p.manifestSize - } - return fs.Size() + manifestSize -} - -// TagKeyCardinality always returns zero. -// It is not possible to determine cardinality of tags across index files, and -// thus it cannot be done across partitions. -func (i *Index) TagKeyCardinality(name, key []byte) int { - return 0 -} - -// FileSet returns the set of all files across all partitions. It must be released. -func (i *Index) FileSet() (*FileSet, error) { - i.mu.RLock() - defer i.mu.RUnlock() - - // Keep track of all of the file sets returned from the partitions temporarily. - // Keeping them alive keeps all of their underlying files alive. We release - // whatever we have when we return. - fss := make([]*FileSet, 0, len(i.partitions)) - defer func() { - for _, fs := range fss { - fs.Release() - } - }() - - // Collect the set of files from each partition. - var files []File - for _, p := range i.partitions { - fs, err := p.FileSet() - if err != nil { - return nil, err - } - fss = append(fss, fs) - files = append(files, fs.files...) - } - - // Construct a new file set from the set of files. This acquires references to - // each of the files, so we can release all of the file sets returned from the - // partitions, which happens automatically during the defer. - return NewFileSet(i.sfile, files) -} - -// SetFieldName is a no-op on this index. -func (i *Index) SetFieldName(measurement []byte, name string) {} - -// Rebuild rebuilds an index. It's a no-op for this index. -func (i *Index) Rebuild() {} - -// MeasurementCardinalityStats returns cardinality stats for all measurements. -func (i *Index) MeasurementCardinalityStats() (MeasurementCardinalityStats, error) { - i.mu.RLock() - defer i.mu.RUnlock() - - stats := NewMeasurementCardinalityStats() - for _, p := range i.partitions { - pstats, err := p.MeasurementCardinalityStats() - if err != nil { - return nil, err - } - stats.Add(pstats) - } - return stats, nil -} - -func (i *Index) seriesByExprIterator(name []byte, expr influxql.Expr) (tsdb.SeriesIDIterator, error) { - switch expr := expr.(type) { - case *influxql.BinaryExpr: - switch expr.Op { - case influxql.AND, influxql.OR: - // Get the series IDs and filter expressions for the LHS. - litr, err := i.seriesByExprIterator(name, expr.LHS) - if err != nil { - return nil, err - } - - // Get the series IDs and filter expressions for the RHS. - ritr, err := i.seriesByExprIterator(name, expr.RHS) - if err != nil { - if litr != nil { - litr.Close() - } - return nil, err - } - - // Intersect iterators if expression is "AND". - if expr.Op == influxql.AND { - return tsdb.IntersectSeriesIDIterators(litr, ritr), nil - } - - // Union iterators if expression is "OR". - return tsdb.UnionSeriesIDIterators(litr, ritr), nil - - default: - return i.seriesByBinaryExprIterator(name, expr) - } - - case *influxql.ParenExpr: - return i.seriesByExprIterator(name, expr.Expr) - - case *influxql.BooleanLiteral: - if expr.Val { - return i.measurementSeriesIDIterator(name) - } - return nil, nil - - default: - return nil, nil - } -} - -// seriesByBinaryExprIterator returns a series iterator and a filtering expression. -func (i *Index) seriesByBinaryExprIterator(name []byte, n *influxql.BinaryExpr) (tsdb.SeriesIDIterator, error) { - // If this binary expression has another binary expression, then this - // is some expression math and we should just pass it to the underlying query. - if _, ok := n.LHS.(*influxql.BinaryExpr); ok { - itr, err := i.measurementSeriesIDIterator(name) - if err != nil { - return nil, err - } - return tsdb.NewSeriesIDExprIterator(itr, n), nil - } else if _, ok := n.RHS.(*influxql.BinaryExpr); ok { - itr, err := i.measurementSeriesIDIterator(name) - if err != nil { - return nil, err - } - return tsdb.NewSeriesIDExprIterator(itr, n), nil - } - - // Retrieve the variable reference from the correct side of the expression. - key, ok := n.LHS.(*influxql.VarRef) - value := n.RHS - if !ok { - key, ok = n.RHS.(*influxql.VarRef) - if !ok { - // This is an expression we do not know how to evaluate. Let the - // query engine take care of this. - itr, err := i.measurementSeriesIDIterator(name) - if err != nil { - return nil, err - } - return tsdb.NewSeriesIDExprIterator(itr, n), nil - } - value = n.LHS - } - - // For fields, return all series from this measurement. - if key.Val != "_name" && (key.Type == influxql.AnyField || (key.Type != influxql.Tag && key.Type != influxql.Unknown)) { - itr, err := i.measurementSeriesIDIterator(name) - if err != nil { - return nil, err - } - return tsdb.NewSeriesIDExprIterator(itr, n), nil - } else if value, ok := value.(*influxql.VarRef); ok { - // Check if the RHS is a variable and if it is a field. - if value.Val != "_name" && (key.Type == influxql.AnyField || (value.Type != influxql.Tag && value.Type != influxql.Unknown)) { - itr, err := i.measurementSeriesIDIterator(name) - if err != nil { - return nil, err - } - return tsdb.NewSeriesIDExprIterator(itr, n), nil - } - } - - // Create iterator based on value type. - switch value := value.(type) { - case *influxql.StringLiteral: - return i.seriesByBinaryExprStringIterator(name, []byte(key.Val), []byte(value.Val), n.Op) - case *influxql.RegexLiteral: - return i.seriesByBinaryExprRegexIterator(name, []byte(key.Val), value.Val, n.Op) - case *influxql.VarRef: - return i.seriesByBinaryExprVarRefIterator(name, []byte(key.Val), value, n.Op) - default: - // We do not know how to evaluate this expression so pass it - // on to the query engine. - itr, err := i.measurementSeriesIDIterator(name) - if err != nil { - return nil, err - } - return tsdb.NewSeriesIDExprIterator(itr, n), nil - } -} - -func (i *Index) seriesByBinaryExprStringIterator(name, key, value []byte, op influxql.Token) (tsdb.SeriesIDIterator, error) { - // Special handling for "_name" to match measurement name. - if bytes.Equal(key, []byte("_name")) { - if (op == influxql.EQ && bytes.Equal(value, name)) || (op == influxql.NEQ && !bytes.Equal(value, name)) { - return i.measurementSeriesIDIterator(name) - } - return nil, nil - } - - if op == influxql.EQ { - // Match a specific value. - if len(value) != 0 { - return i.tagValueSeriesIDIterator(name, key, value) - } - - mitr, err := i.measurementSeriesIDIterator(name) - if err != nil { - return nil, err - } - - kitr, err := i.tagKeySeriesIDIterator(name, key) - if err != nil { - if mitr != nil { - mitr.Close() - } - return nil, err - } - - // Return all measurement series that have no values from this tag key. - return tsdb.DifferenceSeriesIDIterators(mitr, kitr), nil - } - - // Return all measurement series without this tag value. - if len(value) != 0 { - mitr, err := i.measurementSeriesIDIterator(name) - if err != nil { - return nil, err - } - - vitr, err := i.tagValueSeriesIDIterator(name, key, value) - if err != nil { - if mitr != nil { - mitr.Close() - } - return nil, err - } - - return tsdb.DifferenceSeriesIDIterators(mitr, vitr), nil - } - - // Return all series across all values of this tag key. - return i.tagKeySeriesIDIterator(name, key) -} - -func (i *Index) seriesByBinaryExprRegexIterator(name, key []byte, value *regexp.Regexp, op influxql.Token) (tsdb.SeriesIDIterator, error) { - // Special handling for "_name" to match measurement name. - if bytes.Equal(key, []byte("_name")) { - match := value.Match(name) - if (op == influxql.EQREGEX && match) || (op == influxql.NEQREGEX && !match) { - mitr, err := i.measurementSeriesIDIterator(name) - if err != nil { - return nil, err - } - return tsdb.NewSeriesIDExprIterator(mitr, &influxql.BooleanLiteral{Val: true}), nil - } - return nil, nil - } - return i.matchTagValueSeriesIDIterator(name, key, value, op == influxql.EQREGEX) -} - -func (i *Index) seriesByBinaryExprVarRefIterator(name, key []byte, value *influxql.VarRef, op influxql.Token) (tsdb.SeriesIDIterator, error) { - itr0, err := i.tagKeySeriesIDIterator(name, key) - if err != nil { - return nil, err - } - - itr1, err := i.tagKeySeriesIDIterator(name, []byte(value.Val)) - if err != nil { - if itr0 != nil { - itr0.Close() - } - return nil, err - } - - if op == influxql.EQ { - return tsdb.IntersectSeriesIDIterators(itr0, itr1), nil - } - return tsdb.DifferenceSeriesIDIterators(itr0, itr1), nil -} - -// MatchTagValueSeriesIDIterator returns a series iterator for tags which match value. -// If matches is false, returns iterators which do not match value. -func (i *Index) MatchTagValueSeriesIDIterator(name, key []byte, value *regexp.Regexp, matches bool) (tsdb.SeriesIDIterator, error) { - itr, err := i.matchTagValueSeriesIDIterator(name, key, value, matches) - if err != nil { - return nil, err - } - return FilterUndeletedSeriesIDIterator(i.sfile, itr) -} - -// matchTagValueSeriesIDIterator returns a series iterator for tags which match -// value. See MatchTagValueSeriesIDIterator for more details. -// -// It guarantees to never take any locks on the underlying series file. -func (i *Index) matchTagValueSeriesIDIterator(name, key []byte, value *regexp.Regexp, matches bool) (tsdb.SeriesIDIterator, error) { - matchEmpty := value.MatchString("") - if matches { - if matchEmpty { - return i.matchTagValueEqualEmptySeriesIDIterator(name, key, value) - } - return i.matchTagValueEqualNotEmptySeriesIDIterator(name, key, value) - } - - if matchEmpty { - return i.matchTagValueNotEqualEmptySeriesIDIterator(name, key, value) - } - return i.matchTagValueNotEqualNotEmptySeriesIDIterator(name, key, value) -} - -func (i *Index) matchTagValueEqualEmptySeriesIDIterator(name, key []byte, value *regexp.Regexp) (tsdb.SeriesIDIterator, error) { - vitr, err := i.TagValueIterator(name, key) - if err != nil { - return nil, err - } else if vitr == nil { - return i.measurementSeriesIDIterator(name) - } - defer vitr.Close() - - var itrs []tsdb.SeriesIDIterator - if err := func() error { - for { - e, err := vitr.Next() - if err != nil { - return err - } else if e == nil { - break - } - - if !value.Match(e) { - itr, err := i.tagValueSeriesIDIterator(name, key, e) - if err != nil { - return err - } else if itr != nil { - itrs = append(itrs, itr) - } - } - } - return nil - }(); err != nil { - tsdb.SeriesIDIterators(itrs).Close() - return nil, err - } - - mitr, err := i.measurementSeriesIDIterator(name) - if err != nil { - tsdb.SeriesIDIterators(itrs).Close() - return nil, err - } - - return tsdb.DifferenceSeriesIDIterators(mitr, tsdb.MergeSeriesIDIterators(itrs...)), nil -} - -func (i *Index) matchTagValueEqualNotEmptySeriesIDIterator(name, key []byte, value *regexp.Regexp) (tsdb.SeriesIDIterator, error) { - vitr, err := i.TagValueIterator(name, key) - if err != nil { - return nil, err - } else if vitr == nil { - return nil, nil - } - defer vitr.Close() - - var itrs []tsdb.SeriesIDIterator - for { - e, err := vitr.Next() - if err != nil { - tsdb.SeriesIDIterators(itrs).Close() - return nil, err - } else if e == nil { - break - } - - if value.Match(e) { - itr, err := i.tagValueSeriesIDIterator(name, key, e) - if err != nil { - tsdb.SeriesIDIterators(itrs).Close() - return nil, err - } else if itr != nil { - itrs = append(itrs, itr) - } - } - } - return tsdb.MergeSeriesIDIterators(itrs...), nil -} - -func (i *Index) matchTagValueNotEqualEmptySeriesIDIterator(name, key []byte, value *regexp.Regexp) (tsdb.SeriesIDIterator, error) { - vitr, err := i.TagValueIterator(name, key) - if err != nil { - return nil, err - } else if vitr == nil { - return nil, nil - } - defer vitr.Close() - - var itrs []tsdb.SeriesIDIterator - for { - e, err := vitr.Next() - if err != nil { - tsdb.SeriesIDIterators(itrs).Close() - return nil, err - } else if e == nil { - break - } - - if !value.Match(e) { - itr, err := i.tagValueSeriesIDIterator(name, key, e) - if err != nil { - tsdb.SeriesIDIterators(itrs).Close() - return nil, err - } else if itr != nil { - itrs = append(itrs, itr) - } - } - } - return tsdb.MergeSeriesIDIterators(itrs...), nil -} - -func (i *Index) matchTagValueNotEqualNotEmptySeriesIDIterator(name, key []byte, value *regexp.Regexp) (tsdb.SeriesIDIterator, error) { - vitr, err := i.TagValueIterator(name, key) - if err != nil { - return nil, err - } else if vitr == nil { - return i.measurementSeriesIDIterator(name) - } - defer vitr.Close() - - var itrs []tsdb.SeriesIDIterator - for { - e, err := vitr.Next() - if err != nil { - tsdb.SeriesIDIterators(itrs).Close() - return nil, err - } else if e == nil { - break - } - if value.Match(e) { - itr, err := i.tagValueSeriesIDIterator(name, key, e) - if err != nil { - tsdb.SeriesIDIterators(itrs).Close() - return nil, err - } else if itr != nil { - itrs = append(itrs, itr) - } - } - } - - mitr, err := i.measurementSeriesIDIterator(name) - if err != nil { - tsdb.SeriesIDIterators(itrs).Close() - return nil, err - } - return tsdb.DifferenceSeriesIDIterators(mitr, tsdb.MergeSeriesIDIterators(itrs...)), nil -} - -// IsIndexDir returns true if directory contains at least one partition directory. -func IsIndexDir(path string) (bool, error) { - fis, err := ioutil.ReadDir(path) - if err != nil { - return false, err - } - for _, fi := range fis { - if !fi.IsDir() { - continue - } else if ok, err := IsPartitionDir(filepath.Join(path, fi.Name())); err != nil { - return false, err - } else if ok { - return true, nil - } - } - return false, nil -} - -// filterUndeletedSeriesIDIterator returns all series which are not deleted. -type filterUndeletedSeriesIDIterator struct { - sfile *seriesfile.SeriesFile - sfileref *lifecycle.Reference - itr tsdb.SeriesIDIterator -} - -// FilterUndeletedSeriesIDIterator returns an iterator which filters all deleted series. -func FilterUndeletedSeriesIDIterator(sfile *seriesfile.SeriesFile, itr tsdb.SeriesIDIterator) (tsdb.SeriesIDIterator, error) { - if itr == nil { - return nil, nil - } - sfileref, err := sfile.Acquire() - if err != nil { - return nil, err - } - return &filterUndeletedSeriesIDIterator{ - sfile: sfile, - sfileref: sfileref, - itr: itr, - }, nil -} - -func (itr *filterUndeletedSeriesIDIterator) Close() (err error) { - itr.sfileref.Release() - return itr.itr.Close() -} - -func (itr *filterUndeletedSeriesIDIterator) Next() (tsdb.SeriesIDElem, error) { - for { - e, err := itr.itr.Next() - if err != nil { - return tsdb.SeriesIDElem{}, err - } else if e.SeriesID.IsZero() { - return tsdb.SeriesIDElem{}, nil - } else if itr.sfile.IsDeleted(e.SeriesID) { - continue - } - return e, nil - } -} - -type DropSeriesItem struct { - SeriesID tsdb.SeriesID - Key []byte -} - -// wait rate limits page faults to the underlying data. Skipped if limiter is not set. -func wait(limiter *mincore.Limiter, b []byte) error { - if limiter == nil { - return nil - } - return limiter.WaitRange(context.Background(), b) -} diff --git a/tsdb/tsi1/index_file.go b/tsdb/tsi1/index_file.go deleted file mode 100644 index 89a984c42b..0000000000 --- a/tsdb/tsi1/index_file.go +++ /dev/null @@ -1,511 +0,0 @@ -package tsi1 - -import ( - "bytes" - "encoding/binary" - "errors" - "fmt" - "io" - "sync" - "unsafe" - - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/pkg/lifecycle" - "github.com/influxdata/influxdb/v2/pkg/mincore" - "github.com/influxdata/influxdb/v2/pkg/mmap" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" -) - -// IndexFileVersion is the current TSI1 index file version. -const IndexFileVersion = 1 - -// FileSignature represents a magic number at the header of the index file. -const FileSignature = "TSI1" - -// IndexFile field size constants. -const ( - // IndexFile trailer fields - IndexFileVersionSize = 2 - - // IndexFileTrailerSize is the size of the trailer. Currently 82 bytes. - IndexFileTrailerSize = IndexFileVersionSize + - 8 + 8 + // measurement block offset + size - 8 + 8 + // series id set offset + size - 8 + 8 + // tombstone series id set offset + size - // legacy sketch info. we used to have HLL sketches, but they were - // removed. we keep the offset and length bytes in the trailer so - // that we don't have to do a migration, but they are unused. - 8 + 8 + 8 + 8 + - 0 -) - -// IndexFile errors. -var ( - ErrInvalidIndexFile = errors.New("invalid index file") - ErrUnsupportedIndexFileVersion = errors.New("unsupported index file version") -) - -// IndexFile represents a collection of measurement, tag, and series data. -type IndexFile struct { - data []byte - - // Lifecycle tracking - res lifecycle.Resource - - // Components - sfile *seriesfile.SeriesFile - sfileref *lifecycle.Reference - - tblks map[string]*TagBlock // tag blocks by measurement name - mblk MeasurementBlock - - // Raw series set data. - seriesIDSetData []byte - tombstoneSeriesIDSetData []byte - - // Sortable identifier & filepath to the log file. - level int - id int - - mu sync.RWMutex - // Compaction tracking. - compacting bool - - // Path to data file. - path string - - pageFaultLimiter *mincore.Limiter -} - -// NewIndexFile returns a new instance of IndexFile. -func NewIndexFile(sfile *seriesfile.SeriesFile) *IndexFile { - return &IndexFile{ - sfile: sfile, - } -} - -// bytes estimates the memory footprint of this IndexFile, in bytes. -func (f *IndexFile) bytes() int { - var b int - // Do not count f.data contents because it is mmap'd - b += int(unsafe.Sizeof(f.data)) - b += int(unsafe.Sizeof(f.res)) - b += int(unsafe.Sizeof(f.sfile)) - b += int(unsafe.Sizeof(f.sfileref)) - // Do not count SeriesFile because it belongs to the code that constructed this IndexFile. - b += int(unsafe.Sizeof(f.tblks)) - for k, v := range f.tblks { - // Do not count TagBlock contents, they all reference f.data - b += int(unsafe.Sizeof(k)) + len(k) - b += int(unsafe.Sizeof(*v)) - } - b += int(unsafe.Sizeof(f.mblk)) + f.mblk.bytes() - b += int(unsafe.Sizeof(f.seriesIDSetData) + unsafe.Sizeof(f.tombstoneSeriesIDSetData)) - // Do not count contents of seriesIDSetData or tombstoneSeriesIDSetData: references f.data - b += int(unsafe.Sizeof(f.level) + unsafe.Sizeof(f.id)) - b += 24 // mu RWMutex is 24 bytes - b += int(unsafe.Sizeof(f.compacting)) - b += int(unsafe.Sizeof(f.path)) + len(f.path) - - return b -} - -// Open memory maps the data file at the file's path. -func (f *IndexFile) Open() (err error) { - defer func() { - if err := recover(); err != nil { - err = fmt.Errorf("[Index file: %s] %v", f.path, err) - panic(err) - } - }() - - // Try to acquire a reference to the series file. - f.sfileref, err = f.sfile.Acquire() - if err != nil { - return err - } - - // Extract identifier from path name. - f.id, f.level = ParseFilename(f.Path()) - - data, err := mmap.Map(f.Path(), 0) - if err != nil { - f.sfileref.Release() - return err - } - - if err := f.UnmarshalBinary(data); err != nil { - f.sfileref.Release() - f.Close() - return err - } - - // The resource is now open - f.res.Open() - - return nil -} - -// Close unmaps the data file. -func (f *IndexFile) Close() error { - // Close the resource and wait for any references. - f.res.Close() - - if f.sfileref != nil { - f.sfileref.Release() - f.sfileref = nil - } - - f.sfile = nil - f.tblks = nil - f.mblk = MeasurementBlock{} - return mmap.Unmap(f.data) -} - -// ID returns the file sequence identifier. -func (f *IndexFile) ID() int { return f.id } - -// Path returns the file path. -func (f *IndexFile) Path() string { return f.path } - -// SetPath sets the file's path. -func (f *IndexFile) SetPath(path string) { f.path = path } - -// Level returns the compaction level for the file. -func (f *IndexFile) Level() int { return f.level } - -// Acquire adds a reference count to the file. -func (f *IndexFile) Acquire() (*lifecycle.Reference, error) { - return f.res.Acquire() -} - -// Size returns the size of the index file, in bytes. -func (f *IndexFile) Size() int64 { return int64(len(f.data)) } - -// Compacting returns true if the file is being compacted. -func (f *IndexFile) Compacting() bool { - f.mu.RLock() - v := f.compacting - f.mu.RUnlock() - return v -} - -// UnmarshalBinary opens an index from data. -// The byte slice is retained so it must be kept open. -func (f *IndexFile) UnmarshalBinary(data []byte) error { - // Ensure magic number exists at the beginning. - if len(data) < len(FileSignature) { - return io.ErrShortBuffer - } else if !bytes.Equal(data[:len(FileSignature)], []byte(FileSignature)) { - return ErrInvalidIndexFile - } - - // Read index file trailer. - t, err := ReadIndexFileTrailer(data) - if err != nil { - return err - } - - // Slice series set data. - f.seriesIDSetData = data[t.SeriesIDSet.Offset : t.SeriesIDSet.Offset+t.SeriesIDSet.Size] - f.tombstoneSeriesIDSetData = data[t.TombstoneSeriesIDSet.Offset : t.TombstoneSeriesIDSet.Offset+t.TombstoneSeriesIDSet.Size] - - // Unmarshal measurement block. - if err := f.mblk.UnmarshalBinary(data[t.MeasurementBlock.Offset:][:t.MeasurementBlock.Size]); err != nil { - return err - } - - // Unmarshal each tag block. - f.tblks = make(map[string]*TagBlock) - itr := f.mblk.Iterator(f.pageFaultLimiter) - - for m := itr.Next(); m != nil; m = itr.Next() { - e := m.(*MeasurementBlockElem) - - // Slice measurement block data. - buf := data[e.tagBlock.offset:] - buf = buf[:e.tagBlock.size] - - // Unmarshal measurement block. - var tblk TagBlock - if err := tblk.UnmarshalBinary(buf); err != nil { - return err - } - f.tblks[string(e.name)] = &tblk - } - - // Save reference to entire data block. - f.data = data - - return nil -} - -func (f *IndexFile) SeriesIDSet() (*tsdb.SeriesIDSet, error) { - ss := tsdb.NewSeriesIDSet() - if err := ss.UnmarshalBinary(f.seriesIDSetData); err != nil { - return nil, err - } - return ss, wait(f.pageFaultLimiter, f.seriesIDSetData) -} - -func (f *IndexFile) TombstoneSeriesIDSet() (*tsdb.SeriesIDSet, error) { - ss := tsdb.NewSeriesIDSet() - if err := ss.UnmarshalBinaryUnsafe(f.tombstoneSeriesIDSetData); err != nil { - return nil, err - } - return ss, wait(f.pageFaultLimiter, f.tombstoneSeriesIDSetData) -} - -// Measurement returns a measurement element. -func (f *IndexFile) Measurement(name []byte) MeasurementElem { - e, ok := f.mblk.Elem(name, f.pageFaultLimiter) - if !ok { - return nil - } - return &e -} - -// MeasurementN returns the number of measurements in the file. -func (f *IndexFile) MeasurementN() (n uint64) { - mitr := f.mblk.Iterator(f.pageFaultLimiter) - for me := mitr.Next(); me != nil; me = mitr.Next() { - n++ - } - return n -} - -// MeasurementHasSeries returns true if a measurement has any non-tombstoned series. -func (f *IndexFile) MeasurementHasSeries(ss *tsdb.SeriesIDSet, name []byte) (ok bool) { - e, ok := f.mblk.Elem(name, f.pageFaultLimiter) - if !ok { - return false - } - - var exists bool - e.ForEachSeriesID(func(id tsdb.SeriesID) error { - if ss.Contains(id) { - exists = true - return errors.New("done") - } - return nil - }) - return exists -} - -// TagValueIterator returns a value iterator for a tag key and a flag -// indicating if a tombstone exists on the measurement or key. -func (f *IndexFile) TagValueIterator(name, key []byte) TagValueIterator { - tblk := f.tblks[string(name)] - if tblk == nil { - return nil - } - - // Find key element. - ke := tblk.TagKeyElem(key, f.pageFaultLimiter) - if ke == nil { - return nil - } - - // Merge all value series iterators together. - return ke.TagValueIterator(f.pageFaultLimiter) -} - -// TagKeySeriesIDIterator returns a series iterator for a tag key and a flag -// indicating if a tombstone exists on the measurement or key. -func (f *IndexFile) TagKeySeriesIDIterator(name, key []byte) (tsdb.SeriesIDIterator, error) { - tblk := f.tblks[string(name)] - if tblk == nil { - return nil, nil - } - - // Find key element. - ke := tblk.TagKeyElem(key, f.pageFaultLimiter) - if ke == nil { - return nil, nil - } - - // Merge all value series iterators together. - vitr := ke.TagValueIterator(f.pageFaultLimiter) - - var itrs []tsdb.SeriesIDIterator - for ve := vitr.Next(); ve != nil; ve = vitr.Next() { - tblk, ok := ve.(*TagBlockValueElem) - if !ok { - return nil, fmt.Errorf("got type %T for iterator, expected %T", ve, TagBlockValueElem{}) - } - - ss, err := tblk.SeriesIDSet() - if err != nil { - return nil, err - } - itrs = append(itrs, tsdb.NewSeriesIDSetIterator(ss)) - } - - return tsdb.MergeSeriesIDIterators(itrs...), nil -} - -// TagValueSeriesIDSet returns a series id set for a tag value. -func (f *IndexFile) TagValueSeriesIDSet(name, key, value []byte) (*tsdb.SeriesIDSet, error) { - tblk := f.tblks[string(name)] - if tblk == nil { - return nil, nil - } - - // Find value element. - var valueElem TagBlockValueElem - if !tblk.DecodeTagValueElem(key, value, &valueElem, f.pageFaultLimiter) { - return nil, nil - } else if valueElem.SeriesN() == 0 { - return nil, nil - } - return valueElem.SeriesIDSet() -} - -// TagKey returns a tag key. -func (f *IndexFile) TagKey(name, key []byte) TagKeyElem { - tblk := f.tblks[string(name)] - if tblk == nil { - return nil - } - return tblk.TagKeyElem(key, f.pageFaultLimiter) -} - -// TagValue returns a tag value. -func (f *IndexFile) TagValue(name, key, value []byte) TagValueElem { - tblk := f.tblks[string(name)] - if tblk == nil { - return nil - } - return tblk.TagValueElem(key, value, f.pageFaultLimiter) -} - -// HasSeries returns flags indicating if the series exists and if it is tombstoned. -func (f *IndexFile) HasSeries(name []byte, tags models.Tags, buf []byte) (exists, tombstoned bool) { - return f.sfile.HasSeries(name, tags, buf), false // TODO(benbjohnson): series tombstone -} - -// TagValueElem returns an element for a measurement/tag/value. -func (f *IndexFile) TagValueElem(name, key, value []byte) TagValueElem { - tblk, ok := f.tblks[string(name)] - if !ok { - return nil - } - return tblk.TagValueElem(key, value, f.pageFaultLimiter) -} - -// MeasurementIterator returns an iterator over all measurements. -func (f *IndexFile) MeasurementIterator() MeasurementIterator { - return f.mblk.Iterator(f.pageFaultLimiter) -} - -// TagKeyIterator returns an iterator over all tag keys for a measurement. -func (f *IndexFile) TagKeyIterator(name []byte) TagKeyIterator { - blk := f.tblks[string(name)] - if blk == nil { - return nil - } - return blk.TagKeyIterator(f.pageFaultLimiter) -} - -// MeasurementSeriesIDIterator returns an iterator over a measurement's series. -func (f *IndexFile) MeasurementSeriesIDIterator(name []byte) tsdb.SeriesIDIterator { - return f.mblk.SeriesIDIterator(name, f.pageFaultLimiter) -} - -// ReadIndexFileTrailer returns the index file trailer from data. -func ReadIndexFileTrailer(data []byte) (IndexFileTrailer, error) { - var t IndexFileTrailer - - // Read version. - t.Version = int(binary.BigEndian.Uint16(data[len(data)-IndexFileVersionSize:])) - if t.Version != IndexFileVersion { - return t, ErrUnsupportedIndexFileVersion - } - - // Slice trailer data. - buf := data[len(data)-IndexFileTrailerSize:] - - // Read measurement block info. - t.MeasurementBlock.Offset, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:] - t.MeasurementBlock.Size, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:] - - // Read series id set info. - t.SeriesIDSet.Offset, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:] - t.SeriesIDSet.Size, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:] - - // Read series tombstone id set info. - t.TombstoneSeriesIDSet.Offset, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:] - t.TombstoneSeriesIDSet.Size, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:] - - // Skip over any legacy sketch data. - buf = buf[8*4:] - - if len(buf) != 2 { // Version field still in buffer. - return t, fmt.Errorf("unread %d bytes left unread in trailer", len(buf)-2) - } - return t, nil -} - -// IndexFileTrailer represents meta data written to the end of the index file. -type IndexFileTrailer struct { - Version int - - MeasurementBlock struct { - Offset int64 - Size int64 - } - - SeriesIDSet struct { - Offset int64 - Size int64 - } - - TombstoneSeriesIDSet struct { - Offset int64 - Size int64 - } -} - -// WriteTo writes the trailer to w. -func (t *IndexFileTrailer) WriteTo(w io.Writer) (n int64, err error) { - // Write measurement block info. - if err := writeUint64To(w, uint64(t.MeasurementBlock.Offset), &n); err != nil { - return n, err - } else if err := writeUint64To(w, uint64(t.MeasurementBlock.Size), &n); err != nil { - return n, err - } - - // Write series id set info. - if err := writeUint64To(w, uint64(t.SeriesIDSet.Offset), &n); err != nil { - return n, err - } else if err := writeUint64To(w, uint64(t.SeriesIDSet.Size), &n); err != nil { - return n, err - } - - // Write tombstone series id set info. - if err := writeUint64To(w, uint64(t.TombstoneSeriesIDSet.Offset), &n); err != nil { - return n, err - } else if err := writeUint64To(w, uint64(t.TombstoneSeriesIDSet.Size), &n); err != nil { - return n, err - } - - // Write legacy sketch info. - for i := 0; i < 4; i++ { - if err := writeUint64To(w, 0, &n); err != nil { - return n, err - } - } - - // Write index file encoding version. - if err := writeUint16To(w, IndexFileVersion, &n); err != nil { - return n, err - } - - return n, nil -} - -// FormatIndexFileName generates an index filename for the given index. -func FormatIndexFileName(id, level int) string { - return fmt.Sprintf("L%d-%08d%s", level, id, IndexFileExt) -} diff --git a/tsdb/tsi1/index_file_test.go b/tsdb/tsi1/index_file_test.go deleted file mode 100644 index 8276249831..0000000000 --- a/tsdb/tsi1/index_file_test.go +++ /dev/null @@ -1,271 +0,0 @@ -package tsi1_test - -import ( - "bytes" - "context" - "fmt" - "reflect" - "testing" - "time" - - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" - "github.com/influxdata/influxdb/v2/tsdb/tsi1" -) - -// Ensure a simple index file can be built and opened. -func TestCreateIndexFile(t *testing.T) { - sfile := MustOpenSeriesFile() - defer sfile.Close() - - f, err := CreateIndexFile(sfile.SeriesFile, []Series{ - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "east"}), Type: models.Integer}, - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "west"}), Type: models.Integer}, - {Name: []byte("mem"), Tags: models.NewTags(map[string]string{"region": "east"}), Type: models.Integer}, - }) - if err != nil { - t.Fatal(err) - } - defer f.Close() - - if e := f.TagValueElem([]byte("cpu"), []byte("region"), []byte("west")); e == nil { - t.Fatal("expected element") - } else if n := e.(*tsi1.TagBlockValueElem).SeriesN(); n != 1 { - t.Fatalf("unexpected series count: %d", n) - } -} - -func TestIndexFile_TagKeySeriesIDIterator(t *testing.T) { - sfile := MustOpenSeriesFile() - defer sfile.Close() - - f, err := CreateIndexFile(sfile.SeriesFile, []Series{ - {Name: []byte("mem"), Tags: models.NewTags(map[string]string{"region": "east"}), Type: models.Integer}, - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "east"}), Type: models.Integer}, - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "west"}), Type: models.Integer}, - }) - if err != nil { - t.Fatal(err) - } - defer f.Close() - - itr, err := f.TagKeySeriesIDIterator([]byte("cpu"), []byte("region")) - if err != nil { - t.Fatal(err) - } - defer itr.Close() - - // NOTE(edd): the series keys end up being emitted in this order because the - // series were written to different partitons in the _series file_. As such, - // the key with region=west ends up with a lower series ID than the region=east - // series, even though it was written later. When the series id sets for each - // tag block in the index file are merged together and iterated, the roaring - // bitmap library sorts the series ids, resulting the the series keys being - // emitted in a different order to that which they were written. - exp := []string{"cpu,region=west", "cpu,region=east"} - var got []string - for { - e, err := itr.Next() - if err != nil { - t.Fatal(err) - } - - if e.SeriesID.ID == 0 { - break - } - fmt.Println(e.SeriesID.ID) - - name, tags := seriesfile.ParseSeriesKey(sfile.SeriesKey(e.SeriesID)) - got = append(got, string(models.MustNewPoint(string(name), tags, models.Fields{"a": "a"}, time.Time{}).Key())) - } - - if !reflect.DeepEqual(got, exp) { - t.Fatalf("got keys %v, expected %v", got, exp) - } -} - -// Ensure index file generation can be successfully built. -func TestGenerateIndexFile(t *testing.T) { - sfile := MustOpenSeriesFile() - defer sfile.Close() - - // Build generated index file. - f, err := GenerateIndexFile(sfile.SeriesFile, 10, 3, 4) - if err != nil { - t.Fatal(err) - } - defer f.Close() - - // Verify that tag/value series can be fetched. - if e := f.TagValueElem([]byte("measurement0"), []byte("key0"), []byte("value0")); e == nil { - t.Fatal("expected element") - } else if n := e.(*tsi1.TagBlockValueElem).SeriesN(); n == 0 { - t.Fatal("expected series") - } -} - -// Ensure index file generated with uvarint encoding can be loaded. -func TestGenerateIndexFile_Uvarint(t *testing.T) { - // Load previously generated series file. - sfile := seriesfile.NewSeriesFile("testdata/uvarint/_series") - if err := sfile.Open(context.Background()); err != nil { - t.Fatal(err) - } - defer sfile.Close() - - // Load legacy index file from buffer. - f := tsi1.NewIndexFile(sfile) - f.SetPath("testdata/uvarint/index") - if err := f.Open(); err != nil { - t.Fatal(err) - } - defer f.Close() - - // Verify that tag/value series can be fetched. - if e := f.TagValueElem([]byte("measurement0"), []byte("key0"), []byte("value0")); e == nil { - t.Fatal("expected element") - } else if n := e.(*tsi1.TagBlockValueElem).SeriesN(); n == 0 { - t.Fatal("expected series") - } -} - -// Ensure a MeasurementHashSeries returns false when all series are tombstoned. -func TestIndexFile_MeasurementHasSeries_Tombstoned(t *testing.T) { - sfile := MustOpenSeriesFile() - defer sfile.Close() - - f, err := CreateIndexFile(sfile.SeriesFile, []Series{ - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "east"}), Type: models.Integer}, - }) - if err != nil { - t.Fatal(err) - } - defer f.Close() - - // Simulate all series are tombstoned - ss := tsdb.NewSeriesIDSet() - - if f.MeasurementHasSeries(ss, []byte("cpu")) { - t.Fatalf("MeasurementHasSeries got true, exp false") - } -} - -func BenchmarkIndexFile_TagValueSeries(b *testing.B) { - b.Run("M=1,K=2,V=3", func(b *testing.B) { - sfile := MustOpenSeriesFile() - defer sfile.Close() - benchmarkIndexFile_TagValueSeries(b, MustFindOrGenerateIndexFile(sfile.SeriesFile, 1, 2, 3)) - }) - b.Run("M=10,K=5,V=5", func(b *testing.B) { - sfile := MustOpenSeriesFile() - defer sfile.Close() - benchmarkIndexFile_TagValueSeries(b, MustFindOrGenerateIndexFile(sfile.SeriesFile, 10, 5, 5)) - }) - b.Run("M=10,K=7,V=5", func(b *testing.B) { - sfile := MustOpenSeriesFile() - defer sfile.Close() - benchmarkIndexFile_TagValueSeries(b, MustFindOrGenerateIndexFile(sfile.SeriesFile, 10, 7, 7)) - }) -} - -func benchmarkIndexFile_TagValueSeries(b *testing.B, idx *tsi1.IndexFile) { - b.ResetTimer() - b.ReportAllocs() - - for i := 0; i < b.N; i++ { - if e := idx.TagValueElem([]byte("measurement0"), []byte("key0"), []byte("value0")); e == nil { - b.Fatal("expected element") - } else if e.(*tsi1.TagBlockValueElem).SeriesN() == 0 { - b.Fatal("expected series") - } - } -} - -// CreateIndexFile creates an index file with a given set of series. -func CreateIndexFile(sfile *seriesfile.SeriesFile, series []Series) (*tsi1.IndexFile, error) { - lf, err := CreateLogFile(sfile, series) - if err != nil { - return nil, err - } - defer lf.Close() - - // Write index file to buffer. - var buf bytes.Buffer - if _, err := lf.CompactTo(&buf, M, K, nil); err != nil { - return nil, err - } - - // Load index file from buffer. - f := tsi1.NewIndexFile(sfile) - if err := f.UnmarshalBinary(buf.Bytes()); err != nil { - return nil, err - } - return f, nil -} - -// GenerateIndexFile generates an index file from a set of series based on the count arguments. -// Total series returned will equal measurementN * tagN * valueN. -func GenerateIndexFile(sfile *seriesfile.SeriesFile, measurementN, tagN, valueN int) (*tsi1.IndexFile, error) { - // Generate a new log file first. - lf, err := GenerateLogFile(sfile, measurementN, tagN, valueN) - if err != nil { - return nil, err - } - defer lf.Close() - - // Compact log file to buffer. - var buf bytes.Buffer - if _, err := lf.CompactTo(&buf, M, K, nil); err != nil { - return nil, err - } - - // Load index file from buffer. - f := tsi1.NewIndexFile(sfile) - if err := f.UnmarshalBinary(buf.Bytes()); err != nil { - return nil, err - } - return f, nil -} - -func MustGenerateIndexFile(sfile *seriesfile.SeriesFile, measurementN, tagN, valueN int) *tsi1.IndexFile { - f, err := GenerateIndexFile(sfile, measurementN, tagN, valueN) - if err != nil { - panic(err) - } - return f -} - -var indexFileCache struct { - MeasurementN int - TagN int - ValueN int - - IndexFile *tsi1.IndexFile -} - -// MustFindOrGenerateIndexFile returns a cached index file or generates one if it doesn't exist. -func MustFindOrGenerateIndexFile(sfile *seriesfile.SeriesFile, measurementN, tagN, valueN int) *tsi1.IndexFile { - // Use cache if fields match and the index file has been generated. - if indexFileCache.MeasurementN == measurementN && - indexFileCache.TagN == tagN && - indexFileCache.ValueN == valueN && - indexFileCache.IndexFile != nil { - return indexFileCache.IndexFile - } - - // Generate and cache. - indexFileCache.MeasurementN = measurementN - indexFileCache.TagN = tagN - indexFileCache.ValueN = valueN - indexFileCache.IndexFile = MustGenerateIndexFile(sfile, measurementN, tagN, valueN) - return indexFileCache.IndexFile -} - -func pow(x, y int) int { - r := 1 - for i := 0; i < y; i++ { - r *= x - } - return r -} diff --git a/tsdb/tsi1/index_files.go b/tsdb/tsi1/index_files.go deleted file mode 100644 index 62ecf5917c..0000000000 --- a/tsdb/tsi1/index_files.go +++ /dev/null @@ -1,425 +0,0 @@ -package tsi1 - -import ( - "bufio" - "io" - "os" - "sort" - "time" - - "github.com/influxdata/influxdb/v2/pkg/bytesutil" - "github.com/influxdata/influxdb/v2/pkg/lifecycle" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" -) - -// IndexFiles represents a layered set of index files. -type IndexFiles []*IndexFile - -// IDs returns the ids for all index files. -func (p IndexFiles) IDs() []int { - a := make([]int, len(p)) - for i, f := range p { - a[i] = f.ID() - } - return a -} - -// Acquire acquires a reference to each file in the index files. -func (p IndexFiles) Acquire() (lifecycle.References, error) { - refs := make(lifecycle.References, 0, len(p)) - for _, f := range p { - ref, err := f.Acquire() - if err != nil { - for _, ref := range refs { - ref.Release() - } - return nil, err - } - refs = append(refs, ref) - } - return refs, nil -} - -// Files returns p as a list of File objects. -func (p IndexFiles) Files() []File { - other := make([]File, len(p)) - for i, f := range p { - other[i] = f - } - return other -} - -func (p IndexFiles) buildSeriesIDSets() (seriesIDSet, tombstoneSeriesIDSet *tsdb.SeriesIDSet, err error) { - if len(p) == 0 { - return tsdb.NewSeriesIDSet(), tsdb.NewSeriesIDSet(), nil - } - - // Start with sets from last file. - if seriesIDSet, err = p[len(p)-1].SeriesIDSet(); err != nil { - return nil, nil, err - } else if tombstoneSeriesIDSet, err = p[len(p)-1].TombstoneSeriesIDSet(); err != nil { - return nil, nil, err - } - - // Build sets in reverse order. - // This assumes that bits in both sets are mutually exclusive. - for i := len(p) - 2; i >= 0; i-- { - ss, err := p[i].SeriesIDSet() - if err != nil { - return nil, nil, err - } - - ts, err := p[i].TombstoneSeriesIDSet() - if err != nil { - return nil, nil, err - } - - // Add tombstones and remove from old series existence set. - seriesIDSet.Diff(ts) - tombstoneSeriesIDSet.Merge(ts) - - // Add new series and remove from old series tombstone set. - tombstoneSeriesIDSet.Diff(ss) - seriesIDSet.Merge(ss) - } - - return seriesIDSet, tombstoneSeriesIDSet, nil -} - -// MeasurementNames returns a sorted list of all measurement names for all files. -func (p *IndexFiles) MeasurementNames() [][]byte { - itr := p.MeasurementIterator() - if itr == nil { - return nil - } - - var names [][]byte - for e := itr.Next(); e != nil; e = itr.Next() { - names = append(names, bytesutil.Clone(e.Name())) - } - sort.Sort(byteSlices(names)) - return names -} - -// MeasurementIterator returns an iterator that merges measurements across all files. -func (p IndexFiles) MeasurementIterator() MeasurementIterator { - a := make([]MeasurementIterator, 0, len(p)) - for i := range p { - itr := p[i].MeasurementIterator() - if itr == nil { - continue - } - a = append(a, itr) - } - return MergeMeasurementIterators(a...) -} - -// TagKeyIterator returns an iterator that merges tag keys across all files. -func (p *IndexFiles) TagKeyIterator(name []byte) (TagKeyIterator, error) { - a := make([]TagKeyIterator, 0, len(*p)) - for _, f := range *p { - itr := f.TagKeyIterator(name) - if itr == nil { - continue - } - a = append(a, itr) - } - return MergeTagKeyIterators(a...), nil -} - -// MeasurementSeriesIDIterator returns an iterator that merges series across all files. -func (p IndexFiles) MeasurementSeriesIDIterator(name []byte) tsdb.SeriesIDIterator { - a := make([]tsdb.SeriesIDIterator, 0, len(p)) - for _, f := range p { - itr := f.MeasurementSeriesIDIterator(name) - if itr == nil { - continue - } - a = append(a, itr) - } - return tsdb.MergeSeriesIDIterators(a...) -} - -// TagValueSeriesIDSet returns an iterator that merges series across all files. -func (p IndexFiles) TagValueSeriesIDSet(name, key, value []byte) (*tsdb.SeriesIDSet, error) { - ss := tsdb.NewSeriesIDSet() - for i := range p { - if fss, err := p[i].TagValueSeriesIDSet(name, key, value); err != nil { - return nil, err - } else if fss != nil { - ss.Merge(fss) - } - } - return ss, nil -} - -// CompactTo merges all index files and writes them to w. -func (p IndexFiles) CompactTo(w io.Writer, sfile *seriesfile.SeriesFile, m, k uint64, cancel <-chan struct{}) (n int64, err error) { - var t IndexFileTrailer - - // Check for cancellation. - select { - case <-cancel: - return n, ErrCompactionInterrupted - default: - } - - // Wrap writer in buffered I/O. - bw := bufio.NewWriter(w) - - // Setup context object to track shared data for this compaction. - var info indexCompactInfo - info.cancel = cancel - info.tagSets = make(map[string]indexTagSetPos) - - // Write magic number. - if err := writeTo(bw, []byte(FileSignature), &n); err != nil { - return n, err - } - - // Flush buffer before re-mapping. - if err := bw.Flush(); err != nil { - return n, err - } - - // Write tagset blocks in measurement order. - if err := p.writeTagsetsTo(bw, &info, &n); err != nil { - return n, err - } - - // Ensure block is word aligned. - // if offset := n % 8; offset != 0 { - // if err := writeTo(bw, make([]byte, 8-offset), &n); err != nil { - // return n, err - // } - // } - - // Write measurement block. - t.MeasurementBlock.Offset = n - if err := p.writeMeasurementBlockTo(bw, &info, &n); err != nil { - return n, err - } - t.MeasurementBlock.Size = n - t.MeasurementBlock.Offset - - // Build series sets. - seriesIDSet, tombstoneSeriesIDSet, err := p.buildSeriesIDSets() - if err != nil { - return n, err - } - - // Write series set. - t.SeriesIDSet.Offset = n - nn, err := seriesIDSet.WriteTo(bw) - if n += nn; err != nil { - return n, err - } - t.SeriesIDSet.Size = n - t.SeriesIDSet.Offset - - // Write tombstone series set. - t.TombstoneSeriesIDSet.Offset = n - nn, err = tombstoneSeriesIDSet.WriteTo(bw) - if n += nn; err != nil { - return n, err - } - t.TombstoneSeriesIDSet.Size = n - t.TombstoneSeriesIDSet.Offset - - // Write trailer. - nn, err = t.WriteTo(bw) - n += nn - if err != nil { - return n, err - } - - // Flush file. - if err := bw.Flush(); err != nil { - return n, err - } - - return n, nil -} - -func (p IndexFiles) writeTagsetsTo(w io.Writer, info *indexCompactInfo, n *int64) error { - mitr := p.MeasurementIterator() - if mitr == nil { - return nil - } - - for m := mitr.Next(); m != nil; m = mitr.Next() { - if err := p.writeTagsetTo(w, m.Name(), info, n); err != nil { - return err - } - } - return nil -} - -// writeTagsetTo writes a single tagset to w and saves the tagset offset. -func (p IndexFiles) writeTagsetTo(w io.Writer, name []byte, info *indexCompactInfo, n *int64) error { - var seriesIDs []tsdb.SeriesID - - // Check for cancellation. - select { - case <-info.cancel: - return ErrCompactionInterrupted - default: - } - - // Ensure block is word aligned. - // if offset := (*n) % 8; offset != 0 { - // if err := writeTo(w, make([]byte, 8-offset), n); err != nil { - // return err - // } - // } - - kitr, err := p.TagKeyIterator(name) - if err != nil { - return err - } - - enc := NewTagBlockEncoder(w) - for ke := kitr.Next(); ke != nil; ke = kitr.Next() { - // Encode key. - if err := enc.EncodeKey(ke.Key(), ke.Deleted()); err != nil { - return err - } - - // Iterate over tag values. - vitr := ke.TagValueIterator(nil) - for ve := vitr.Next(); ve != nil; ve = vitr.Next() { - seriesIDs = seriesIDs[:0] - - // Merge all series together. - if err := func() error { - ss, err := p.TagValueSeriesIDSet(name, ke.Key(), ve.Value()) - if err != nil { - return err - } - return enc.EncodeValue(ve.Value(), ve.Deleted(), ss) - }(); err != nil { - return nil - } - } - } - - // Save tagset offset to measurement. - pos := info.tagSets[string(name)] - pos.offset = *n - - // Flush data to writer. - err = enc.Close() - *n += enc.N() - if err != nil { - return err - } - - // Save tagset size to measurement. - pos.size = *n - pos.offset - - info.tagSets[string(name)] = pos - - return nil -} - -func (p IndexFiles) writeMeasurementBlockTo(w io.Writer, info *indexCompactInfo, n *int64) error { - mw := NewMeasurementBlockWriter() - - // Check for cancellation. - select { - case <-info.cancel: - return ErrCompactionInterrupted - default: - } - - // Add measurement data & compute sketches. - mitr := p.MeasurementIterator() - if mitr != nil { - var seriesN int - for m := mitr.Next(); m != nil; m = mitr.Next() { - name := m.Name() - - // Look-up series ids. - if err := func() error { - itr := p.MeasurementSeriesIDIterator(name) - defer itr.Close() - - var seriesIDs []tsdb.SeriesID - for { - e, err := itr.Next() - if err != nil { - return err - } else if e.SeriesID.IsZero() { - break - } - seriesIDs = append(seriesIDs, e.SeriesID) - - // Check for cancellation periodically. - if seriesN++; seriesN%1000 == 0 { - select { - case <-info.cancel: - return ErrCompactionInterrupted - default: - } - } - } - sort.Slice(seriesIDs, func(i, j int) bool { return seriesIDs[i].Less(seriesIDs[j]) }) - - // Add measurement to writer. - pos := info.tagSets[string(name)] - mw.Add(name, m.Deleted(), pos.offset, pos.size, seriesIDs) - - return nil - }(); err != nil { - return err - } - } - } - - // Flush data to writer. - nn, err := mw.WriteTo(w) - *n += nn - return err -} - -// Stat returns the max index file size and the total file size for all index files. -func (p IndexFiles) Stat() (*IndexFilesInfo, error) { - var info IndexFilesInfo - for _, f := range p { - fi, err := os.Stat(f.Path()) - if os.IsNotExist(err) { - continue - } else if err != nil { - return nil, err - } - - if fi.Size() > info.MaxSize { - info.MaxSize = fi.Size() - } - if fi.ModTime().After(info.ModTime) { - info.ModTime = fi.ModTime() - } - - info.Size += fi.Size() - } - return &info, nil -} - -type IndexFilesInfo struct { - MaxSize int64 // largest file size - Size int64 // total file size - ModTime time.Time // last modified -} - -// indexCompactInfo is a context object used for tracking position information -// during the compaction of index files. -type indexCompactInfo struct { - cancel <-chan struct{} - - // Tracks offset/size for each measurement's tagset. - tagSets map[string]indexTagSetPos -} - -// indexTagSetPos stores the offset/size of tagsets. -type indexTagSetPos struct { - offset int64 - size int64 -} diff --git a/tsdb/tsi1/index_files_test.go b/tsdb/tsi1/index_files_test.go deleted file mode 100644 index 06ac03aa04..0000000000 --- a/tsdb/tsi1/index_files_test.go +++ /dev/null @@ -1,56 +0,0 @@ -package tsi1_test - -import ( - "bytes" - "testing" - - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/tsdb/tsi1" -) - -// Ensure multiple index files can be compacted together. -func TestIndexFiles_WriteTo(t *testing.T) { - sfile := MustOpenSeriesFile() - defer sfile.Close() - - // Write first file. - f0, err := CreateIndexFile(sfile.SeriesFile, []Series{ - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "east"}), Type: models.Integer}, - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "west"}), Type: models.Integer}, - {Name: []byte("mem"), Tags: models.NewTags(map[string]string{"region": "east"}), Type: models.Integer}, - }) - if err != nil { - t.Fatal(err) - } - - // Write second file. - f1, err := CreateIndexFile(sfile.SeriesFile, []Series{ - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "west"}), Type: models.Integer}, - {Name: []byte("disk"), Tags: models.NewTags(map[string]string{"region": "east"}), Type: models.Integer}, - }) - if err != nil { - t.Fatal(err) - } - - // Compact the two together and write out to a buffer. - var buf bytes.Buffer - a := tsi1.IndexFiles{f0, f1} - if n, err := a.CompactTo(&buf, sfile.SeriesFile, M, K, nil); err != nil { - t.Fatal(err) - } else if n == 0 { - t.Fatal("expected data written") - } - - // Unmarshal buffer into a new index file. - f := tsi1.NewIndexFile(sfile.SeriesFile) - if err := f.UnmarshalBinary(buf.Bytes()); err != nil { - t.Fatal(err) - } - - // Verify data in compacted file. - if e := f.TagValueElem([]byte("cpu"), []byte("region"), []byte("west")); e == nil { - t.Fatal("expected element") - } else if n := e.(*tsi1.TagBlockValueElem).SeriesN(); n != 1 { - t.Fatalf("unexpected series count: %d", n) - } -} diff --git a/tsdb/tsi1/index_test.go b/tsdb/tsi1/index_test.go deleted file mode 100644 index 31ee984086..0000000000 --- a/tsdb/tsi1/index_test.go +++ /dev/null @@ -1,822 +0,0 @@ -package tsi1_test - -import ( - "compress/gzip" - "context" - "fmt" - "io/ioutil" - "math/rand" - "os" - "path/filepath" - "reflect" - "regexp" - "sync" - "testing" - "time" - - "github.com/google/go-cmp/cmp" - "github.com/influxdata/influxdb/v2/logger" - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" - "github.com/influxdata/influxdb/v2/tsdb/tsi1" - "go.uber.org/zap" - "go.uber.org/zap/zaptest" -) - -// Bloom filter settings used in tests. -const M, K = 4096, 6 - -// Ensure index can iterate over all measurement names. -func TestIndex_ForEachMeasurementName(t *testing.T) { - idx := MustOpenIndex(1, tsi1.NewConfig()) - defer idx.Close() - - // Add series to index. - if err := idx.CreateSeriesSliceIfNotExists([]Series{ - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "east"})}, - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "west"})}, - {Name: []byte("mem"), Tags: models.NewTags(map[string]string{"region": "east"})}, - }); err != nil { - t.Fatal(err) - } - - // Verify measurements are returned. - idx.Run(t, func(t *testing.T) { - var names []string - if err := idx.ForEachMeasurementName(func(name []byte) error { - names = append(names, string(name)) - return nil - }); err != nil { - t.Fatal(err) - } - - if !reflect.DeepEqual(names, []string{"cpu", "mem"}) { - t.Fatalf("unexpected names: %#v", names) - } - }) - - // Add more series. - if err := idx.CreateSeriesSliceIfNotExists([]Series{ - {Name: []byte("disk")}, - {Name: []byte("mem")}, - }); err != nil { - t.Fatal(err) - } - - // Verify new measurements. - idx.Run(t, func(t *testing.T) { - var names []string - if err := idx.ForEachMeasurementName(func(name []byte) error { - names = append(names, string(name)) - return nil - }); err != nil { - t.Fatal(err) - } - - if !reflect.DeepEqual(names, []string{"cpu", "disk", "mem"}) { - t.Fatalf("unexpected names: %#v", names) - } - }) -} - -// Ensure index can return whether a measurement exists. -func TestIndex_MeasurementExists(t *testing.T) { - idx := MustOpenIndex(1, tsi1.NewConfig()) - defer idx.Close() - - // Add series to index. - if err := idx.CreateSeriesSliceIfNotExists([]Series{ - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "east"})}, - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "west"})}, - }); err != nil { - t.Fatal(err) - } - - // Verify measurement exists. - idx.Run(t, func(t *testing.T) { - if v, err := idx.MeasurementExists([]byte("cpu")); err != nil { - t.Fatal(err) - } else if !v { - t.Fatal("expected measurement to exist") - } - }) - - name, tags := []byte("cpu"), models.NewTags(map[string]string{"region": "east"}) - sid := idx.Index.SeriesFile().SeriesID(name, tags, nil) - if sid.IsZero() { - t.Fatalf("got 0 series id for %s/%v", name, tags) - } - - // Delete one series. - if err := idx.DropSeries([]tsi1.DropSeriesItem{{SeriesID: sid, Key: models.MakeKey(name, tags)}}, true); err != nil { - t.Fatal(err) - } - - // Verify measurement still exists. - idx.Run(t, func(t *testing.T) { - if v, err := idx.MeasurementExists([]byte("cpu")); err != nil { - t.Fatal(err) - } else if !v { - t.Fatal("expected measurement to still exist") - } - }) - - // Delete second series. - tags.Set([]byte("region"), []byte("west")) - sid = idx.Index.SeriesFile().SeriesID(name, tags, nil) - if sid.IsZero() { - t.Fatalf("got 0 series id for %s/%v", name, tags) - } - if err := idx.DropSeries([]tsi1.DropSeriesItem{{SeriesID: sid, Key: models.MakeKey(name, tags)}}, true); err != nil { - t.Fatal(err) - } - - // Verify measurement is now deleted. - idx.Run(t, func(t *testing.T) { - if v, err := idx.MeasurementExists([]byte("cpu")); err != nil { - t.Fatal(err) - } else if v { - t.Fatal("expected measurement to be deleted") - } - }) -} - -// Ensure index can return a list of matching measurements. -func TestIndex_MeasurementNamesByRegex(t *testing.T) { - idx := MustOpenIndex(1, tsi1.NewConfig()) - defer idx.Close() - - // Add series to index. - if err := idx.CreateSeriesSliceIfNotExists([]Series{ - {Name: []byte("cpu")}, - {Name: []byte("disk")}, - {Name: []byte("mem")}, - }); err != nil { - t.Fatal(err) - } - - // Retrieve measurements by regex. - idx.Run(t, func(t *testing.T) { - names, err := idx.MeasurementNamesByRegex(regexp.MustCompile(`cpu|mem`)) - if err != nil { - t.Fatal(err) - } else if !reflect.DeepEqual(names, [][]byte{[]byte("cpu"), []byte("mem")}) { - t.Fatalf("unexpected names: %v", names) - } - }) -} - -// Ensure index can delete a measurement and all related keys, values, & series. -func TestIndex_DropMeasurement(t *testing.T) { - idx := MustOpenIndex(1, tsi1.NewConfig()) - defer idx.Close() - - // Add series to index. - if err := idx.CreateSeriesSliceIfNotExists([]Series{ - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "east"})}, - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "west"})}, - {Name: []byte("disk"), Tags: models.NewTags(map[string]string{"region": "north"})}, - {Name: []byte("mem"), Tags: models.NewTags(map[string]string{"region": "west", "country": "us"})}, - }); err != nil { - t.Fatal(err) - } - - // Drop measurement. - if err := idx.DropMeasurement([]byte("cpu")); err != nil { - t.Fatal(err) - } - - // Verify data is gone in each stage. - idx.Run(t, func(t *testing.T) { - // Verify measurement is gone. - if v, err := idx.MeasurementExists([]byte("cpu")); err != nil { - t.Fatal(err) - } else if v { - t.Fatal("expected no measurement") - } - - // Obtain file set to perform lower level checks. - fs, err := idx.PartitionAt(0).FileSet() - if err != nil { - t.Fatal(err) - } - defer fs.Release() - - // Verify tags & values are gone. - if e := fs.TagKeyIterator([]byte("cpu")).Next(); e != nil && !e.Deleted() { - t.Fatal("expected deleted tag key") - } - if itr := fs.TagValueIterator([]byte("cpu"), []byte("region")); itr != nil { - t.Fatal("expected nil tag value iterator") - } - - }) -} - -func TestIndex_Open(t *testing.T) { - // Opening a fresh index should set the MANIFEST version to current version. - idx := NewIndex(tsi1.DefaultPartitionN, tsi1.NewConfig()) - defer idx.Close() - - t.Run("open new index", func(t *testing.T) { - if err := idx.Open(); err != nil { - t.Fatal(err) - } - - // Check version set appropriately. - for i := 0; uint64(i) < tsi1.DefaultPartitionN; i++ { - partition := idx.PartitionAt(i) - fs, err := partition.FileSet() - if err != nil { - t.Fatal(err) - } - if got, exp := partition.Manifest(fs).Version, 1; got != exp { - t.Fatalf("got index version %d, expected %d", got, exp) - } - fs.Release() - } - }) - - // Reopening an open index should return an error. - t.Run("reopen open index", func(t *testing.T) { - err := idx.Open() - if err == nil { - idx.Close() - t.Fatal("didn't get an error on reopen, but expected one") - } - idx.Close() - }) - - // Opening an incompatible index should return an error. - incompatibleVersions := []int{-1, 0, 2} - for _, v := range incompatibleVersions { - t.Run(fmt.Sprintf("incompatible index version: %d", v), func(t *testing.T) { - idx = NewIndex(tsi1.DefaultPartitionN, tsi1.NewConfig()) - // Manually create a MANIFEST file for an incompatible index version. - // under one of the partitions. - partitionPath := filepath.Join(idx.Path(), "2") - os.MkdirAll(partitionPath, 0777) - - mpath := filepath.Join(partitionPath, tsi1.ManifestFileName) - m := tsi1.NewManifest(mpath) - m.Levels = nil - m.Version = v // Set example MANIFEST version. - if _, err := m.Write(); err != nil { - t.Fatal(err) - } - - // Log the MANIFEST file. - data, err := ioutil.ReadFile(mpath) - if err != nil { - panic(err) - } - t.Logf("Incompatible MANIFEST: %s", data) - - // Opening this index should return an error because the MANIFEST has an - // incompatible version. - err = idx.Open() - if err != tsi1.ErrIncompatibleVersion { - idx.Close() - t.Fatalf("got error %v, expected %v", err, tsi1.ErrIncompatibleVersion) - } - }) - } -} - -func TestIndex_Manifest(t *testing.T) { - t.Run("current MANIFEST", func(t *testing.T) { - idx := MustOpenIndex(tsi1.DefaultPartitionN, tsi1.NewConfig()) - defer idx.Close() - - // Check version set appropriately. - for i := 0; uint64(i) < tsi1.DefaultPartitionN; i++ { - partition := idx.PartitionAt(i) - fs, err := partition.FileSet() - if err != nil { - t.Fatal(err) - } - if got, exp := partition.Manifest(fs).Version, tsi1.Version; got != exp { - t.Fatalf("got MANIFEST version %d, expected %d", got, exp) - } - fs.Release() - } - }) -} - -func TestIndex_DiskSizeBytes(t *testing.T) { - idx := MustOpenIndex(tsi1.DefaultPartitionN, tsi1.NewConfig()) - defer idx.Close() - - // Add series to index. - if err := idx.CreateSeriesSliceIfNotExists([]Series{ - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "east"})}, - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "west"})}, - {Name: []byte("disk"), Tags: models.NewTags(map[string]string{"region": "north"})}, - {Name: []byte("mem"), Tags: models.NewTags(map[string]string{"region": "west", "country": "us"})}, - }); err != nil { - t.Fatal(err) - } - - // Verify on disk size is the same in each stage. - // Each series stores flag(1) + series(uvarint(2)) + len(name)(1) + len(key)(1) + len(value)(1) + checksum(4). - expSize := int64(4 * 9) - - // Each MANIFEST file is 419 bytes and there are tsi1.DefaultPartitionN of them - expSize += int64(tsi1.DefaultPartitionN * 419) - - idx.Run(t, func(t *testing.T) { - if got, exp := idx.DiskSizeBytes(), expSize; got != exp { - t.Fatalf("got %d bytes, expected %d", got, exp) - } - }) -} - -// Ensure index can returns measurement cardinality stats. -func TestIndex_MeasurementCardinalityStats(t *testing.T) { - t.Parallel() - - t.Run("Empty", func(t *testing.T) { - idx := MustOpenIndex(1, tsi1.NewConfig()) - defer idx.Close() - if stats, err := idx.MeasurementCardinalityStats(); err != nil { - t.Fatal(err) - } else if diff := cmp.Diff(stats, tsi1.MeasurementCardinalityStats{}); diff != "" { - t.Fatal(diff) - } - }) - - t.Run("Simple", func(t *testing.T) { - idx := MustOpenIndex(1, tsi1.NewConfig()) - defer idx.Close() - - if err := idx.CreateSeriesSliceIfNotExists([]Series{ - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "east"})}, - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "west"})}, - {Name: []byte("mem"), Tags: models.NewTags(map[string]string{"region": "east"})}, - }); err != nil { - t.Fatal(err) - } - - if stats, err := idx.MeasurementCardinalityStats(); err != nil { - t.Fatal(err) - } else if diff := cmp.Diff(stats, tsi1.MeasurementCardinalityStats{"cpu": 2, "mem": 1}); diff != "" { - t.Fatal(diff) - } - }) - - t.Run("SimpleWithDelete", func(t *testing.T) { - idx := MustOpenIndex(1, tsi1.NewConfig()) - defer idx.Close() - - if err := idx.CreateSeriesSliceIfNotExists([]Series{ - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "east"})}, - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "west"})}, - {Name: []byte("mem"), Tags: models.NewTags(map[string]string{"region": "east"})}, - }); err != nil { - t.Fatal(err) - } - - seriesID := idx.SeriesFile.SeriesID([]byte("cpu"), models.NewTags(map[string]string{"region": "west"}), nil) - if err := idx.DropSeries([]tsi1.DropSeriesItem{{SeriesID: seriesID, Key: idx.SeriesFile.SeriesKey(seriesID)}}, true); err != nil { - t.Fatal(err) - } else if stats, err := idx.MeasurementCardinalityStats(); err != nil { - t.Fatal(err) - } else if diff := cmp.Diff(stats, tsi1.MeasurementCardinalityStats{"cpu": 1, "mem": 1}); diff != "" { - t.Fatal(diff) - } - - seriesID = idx.SeriesFile.SeriesID([]byte("mem"), models.NewTags(map[string]string{"region": "east"}), nil) - if err := idx.DropSeries([]tsi1.DropSeriesItem{{SeriesID: seriesID, Key: idx.SeriesFile.SeriesKey(seriesID)}}, true); err != nil { - t.Fatal(err) - } else if stats, err := idx.MeasurementCardinalityStats(); err != nil { - t.Fatal(err) - } else if diff := cmp.Diff(stats, tsi1.MeasurementCardinalityStats{"cpu": 1}); diff != "" { - t.Fatal(diff) - } - }) - - t.Run("Large", func(t *testing.T) { - t.Skip("https://github.com/influxdata/influxdb/issues/15220") - if testing.Short() { - t.Skip("short mode, skipping") - } - idx := MustOpenIndex(1, tsi1.NewConfig()) - defer idx.Close() - - for i := 0; i < 1000; i++ { - a := make([]Series, 1000) - for j := range a { - a[j] = Series{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": fmt.Sprintf("east%04d", (i*1000)+j)})} - } - if err := idx.CreateSeriesSliceIfNotExists(a); err != nil { - t.Fatal(err) - } - } - - if stats, err := idx.MeasurementCardinalityStats(); err != nil { - t.Fatal(err) - } else if diff := cmp.Diff(stats, tsi1.MeasurementCardinalityStats{"cpu": 1000000}); diff != "" { - t.Fatal(diff) - } - - // Reopen and verify count. - if err := idx.Reopen(); err != nil { - t.Fatal(err) - } else if stats, err := idx.MeasurementCardinalityStats(); err != nil { - t.Fatal(err) - } else if diff := cmp.Diff(stats, tsi1.MeasurementCardinalityStats{"cpu": 1000000}); diff != "" { - t.Fatal(diff) - } - }) - - t.Run("LargeWithDelete", func(t *testing.T) { - t.Skip("https://github.com/influxdata/influxdb/issues/15220") - if testing.Short() { - t.Skip("short mode, skipping") - } - config := tsi1.NewConfig() - config.MaxIndexLogFileSize = 4096 - idx := MustOpenIndex(1, config) - defer idx.Close() - - a := make([]Series, 1000) - for i := range a { - a[i] = Series{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": fmt.Sprintf("east%04d", i)})} - } - if err := idx.CreateSeriesSliceIfNotExists(a); err != nil { - t.Fatal(err) - } - - // Issue deletion. - if err := idx.DropMeasurement([]byte("cpu")); err != nil { - t.Fatal(err) - } else if stats, err := idx.MeasurementCardinalityStats(); err != nil { - t.Fatal(err) - } else if diff := cmp.Diff(stats, tsi1.MeasurementCardinalityStats{}); diff != "" { - t.Fatal(diff) - } - }) - - t.Run("Cache", func(t *testing.T) { - config := tsi1.NewConfig() - config.StatsTTL = 1 * time.Second - idx := MustOpenIndex(1, config) - defer idx.Close() - - // Insert two series & verify series. - if err := idx.CreateSeriesSliceIfNotExists([]Series{ - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "east"})}, - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "west"})}, - }); err != nil { - t.Fatal(err) - } else if stats, err := idx.MeasurementCardinalityStats(); err != nil { - t.Fatal(err) - } else if diff := cmp.Diff(stats, tsi1.MeasurementCardinalityStats{"cpu": 2}); diff != "" { - t.Fatal(diff) - } - - // Insert one more series and immediate check. No change should occur. - if err := idx.CreateSeriesSliceIfNotExists([]Series{ - {Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "north"})}, - }); err != nil { - t.Fatal(err) - } else if stats, err := idx.MeasurementCardinalityStats(); err != nil { - t.Fatal(err) - } else if diff := cmp.Diff(stats, tsi1.MeasurementCardinalityStats{"cpu": 2}); diff != "" { - t.Fatal(diff) - } - - // Wait for TTL. - time.Sleep(config.StatsTTL) - - // Verify again and stats should be updated. - if stats, err := idx.MeasurementCardinalityStats(); err != nil { - t.Fatal(err) - } else if diff := cmp.Diff(stats, tsi1.MeasurementCardinalityStats{"cpu": 3}); diff != "" { - t.Fatal(diff) - } - }) -} - -// Ensure index keeps the correct set of series even with concurrent compactions. -func TestIndex_CompactionConsistency(t *testing.T) { - t.Skip("TODO: flaky test: https://github.com/influxdata/influxdb/issues/13755") - t.Parallel() - - idx := NewIndex(tsi1.DefaultPartitionN, tsi1.NewConfig()) - idx.WithLogger(zaptest.NewLogger(t, zaptest.Level(zap.DebugLevel))) - if err := idx.Open(); err != nil { - t.Fatal(err) - } - defer idx.Close() - - // Set up some framework to track launched goroutines. - wg, done := new(sync.WaitGroup), make(chan struct{}) - spawn := func(fn func()) { - wg.Add(1) - go func() { - for { - select { - case <-done: - wg.Done() - return - default: - fn() - } - } - }() - } - - // Spawn a goroutine to constantly ask the index to compact. - spawn(func() { idx.Compact() }) - - // Issue a number of writes and deletes for a while. - expected, operations := make(map[string]struct{}), []string(nil) - spawn(func() { - var err error - if len(expected) > 0 && rand.Intn(5) == 0 { - for m := range expected { - err = idx.DropMeasurement([]byte(m)) - operations = append(operations, "delete: "+m) - delete(expected, m) - break - } - } else { - m := []byte(fmt.Sprintf("m%d", rand.Int())) - s := make([]Series, 100) - for i := range s { - s[i] = Series{Name: m, Tags: models.NewTags(map[string]string{fmt.Sprintf("t%d", i): "v"})} - } - err = idx.CreateSeriesSliceIfNotExists(s) - operations = append(operations, "add: "+string(m)) - expected[string(m)] = struct{}{} - } - if err != nil { - t.Error(err) - } - }) - - // Let them run for a while and then wait. - time.Sleep(10 * time.Second) - close(done) - wg.Wait() - - defer func() { - if !t.Failed() { - return - } - t.Log("expect", len(expected), "measurements after", len(operations), "operations") - for _, op := range operations { - t.Log(op) - } - }() - - for m := range expected { - if v, err := idx.MeasurementExists([]byte(m)); err != nil { - t.Fatal(err) - } else if !v { - t.Fatal("expected", m) - } - } - - miter, err := idx.MeasurementIterator() - if err != nil { - t.Fatal(err) - } - defer miter.Close() - - for { - m, err := miter.Next() - if err != nil { - t.Fatal(err) - } else if m == nil { - break - } else if _, ok := expected[string(m)]; !ok { - t.Fatal("unexpected", string(m)) - } - } -} - -func BenchmarkIndex_CreateSeriesListIfNotExist(b *testing.B) { - // Read line-protocol and coerce into tsdb format. - // 1M series generated with: - // $inch -b 10000 -c 1 -t 10,10,10,10,10,10 -f 1 -m 5 -p 1 - fd, err := os.Open("../testdata/line-protocol-1M.txt.gz") - if err != nil { - b.Fatal(err) - } - - gzr, err := gzip.NewReader(fd) - if err != nil { - fd.Close() - b.Fatal(err) - } - - data, err := ioutil.ReadAll(gzr) - if err != nil { - b.Fatal(err) - } - - if err := fd.Close(); err != nil { - b.Fatal(err) - } - - setup := func() (idx *tsi1.Index, points []models.Point, cleanup func(), err error) { - points, err = models.ParsePoints(data, []byte("org_bucket")) - if err != nil { - return nil, nil, func() {}, err - } - - dataRoot, err := ioutil.TempDir("", "BenchmarkIndex_CreateSeriesListIfNotExist") - if err != nil { - return nil, nil, func() {}, err - } - rmdir := func() { os.RemoveAll(dataRoot) } - - seriesPath, err := ioutil.TempDir(dataRoot, "_series") - if err != nil { - return nil, nil, rmdir, err - } - - sfile := seriesfile.NewSeriesFile(seriesPath) - if err := sfile.Open(context.Background()); err != nil { - return nil, nil, rmdir, err - } - - config := tsi1.NewConfig() - idx = tsi1.NewIndex(sfile, config, tsi1.WithPath(filepath.Join(dataRoot, "index"))) - - if testing.Verbose() { - idx.WithLogger(logger.New(os.Stdout)) - } - - if err := idx.Open(context.Background()); err != nil { - return nil, nil, rmdir, err - } - return idx, points, func() { idx.Close(); rmdir() }, nil - } - - b.ReportAllocs() - b.Run("create_series", func(b *testing.B) { - idx, points, cleanup, err := setup() - defer cleanup() - if err != nil { - b.Fatal(err) - } - - b.ResetTimer() - for i := 0; i < b.N; i++ { - for i := 0; i < len(points); i += 10000 { - b.StopTimer() - collection := tsdb.NewSeriesCollection(points[i : i+10000]) - b.StartTimer() - - if err := idx.CreateSeriesListIfNotExists(collection); err != nil { - b.Fatal(err) - } - } - } - }) - - b.Run("already_exist_series", func(b *testing.B) { - idx, points, cleanup, err := setup() - defer cleanup() - if err != nil { - b.Fatal(err) - } - - // Ensure all points already written. - for i := 0; i < len(points); i += 10000 { - collection := tsdb.NewSeriesCollection(points[i : i+10000]) - if err := idx.CreateSeriesListIfNotExists(collection); err != nil { - b.Fatal(err) - } - } - - b.ResetTimer() - for i := 0; i < b.N; i++ { - for i := 0; i < len(points); i += 10000 { - b.StopTimer() - collection := tsdb.NewSeriesCollection(points[i : i+10000]) - b.StartTimer() - if err := idx.CreateSeriesListIfNotExists(collection); err != nil { - b.Fatal(err) - } - } - } - }) -} - -// Index is a test wrapper for tsi1.Index. -type Index struct { - *tsi1.Index - Config tsi1.Config - SeriesFile *SeriesFile -} - -// NewIndex returns a new instance of Index at a temporary path. -func NewIndex(partitionN uint64, c tsi1.Config) *Index { - idx := &Index{ - Config: c, - SeriesFile: NewSeriesFile(), - } - idx.Index = tsi1.NewIndex(idx.SeriesFile.SeriesFile, idx.Config, tsi1.WithPath(MustTempDir())) - idx.Index.PartitionN = partitionN - return idx -} - -// MustOpenIndex returns a new, open index. Panic on error. -func MustOpenIndex(partitionN uint64, c tsi1.Config) *Index { - idx := NewIndex(partitionN, c) - if err := idx.Open(); err != nil { - panic(err) - } - return idx -} - -// Open opens the underlying tsi1.Index and tsdb.SeriesFile -func (idx Index) Open() error { - if err := idx.SeriesFile.Open(context.Background()); err != nil { - return err - } - return idx.Index.Open(context.Background()) -} - -// Close closes and removes the index directory. -func (idx *Index) Close() error { - defer os.RemoveAll(idx.Path()) - if err := idx.Index.Close(); err != nil { - return err - } - return idx.SeriesFile.Close() -} - -// Reopen closes and opens the index. -func (idx *Index) Reopen() error { - if err := idx.Index.Close(); err != nil { - return err - } - - // Reopen the series file correctly, by initialising a new underlying series - // file using the same disk data. - if err := idx.SeriesFile.Reopen(); err != nil { - return err - } - - partitionN := idx.Index.PartitionN // Remember how many partitions to use. - idx.Index = tsi1.NewIndex(idx.SeriesFile.SeriesFile, idx.Config, tsi1.WithPath(idx.Index.Path())) - idx.Index.PartitionN = partitionN - return idx.Open() -} - -// Run executes a subtest for each of several different states: -// -// - Immediately -// - After reopen -// - After compaction -// - After reopen again -// -// The index should always respond in the same fashion regardless of -// how data is stored. This helper allows the index to be easily tested -// in all major states. -func (idx *Index) Run(t *testing.T, fn func(t *testing.T)) { - // Invoke immediately. - t.Run("state=initial", fn) - - // Reopen and invoke again. - if err := idx.Reopen(); err != nil { - t.Fatalf("reopen error: %s", err) - } - t.Run("state=reopen", fn) - - // TODO: Request a compaction. - // if err := idx.Compact(); err != nil { - // t.Fatalf("compact error: %s", err) - // } - // t.Run("state=post-compaction", fn) - - // Reopen and invoke again. - if err := idx.Reopen(); err != nil { - t.Fatalf("post-compaction reopen error: %s", err) - } - t.Run("state=post-compaction-reopen", fn) -} - -// CreateSeriesSliceIfNotExists creates multiple series at a time. -func (idx *Index) CreateSeriesSliceIfNotExists(a []Series) error { - collection := &tsdb.SeriesCollection{ - Keys: make([][]byte, 0, len(a)), - Names: make([][]byte, 0, len(a)), - Tags: make([]models.Tags, 0, len(a)), - Types: make([]models.FieldType, 0, len(a)), - } - - for _, s := range a { - collection.Keys = append(collection.Keys, models.MakeKey(s.Name, s.Tags)) - collection.Names = append(collection.Names, s.Name) - collection.Tags = append(collection.Tags, s.Tags) - collection.Types = append(collection.Types, s.Type) - } - return idx.CreateSeriesListIfNotExists(collection) -} diff --git a/tsdb/tsi1/legacy_test.go b/tsdb/tsi1/legacy_test.go deleted file mode 100644 index e3530adffc..0000000000 --- a/tsdb/tsi1/legacy_test.go +++ /dev/null @@ -1,36 +0,0 @@ -package tsi1 - -import ( - "context" - "io/ioutil" - "os" - "testing" - - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" -) - -func TestLegacyOpen(t *testing.T) { - dir, err := ioutil.TempDir("", "tsi1-") - if err != nil { - t.Fatal(err) - } - defer os.RemoveAll(dir) - - sfile := seriesfile.NewSeriesFile(dir) - if err := sfile.Open(context.Background()); err != nil { - t.Fatal(err) - } - defer sfile.Close() - - index := NewIndex(sfile, NewConfig(), WithPath("testdata/index-file-index")) - if err := index.Open(context.Background()); err != nil { - t.Fatal(err) - } - defer index.Close() - - // check that we can read all the measurements - err = index.ForEachMeasurementName(func(name []byte) error { return nil }) - if err != nil { - t.Fatal(err) - } -} diff --git a/tsdb/tsi1/log_file.go b/tsdb/tsi1/log_file.go deleted file mode 100644 index 3a9332a50a..0000000000 --- a/tsdb/tsi1/log_file.go +++ /dev/null @@ -1,1533 +0,0 @@ -package tsi1 - -import ( - "bufio" - "bytes" - "encoding/binary" - "errors" - "fmt" - "hash/crc32" - "io" - "os" - "sort" - "sync" - "time" - "unsafe" - - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/pkg/bloom" - "github.com/influxdata/influxdb/v2/pkg/lifecycle" - "github.com/influxdata/influxdb/v2/pkg/mincore" - "github.com/influxdata/influxdb/v2/pkg/mmap" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" -) - -// Log errors. -var ( - ErrLogEntryChecksumMismatch = errors.New("log entry checksum mismatch") -) - -// Log entry flag constants. -const ( - LogEntrySeriesTombstoneFlag = 0x01 - LogEntryMeasurementTombstoneFlag = 0x02 - LogEntryTagKeyTombstoneFlag = 0x04 - LogEntryTagValueTombstoneFlag = 0x08 -) - -// defaultLogFileBufferSize describes the size of the buffer that the LogFile's buffered -// writer uses. If the LogFile does not have an explicit buffer size set then -// this is the size of the buffer; it is equal to the default buffer size used -// by a bufio.Writer. -const defaultLogFileBufferSize = 4096 - -// indexFileBufferSize is the buffer size used when compacting the LogFile down -// into a .tsi file. -const indexFileBufferSize = 1 << 17 // 128K - -// LogFile represents an on-disk write-ahead log file. -type LogFile struct { - mu sync.RWMutex - res lifecycle.Resource - - id int // file sequence identifier - data []byte // mmap - file *os.File // writer - w *bufio.Writer // buffered writer - bufferSize int // The size of the buffer used by the buffered writer - nosync bool // Disables buffer flushing and file syncing. Useful for offline tooling. - buf []byte // marshaling buffer - keyBuf []byte - - sfile *seriesfile.SeriesFile // series lookup - sfileref *lifecycle.Reference - size int64 // tracks current file size - modTime time.Time // tracks last time write occurred - - // In-memory series existence/tombstone sets. - seriesIDSet, tombstoneSeriesIDSet *tsdb.SeriesIDSet - - // In-memory index. - mms logMeasurements - - // In-memory stats - stats MeasurementCardinalityStats - - // Filepath to the log file. - path string -} - -// NewLogFile returns a new instance of LogFile. -func NewLogFile(sfile *seriesfile.SeriesFile, path string) *LogFile { - return &LogFile{ - sfile: sfile, - path: path, - mms: make(logMeasurements), - stats: make(MeasurementCardinalityStats), - - seriesIDSet: tsdb.NewSeriesIDSet(), - tombstoneSeriesIDSet: tsdb.NewSeriesIDSet(), - } -} - -// bytes estimates the memory footprint of this LogFile, in bytes. -func (f *LogFile) bytes() int { - var b int - b += 24 // mu RWMutex is 24 bytes - b += int(unsafe.Sizeof(f.res)) - b += int(unsafe.Sizeof(f.id)) - // Do not count f.data contents because it is mmap'd - b += int(unsafe.Sizeof(f.data)) - b += int(unsafe.Sizeof(f.file)) - b += int(unsafe.Sizeof(f.w)) - b += int(unsafe.Sizeof(f.bufferSize)) - b += int(unsafe.Sizeof(f.nosync)) - // TODO(jacobmarble): Uncomment when we are using go >= 1.10.0 - //b += f.w.Size() - b += int(unsafe.Sizeof(f.buf)) + len(f.buf) - b += int(unsafe.Sizeof(f.keyBuf)) + len(f.keyBuf) - b += int(unsafe.Sizeof(f.sfile)) - b += int(unsafe.Sizeof(f.sfileref)) - b += int(unsafe.Sizeof(f.size)) - b += int(unsafe.Sizeof(f.modTime)) - b += int(unsafe.Sizeof(f.seriesIDSet)) + f.seriesIDSet.Bytes() - b += int(unsafe.Sizeof(f.tombstoneSeriesIDSet)) + f.tombstoneSeriesIDSet.Bytes() - b += int(unsafe.Sizeof(f.mms)) + f.mms.bytes() - b += int(unsafe.Sizeof(f.stats)) - b += int(unsafe.Sizeof(f.path)) + len(f.path) - return b -} - -// Open reads the log from a file and validates all the checksums. -func (f *LogFile) Open() error { - if err := f.open(); err != nil { - f.Close() - return err - } - return nil -} - -func (f *LogFile) open() (err error) { - // Attempt to acquire a reference to the series file. - f.sfileref, err = f.sfile.Acquire() - if err != nil { - return err - } - - f.id, _ = ParseFilename(f.path) - - // Open file for appending. - file, err := os.OpenFile(f.Path(), os.O_WRONLY|os.O_CREATE, 0666) - if err != nil { - return err - } - f.file = file - - if f.bufferSize == 0 { - f.bufferSize = defaultLogFileBufferSize - } - f.w = bufio.NewWriterSize(f.file, f.bufferSize) - - // Finish opening if file is empty. - fi, err := file.Stat() - if err != nil { - return err - } else if fi.Size() == 0 { - f.res.Open() - return nil - } - f.size = fi.Size() - f.modTime = fi.ModTime() - - // Open a read-only memory map of the existing data. - data, err := mmap.Map(f.Path(), 0) - if err != nil { - return err - } - f.data = data - - // Read log entries from mmap. - var n int64 - for buf := f.data; len(buf) > 0; { - // Read next entry. Truncate partial writes. - var e LogEntry - if err := e.UnmarshalBinary(buf); err == io.ErrShortBuffer || err == ErrLogEntryChecksumMismatch { - break - } else if err != nil { - return err - } - - // Execute entry against in-memory index. - f.execEntry(&e) - - // Move buffer forward. - n += int64(e.Size) - buf = buf[e.Size:] - } - - // Move to the end of the file. - f.size = n - _, err = file.Seek(n, io.SeekStart) - if err != nil { - return err - } - - // The resource is now open. - f.res.Open() - - return nil -} - -// Close shuts down the file handle and mmap. -func (f *LogFile) Close() error { - // Wait until the file has no more references. - f.res.Close() - - if f.sfileref != nil { - f.sfileref.Release() - f.sfileref = nil - } - - if f.w != nil { - f.w.Flush() - f.w = nil - } - - if f.file != nil { - f.file.Close() - f.file = nil - } - - if f.data != nil { - mmap.Unmap(f.data) - } - - f.mms = make(logMeasurements) - return nil -} - -// FlushAndSync flushes buffered data to disk and then fsyncs the underlying file. -// If the LogFile has disabled flushing and syncing then FlushAndSync is a no-op. -func (f *LogFile) FlushAndSync() error { - if f.nosync { - return nil - } - - if f.w != nil { - if err := f.w.Flush(); err != nil { - return err - } - } - - if f.file == nil { - return nil - } - return f.file.Sync() -} - -// ID returns the file sequence identifier. -func (f *LogFile) ID() int { return f.id } - -// Path returns the file path. -func (f *LogFile) Path() string { return f.path } - -// SetPath sets the log file's path. -func (f *LogFile) SetPath(path string) { f.path = path } - -// Level returns the log level of the file. -func (f *LogFile) Level() int { return 0 } - -// Filter returns the bloom filter for the file. -func (f *LogFile) Filter() *bloom.Filter { return nil } - -// Acquire adds a reference count to the file. -func (f *LogFile) Acquire() (*lifecycle.Reference, error) { - return f.res.Acquire() -} - -// Stat returns size and last modification time of the file. -func (f *LogFile) Stat() (int64, time.Time) { - f.mu.RLock() - size, modTime := f.size, f.modTime - f.mu.RUnlock() - return size, modTime -} - -// SeriesIDSet returns the series existence set. -func (f *LogFile) SeriesIDSet() (*tsdb.SeriesIDSet, error) { - return f.seriesIDSet, nil -} - -// TombstoneSeriesIDSet returns the series tombstone set. -func (f *LogFile) TombstoneSeriesIDSet() (*tsdb.SeriesIDSet, error) { - return f.tombstoneSeriesIDSet, nil -} - -// Size returns the size of the file, in bytes. -func (f *LogFile) Size() int64 { - f.mu.RLock() - v := f.size - f.mu.RUnlock() - return v -} - -// Measurement returns a measurement element. -func (f *LogFile) Measurement(name []byte) MeasurementElem { - f.mu.RLock() - defer f.mu.RUnlock() - - mm, ok := f.mms[string(name)] - if !ok { - return nil - } - - return mm -} - -func (f *LogFile) MeasurementHasSeries(ss *tsdb.SeriesIDSet, name []byte) bool { - f.mu.RLock() - defer f.mu.RUnlock() - - mm, ok := f.mms[string(name)] - if !ok { - return false - } - - // TODO(edd): if mm is using a seriesSet then this could be changed to do a fast intersection. - for _, id := range mm.seriesIDs() { - if ss.Contains(id) { - return true - } - } - return false -} - -// MeasurementNames returns an ordered list of measurement names. -func (f *LogFile) MeasurementNames() []string { - f.mu.RLock() - defer f.mu.RUnlock() - return f.measurementNames() -} - -func (f *LogFile) measurementNames() []string { - a := make([]string, 0, len(f.mms)) - for name := range f.mms { - a = append(a, name) - } - sort.Strings(a) - return a -} - -// DeleteMeasurement adds a tombstone for a measurement to the log file. -func (f *LogFile) DeleteMeasurement(name []byte) error { - f.mu.Lock() - defer f.mu.Unlock() - - e := LogEntry{Flag: LogEntryMeasurementTombstoneFlag, Name: name} - if err := f.appendEntry(&e); err != nil { - return err - } - f.execEntry(&e) - - // Flush buffer and sync to disk. - return f.FlushAndSync() -} - -// TagKeySeriesIDIterator returns a series iterator for a tag key. -func (f *LogFile) TagKeySeriesIDIterator(name, key []byte) (tsdb.SeriesIDIterator, error) { - f.mu.RLock() - defer f.mu.RUnlock() - - mm, ok := f.mms[string(name)] - if !ok { - return nil, nil - } - - tk, ok := mm.tagSet[string(key)] - if !ok { - return nil, nil - } - - // Combine iterators across all tag keys. - itrs := make([]tsdb.SeriesIDIterator, 0, len(tk.tagValues)) - for _, tv := range tk.tagValues { - if tv.cardinality() == 0 { - continue - } - itrs = append(itrs, tsdb.NewSeriesIDSetIterator(tv.seriesIDSet())) - } - - return tsdb.MergeSeriesIDIterators(itrs...), nil -} - -// TagKeyIterator returns a value iterator for a measurement. -func (f *LogFile) TagKeyIterator(name []byte) TagKeyIterator { - f.mu.RLock() - defer f.mu.RUnlock() - - mm, ok := f.mms[string(name)] - if !ok { - return nil - } - - a := make([]logTagKey, 0, len(mm.tagSet)) - for _, k := range mm.tagSet { - a = append(a, k) - } - return newLogTagKeyIterator(a) -} - -// TagKey returns a tag key element. -func (f *LogFile) TagKey(name, key []byte) TagKeyElem { - f.mu.RLock() - defer f.mu.RUnlock() - - mm, ok := f.mms[string(name)] - if !ok { - return nil - } - - tk, ok := mm.tagSet[string(key)] - if !ok { - return nil - } - - return &tk -} - -// TagValue returns a tag value element. -func (f *LogFile) TagValue(name, key, value []byte) TagValueElem { - f.mu.RLock() - defer f.mu.RUnlock() - - mm, ok := f.mms[string(name)] - if !ok { - return nil - } - - tk, ok := mm.tagSet[string(key)] - if !ok { - return nil - } - - tv, ok := tk.tagValues[string(value)] - if !ok { - return nil - } - - return &tv -} - -// TagValueIterator returns a value iterator for a tag key. -func (f *LogFile) TagValueIterator(name, key []byte) TagValueIterator { - f.mu.RLock() - defer f.mu.RUnlock() - - mm, ok := f.mms[string(name)] - if !ok { - return nil - } - - tk, ok := mm.tagSet[string(key)] - if !ok { - return nil - } - - return tk.TagValueIterator(nil) -} - -// deleteTagKey adds a tombstone for a tag key to the log file without a lock. -func (f *LogFile) deleteTagKey(name, key []byte) error { - e := LogEntry{Flag: LogEntryTagKeyTombstoneFlag, Name: name, Key: key} - if err := f.appendEntry(&e); err != nil { - return err - } - f.execEntry(&e) - return nil -} - -// DeleteTagKey adds a tombstone for a tag key to the log file. -func (f *LogFile) DeleteTagKey(name, key []byte) error { - f.mu.Lock() - defer f.mu.Unlock() - if err := f.deleteTagKey(name, key); err != nil { - return err - } - return f.FlushAndSync() -} - -// DeleteTagKeyNoSync adds a tombstone for a tag key to the log file without a sync. -func (f *LogFile) DeleteTagKeyNoSync(name, key []byte) error { - f.mu.Lock() - defer f.mu.Unlock() - return f.deleteTagKey(name, key) -} - -// TagValueSeriesIDSet returns a series iterator for a tag value. -func (f *LogFile) TagValueSeriesIDSet(name, key, value []byte) (*tsdb.SeriesIDSet, error) { - f.mu.RLock() - defer f.mu.RUnlock() - - mm, ok := f.mms[string(name)] - if !ok { - return nil, nil - } - - tk, ok := mm.tagSet[string(key)] - if !ok { - return nil, nil - } - - tv, ok := tk.tagValues[string(value)] - if !ok { - return nil, nil - } else if tv.cardinality() == 0 { - return nil, nil - } - - return tv.seriesIDSet(), nil -} - -// MeasurementN returns the total number of measurements. -func (f *LogFile) MeasurementN() (n uint64) { - f.mu.RLock() - defer f.mu.RUnlock() - return uint64(len(f.mms)) -} - -// TagKeyN returns the total number of keys. -func (f *LogFile) TagKeyN() (n uint64) { - f.mu.RLock() - defer f.mu.RUnlock() - for _, mm := range f.mms { - n += uint64(len(mm.tagSet)) - } - return n -} - -// TagValueN returns the total number of values. -func (f *LogFile) TagValueN() (n uint64) { - f.mu.RLock() - defer f.mu.RUnlock() - for _, mm := range f.mms { - for _, k := range mm.tagSet { - n += uint64(len(k.tagValues)) - } - } - return n -} - -// deleteTagValue adds a tombstone for a tag value to the log file without a lock. -func (f *LogFile) deleteTagValue(name, key, value []byte) error { - e := LogEntry{Flag: LogEntryTagValueTombstoneFlag, Name: name, Key: key, Value: value} - if err := f.appendEntry(&e); err != nil { - return err - } - f.execEntry(&e) - return nil -} - -// DeleteTagValue adds a tombstone for a tag value to the log file. -func (f *LogFile) DeleteTagValue(name, key, value []byte) error { - f.mu.Lock() - defer f.mu.Unlock() - if err := f.deleteTagValue(name, key, value); err != nil { - return err - } - return f.FlushAndSync() -} - -// DeleteTagValueNoSync adds a tombstone for a tag value to the log file. -// Caller must call FlushAndSync(). -func (f *LogFile) DeleteTagValueNoSync(name, key, value []byte) error { - f.mu.Lock() - defer f.mu.Unlock() - return f.deleteTagValue(name, key, value) -} - -// AddSeriesList adds a list of series to the log file in bulk. -func (f *LogFile) AddSeriesList(seriesSet *tsdb.SeriesIDSet, collection *tsdb.SeriesCollection) ([]tsdb.SeriesID, error) { - var writeRequired bool - var entries []LogEntry - - var i int // Track the index of the point in the batch - seriesSet.RLock() - for iter := collection.Iterator(); iter.Next(); { - seriesID := iter.SeriesID() - - if seriesSet.ContainsNoLock(seriesID) { - i++ - continue - } - writeRequired = true - - // lazy allocation of entries to avoid common case of no new series - if entries == nil { - entries = make([]LogEntry, 0, collection.Length()) - } - - entries = append(entries, LogEntry{ - SeriesID: seriesID, - name: iter.Name(), - tags: iter.Tags(), - cached: true, - batchidx: i, - }) - i++ - } - seriesSet.RUnlock() - - // Exit if all series already exist. - if !writeRequired { - return nil, nil - } - - f.mu.Lock() - defer f.mu.Unlock() - - seriesSet.Lock() - defer seriesSet.Unlock() - var seriesIDs []tsdb.SeriesID - for i := range entries { // NB - this doesn't evaluate all series ids returned from series file. - entry := &entries[i] - if seriesSet.ContainsNoLock(entry.SeriesID) { - // We don't need to allocate anything for this series. - continue - } - if err := f.appendEntry(entry); err != nil { - return nil, err - } - f.execEntry(entry) - seriesSet.AddNoLock(entry.SeriesID) - - if seriesIDs == nil { - seriesIDs = make([]tsdb.SeriesID, collection.Length()) - } - seriesIDs[entry.batchidx] = entry.SeriesID - } - - // Flush buffer and sync to disk. - if err := f.FlushAndSync(); err != nil { - return nil, err - } - return seriesIDs, nil -} - -// DeleteSeriesIDs adds a tombstone for a list of series ids. -func (f *LogFile) DeleteSeriesIDs(ids []tsdb.SeriesID) error { - f.mu.Lock() - defer f.mu.Unlock() - - for _, id := range ids { - e := LogEntry{Flag: LogEntrySeriesTombstoneFlag, SeriesID: id} - if err := f.appendEntry(&e); err != nil { - return err - } - f.execEntry(&e) - } - - // Flush buffer and sync to disk. - return f.FlushAndSync() -} - -// DeleteSeriesIDList marks a tombstone for all the series IDs. DeleteSeriesIDList -// should be preferred to repeatedly calling DeleteSeriesID for many series ids. -func (f *LogFile) DeleteSeriesIDList(ids []tsdb.SeriesID) error { - f.mu.Lock() - defer f.mu.Unlock() - - for _, id := range ids { - e := LogEntry{Flag: LogEntrySeriesTombstoneFlag, SeriesID: id} - if err := f.appendEntry(&e); err != nil { - return err - } - f.execEntry(&e) - } - - // Flush buffer and sync to disk. - return f.FlushAndSync() -} - -// SeriesN returns the total number of series in the file. -func (f *LogFile) SeriesN() (n uint64) { - f.mu.RLock() - defer f.mu.RUnlock() - - for _, mm := range f.mms { - n += uint64(mm.cardinality()) - } - return n -} - -// appendEntry adds a log entry to the end of the file. -func (f *LogFile) appendEntry(e *LogEntry) error { - // Marshal entry to the local buffer. - f.buf = appendLogEntry(f.buf[:0], e) - - // Save the size of the record. - e.Size = len(f.buf) - - // Write record to file. - n, err := f.w.Write(f.buf) - if err != nil { - // Move position backwards over partial entry. - // Log should be reopened if seeking cannot be completed. - if n > 0 { - f.w.Reset(f.file) - if _, err := f.file.Seek(int64(-n), io.SeekCurrent); err != nil { - f.Close() - } - } - return err - } - - // Update in-memory file size & modification time. - f.size += int64(n) - f.modTime = time.Now() - - return nil -} - -// execEntry executes a log entry against the in-memory index. -// This is done after appending and on replay of the log. -func (f *LogFile) execEntry(e *LogEntry) { - switch e.Flag { - case LogEntryMeasurementTombstoneFlag: - f.execDeleteMeasurementEntry(e) - case LogEntryTagKeyTombstoneFlag: - f.execDeleteTagKeyEntry(e) - case LogEntryTagValueTombstoneFlag: - f.execDeleteTagValueEntry(e) - default: - f.execSeriesEntry(e) - } -} - -func (f *LogFile) execDeleteMeasurementEntry(e *LogEntry) { - mm := f.createMeasurementIfNotExists(e.Name) - mm.deleted = true - mm.tagSet = make(map[string]logTagKey) - mm.series = make(map[tsdb.SeriesID]struct{}) - mm.seriesSet = nil -} - -func (f *LogFile) execDeleteTagKeyEntry(e *LogEntry) { - mm := f.createMeasurementIfNotExists(e.Name) - ts := mm.createTagSetIfNotExists(e.Key) - - ts.deleted = true - - mm.tagSet[string(e.Key)] = ts -} - -func (f *LogFile) execDeleteTagValueEntry(e *LogEntry) { - mm := f.createMeasurementIfNotExists(e.Name) - ts := mm.createTagSetIfNotExists(e.Key) - tv := ts.createTagValueIfNotExists(e.Value) - - tv.deleted = true - - ts.tagValues[string(e.Value)] = tv - mm.tagSet[string(e.Key)] = ts -} - -func (f *LogFile) execSeriesEntry(e *LogEntry) { - var seriesKey []byte - if e.cached { - sz := seriesfile.SeriesKeySize(e.name, e.tags) - if len(f.keyBuf) < sz { - f.keyBuf = make([]byte, 0, sz) - } - seriesKey = seriesfile.AppendSeriesKey(f.keyBuf[:0], e.name, e.tags) - } else { - seriesKey = f.sfile.SeriesKey(e.SeriesID) - } - - // Series keys can be removed if the series has been deleted from - // the entire database and the server is restarted. This would cause - // the log to replay its insert but the key cannot be found. - // - // https://github.com/influxdata/influxdb/issues/9444 - if seriesKey == nil { - return - } - - // Check if deleted. - deleted := e.Flag == LogEntrySeriesTombstoneFlag - - // Read key size. - _, remainder := seriesfile.ReadSeriesKeyLen(seriesKey) - - // Read measurement name. - name, remainder := seriesfile.ReadSeriesKeyMeasurement(remainder) - mm := f.createMeasurementIfNotExists(name) - mm.deleted = false - if !deleted { - mm.addSeriesID(e.SeriesID) - } else { - mm.removeSeriesID(e.SeriesID) - } - - // Read tag count. - tagN, remainder := seriesfile.ReadSeriesKeyTagN(remainder) - - // Save tags. - var k, v []byte - for i := 0; i < tagN; i++ { - k, v, remainder = seriesfile.ReadSeriesKeyTag(remainder) - ts := mm.createTagSetIfNotExists(k) - tv := ts.createTagValueIfNotExists(v) - - // Add/remove a reference to the series on the tag value. - if !deleted { - tv.addSeriesID(e.SeriesID) - } else { - tv.removeSeriesID(e.SeriesID) - } - - ts.tagValues[string(v)] = tv - mm.tagSet[string(k)] = ts - } - - // Add/remove from appropriate series id sets & stats. - if !deleted { - f.seriesIDSet.Add(e.SeriesID) - f.tombstoneSeriesIDSet.Remove(e.SeriesID) - f.stats.Inc(name) - } else { - f.seriesIDSet.Remove(e.SeriesID) - f.tombstoneSeriesIDSet.Add(e.SeriesID) - f.stats.Dec(name) - } -} - -// SeriesIDIterator returns an iterator over all series in the log file. -func (f *LogFile) SeriesIDIterator() tsdb.SeriesIDIterator { - f.mu.RLock() - defer f.mu.RUnlock() - - ss := tsdb.NewSeriesIDSet() - allSeriesSets := make([]*tsdb.SeriesIDSet, 0, len(f.mms)) - - for _, mm := range f.mms { - if mm.seriesSet != nil { - allSeriesSets = append(allSeriesSets, mm.seriesSet) - continue - } - - // measurement is not using seriesSet to store series IDs. - mm.forEach(func(seriesID tsdb.SeriesID) { - ss.AddNoLock(seriesID) - }) - } - - // Fast merge all seriesSets. - if len(allSeriesSets) > 0 { - ss.Merge(allSeriesSets...) - } - - return tsdb.NewSeriesIDSetIterator(ss) -} - -// createMeasurementIfNotExists returns a measurement by name. -func (f *LogFile) createMeasurementIfNotExists(name []byte) *logMeasurement { - mm := f.mms[string(name)] - if mm == nil { - mm = &logMeasurement{ - name: name, - tagSet: make(map[string]logTagKey), - series: make(map[tsdb.SeriesID]struct{}), - } - f.mms[string(name)] = mm - } - return mm -} - -// MeasurementIterator returns an iterator over all the measurements in the file. -func (f *LogFile) MeasurementIterator() MeasurementIterator { - f.mu.RLock() - defer f.mu.RUnlock() - - var itr logMeasurementIterator - for _, mm := range f.mms { - itr.mms = append(itr.mms, *mm) - } - sort.Sort(logMeasurementSlice(itr.mms)) - return &itr -} - -// MeasurementSeriesIDIterator returns an iterator over all series for a measurement. -func (f *LogFile) MeasurementSeriesIDIterator(name []byte) tsdb.SeriesIDIterator { - f.mu.RLock() - defer f.mu.RUnlock() - - mm := f.mms[string(name)] - if mm == nil || mm.cardinality() == 0 { - return nil - } - return tsdb.NewSeriesIDSetIterator(mm.seriesIDSet()) -} - -// CompactTo compacts the log file and writes it to w. -func (f *LogFile) CompactTo(w io.Writer, m, k uint64, cancel <-chan struct{}) (n int64, err error) { - f.mu.RLock() - defer f.mu.RUnlock() - - // Check for cancellation. - select { - case <-cancel: - return n, ErrCompactionInterrupted - default: - } - - // Wrap in bufferred writer with a buffer equivalent to the LogFile size. - bw := bufio.NewWriterSize(w, indexFileBufferSize) // 128K - - // Setup compaction offset tracking data. - var t IndexFileTrailer - info := newLogFileCompactInfo() - info.cancel = cancel - - // Write magic number. - if err := writeTo(bw, []byte(FileSignature), &n); err != nil { - return n, err - } - - // Retreve measurement names in order. - names := f.measurementNames() - - // Flush buffer & mmap series block. - if err := bw.Flush(); err != nil { - return n, err - } - - // Write tagset blocks in measurement order. - if err := f.writeTagsetsTo(bw, names, info, &n); err != nil { - return n, err - } - - // Write measurement block. - t.MeasurementBlock.Offset = n - if err := f.writeMeasurementBlockTo(bw, names, info, &n); err != nil { - return n, err - } - t.MeasurementBlock.Size = n - t.MeasurementBlock.Offset - - // Write series set. - t.SeriesIDSet.Offset = n - nn, err := f.seriesIDSet.WriteTo(bw) - if n += nn; err != nil { - return n, err - } - t.SeriesIDSet.Size = n - t.SeriesIDSet.Offset - - // Write tombstone series set. - t.TombstoneSeriesIDSet.Offset = n - nn, err = f.tombstoneSeriesIDSet.WriteTo(bw) - if n += nn; err != nil { - return n, err - } - t.TombstoneSeriesIDSet.Size = n - t.TombstoneSeriesIDSet.Offset - - // Write trailer. - nn, err = t.WriteTo(bw) - n += nn - if err != nil { - return n, err - } - - // Flush buffer. - if err := bw.Flush(); err != nil { - return n, err - } - - return n, nil -} - -func (f *LogFile) writeTagsetsTo(w io.Writer, names []string, info *logFileCompactInfo, n *int64) error { - for _, name := range names { - if err := f.writeTagsetTo(w, name, info, n); err != nil { - return err - } - } - return nil -} - -// writeTagsetTo writes a single tagset to w and saves the tagset offset. -func (f *LogFile) writeTagsetTo(w io.Writer, name string, info *logFileCompactInfo, n *int64) error { - mm := f.mms[name] - - // Check for cancellation. - select { - case <-info.cancel: - return ErrCompactionInterrupted - default: - } - - enc := NewTagBlockEncoder(w) - var valueN int - for _, k := range mm.keys() { - tag := mm.tagSet[k] - - // Encode tag. Skip values if tag is deleted. - if err := enc.EncodeKey(tag.name, tag.deleted); err != nil { - return err - } else if tag.deleted { - continue - } - - // Sort tag values. - values := make([]string, 0, len(tag.tagValues)) - for v := range tag.tagValues { - values = append(values, v) - } - sort.Strings(values) - - // Add each value. - for _, v := range values { - value := tag.tagValues[v] - if err := enc.EncodeValue(value.name, value.deleted, value.seriesIDSet()); err != nil { - return err - } - - // Check for cancellation periodically. - if valueN++; valueN%1000 == 0 { - select { - case <-info.cancel: - return ErrCompactionInterrupted - default: - } - } - } - } - - // Save tagset offset to measurement. - offset := *n - - // Flush tag block. - err := enc.Close() - *n += enc.N() - if err != nil { - return err - } - - // Save tagset offset to measurement. - size := *n - offset - - info.mms[name] = &logFileMeasurementCompactInfo{offset: offset, size: size} - - return nil -} - -func (f *LogFile) writeMeasurementBlockTo(w io.Writer, names []string, info *logFileCompactInfo, n *int64) error { - mw := NewMeasurementBlockWriter() - - // Check for cancellation. - select { - case <-info.cancel: - return ErrCompactionInterrupted - default: - } - - // Add measurement data. - for _, name := range names { - mm := f.mms[name] - mmInfo := info.mms[name] - assert(mmInfo != nil, "measurement info not found") - //lint:ignore SA5011 mmInfo is flagged as being possibly nil because of the assertion - mw.Add(mm.name, mm.deleted, mmInfo.offset, mmInfo.size, mm.seriesIDs()) - } - - // Flush data to writer. - nn, err := mw.WriteTo(w) - *n += nn - return err -} - -// logFileCompactInfo is a context object to track compaction position info. -type logFileCompactInfo struct { - cancel <-chan struct{} - mms map[string]*logFileMeasurementCompactInfo -} - -// newLogFileCompactInfo returns a new instance of logFileCompactInfo. -func newLogFileCompactInfo() *logFileCompactInfo { - return &logFileCompactInfo{ - mms: make(map[string]*logFileMeasurementCompactInfo), - } -} - -type logFileMeasurementCompactInfo struct { - offset int64 - size int64 -} - -// MeasurementCardinalityStats returns cardinality stats for this log file. -func (f *LogFile) MeasurementCardinalityStats() MeasurementCardinalityStats { - f.mu.RLock() - defer f.mu.RUnlock() - return f.stats.Clone() -} - -// LogEntry represents a single log entry in the write-ahead log. -type LogEntry struct { - Flag byte // flag - SeriesID tsdb.SeriesID // series id - Name []byte // measurement name - Key []byte // tag key - Value []byte // tag value - Checksum uint32 // checksum of flag/name/tags. - Size int // total size of record, in bytes. - - cached bool // Hint to LogFile that series data is already parsed - name []byte // series naem, this is a cached copy of the parsed measurement name - tags models.Tags // series tags, this is a cached copied of the parsed tags - batchidx int // position of entry in batch. -} - -// UnmarshalBinary unmarshals data into e. -func (e *LogEntry) UnmarshalBinary(data []byte) error { - var sz uint64 - var n int - var seriesID uint64 - var err error - - orig := data - start := len(data) - - // Parse flag data. - if len(data) < 1 { - return io.ErrShortBuffer - } - e.Flag, data = data[0], data[1:] - - // Parse series id. - if seriesID, n, err = uvarint(data); err != nil { - return err - } - e.SeriesID, data = tsdb.NewSeriesID(seriesID), data[n:] - - // Parse name length. - if sz, n, err = uvarint(data); err != nil { - return err - } - - // Read name data. - if len(data) < n+int(sz) { - return io.ErrShortBuffer - } - e.Name, data = data[n:n+int(sz)], data[n+int(sz):] - - // Parse key length. - if sz, n, err = uvarint(data); err != nil { - return err - } - - // Read key data. - if len(data) < n+int(sz) { - return io.ErrShortBuffer - } - e.Key, data = data[n:n+int(sz)], data[n+int(sz):] - - // Parse value length. - if sz, n, err = uvarint(data); err != nil { - return err - } - - // Read value data. - if len(data) < n+int(sz) { - return io.ErrShortBuffer - } - e.Value, data = data[n:n+int(sz)], data[n+int(sz):] - - // Compute checksum. - chk := crc32.ChecksumIEEE(orig[:start-len(data)]) - - // Parse checksum. - if len(data) < 4 { - return io.ErrShortBuffer - } - e.Checksum, data = binary.BigEndian.Uint32(data[:4]), data[4:] - - // Verify checksum. - if chk != e.Checksum { - return ErrLogEntryChecksumMismatch - } - - // Save length of elem. - e.Size = start - len(data) - - return nil -} - -// appendLogEntry appends to dst and returns the new buffer. -// This updates the checksum on the entry. -func appendLogEntry(dst []byte, e *LogEntry) []byte { - var buf [binary.MaxVarintLen64]byte - start := len(dst) - - // Append flag. - dst = append(dst, e.Flag) - - // Append series id. - n := binary.PutUvarint(buf[:], e.SeriesID.RawID()) - dst = append(dst, buf[:n]...) - - // Append name. - n = binary.PutUvarint(buf[:], uint64(len(e.Name))) - dst = append(dst, buf[:n]...) - dst = append(dst, e.Name...) - - // Append key. - n = binary.PutUvarint(buf[:], uint64(len(e.Key))) - dst = append(dst, buf[:n]...) - dst = append(dst, e.Key...) - - // Append value. - n = binary.PutUvarint(buf[:], uint64(len(e.Value))) - dst = append(dst, buf[:n]...) - dst = append(dst, e.Value...) - - // Calculate checksum. - e.Checksum = crc32.ChecksumIEEE(dst[start:]) - - // Append checksum. - binary.BigEndian.PutUint32(buf[:4], e.Checksum) - dst = append(dst, buf[:4]...) - - return dst -} - -// logMeasurements represents a map of measurement names to measurements. -type logMeasurements map[string]*logMeasurement - -// bytes estimates the memory footprint of this logMeasurements, in bytes. -func (mms *logMeasurements) bytes() int { - var b int - for k, v := range *mms { - b += len(k) - b += v.bytes() - } - b += int(unsafe.Sizeof(*mms)) - return b -} - -type logMeasurement struct { - name []byte - tagSet map[string]logTagKey - deleted bool - series map[tsdb.SeriesID]struct{} - seriesSet *tsdb.SeriesIDSet -} - -// bytes estimates the memory footprint of this logMeasurement, in bytes. -func (m *logMeasurement) bytes() int { - var b int - b += len(m.name) - for k, v := range m.tagSet { - b += len(k) - b += v.bytes() - } - b += (int(m.cardinality()) * 8) - b += int(unsafe.Sizeof(*m)) - return b -} - -func (m *logMeasurement) addSeriesID(x tsdb.SeriesID) { - if m.seriesSet != nil { - m.seriesSet.AddNoLock(x) - return - } - - m.series[x] = struct{}{} - - // If the map is getting too big it can be converted into a roaring seriesSet. - if len(m.series) > 25 { - m.seriesSet = tsdb.NewSeriesIDSet() - for id := range m.series { - m.seriesSet.AddNoLock(id) - } - m.series = nil - } -} - -func (m *logMeasurement) removeSeriesID(x tsdb.SeriesID) { - if m.seriesSet != nil { - m.seriesSet.RemoveNoLock(x) - return - } - delete(m.series, x) -} - -func (m *logMeasurement) cardinality() int64 { - if m.seriesSet != nil { - return int64(m.seriesSet.Cardinality()) - } - return int64(len(m.series)) -} - -// forEach applies fn to every series ID in the logMeasurement. -func (m *logMeasurement) forEach(fn func(tsdb.SeriesID)) { - if m.seriesSet != nil { - m.seriesSet.ForEachNoLock(fn) - return - } - - for seriesID := range m.series { - fn(seriesID) - } -} - -// seriesIDs returns a sorted set of seriesIDs. -func (m *logMeasurement) seriesIDs() []tsdb.SeriesID { - a := make([]tsdb.SeriesID, 0, m.cardinality()) - if m.seriesSet != nil { - m.seriesSet.ForEachNoLock(func(id tsdb.SeriesID) { a = append(a, id) }) - return a // IDs are already sorted. - } - - for seriesID := range m.series { - a = append(a, seriesID) - } - sort.Slice(a, func(i, j int) bool { return a[i].Less(a[j]) }) - return a -} - -// seriesIDSet returns a copy of the logMeasurement's seriesSet, or creates a new -// one -func (m *logMeasurement) seriesIDSet() *tsdb.SeriesIDSet { - if m.seriesSet != nil { - return m.seriesSet.CloneNoLock() - } - - ss := tsdb.NewSeriesIDSet() - for seriesID := range m.series { - ss.AddNoLock(seriesID) - } - return ss -} - -func (m *logMeasurement) Name() []byte { return m.name } -func (m *logMeasurement) Deleted() bool { return m.deleted } - -func (m *logMeasurement) createTagSetIfNotExists(key []byte) logTagKey { - ts, ok := m.tagSet[string(key)] - if !ok { - ts = logTagKey{name: key, tagValues: make(map[string]logTagValue)} - } - return ts -} - -// keys returns a sorted list of tag keys. -func (m *logMeasurement) keys() []string { - a := make([]string, 0, len(m.tagSet)) - for k := range m.tagSet { - a = append(a, k) - } - sort.Strings(a) - return a -} - -// logMeasurementSlice is a sortable list of log measurements. -type logMeasurementSlice []logMeasurement - -func (a logMeasurementSlice) Len() int { return len(a) } -func (a logMeasurementSlice) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a logMeasurementSlice) Less(i, j int) bool { return bytes.Compare(a[i].name, a[j].name) == -1 } - -// logMeasurementIterator represents an iterator over a slice of measurements. -type logMeasurementIterator struct { - mms []logMeasurement -} - -// Next returns the next element in the iterator. -func (itr *logMeasurementIterator) Next() (e MeasurementElem) { - if len(itr.mms) == 0 { - return nil - } - e, itr.mms = &itr.mms[0], itr.mms[1:] - return e -} - -type logTagKey struct { - name []byte - deleted bool - tagValues map[string]logTagValue -} - -// bytes estimates the memory footprint of this logTagKey, in bytes. -func (tk *logTagKey) bytes() int { - var b int - b += len(tk.name) - for k, v := range tk.tagValues { - b += len(k) - b += v.bytes() - } - b += int(unsafe.Sizeof(*tk)) - return b -} - -func (tk *logTagKey) Key() []byte { return tk.name } -func (tk *logTagKey) Deleted() bool { return tk.deleted } - -func (tk *logTagKey) TagValueIterator(_ *mincore.Limiter) TagValueIterator { - a := make([]logTagValue, 0, len(tk.tagValues)) - for _, v := range tk.tagValues { - a = append(a, v) - } - return newLogTagValueIterator(a) -} - -func (tk *logTagKey) createTagValueIfNotExists(value []byte) logTagValue { - tv, ok := tk.tagValues[string(value)] - if !ok { - tv = logTagValue{name: value, series: make(map[tsdb.SeriesID]struct{})} - } - return tv -} - -// logTagKey is a sortable list of log tag keys. -type logTagKeySlice []logTagKey - -func (a logTagKeySlice) Len() int { return len(a) } -func (a logTagKeySlice) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a logTagKeySlice) Less(i, j int) bool { return bytes.Compare(a[i].name, a[j].name) == -1 } - -type logTagValue struct { - name []byte - deleted bool - series map[tsdb.SeriesID]struct{} - seriesSet *tsdb.SeriesIDSet -} - -// bytes estimates the memory footprint of this logTagValue, in bytes. -func (tv *logTagValue) bytes() int { - var b int - b += len(tv.name) - b += int(unsafe.Sizeof(*tv)) - b += (int(tv.cardinality()) * 8) - return b -} - -func (tv *logTagValue) addSeriesID(x tsdb.SeriesID) { - if tv.seriesSet != nil { - tv.seriesSet.AddNoLock(x) - return - } - - tv.series[x] = struct{}{} - - // If the map is getting too big it can be converted into a roaring seriesSet. - if len(tv.series) > 25 { - tv.seriesSet = tsdb.NewSeriesIDSet() - for id := range tv.series { - tv.seriesSet.AddNoLock(id) - } - tv.series = nil - } -} - -func (tv *logTagValue) removeSeriesID(x tsdb.SeriesID) { - if tv.seriesSet != nil { - tv.seriesSet.RemoveNoLock(x) - return - } - delete(tv.series, x) -} - -func (tv *logTagValue) cardinality() int64 { - if tv.seriesSet != nil { - return int64(tv.seriesSet.Cardinality()) - } - return int64(len(tv.series)) -} - -// seriesIDSet returns a copy of the logMeasurement's seriesSet, or creates a new -// one -func (tv *logTagValue) seriesIDSet() *tsdb.SeriesIDSet { - if tv.seriesSet != nil { - return tv.seriesSet.CloneNoLock() - } - - ss := tsdb.NewSeriesIDSet() - for seriesID := range tv.series { - ss.AddNoLock(seriesID) - } - return ss -} - -func (tv *logTagValue) Value() []byte { return tv.name } -func (tv *logTagValue) Deleted() bool { return tv.deleted } - -// logTagValue is a sortable list of log tag values. -type logTagValueSlice []logTagValue - -func (a logTagValueSlice) Len() int { return len(a) } -func (a logTagValueSlice) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a logTagValueSlice) Less(i, j int) bool { return bytes.Compare(a[i].name, a[j].name) == -1 } - -// logTagKeyIterator represents an iterator over a slice of tag keys. -type logTagKeyIterator struct { - a []logTagKey -} - -// newLogTagKeyIterator returns a new instance of logTagKeyIterator. -func newLogTagKeyIterator(a []logTagKey) *logTagKeyIterator { - sort.Sort(logTagKeySlice(a)) - return &logTagKeyIterator{a: a} -} - -// Next returns the next element in the iterator. -func (itr *logTagKeyIterator) Next() (e TagKeyElem) { - if len(itr.a) == 0 { - return nil - } - e, itr.a = &itr.a[0], itr.a[1:] - return e -} - -// logTagValueIterator represents an iterator over a slice of tag values. -type logTagValueIterator struct { - a []logTagValue -} - -// newLogTagValueIterator returns a new instance of logTagValueIterator. -func newLogTagValueIterator(a []logTagValue) *logTagValueIterator { - sort.Sort(logTagValueSlice(a)) - return &logTagValueIterator{a: a} -} - -// Next returns the next element in the iterator. -func (itr *logTagValueIterator) Next() (e TagValueElem) { - if len(itr.a) == 0 { - return nil - } - e, itr.a = &itr.a[0], itr.a[1:] - return e -} - -// FormatLogFileName generates a log filename for the given index. -func FormatLogFileName(id int) string { - return fmt.Sprintf("L0-%08d%s", id, LogFileExt) -} diff --git a/tsdb/tsi1/log_file_test.go b/tsdb/tsi1/log_file_test.go deleted file mode 100644 index 20245fec33..0000000000 --- a/tsdb/tsi1/log_file_test.go +++ /dev/null @@ -1,610 +0,0 @@ -package tsi1_test - -import ( - "bytes" - "fmt" - "io/ioutil" - "math/rand" - "os" - "path/filepath" - "regexp" - "runtime/pprof" - "sort" - "testing" - "time" - - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/pkg/bloom" - "github.com/influxdata/influxdb/v2/pkg/fs" - "github.com/influxdata/influxdb/v2/pkg/slices" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" - "github.com/influxdata/influxdb/v2/tsdb/tsi1" -) - -// Ensure log file can append series. -func TestLogFile_AddSeriesList(t *testing.T) { - t.Parallel() - - sfile := MustOpenSeriesFile() - defer sfile.Close() - - f := MustOpenLogFile(sfile.SeriesFile) - defer f.Close() - seriesSet := tsdb.NewSeriesIDSet() - - // Add test data. - collection := &tsdb.SeriesCollection{ - Names: slices.StringsToBytes("cpu", "mem"), - Types: []models.FieldType{models.Integer, models.Integer}, - Tags: []models.Tags{ - {{Key: []byte("region"), Value: []byte("us-east")}}, - {{Key: []byte("host"), Value: []byte("serverA")}}, - }, - } - - if err := sfile.CreateSeriesListIfNotExists(collection); err != nil { - t.Fatal(err) - } - ids, err := f.AddSeriesList(seriesSet, collection) - if err != nil { - t.Fatal(err) - } - - // Returned series ids should match those in the seriesSet. - other := tsdb.NewSeriesIDSet(ids...) - if !other.Equals(seriesSet) { - t.Fatalf("got series ids %s, expected %s", other, seriesSet) - } - - // Add the same series again with a new one. - collection = &tsdb.SeriesCollection{ - Names: slices.StringsToBytes("cpu", "mem"), - Types: []models.FieldType{models.Integer, models.Integer}, - Tags: []models.Tags{ - {{Key: []byte("region"), Value: []byte("us-west")}}, - {{Key: []byte("host"), Value: []byte("serverA")}}, - }, - } - if err := sfile.CreateSeriesListIfNotExists(collection); err != nil { - t.Fatal(err) - } - ids, err = f.AddSeriesList(seriesSet, collection) - if err != nil { - t.Fatal(err) - } - - if got, exp := len(ids), 2; got != exp { - t.Fatalf("got %d series ids, expected %d", got, exp) - } else if ids[0].IsZero() { - t.Error("series id was 0, expected it not to be") - } else if !ids[1].IsZero() { - t.Errorf("got series id %d, expected 0", ids[1].RawID()) - } - - // Add only the same series IDs. - collection = &tsdb.SeriesCollection{ - Names: slices.StringsToBytes("cpu", "mem"), - Types: []models.FieldType{models.Integer, models.Integer}, - Tags: []models.Tags{ - {{Key: []byte("region"), Value: []byte("us-west")}}, - {{Key: []byte("host"), Value: []byte("serverA")}}, - }, - } - if err := sfile.CreateSeriesListIfNotExists(collection); err != nil { - t.Fatal(err) - } - ids, err = f.AddSeriesList(seriesSet, collection) - if err != nil { - t.Fatal(err) - } - - if len(ids) != 0 { - t.Fatalf("got %d ids, expected none", len(ids)) - } - - // Verify data. - itr := f.MeasurementIterator() - if e := itr.Next(); e == nil || string(e.Name()) != "cpu" { - t.Fatalf("unexpected measurement: %#v", e) - } else if e := itr.Next(); e == nil || string(e.Name()) != "mem" { - t.Fatalf("unexpected measurement: %#v", e) - } else if e := itr.Next(); e != nil { - t.Fatalf("expected eof, got: %#v", e) - } - - // Reopen file and re-verify. - if err := f.Reopen(); err != nil { - t.Fatal(err) - } - - // Verify data. - itr = f.MeasurementIterator() - if e := itr.Next(); e == nil || string(e.Name()) != "cpu" { - t.Fatalf("unexpected measurement: %#v", e) - } else if e := itr.Next(); e == nil || string(e.Name()) != "mem" { - t.Fatalf("unexpected measurement: %#v", e) - } else if e := itr.Next(); e != nil { - t.Fatalf("expected eof, got: %#v", e) - } -} - -func TestLogFile_SeriesStoredInOrder(t *testing.T) { - t.Skip("TODO(#14028): flaky test (https://github.com/influxdata/influxdb/issues/14028)") - t.Parallel() - - sfile := MustOpenSeriesFile() - defer sfile.Close() - - f := MustOpenLogFile(sfile.SeriesFile) - defer f.Close() - seriesSet := tsdb.NewSeriesIDSet() - - // Generate and add test data - tvm := make(map[string]struct{}) - rand.Seed(time.Now().Unix()) - for i := 0; i < 100; i++ { - tv := fmt.Sprintf("server-%d", rand.Intn(50)) // Encourage adding duplicate series. - tvm[tv] = struct{}{} - - collection := &tsdb.SeriesCollection{ - Names: [][]byte{[]byte("mem"), []byte("cpu")}, - Types: []models.FieldType{models.Integer, models.Integer}, - Tags: []models.Tags{ - {models.NewTag([]byte("host"), []byte(tv))}, - {models.NewTag([]byte("host"), []byte(tv))}, - }, - } - if err := sfile.CreateSeriesListIfNotExists(collection); err != nil { - t.Fatal(err) - } - if _, err := f.AddSeriesList(seriesSet, collection); err != nil { - t.Fatal(err) - } - } - - // Sort the tag values so we know what order to expect. - tvs := make([]string, 0, len(tvm)) - for tv := range tvm { - tvs = append(tvs, tv) - } - sort.Strings(tvs) - - // Double the series values since we're adding them twice (two measurements) - tvs = append(tvs, tvs...) - - // When we pull the series out via an iterator they should be in order. - itr := f.SeriesIDIterator() - if itr == nil { - t.Fatal("nil iterator") - } - - var prevSeriesID tsdb.SeriesID - for i := 0; i < len(tvs); i++ { - elem, err := itr.Next() - if err != nil { - t.Fatal(err) - } else if elem.SeriesID.IsZero() { - t.Fatal("got nil series") - } else if elem.SeriesID.Less(prevSeriesID) { - t.Fatalf("series out of order: %d !< %d ", elem.SeriesID, prevSeriesID) - } - prevSeriesID = elem.SeriesID - } -} - -// Ensure log file can delete an existing measurement. -func TestLogFile_DeleteMeasurement(t *testing.T) { - t.Parallel() - - sfile := MustOpenSeriesFile() - defer sfile.Close() - - f := MustOpenLogFile(sfile.SeriesFile) - defer f.Close() - seriesSet := tsdb.NewSeriesIDSet() - - // Add test data. - collection := &tsdb.SeriesCollection{ - Names: [][]byte{[]byte("mem"), []byte("cpu"), []byte("cpu")}, - Types: []models.FieldType{models.Integer, models.Integer, models.Integer}, - Tags: []models.Tags{ - {{Key: []byte("host"), Value: []byte("serverA")}}, - {{Key: []byte("region"), Value: []byte("us-east")}}, - {{Key: []byte("region"), Value: []byte("us-west")}}, - }, - } - - if err := sfile.CreateSeriesListIfNotExists(collection); err != nil { - t.Fatal(err) - } - if _, err := f.AddSeriesList(seriesSet, collection); err != nil { - t.Fatal(err) - } - - // Remove measurement. - if err := f.DeleteMeasurement([]byte("cpu")); err != nil { - t.Fatal(err) - } - - // Verify data. - itr := f.MeasurementIterator() - if e := itr.Next(); string(e.Name()) != "cpu" || !e.Deleted() { - t.Fatalf("unexpected measurement: %s/%v", e.Name(), e.Deleted()) - } else if e := itr.Next(); string(e.Name()) != "mem" || e.Deleted() { - t.Fatalf("unexpected measurement: %s/%v", e.Name(), e.Deleted()) - } else if e := itr.Next(); e != nil { - t.Fatalf("expected eof, got: %#v", e) - } -} - -// Ensure log file can recover correctly. -func TestLogFile_Open(t *testing.T) { - t.Parallel() - - t.Run("Truncate", func(t *testing.T) { - sfile := MustOpenSeriesFile() - defer sfile.Close() - seriesSet := tsdb.NewSeriesIDSet() - - f := MustOpenLogFile(sfile.SeriesFile) - defer f.Close() - - // Add test data & close. - collection := &tsdb.SeriesCollection{ - Names: [][]byte{[]byte("cpu"), []byte("mem")}, - Tags: []models.Tags{{{}}, {{}}}, - Types: []models.FieldType{models.Integer, models.Integer}, - } - - if err := sfile.CreateSeriesListIfNotExists(collection); err != nil { - t.Fatal(err) - } - if _, err := f.AddSeriesList(seriesSet, collection); err != nil { - t.Fatal(err) - } else if err := f.LogFile.Close(); err != nil { - t.Fatal(err) - } - - // Truncate data & reopen. - if fi, err := os.Stat(f.LogFile.Path()); err != nil { - t.Fatal(err) - } else if err := os.Truncate(f.LogFile.Path(), fi.Size()-1); err != nil { - t.Fatal(err) - } else if err := f.LogFile.Open(); err != nil { - t.Fatal(err) - } - - // Verify data. - itr := f.SeriesIDIterator() - if elem, err := itr.Next(); err != nil { - t.Fatal(err) - } else if name, tags := sfile.Series(elem.SeriesID); string(name) != `cpu` { - t.Fatalf("unexpected series: %s,%s", name, tags.String()) - } else if elem, err := itr.Next(); err != nil { - t.Fatal(err) - } else if !elem.SeriesID.IsZero() { - t.Fatalf("expected eof, got: %#v", elem) - } - - // Add more data & reopen. - collection = &tsdb.SeriesCollection{ - Names: [][]byte{[]byte("disk")}, - Tags: []models.Tags{{{}}}, - Types: []models.FieldType{models.Integer}, - } - - if err := sfile.CreateSeriesListIfNotExists(collection); err != nil { - t.Fatal(err) - } - if _, err := f.AddSeriesList(seriesSet, collection); err != nil { - t.Fatal(err) - } else if err := f.Reopen(); err != nil { - t.Fatal(err) - } - - // Verify new data. - itr = f.SeriesIDIterator() - if elem, err := itr.Next(); err != nil { - t.Fatal(err) - } else if name, tags := sfile.Series(elem.SeriesID); string(name) != `cpu` { - t.Fatalf("unexpected series: %s,%s", name, tags.String()) - } else if elem, err := itr.Next(); err != nil { - t.Fatal(err) - } else if name, tags := sfile.Series(elem.SeriesID); string(name) != `disk` { - t.Fatalf("unexpected series: %s,%s", name, tags.String()) - } else if elem, err := itr.Next(); err != nil { - t.Fatal(err) - } else if !elem.SeriesID.IsZero() { - t.Fatalf("expected eof, got: %#v", elem) - } - }) - - t.Run("ChecksumMismatch", func(t *testing.T) { - sfile := MustOpenSeriesFile() - defer sfile.Close() - seriesSet := tsdb.NewSeriesIDSet() - - f := MustOpenLogFile(sfile.SeriesFile) - defer f.Close() - - // Add test data & close. - collection := &tsdb.SeriesCollection{ - Names: [][]byte{[]byte("cpu"), []byte("mem")}, - Tags: []models.Tags{{{}}, {{}}}, - Types: []models.FieldType{models.Integer, models.Integer}, - } - if err := sfile.CreateSeriesListIfNotExists(collection); err != nil { - t.Fatal(err) - } - if _, err := f.AddSeriesList(seriesSet, collection); err != nil { - t.Fatal(err) - } else if err := f.LogFile.Close(); err != nil { - t.Fatal(err) - } - - // Corrupt last entry. - buf, err := ioutil.ReadFile(f.LogFile.Path()) - if err != nil { - t.Fatal(err) - } - buf[len(buf)-1] = 0 - - // Overwrite file with corrupt entry and reopen. - if err := ioutil.WriteFile(f.LogFile.Path(), buf, 0666); err != nil { - t.Fatal(err) - } else if err := f.LogFile.Open(); err != nil { - t.Fatal(err) - } - - // Verify data. - itr := f.SeriesIDIterator() - if elem, err := itr.Next(); err != nil { - t.Fatal(err) - } else if name, tags := sfile.Series(elem.SeriesID); string(name) != `cpu` { - t.Fatalf("unexpected series: %s,%s", name, tags.String()) - } else if elem, err := itr.Next(); err != nil { - t.Fatal(err) - } else if !elem.SeriesID.IsZero() { - t.Fatalf("expected eof, got: %#v", elem) - } - }) -} - -// LogFile is a test wrapper for tsi1.LogFile. -type LogFile struct { - *tsi1.LogFile -} - -// NewLogFile returns a new instance of LogFile with a temporary file path. -func NewLogFile(sfile *seriesfile.SeriesFile) *LogFile { - file, err := ioutil.TempFile("", "tsi1-log-file-") - if err != nil { - panic(err) - } - file.Close() - - return &LogFile{LogFile: tsi1.NewLogFile(sfile, file.Name())} -} - -// MustOpenLogFile returns a new, open instance of LogFile. Panic on error. -func MustOpenLogFile(sfile *seriesfile.SeriesFile) *LogFile { - f := NewLogFile(sfile) - if err := f.Open(); err != nil { - panic(err) - } - return f -} - -// Close closes the log file and removes it from disk. -func (f *LogFile) Close() error { - defer os.Remove(f.Path()) - return f.LogFile.Close() -} - -// Reopen closes and reopens the file. -func (f *LogFile) Reopen() error { - if err := f.LogFile.Close(); err != nil { - return err - } - if err := f.LogFile.Open(); err != nil { - return err - } - return nil -} - -// CreateLogFile creates a new temporary log file and adds a list of series. -func CreateLogFile(sfile *seriesfile.SeriesFile, series []Series) (*LogFile, error) { - f := MustOpenLogFile(sfile) - seriesSet := tsdb.NewSeriesIDSet() - for _, serie := range series { - collection := &tsdb.SeriesCollection{ - Names: [][]byte{[]byte(serie.Name)}, - Tags: []models.Tags{serie.Tags}, - Types: []models.FieldType{serie.Type}, - } - if err := sfile.CreateSeriesListIfNotExists(collection); err != nil { - return nil, err - } - if _, err := f.AddSeriesList(seriesSet, collection); err != nil { - return nil, err - } - } - return f, nil -} - -// GenerateLogFile generates a log file from a set of series based on the count arguments. -// Total series returned will equal measurementN * tagN * valueN. -func GenerateLogFile(sfile *seriesfile.SeriesFile, measurementN, tagN, valueN int) (*LogFile, error) { - tagValueN := pow(valueN, tagN) - - f := MustOpenLogFile(sfile) - seriesSet := tsdb.NewSeriesIDSet() - collection := new(tsdb.SeriesCollection) - - for i := 0; i < measurementN; i++ { - name := []byte(fmt.Sprintf("measurement%d", i)) - - // Generate tag sets. - for j := 0; j < tagValueN; j++ { - var tags models.Tags - for k := 0; k < tagN; k++ { - key := []byte(fmt.Sprintf("key%d", k)) - value := []byte(fmt.Sprintf("value%d", (j / pow(valueN, k) % valueN))) - tags = append(tags, models.NewTag(key, value)) - } - collection.Names = append(collection.Names, name) - collection.Tags = append(collection.Tags, tags) - collection.Types = append(collection.Types, models.Integer) - - if collection.Length() >= 10000 { - if err := sfile.CreateSeriesListIfNotExists(collection); err != nil { - return nil, err - } - if _, err := f.AddSeriesList(seriesSet, collection); err != nil { - return nil, err - } - collection.Truncate(0) - } - } - } - - if collection.Length() > 0 { - if err := sfile.CreateSeriesListIfNotExists(collection); err != nil { - return nil, err - } - if _, err := f.AddSeriesList(seriesSet, collection); err != nil { - return nil, err - } - } - - return f, nil -} - -func benchmarkLogFile_AddSeries(b *testing.B, measurementN, seriesKeyN, seriesValueN int) { - sfile := MustOpenSeriesFile() - defer sfile.Close() - - b.StopTimer() - f := MustOpenLogFile(sfile.SeriesFile) - seriesSet := tsdb.NewSeriesIDSet() - - type Datum struct { - Name []byte - Tags models.Tags - Type models.FieldType - } - - // Pre-generate everything. - var ( - data []Datum - series int - ) - - tagValueN := pow(seriesValueN, seriesKeyN) - - for i := 0; i < measurementN; i++ { - name := []byte(fmt.Sprintf("measurement%d", i)) - for j := 0; j < tagValueN; j++ { - var tags models.Tags - for k := 0; k < seriesKeyN; k++ { - key := []byte(fmt.Sprintf("key%d", k)) - value := []byte(fmt.Sprintf("value%d", (j / pow(seriesValueN, k) % seriesValueN))) - tags = append(tags, models.NewTag(key, value)) - } - data = append(data, Datum{Name: name, Tags: tags, Type: models.Integer}) - series += len(tags) - } - } - - b.StartTimer() - b.ResetTimer() - - for i := 0; i < b.N; i++ { - for _, d := range data { - collection := &tsdb.SeriesCollection{ - Names: [][]byte{[]byte(d.Name)}, - Tags: []models.Tags{d.Tags}, - Types: []models.FieldType{d.Type}, - } - if err := sfile.CreateSeriesListIfNotExists(collection); err != nil { - b.Fatal(err) - } - if _, err := f.AddSeriesList(seriesSet, collection); err != nil { - b.Fatal(err) - } - } - } -} - -func BenchmarkLogFile_AddSeries_100_1_1(b *testing.B) { benchmarkLogFile_AddSeries(b, 100, 1, 1) } // 100 series -func BenchmarkLogFile_AddSeries_1000_1_1(b *testing.B) { benchmarkLogFile_AddSeries(b, 1000, 1, 1) } // 1000 series -func BenchmarkLogFile_AddSeries_10000_1_1(b *testing.B) { benchmarkLogFile_AddSeries(b, 10000, 1, 1) } // 10000 series -func BenchmarkLogFile_AddSeries_100_2_10(b *testing.B) { benchmarkLogFile_AddSeries(b, 100, 2, 10) } // ~20K series -func BenchmarkLogFile_AddSeries_100000_1_1(b *testing.B) { benchmarkLogFile_AddSeries(b, 100000, 1, 1) } // ~100K series -func BenchmarkLogFile_AddSeries_100_3_7(b *testing.B) { benchmarkLogFile_AddSeries(b, 100, 3, 7) } // ~100K series -func BenchmarkLogFile_AddSeries_200_3_7(b *testing.B) { benchmarkLogFile_AddSeries(b, 200, 3, 7) } // ~200K series -func BenchmarkLogFile_AddSeries_200_4_7(b *testing.B) { benchmarkLogFile_AddSeries(b, 200, 4, 7) } // ~1.9M series - -func BenchmarkLogFile_WriteTo(b *testing.B) { - for _, seriesN := range []int{1000, 10000, 100000, 1000000} { - name := fmt.Sprintf("series=%d", seriesN) - b.Run(name, func(b *testing.B) { - sfile := MustOpenSeriesFile() - defer sfile.Close() - - f := MustOpenLogFile(sfile.SeriesFile) - defer f.Close() - seriesSet := tsdb.NewSeriesIDSet() - - // Estimate bloom filter size. - m, k := bloom.Estimate(uint64(seriesN), 0.02) - - // Initialize log file with series data. - for i := 0; i < seriesN; i++ { - collection := &tsdb.SeriesCollection{ - Names: [][]byte{[]byte("cpu")}, - Tags: []models.Tags{{ - {Key: []byte("host"), Value: []byte(fmt.Sprintf("server-%d", i))}, - {Key: []byte("location"), Value: []byte("us-west")}, - }}, - Types: []models.FieldType{models.Integer}, - } - if err := sfile.CreateSeriesListIfNotExists(collection); err != nil { - b.Fatal(err) - } - if _, err := f.AddSeriesList(seriesSet, collection); err != nil { - b.Fatal(err) - } - } - b.ResetTimer() - - // Create cpu profile for each subtest. - MustStartCPUProfile(name) - defer pprof.StopCPUProfile() - - // Compact log file. - for i := 0; i < b.N; i++ { - buf := bytes.NewBuffer(make([]byte, 0, 150*seriesN)) - if _, err := f.CompactTo(buf, m, k, nil); err != nil { - b.Fatal(err) - } - b.Logf("sz=%db", buf.Len()) - } - }) - } -} - -// MustStartCPUProfile starts a cpu profile in a temporary path based on name. -func MustStartCPUProfile(name string) { - name = regexp.MustCompile(`\W+`).ReplaceAllString(name, "-") - - // Open file and start pprof. - f, err := fs.CreateFile(filepath.Join("/tmp", fmt.Sprintf("cpu-%s.pprof", name))) - if err != nil { - panic(err) - } - if err := pprof.StartCPUProfile(f); err != nil { - panic(err) - } -} diff --git a/tsdb/tsi1/measurement_block.go b/tsdb/tsi1/measurement_block.go deleted file mode 100644 index 6747d9a6e9..0000000000 --- a/tsdb/tsi1/measurement_block.go +++ /dev/null @@ -1,613 +0,0 @@ -package tsi1 - -import ( - "bytes" - "encoding/binary" - "errors" - "io" - "sort" - "unsafe" - - "github.com/influxdata/influxdb/v2/pkg/mincore" - "github.com/influxdata/influxdb/v2/pkg/rhh" - "github.com/influxdata/influxdb/v2/tsdb" -) - -// MeasurementBlockVersion is the version of the measurement block. -const MeasurementBlockVersion = 1 - -// Measurement flag constants. -const ( - MeasurementTombstoneFlag = 0x01 - MeasurementSeriesIDSetFlag = 0x02 -) - -// Measurement field size constants. -const ( - // 1 byte offset for the block to ensure non-zero offsets. - MeasurementFillSize = 1 - - // Measurement trailer fields - MeasurementTrailerSize = 0 + - 2 + // version - 8 + 8 + // data offset/size - 8 + 8 + // hash index offset/size - // legacy sketch info. we used to have HLL sketches, but they were - // removed. we keep the offset and length bytes in the trailer so - // that we don't have to do a migration, but they are unused. - 8 + 8 + 8 + 8 - - // Measurement key block fields. - MeasurementNSize = 8 - MeasurementOffsetSize = 8 - - SeriesIDSize = 8 -) - -// Measurement errors. -var ( - ErrUnsupportedMeasurementBlockVersion = errors.New("unsupported measurement block version") - ErrMeasurementBlockSizeMismatch = errors.New("measurement block size mismatch") -) - -// MeasurementBlock represents a collection of all measurements in an index. -type MeasurementBlock struct { - data []byte - hashData []byte - - version int // block version -} - -// bytes estimates the memory footprint of this MeasurementBlock, in bytes. -func (blk *MeasurementBlock) bytes() int { - var b int - // Do not count contents of blk.data or blk.hashData because they reference into an external []byte - b += int(unsafe.Sizeof(*blk)) - return b -} - -// Version returns the encoding version parsed from the data. -// Only valid after UnmarshalBinary() has been successfully invoked. -func (blk *MeasurementBlock) Version() int { return blk.version } - -// Elem returns an element for a measurement. -func (blk *MeasurementBlock) Elem(name []byte, limiter *mincore.Limiter) (e MeasurementBlockElem, ok bool) { - _ = wait(limiter, blk.hashData[:MeasurementNSize]) - n := int64(binary.BigEndian.Uint64(blk.hashData[:MeasurementNSize])) - hash := rhh.HashKey(name) - pos := hash % n - - // Track current distance - var d int64 - for { - // Find offset of measurement. - _ = wait(limiter, blk.hashData[MeasurementNSize+(pos*MeasurementOffsetSize):MeasurementNSize+(pos*MeasurementOffsetSize)+8]) - offset := binary.BigEndian.Uint64(blk.hashData[MeasurementNSize+(pos*MeasurementOffsetSize):]) - if offset == 0 { - return MeasurementBlockElem{}, false - } - - // Evaluate name if offset is not empty. - if offset > 0 { - // Parse into element. - var e MeasurementBlockElem - _ = wait(limiter, blk.data[offset:offset+1]) - e.UnmarshalBinary(blk.data[offset:]) - - // Return if name match. - if bytes.Equal(e.name, name) { - return e, true - } - - // Check if we've exceeded the probe distance. - if d > rhh.Dist(rhh.HashKey(e.name), pos, n) { - return MeasurementBlockElem{}, false - } - } - - // Move position forward. - pos = (pos + 1) % n - d++ - - if d > n { - return MeasurementBlockElem{}, false - } - } -} - -// UnmarshalBinary unpacks data into the block. Block is not copied so data -// should be retained and unchanged after being passed into this function. -func (blk *MeasurementBlock) UnmarshalBinary(data []byte) error { - // Read trailer. - t, err := ReadMeasurementBlockTrailer(data) - if err != nil { - return err - } - - // Save data section. - blk.data = data[t.Data.Offset:] - blk.data = blk.data[:t.Data.Size] - - // Save hash index block. - blk.hashData = data[t.HashIndex.Offset:] - blk.hashData = blk.hashData[:t.HashIndex.Size] - - return nil -} - -// Iterator returns an iterator over all measurements. -func (blk *MeasurementBlock) Iterator(limiter *mincore.Limiter) MeasurementIterator { - return &blockMeasurementIterator{ - data: blk.data[MeasurementFillSize:], - limiter: limiter, - } -} - -// SeriesIDIterator returns an iterator for all series ids in a measurement. -func (blk *MeasurementBlock) SeriesIDIterator(name []byte, limiter *mincore.Limiter) tsdb.SeriesIDIterator { - // Find measurement element. - e, ok := blk.Elem(name, limiter) - if !ok { - return &rawSeriesIDIterator{} - } - if e.seriesIDSet != nil { - _ = wait(limiter, e.seriesIDSetData) - return tsdb.NewSeriesIDSetIterator(e.seriesIDSet) - } - return &rawSeriesIDIterator{n: e.series.n, data: e.series.data} -} - -// blockMeasurementIterator iterates over a list measurements in a block. -type blockMeasurementIterator struct { - elem MeasurementBlockElem - data []byte - - limiter *mincore.Limiter -} - -// Next returns the next measurement. Returns nil when iterator is complete. -func (itr *blockMeasurementIterator) Next() MeasurementElem { - // Return nil when we run out of data. - if len(itr.data) == 0 { - return nil - } - - // Unmarshal the element at the current position. - itr.elem.UnmarshalBinary(itr.data) - _ = wait(itr.limiter, itr.data[:itr.elem.size]) - - // Move the data forward past the record. - itr.data = itr.data[itr.elem.size:] - - return &itr.elem -} - -// rawSeriesIterator iterates over a list of raw series data. -type rawSeriesIDIterator struct { - prev uint64 - n uint64 - data []byte -} - -func (itr *rawSeriesIDIterator) Close() error { return nil } - -// Next returns the next decoded series. -func (itr *rawSeriesIDIterator) Next() (tsdb.SeriesIDElem, error) { - if len(itr.data) == 0 { - return tsdb.SeriesIDElem{}, nil - } - - delta, n, err := uvarint(itr.data) - if err != nil { - return tsdb.SeriesIDElem{}, err - } - itr.data = itr.data[n:] - - seriesID := itr.prev + uint64(delta) - itr.prev = seriesID - return tsdb.SeriesIDElem{SeriesID: tsdb.NewSeriesID(seriesID)}, nil -} - -func (itr *rawSeriesIDIterator) SeriesIDSet() *tsdb.SeriesIDSet { - ss := tsdb.NewSeriesIDSet() - for data, prev := itr.data, uint64(0); len(data) > 0; { - delta, n, err := uvarint(data) - if err != nil { - break - } - data = data[n:] - - seriesID := prev + uint64(delta) - prev = seriesID - ss.AddNoLock(tsdb.NewSeriesID(seriesID)) - } - return ss -} - -// MeasurementBlockTrailer represents meta data at the end of a MeasurementBlock. -type MeasurementBlockTrailer struct { - Version int // Encoding version - - // Offset & size of data section. - Data struct { - Offset int64 - Size int64 - } - - // Offset & size of hash map section. - HashIndex struct { - Offset int64 - Size int64 - } -} - -// ReadMeasurementBlockTrailer returns the block trailer from data. -func ReadMeasurementBlockTrailer(data []byte) (MeasurementBlockTrailer, error) { - var t MeasurementBlockTrailer - - // Read version (which is located in the last two bytes of the trailer). - t.Version = int(binary.BigEndian.Uint16(data[len(data)-2:])) - if t.Version != MeasurementBlockVersion { - return t, ErrUnsupportedIndexFileVersion - } - - // Slice trailer data. - buf := data[len(data)-MeasurementTrailerSize:] - - // Read data section info. - t.Data.Offset, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:] - t.Data.Size, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:] - - // Read measurement block info. - t.HashIndex.Offset, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:] - t.HashIndex.Size, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:] - - // We would advance past old sketch info, but that's unused now. - _ = buf - // buf = buf[4*8:] - - return t, nil -} - -// WriteTo writes the trailer to w. -func (t *MeasurementBlockTrailer) WriteTo(w io.Writer) (n int64, err error) { - // Write data section info. - if err := writeUint64To(w, uint64(t.Data.Offset), &n); err != nil { - return n, err - } else if err := writeUint64To(w, uint64(t.Data.Size), &n); err != nil { - return n, err - } - - // Write hash index section info. - if err := writeUint64To(w, uint64(t.HashIndex.Offset), &n); err != nil { - return n, err - } else if err := writeUint64To(w, uint64(t.HashIndex.Size), &n); err != nil { - return n, err - } - - // Write legacy sketch info. - for i := 0; i < 4; i++ { - if err := writeUint64To(w, 0, &n); err != nil { - return n, err - } - } - - // Write measurement block version. - if err := writeUint16To(w, MeasurementBlockVersion, &n); err != nil { - return n, err - } - - return n, nil -} - -// MeasurementBlockElem represents an internal measurement element. -type MeasurementBlockElem struct { - flag byte // flag - name []byte // measurement name - - tagBlock struct { - offset int64 - size int64 - } - - series struct { - n uint64 // series count - data []byte // serialized series data - } - - seriesIDSet *tsdb.SeriesIDSet - seriesIDSetData []byte - - // size in bytes, set after unmarshaling. - size int -} - -// Name returns the measurement name. -func (e *MeasurementBlockElem) Name() []byte { return e.name } - -// Deleted returns true if the tombstone flag is set. -func (e *MeasurementBlockElem) Deleted() bool { - return (e.flag & MeasurementTombstoneFlag) != 0 -} - -// TagBlockOffset returns the offset of the measurement's tag block. -func (e *MeasurementBlockElem) TagBlockOffset() int64 { return e.tagBlock.offset } - -// TagBlockSize returns the size of the measurement's tag block. -func (e *MeasurementBlockElem) TagBlockSize() int64 { return e.tagBlock.size } - -// SeriesData returns the raw series data. -func (e *MeasurementBlockElem) SeriesData() []byte { return e.series.data } - -// SeriesN returns the number of series associated with the measurement. -func (e *MeasurementBlockElem) SeriesN() uint64 { return e.series.n } - -// SeriesID returns series ID at an index. -func (e *MeasurementBlockElem) SeriesID(i int) uint64 { - return binary.BigEndian.Uint64(e.series.data[i*SeriesIDSize:]) -} - -func (e *MeasurementBlockElem) HasSeries() bool { return e.series.n > 0 } - -// SeriesIDs returns a list of decoded series ids. -// -// NOTE: This should be used for testing and diagnostics purposes only. -// It requires loading the entire list of series in-memory. -func (e *MeasurementBlockElem) SeriesIDs() []tsdb.SeriesID { - a := make([]tsdb.SeriesID, 0, e.series.n) - e.ForEachSeriesID(func(id tsdb.SeriesID) error { - a = append(a, id) - return nil - }) - return a -} - -func (e *MeasurementBlockElem) ForEachSeriesID(fn func(tsdb.SeriesID) error) error { - // Read from roaring, if available. - if e.seriesIDSet != nil { - itr := e.seriesIDSet.Iterator() - for itr.HasNext() { - if err := fn(tsdb.NewSeriesID(uint64(itr.Next()))); err != nil { - return err - } - } - } - - // Read from uvarint encoded data, if available. - var prev uint64 - for data := e.series.data; len(data) > 0; { - delta, n, err := uvarint(data) - if err != nil { - return err - } - data = data[n:] - - seriesID := prev + uint64(delta) - if err = fn(tsdb.NewSeriesID(seriesID)); err != nil { - return err - } - prev = seriesID - } - return nil -} - -// Size returns the size of the element. -func (e *MeasurementBlockElem) Size() int { return e.size } - -// UnmarshalBinary unmarshals data into e. -func (e *MeasurementBlockElem) UnmarshalBinary(data []byte) error { - start := len(data) - - // Parse flag data. - e.flag, data = data[0], data[1:] - - // Parse tag block offset. - e.tagBlock.offset, data = int64(binary.BigEndian.Uint64(data)), data[8:] - e.tagBlock.size, data = int64(binary.BigEndian.Uint64(data)), data[8:] - - // Parse name. - sz, n, err := uvarint(data) - if err != nil { - return err - } - e.name, data = data[n:n+int(sz)], data[n+int(sz):] - - // Parse series count. - v, n, err := uvarint(data) - if err != nil { - return err - } - e.series.n, data = uint64(v), data[n:] - - // Parse series data size. - sz, n, err = uvarint(data) - if err != nil { - return err - } - data = data[n:] - - // Parse series data (original uvarint encoded or roaring bitmap). - if e.flag&MeasurementSeriesIDSetFlag == 0 { - e.series.data, data = data[:sz], data[sz:] - } else { - // data = memalign(data) - e.seriesIDSet = tsdb.NewSeriesIDSet() - e.seriesIDSetData = data[:sz] - if err = e.seriesIDSet.UnmarshalBinaryUnsafe(data[:sz]); err != nil { - return err - } - data = data[sz:] - } - - // Save length of elem. - e.size = start - len(data) - - return nil -} - -// MeasurementBlockWriter writes a measurement block. -type MeasurementBlockWriter struct { - buf bytes.Buffer - mms map[string]measurement -} - -// NewMeasurementBlockWriter returns a new MeasurementBlockWriter. -func NewMeasurementBlockWriter() *MeasurementBlockWriter { - return &MeasurementBlockWriter{ - mms: make(map[string]measurement), - } -} - -// Add adds a measurement with series and tag set offset/size. -func (mw *MeasurementBlockWriter) Add(name []byte, deleted bool, offset, size int64, seriesIDs []tsdb.SeriesID) { - mm := mw.mms[string(name)] - mm.deleted = deleted - mm.tagBlock.offset = offset - mm.tagBlock.size = size - - if mm.seriesIDSet == nil { - mm.seriesIDSet = tsdb.NewSeriesIDSet() - } - for _, seriesID := range seriesIDs { - mm.seriesIDSet.AddNoLock(seriesID) - } - - mw.mms[string(name)] = mm -} - -// WriteTo encodes the measurements to w. -func (mw *MeasurementBlockWriter) WriteTo(w io.Writer) (n int64, err error) { - var t MeasurementBlockTrailer - - // Sort names. - names := make([]string, 0, len(mw.mms)) - for name := range mw.mms { - names = append(names, name) - } - sort.Strings(names) - - // Begin data section. - t.Data.Offset = n - - // Write padding byte so no offsets are zero. - if err := writeUint8To(w, 0, &n); err != nil { - return n, err - } - - // Encode key list. - for _, name := range names { - // Retrieve measurement and save offset. - mm := mw.mms[name] - mm.offset = n - mw.mms[name] = mm - - // Write measurement - if err := mw.writeMeasurementTo(w, []byte(name), &mm, &n); err != nil { - return n, err - } - } - t.Data.Size = n - t.Data.Offset - - // Build key hash map - m := rhh.NewHashMap(rhh.Options{ - Capacity: int64(len(names)), - LoadFactor: LoadFactor, - }) - for name := range mw.mms { - mm := mw.mms[name] - m.Put([]byte(name), &mm) - } - - t.HashIndex.Offset = n - - // Encode hash map length. - if err := writeUint64To(w, uint64(m.Cap()), &n); err != nil { - return n, err - } - - // Encode hash map offset entries. - for i := int64(0); i < m.Cap(); i++ { - _, v := m.Elem(i) - - var offset int64 - if mm, ok := v.(*measurement); ok { - offset = mm.offset - } - - if err := writeUint64To(w, uint64(offset), &n); err != nil { - return n, err - } - } - t.HashIndex.Size = n - t.HashIndex.Offset - - // Write trailer. - nn, err := t.WriteTo(w) - n += nn - return n, err -} - -// writeMeasurementTo encodes a single measurement entry into w. -func (mw *MeasurementBlockWriter) writeMeasurementTo(w io.Writer, name []byte, mm *measurement, n *int64) error { - // Write flag & tag block offset. - if err := writeUint8To(w, mm.flag(), n); err != nil { - return err - } - if err := writeUint64To(w, uint64(mm.tagBlock.offset), n); err != nil { - return err - } else if err := writeUint64To(w, uint64(mm.tagBlock.size), n); err != nil { - return err - } - - // Write measurement name. - if err := writeUvarintTo(w, uint64(len(name)), n); err != nil { - return err - } - if err := writeTo(w, name, n); err != nil { - return err - } - - // Write series data to buffer. - mw.buf.Reset() - if _, err := mm.seriesIDSet.WriteTo(&mw.buf); err != nil { - return err - } - - // Write series count. - if err := writeUvarintTo(w, mm.seriesIDSet.Cardinality(), n); err != nil { - return err - } - - // Write data size & buffer. - if err := writeUvarintTo(w, uint64(mw.buf.Len()), n); err != nil { - return err - } - - // Word align bitmap data. - // if offset := (*n) % 8; offset != 0 { - // if err := writeTo(w, make([]byte, 8-offset), n); err != nil { - // return err - // } - // } - - nn, err := mw.buf.WriteTo(w) - *n += nn - return err -} - -type measurement struct { - deleted bool - tagBlock struct { - offset int64 - size int64 - } - seriesIDSet *tsdb.SeriesIDSet - offset int64 -} - -func (mm measurement) flag() byte { - flag := byte(MeasurementSeriesIDSetFlag) - if mm.deleted { - flag |= MeasurementTombstoneFlag - } - return flag -} diff --git a/tsdb/tsi1/measurement_block_test.go b/tsdb/tsi1/measurement_block_test.go deleted file mode 100644 index b3b694a08a..0000000000 --- a/tsdb/tsi1/measurement_block_test.go +++ /dev/null @@ -1,176 +0,0 @@ -package tsi1_test - -import ( - "bytes" - "encoding/binary" - "fmt" - "reflect" - "testing" - - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/tsi1" -) - -func TestReadMeasurementBlockTrailer(t *testing.T) { - // Build a trailer - var ( - data = make([]byte, tsi1.MeasurementTrailerSize) - blockversion = uint16(1) - blockOffset, blockSize = uint64(1), uint64(2500) - hashIdxOffset, hashIdxSize = uint64(2501), uint64(1000) - ) - - binary.BigEndian.PutUint64(data[0:], blockOffset) - binary.BigEndian.PutUint64(data[8:], blockSize) - binary.BigEndian.PutUint64(data[16:], hashIdxOffset) - binary.BigEndian.PutUint64(data[24:], hashIdxSize) - binary.BigEndian.PutUint64(data[32:], 0) - binary.BigEndian.PutUint64(data[40:], 0) - binary.BigEndian.PutUint64(data[48:], 0) - binary.BigEndian.PutUint64(data[56:], 0) - binary.BigEndian.PutUint16(data[64:], blockversion) - - trailer, err := tsi1.ReadMeasurementBlockTrailer(data) - if err != nil { - t.Logf("trailer is: %#v\n", trailer) - t.Fatal(err) - } - - ok := true && - trailer.Version == int(blockversion) && - trailer.Data.Offset == int64(blockOffset) && - trailer.Data.Size == int64(blockSize) && - trailer.HashIndex.Offset == int64(hashIdxOffset) && - trailer.HashIndex.Size == int64(hashIdxSize) - - if !ok { - t.Fatalf("got %v\nwhich does not match expected", trailer) - } -} - -func TestMeasurementBlockTrailer_WriteTo(t *testing.T) { - var trailer = tsi1.MeasurementBlockTrailer{ - Version: 1, - Data: struct { - Offset int64 - Size int64 - }{Offset: 1, Size: 2}, - HashIndex: struct { - Offset int64 - Size int64 - }{Offset: 3, Size: 4}, - } - - var buf bytes.Buffer - n, err := trailer.WriteTo(&buf) - if got, exp := n, int64(tsi1.MeasurementTrailerSize); got != exp { - t.Fatalf("got %v, exp %v", got, exp) - } - - if got := err; got != nil { - t.Fatalf("got %v, exp %v", got, nil) - } - - // Verify trailer written correctly. - exp := "" + - "0000000000000001" + // data offset - "0000000000000002" + // data size - "0000000000000003" + // hash index offset - "0000000000000004" + // hash index size - "0000000000000000" + // legacy sketch offset - "0000000000000000" + // legacy sketch size - "0000000000000000" + // legacy tsketch offset - "0000000000000000" + // legacy tsketch size - "0001" // version - - if got, exp := fmt.Sprintf("%x", buf.String()), exp; got != exp { - t.Fatalf("got %v, exp %v", got, exp) - } -} - -// Ensure measurement blocks can be written and opened. -func TestMeasurementBlockWriter(t *testing.T) { - ms := Measurements{ - NewMeasurement([]byte("foo"), false, 100, 10, toSeriesIDs([]uint64{1, 3, 4})), - NewMeasurement([]byte("bar"), false, 200, 20, toSeriesIDs([]uint64{2})), - NewMeasurement([]byte("baz"), false, 300, 30, toSeriesIDs([]uint64{5, 6})), - } - - // Write the measurements to writer. - mw := tsi1.NewMeasurementBlockWriter() - for _, m := range ms { - mw.Add(m.Name, m.Deleted, m.Offset, m.Size, m.ids) - } - - // Encode into buffer. - var buf bytes.Buffer - if n, err := mw.WriteTo(&buf); err != nil { - t.Fatal(err) - } else if n == 0 { - t.Fatal("expected bytes written") - } - - // Unmarshal into a block. - var blk tsi1.MeasurementBlock - if err := blk.UnmarshalBinary(buf.Bytes()); err != nil { - t.Fatal(err) - } - - // Verify data in block. - if e, ok := blk.Elem([]byte("foo"), nil); !ok { - t.Fatal("expected element") - } else if e.TagBlockOffset() != 100 || e.TagBlockSize() != 10 { - t.Fatalf("unexpected offset/size: %v/%v", e.TagBlockOffset(), e.TagBlockSize()) - } else if !reflect.DeepEqual(e.SeriesIDs(), toSeriesIDs([]uint64{1, 3, 4})) { - t.Fatalf("unexpected series data: %#v", e.SeriesIDs()) - } - - if e, ok := blk.Elem([]byte("bar"), nil); !ok { - t.Fatal("expected element") - } else if e.TagBlockOffset() != 200 || e.TagBlockSize() != 20 { - t.Fatalf("unexpected offset/size: %v/%v", e.TagBlockOffset(), e.TagBlockSize()) - } else if !reflect.DeepEqual(e.SeriesIDs(), toSeriesIDs([]uint64{2})) { - t.Fatalf("unexpected series data: %#v", e.SeriesIDs()) - } - - if e, ok := blk.Elem([]byte("baz"), nil); !ok { - t.Fatal("expected element") - } else if e.TagBlockOffset() != 300 || e.TagBlockSize() != 30 { - t.Fatalf("unexpected offset/size: %v/%v", e.TagBlockOffset(), e.TagBlockSize()) - } else if !reflect.DeepEqual(e.SeriesIDs(), toSeriesIDs([]uint64{5, 6})) { - t.Fatalf("unexpected series data: %#v", e.SeriesIDs()) - } - - // Verify non-existent measurement doesn't exist. - if _, ok := blk.Elem([]byte("BAD_MEASUREMENT"), nil); ok { - t.Fatal("expected no element") - } -} - -type Measurements []Measurement - -type Measurement struct { - Name []byte - Deleted bool - Offset int64 - Size int64 - ids []tsdb.SeriesID -} - -func NewMeasurement(name []byte, deleted bool, offset, size int64, ids []tsdb.SeriesID) Measurement { - return Measurement{ - Name: name, - Deleted: deleted, - Offset: offset, - Size: size, - ids: ids, - } -} - -func toSeriesIDs(ids []uint64) []tsdb.SeriesID { - sids := make([]tsdb.SeriesID, 0, len(ids)) - for _, id := range ids { - sids = append(sids, tsdb.NewSeriesID(id)) - } - return sids -} diff --git a/tsdb/tsi1/metrics.go b/tsdb/tsi1/metrics.go deleted file mode 100644 index 84ac5aab56..0000000000 --- a/tsdb/tsi1/metrics.go +++ /dev/null @@ -1,228 +0,0 @@ -package tsi1 - -import ( - "sort" - "sync" - - "github.com/prometheus/client_golang/prometheus" -) - -// The following package variables act as singletons, to be shared by all -// storage.Engine instantiations. This allows multiple TSI indexes to be -// monitored within the same process. -var ( - cms *cacheMetrics // TSI index cache metrics - pms *partitionMetrics // TSI partition metrics - mmu sync.RWMutex -) - -// PrometheusCollectors returns all prometheus metrics for the tsm1 package. -func PrometheusCollectors() []prometheus.Collector { - mmu.RLock() - defer mmu.RUnlock() - - var collectors []prometheus.Collector - if cms != nil { - collectors = append(collectors, cms.PrometheusCollectors()...) - } - if pms != nil { - collectors = append(collectors, pms.PrometheusCollectors()...) - } - return collectors -} - -// namespace is the leading part of all published metrics for the Storage service. -const namespace = "storage" - -const cacheSubsystem = "tsi_cache" // sub-system associated with TSI index cache. -const partitionSubsystem = "tsi_index" // sub-system associated with the TSI index. - -type cacheMetrics struct { - Size *prometheus.GaugeVec // Size of the cache. - - // These metrics have an extra label status = {"hit", "miss"} - Gets *prometheus.CounterVec // Number of times item retrieved. - Puts *prometheus.CounterVec // Number of times item inserted. - Deletes *prometheus.CounterVec // Number of times item deleted. - Evictions *prometheus.CounterVec // Number of times item deleted. -} - -// newCacheMetrics initialises the prometheus metrics for tracking the Series File. -func newCacheMetrics(labels prometheus.Labels) *cacheMetrics { - var names []string - for k := range labels { - names = append(names, k) - } - sort.Strings(names) - - statusNames := append(append([]string(nil), names...), "status") - sort.Strings(statusNames) - - return &cacheMetrics{ - Size: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: cacheSubsystem, - Name: "size", - Help: "Number of items residing in the cache.", - }, names), - Gets: prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: cacheSubsystem, - Name: "get_total", - Help: "Total number of gets on cache.", - }, statusNames), - Puts: prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: cacheSubsystem, - Name: "put_total", - Help: "Total number of insertions in cache.", - }, statusNames), - Deletes: prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: cacheSubsystem, - Name: "deletes_total", - Help: "Total number of deletions in cache.", - }, statusNames), - Evictions: prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: cacheSubsystem, - Name: "evictions_total", - Help: "Total number of cache evictions.", - }, names), - } -} - -// PrometheusCollectors satisfies the prom.PrometheusCollector interface. -func (m *cacheMetrics) PrometheusCollectors() []prometheus.Collector { - return []prometheus.Collector{ - m.Size, - m.Gets, - m.Puts, - m.Deletes, - m.Evictions, - } -} - -type partitionMetrics struct { - SeriesCreated *prometheus.CounterVec // Number of series created in Series File. - SeriesCreatedDuration *prometheus.HistogramVec // Distribution of time to insert series. - SeriesDropped *prometheus.CounterVec // Number of series removed from index. - Series *prometheus.GaugeVec // Number of series. - Measurements *prometheus.GaugeVec // Number of measurements. - DiskSize *prometheus.GaugeVec // Size occupied on disk. - - // This metrics has a "type" = {index, log} - FilesTotal *prometheus.GaugeVec // files on disk. - - // This metric has a "level" metric. - CompactionsActive *prometheus.GaugeVec // Number of active compactions. - - // These metrics have a "level" metric. - // The following metrics include a "status" = {ok, error}` label - CompactionDuration *prometheus.HistogramVec // Duration of compactions. - Compactions *prometheus.CounterVec // Total number of compactions. -} - -// newPartitionMetrics initialises the prometheus metrics for tracking the TSI partitions. -func newPartitionMetrics(labels prometheus.Labels) *partitionMetrics { - names := []string{"index_partition"} // All metrics have a partition - for k := range labels { - names = append(names, k) - } - sort.Strings(names) - - // type = {"index", "log"} - fileNames := append(append([]string(nil), names...), "type") - sort.Strings(fileNames) - - // level = [0, 7] - compactionNames := append(append([]string(nil), names...), "level") - sort.Strings(compactionNames) - - // status = {"ok", "error"} - attemptedCompactionNames := append(append([]string(nil), compactionNames...), "status") - sort.Strings(attemptedCompactionNames) - - return &partitionMetrics{ - SeriesCreated: prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: partitionSubsystem, - Name: "series_created", - Help: "Number of series created in the partition.", - }, names), - SeriesCreatedDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: namespace, - Subsystem: partitionSubsystem, - Name: "series_created_duration_ns", - Help: "Time taken in nanosecond to create single series.", - // 30 buckets spaced exponentially between 100ns and ~19 us. - Buckets: prometheus.ExponentialBuckets(100.0, 1.2, 30), - }, names), - SeriesDropped: prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: partitionSubsystem, - Name: "series_dropped", - Help: "Number of series dropped from the partition.", - }, names), - Series: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: partitionSubsystem, - Name: "series_total", - Help: "Number of series in the partition.", - }, names), - Measurements: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: partitionSubsystem, - Name: "measurements_total", - Help: "Number of series in the partition.", - }, names), - FilesTotal: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: partitionSubsystem, - Name: "files_total", - Help: "Number of files in the partition.", - }, fileNames), - DiskSize: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: partitionSubsystem, - Name: "disk_bytes", - Help: "Number of bytes TSI partition is using on disk.", - }, names), - CompactionsActive: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: partitionSubsystem, - Name: "compactions_active", - Help: "Number of active partition compactions.", - }, compactionNames), - CompactionDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: namespace, - Subsystem: partitionSubsystem, - Name: "compactions_duration_seconds", - Help: "Time taken for a successful compaction of partition.", - // 30 buckets spaced exponentially between 1s and ~10 minutes. - Buckets: prometheus.ExponentialBuckets(1.0, 1.25, 30), - }, compactionNames), - Compactions: prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: partitionSubsystem, - Name: "compactions_total", - Help: "Number of compactions.", - }, attemptedCompactionNames), - } -} - -// PrometheusCollectors satisfies the prom.PrometheusCollector interface. -func (m *partitionMetrics) PrometheusCollectors() []prometheus.Collector { - return []prometheus.Collector{ - m.SeriesCreated, - m.SeriesCreatedDuration, - m.SeriesDropped, - m.Series, - m.Measurements, - m.FilesTotal, - m.DiskSize, - m.CompactionsActive, - m.CompactionDuration, - m.Compactions, - } -} diff --git a/tsdb/tsi1/metrics_test.go b/tsdb/tsi1/metrics_test.go deleted file mode 100644 index 3a96c81c68..0000000000 --- a/tsdb/tsi1/metrics_test.go +++ /dev/null @@ -1,232 +0,0 @@ -package tsi1 - -import ( - "testing" - - "github.com/influxdata/influxdb/v2/kit/prom/promtest" - "github.com/prometheus/client_golang/prometheus" - dto "github.com/prometheus/client_model/go" -) - -func TestMetrics_Cache(t *testing.T) { - // metrics to be shared by multiple file stores. - metrics := newCacheMetrics(prometheus.Labels{"engine_id": "", "node_id": ""}) - - t1 := newCacheTracker(metrics, prometheus.Labels{"engine_id": "0", "node_id": "0"}) - t2 := newCacheTracker(metrics, prometheus.Labels{"engine_id": "1", "node_id": "0"}) - - reg := prometheus.NewRegistry() - reg.MustRegister(metrics.PrometheusCollectors()...) - - base := namespace + "_" + cacheSubsystem + "_" - - // All the metric names - gauges := []string{base + "size"} - - counters := []string{ - base + "get_total", - base + "put_total", - base + "deletes_total", - base + "evictions_total", - } - - // Generate some measurements. - for i, tracker := range []*cacheTracker{t1, t2} { - tracker.SetSize(uint64(i + len(gauges[0]))) - - labels := tracker.Labels() - labels["status"] = "hit" - tracker.metrics.Gets.With(labels).Add(float64(i + len(counters[0]))) - tracker.metrics.Puts.With(labels).Add(float64(i + len(counters[1]))) - tracker.metrics.Deletes.With(labels).Add(float64(i + len(counters[2]))) - - tracker.metrics.Evictions.With(tracker.Labels()).Add(float64(i + len(counters[3]))) - } - - // Test that all the correct metrics are present. - mfs, err := reg.Gather() - if err != nil { - t.Fatal(err) - } - - // The label variants for the two caches. - labelVariants := []prometheus.Labels{ - prometheus.Labels{"engine_id": "0", "node_id": "0"}, - prometheus.Labels{"engine_id": "1", "node_id": "0"}, - } - - for i, labels := range labelVariants { - for _, name := range gauges { - exp := float64(i + len(name)) - metric := promtest.MustFindMetric(t, mfs, name, labels) - if got := metric.GetGauge().GetValue(); got != exp { - t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) - } - } - - var metric *dto.Metric - for _, name := range counters { - exp := float64(i + len(name)) - - if name != counters[3] { - // Make a copy since we need to add a label - l := make(prometheus.Labels, len(labels)) - for k, v := range labels { - l[k] = v - } - l["status"] = "hit" - - metric = promtest.MustFindMetric(t, mfs, name, l) - } else { - metric = promtest.MustFindMetric(t, mfs, name, labels) - } - - if got := metric.GetCounter().GetValue(); got != exp { - t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) - } - } - } -} - -func TestMetrics_Partition(t *testing.T) { - // metrics to be shared by multiple file stores. - metrics := newPartitionMetrics(prometheus.Labels{"engine_id": "", "node_id": ""}) - - t1 := newPartitionTracker(metrics, prometheus.Labels{"engine_id": "0", "index_partition": "0", "node_id": "0"}) - t2 := newPartitionTracker(metrics, prometheus.Labels{"engine_id": "1", "index_partition": "0", "node_id": "0"}) - - reg := prometheus.NewRegistry() - reg.MustRegister(metrics.PrometheusCollectors()...) - - base := namespace + "_" + partitionSubsystem + "_" - - // All the metric names - gauges := []string{ - base + "series_total", - base + "measurements_total", - base + "files_total", - base + "disk_bytes", - base + "compactions_active", - } - - counters := []string{ - base + "series_created", - base + "series_dropped", - base + "compactions_total", - } - - histograms := []string{ - base + "series_created_duration_ns", - base + "compactions_duration_seconds", - } - - // Generate some measurements. - for i, tracker := range []*partitionTracker{t1, t2} { - tracker.SetSeries(uint64(i + len(gauges[0]))) - tracker.SetMeasurements(uint64(i + len(gauges[1]))) - labels := tracker.Labels() - labels["type"] = "index" - tracker.metrics.FilesTotal.With(labels).Add(float64(i + len(gauges[2]))) - tracker.SetDiskSize(uint64(i + len(gauges[3]))) - labels = tracker.Labels() - labels["level"] = "2" - tracker.metrics.CompactionsActive.With(labels).Add(float64(i + len(gauges[4]))) - - tracker.metrics.SeriesCreated.With(tracker.Labels()).Add(float64(i + len(counters[0]))) - tracker.AddSeriesDropped(uint64(i + len(counters[1]))) - labels = tracker.Labels() - labels["level"] = "2" - labels["status"] = "ok" - tracker.metrics.Compactions.With(labels).Add(float64(i + len(counters[2]))) - - tracker.metrics.SeriesCreatedDuration.With(tracker.Labels()).Observe(float64(i + len(histograms[0]))) - labels = tracker.Labels() - labels["level"] = "2" - tracker.metrics.CompactionDuration.With(labels).Observe(float64(i + len(histograms[1]))) - } - - // Test that all the correct metrics are present. - mfs, err := reg.Gather() - if err != nil { - t.Fatal(err) - } - - // The label variants for the two caches. - labelVariants := []prometheus.Labels{ - prometheus.Labels{"engine_id": "0", "index_partition": "0", "node_id": "0"}, - prometheus.Labels{"engine_id": "1", "index_partition": "0", "node_id": "0"}, - } - - for j, labels := range labelVariants { - var metric *dto.Metric - - for i, name := range gauges { - exp := float64(j + len(name)) - - if i == 2 { - l := make(prometheus.Labels, len(labels)) - for k, v := range labels { - l[k] = v - } - l["type"] = "index" - metric = promtest.MustFindMetric(t, mfs, name, l) - } else if i == 4 { - l := make(prometheus.Labels, len(labels)) - for k, v := range labels { - l[k] = v - } - l["level"] = "2" - metric = promtest.MustFindMetric(t, mfs, name, l) - } else { - metric = promtest.MustFindMetric(t, mfs, name, labels) - } - - if got := metric.GetGauge().GetValue(); got != exp { - t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) - } - } - - for i, name := range counters { - exp := float64(j + len(name)) - - if i == 2 { - // Make a copy since we need to add a label - l := make(prometheus.Labels, len(labels)) - for k, v := range labels { - l[k] = v - } - l["status"] = "ok" - l["level"] = "2" - - metric = promtest.MustFindMetric(t, mfs, name, l) - } else { - metric = promtest.MustFindMetric(t, mfs, name, labels) - } - - if got := metric.GetCounter().GetValue(); got != exp { - t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) - } - } - - for i, name := range histograms { - exp := float64(j + len(name)) - - if i == 1 { - // Make a copy since we need to add a label - l := make(prometheus.Labels, len(labels)) - for k, v := range labels { - l[k] = v - } - l["level"] = "2" - - metric = promtest.MustFindMetric(t, mfs, name, l) - } else { - metric = promtest.MustFindMetric(t, mfs, name, labels) - } - - if got := metric.GetHistogram().GetSampleSum(); got != exp { - t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) - } - } - } -} diff --git a/tsdb/tsi1/partition.go b/tsdb/tsi1/partition.go deleted file mode 100644 index 74dd4cb7fc..0000000000 --- a/tsdb/tsi1/partition.go +++ /dev/null @@ -1,1727 +0,0 @@ -package tsi1 - -import ( - "context" - "encoding/json" - "errors" - "fmt" - "io/ioutil" - "os" - "path/filepath" - "regexp" - "strconv" - "strings" - "sync" - "time" - "unsafe" - - "github.com/influxdata/influxdb/v2/kit/tracing" - "github.com/influxdata/influxdb/v2/logger" - "github.com/influxdata/influxdb/v2/pkg/bytesutil" - "github.com/influxdata/influxdb/v2/pkg/fs" - "github.com/influxdata/influxdb/v2/pkg/lifecycle" - "github.com/influxdata/influxdb/v2/pkg/mincore" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" - "github.com/influxdata/influxql" - "github.com/prometheus/client_golang/prometheus" - "go.uber.org/zap" - "golang.org/x/time/rate" -) - -// Version is the current version of the TSI index. -const Version = 1 - -// File extensions. -const ( - LogFileExt = ".tsl" - IndexFileExt = ".tsi" - - CompactingExt = ".compacting" -) - -const ( - // ManifestFileName is the name of the index manifest file. - ManifestFileName = "MANIFEST" -) - -// Partition represents a collection of layered index files and WAL. -type Partition struct { - // The rule to ensure no deadlocks, no resource leaks, and no use after close - // is that if the partition launches a goroutine, it must acquire a reference - // to itself first and releases it only after it has done all of its use of mu. - mu sync.RWMutex - resmu sync.Mutex // protects res Open and Close - res lifecycle.Resource - - sfile *seriesfile.SeriesFile // series lookup file - sfileref *lifecycle.Reference // reference to series lookup file - - activeLogFile *LogFile // current log file - fileSet *FileSet // current file set - seq int // file id sequence - - // Running statistics - tracker *partitionTracker - - // Fast series lookup of series IDs in the series file that have been present - // in this partition. This set tracks both insertions and deletions of a series. - seriesIDSet *tsdb.SeriesIDSet - - // Stats caching - StatsTTL time.Duration - statsCache MeasurementCardinalityStats - lastStatsTime time.Time - - // Compaction management - levels []CompactionLevel // compaction levels - levelCompacting []bool // level compaction status - compactionsDisabled int // counter of disables - currentCompactionN int // counter of in-progress compactions - - // Directory of the Partition's index files. - path string - id string // id portion of path. - - // Log file compaction thresholds. - MaxLogFileSize int64 - nosync bool // when true, flushing and syncing of LogFile will be disabled. - logbufferSize int // the LogFile's buffer is set to this value. - - pageFaultLimiter *rate.Limiter - - logger *zap.Logger - - // Current size of MANIFEST. Used to determine partition size. - manifestSize int64 - - // Index's version. - version int -} - -// NewPartition returns a new instance of Partition. -func NewPartition(sfile *seriesfile.SeriesFile, path string) *Partition { - partition := &Partition{ - path: path, - sfile: sfile, - seriesIDSet: tsdb.NewSeriesIDSet(), - - MaxLogFileSize: DefaultMaxIndexLogFileSize, - - logger: zap.NewNop(), - version: Version, - } - - defaultLabels := prometheus.Labels{"index_partition": ""} - partition.tracker = newPartitionTracker(newPartitionMetrics(nil), defaultLabels) - return partition -} - -// bytes estimates the memory footprint of this Partition, in bytes. -func (p *Partition) bytes() int { - var b int - b += int(unsafe.Sizeof(p.mu)) - b += int(unsafe.Sizeof(p.resmu)) - b += int(unsafe.Sizeof(p.res)) - // Do not count SeriesFile contents because it belongs to the code that constructed this Partition. - b += int(unsafe.Sizeof(p.sfile)) - b += int(unsafe.Sizeof(p.sfileref)) - b += int(unsafe.Sizeof(p.activeLogFile)) + p.activeLogFile.bytes() - b += int(unsafe.Sizeof(p.fileSet)) + p.fileSet.bytes() - b += int(unsafe.Sizeof(p.seq)) - b += int(unsafe.Sizeof(p.tracker)) - b += int(unsafe.Sizeof(p.seriesIDSet)) + p.seriesIDSet.Bytes() - b += int(unsafe.Sizeof(p.levels)) - for _, level := range p.levels { - b += int(unsafe.Sizeof(level)) - } - b += int(unsafe.Sizeof(p.levelCompacting)) - for _, levelCompacting := range p.levelCompacting { - b += int(unsafe.Sizeof(levelCompacting)) - } - b += int(unsafe.Sizeof(p.compactionsDisabled)) - b += int(unsafe.Sizeof(p.path)) + len(p.path) - b += int(unsafe.Sizeof(p.id)) + len(p.id) - b += int(unsafe.Sizeof(p.MaxLogFileSize)) - b += int(unsafe.Sizeof(p.nosync)) - b += int(unsafe.Sizeof(p.logbufferSize)) - b += int(unsafe.Sizeof(p.logger)) - b += int(unsafe.Sizeof(p.manifestSize)) - b += int(unsafe.Sizeof(p.version)) - return b -} - -// ErrIncompatibleVersion is returned when attempting to read from an -// incompatible tsi1 manifest file. -var ErrIncompatibleVersion = errors.New("incompatible tsi1 index MANIFEST") - -// Open opens the partition. -func (p *Partition) Open() (err error) { - p.resmu.Lock() - defer p.resmu.Unlock() - - if p.res.Opened() { - return errors.New("index partition already open") - } - - // Try to acquire a reference to the series file - p.sfileref, err = p.sfile.Acquire() - if err != nil { - return err - } - - defer func() { - if err != nil { - p.close() - } - }() - - // Validate path is correct. - p.id = filepath.Base(p.path) - if _, err := strconv.Atoi(p.id); err != nil { - return err - } - - // Create directory if it doesn't exist. - if err := os.MkdirAll(p.path, 0777); err != nil { - return err - } - - // Read manifest file. - m, manifestSize, err := ReadManifestFile(p.manifestPath()) - if os.IsNotExist(err) { - m = NewManifest(p.manifestPath()) - } else if err != nil { - return err - } - // Set manifest size on the partition - p.manifestSize = manifestSize - - // Check to see if the MANIFEST file is compatible with the current Index. - if err := m.Validate(); err != nil { - return err - } - - // Copy compaction levels to the index. - p.levels = make([]CompactionLevel, len(m.Levels)) - copy(p.levels, m.Levels) - - // Set up flags to track whether a level is compacting. - p.levelCompacting = make([]bool, len(p.levels)) - - // Open each file in the manifest. - files, err := func() (files []File, err error) { - // Ensure any opened files are closed in the case of an error. - defer func() { - if err != nil { - for _, file := range files { - file.Close() - } - } - }() - - // Open all of the files in the manifest. - for _, filename := range m.Files { - switch filepath.Ext(filename) { - case LogFileExt: - f, err := p.openLogFile(filepath.Join(p.path, filename)) - if err != nil { - return nil, err - } - files = append(files, f) - - // Make first log file active, if within threshold. - sz, _ := f.Stat() - if p.activeLogFile == nil && sz < p.MaxLogFileSize { - p.activeLogFile = f - } - - case IndexFileExt: - f, err := p.openIndexFile(filepath.Join(p.path, filename)) - if err != nil { - return nil, err - } - files = append(files, f) - } - } - - return files, nil - }() - if err != nil { - return err - } - - // Place the files in a file set. - p.fileSet, err = NewFileSet(p.sfile, files) - if err != nil { - for _, file := range files { - file.Close() - } - return err - } - - // Set initial sequence number. - p.seq = p.fileSet.MaxID() - - // Delete any files not in the manifest. - if err := p.deleteNonManifestFiles(m); err != nil { - return err - } - - // Ensure a log file exists. - if p.activeLogFile == nil { - if err := p.prependActiveLogFile(); err != nil { - return err - } - } - - // Build series existence set. - if err := p.buildSeriesSet(); err != nil { - return err - } - p.tracker.SetSeries(p.seriesIDSet.Cardinality()) - p.tracker.SetFiles(uint64(len(p.fileSet.IndexFiles())), "index") - p.tracker.SetFiles(uint64(len(p.fileSet.LogFiles())), "log") - p.tracker.SetDiskSize(uint64(p.fileSet.Size())) - - // Mark opened. - p.res.Open() - - // Send a compaction request on start up. - p.compact() - - return nil -} - -// openLogFile opens a log file and appends it to the index. -func (p *Partition) openLogFile(path string) (*LogFile, error) { - f := NewLogFile(p.sfile, path) - f.nosync = p.nosync - f.bufferSize = p.logbufferSize - if err := f.Open(); err != nil { - return nil, err - } - return f, nil -} - -// openIndexFile opens a log file and appends it to the index. -func (p *Partition) openIndexFile(path string) (*IndexFile, error) { - f := NewIndexFile(p.sfile) - f.SetPath(path) - if err := f.Open(); err != nil { - return nil, err - } - f.pageFaultLimiter = mincore.NewLimiter(p.pageFaultLimiter, f.data) - return f, nil -} - -// deleteNonManifestFiles removes all files not in the manifest. -func (p *Partition) deleteNonManifestFiles(m *Manifest) error { - dir, err := os.Open(p.path) - if err != nil { - return err - } - defer dir.Close() - - fis, err := dir.Readdir(-1) - if err != nil { - return err - } - - // Loop over all files and remove any not in the manifest. - for _, fi := range fis { - filename := filepath.Base(fi.Name()) - if filename == ManifestFileName || m.HasFile(filename) { - continue - } - - if err := os.RemoveAll(filename); err != nil { - return err - } - } - - return dir.Close() -} - -func (p *Partition) buildSeriesSet() error { - p.seriesIDSet = tsdb.NewSeriesIDSet() - - // Read series sets from files in reverse. - for i := len(p.fileSet.files) - 1; i >= 0; i-- { - f := p.fileSet.files[i] - - // Delete anything that's been tombstoned. - ts, err := f.TombstoneSeriesIDSet() - if err != nil { - return err - } - p.seriesIDSet.Diff(ts) - - // Add series created within the file. - ss, err := f.SeriesIDSet() - if err != nil { - return err - } - p.seriesIDSet.Merge(ss) - } - - return nil -} - -// Close closes the partition. -func (p *Partition) Close() error { - p.resmu.Lock() - defer p.resmu.Unlock() - - // Close the resource. - p.res.Close() - p.Wait() - - // There are now no internal outstanding callers holding a reference - // so we can acquire this mutex to protect against external callers. - p.mu.Lock() - defer p.mu.Unlock() - - return p.close() -} - -// close does the work of closing and cleaning up the partition after it -// has acquired locks and ensured no one is using it. -func (p *Partition) close() error { - // Release series file. - if p.sfileref != nil { - p.sfileref.Release() - p.sfileref = nil - } - - // Release the file set and close all of the files. - var err error - if p.fileSet != nil { - p.fileSet.Release() - for _, file := range p.fileSet.files { - if e := file.Close(); e != nil && err == nil { - err = e - } - } - p.fileSet = nil - } - - return err -} - -// Path returns the path to the partition. -func (p *Partition) Path() string { return p.path } - -// SeriesFile returns the attached series file. -func (p *Partition) SeriesFile() *seriesfile.SeriesFile { return p.sfile } - -// NextSequence returns the next file identifier. -func (p *Partition) NextSequence() int { - p.mu.Lock() - defer p.mu.Unlock() - return p.nextSequence() -} - -func (p *Partition) nextSequence() int { - p.seq++ - return p.seq -} - -// manifestPath returns the path to the index's manifest file. -func (p *Partition) manifestPath() string { - return filepath.Join(p.path, ManifestFileName) -} - -// Manifest returns a Manifest for the partition given a file set. -func (p *Partition) Manifest(fs *FileSet) *Manifest { - p.mu.RLock() - defer p.mu.RUnlock() - - return p.manifest(fs) -} - -// manifest returns a Manifest for the partition given a file set. It -// requires that at least a read lock is held. -func (p *Partition) manifest(fs *FileSet) *Manifest { - m := &Manifest{ - Levels: p.levels, - Files: make([]string, len(fs.files)), - Version: p.version, - path: p.manifestPath(), - } - - for j, f := range fs.files { - m.Files[j] = filepath.Base(f.Path()) - } - - return m -} - -// WithLogger sets the logger for the index. -func (p *Partition) WithLogger(logger *zap.Logger) { - p.logger = logger.With(zap.String("index", "tsi")) -} - -// FileSet returns a copy of the current file set. You must call Release on it when -// you are finished. -func (p *Partition) FileSet() (*FileSet, error) { - p.mu.RLock() - fs, err := p.fileSet.Duplicate() - p.mu.RUnlock() - return fs, err -} - -// replaceFileSet is a helper to replace the file set of the partition. It releases -// the resources on the old file set before replacing it with the new one. -func (p *Partition) replaceFileSet(fs *FileSet) { - p.fileSet.Release() - p.fileSet = fs -} - -// FileN returns the active files in the file set. -func (p *Partition) FileN() int { return len(p.fileSet.files) } - -// prependActiveLogFile adds a new log file so that the current log file can be compacted. -func (p *Partition) prependActiveLogFile() error { - // Open file and insert it into the first position. - f, err := p.openLogFile(filepath.Join(p.path, FormatLogFileName(p.nextSequence()))) - if err != nil { - return err - } - - // Prepend and generate new fileset. - fileSet, err := p.fileSet.PrependLogFile(f) - if err != nil { - f.Close() - return err - } - - // Write new manifest. - manifestSize, err := p.manifest(fileSet).Write() - if err != nil { - // TODO: Close index if write fails. - fileSet.Release() - f.Close() - return err - } - - // Now that we can no longer error, update the partition state. - p.activeLogFile = f - p.replaceFileSet(fileSet) - p.manifestSize = manifestSize - - // Set the file metrics again. - p.tracker.SetFiles(uint64(len(p.fileSet.IndexFiles())), "index") - p.tracker.SetFiles(uint64(len(p.fileSet.LogFiles())), "log") - p.tracker.SetDiskSize(uint64(p.fileSet.Size())) - return nil -} - -// ForEachMeasurementName iterates over all measurement names in the index. -func (p *Partition) ForEachMeasurementName(fn func(name []byte) error) error { - fs, err := p.FileSet() - if err != nil { - return err - } - defer fs.Release() - - itr := fs.MeasurementIterator() - if itr == nil { - return nil - } - - for e := itr.Next(); e != nil; e = itr.Next() { - if err := fn(e.Name()); err != nil { - return err - } - } - - return nil -} - -// MeasurementHasSeries returns true if a measurement has at least one non-tombstoned series. -func (p *Partition) MeasurementHasSeries(name []byte) (bool, error) { - fs, err := p.FileSet() - if err != nil { - return false, err - } - defer fs.Release() - - for _, f := range fs.files { - if f.MeasurementHasSeries(p.seriesIDSet, name) { - return true, nil - } - } - - return false, nil -} - -// MeasurementIterator returns an iterator over all measurement names. -func (p *Partition) MeasurementIterator() (tsdb.MeasurementIterator, error) { - fs, err := p.FileSet() - if err != nil { - return nil, err - } - return newFileSetMeasurementIterator(fs, - NewTSDBMeasurementIteratorAdapter(fs.MeasurementIterator())), nil -} - -// MeasurementExists returns true if a measurement exists. -func (p *Partition) MeasurementExists(name []byte) (bool, error) { - fs, err := p.FileSet() - if err != nil { - return false, err - } - defer fs.Release() - - m := fs.Measurement(name) - return m != nil && !m.Deleted(), nil -} - -func (p *Partition) MeasurementNamesByRegex(re *regexp.Regexp) ([][]byte, error) { - fs, err := p.FileSet() - if err != nil { - return nil, err - } - defer fs.Release() - - itr := fs.MeasurementIterator() - if itr == nil { - return nil, nil - } - - var a [][]byte - for e := itr.Next(); e != nil; e = itr.Next() { - if re.Match(e.Name()) { - // Clone bytes since they will be used after the fileset is released. - a = append(a, bytesutil.Clone(e.Name())) - } - } - return a, nil -} - -func (p *Partition) MeasurementSeriesIDIterator(name []byte) (tsdb.SeriesIDIterator, error) { - fs, err := p.FileSet() - if err != nil { - return nil, err - } - return newFileSetSeriesIDIterator(fs, fs.MeasurementSeriesIDIterator(name)), nil -} - -// DropMeasurement deletes a measurement from the index. DropMeasurement does -// not remove any series from the index directly. -func (p *Partition) DropMeasurement(name []byte) error { - fs, err := p.FileSet() - if err != nil { - return err - } - defer fs.Release() - - // Delete all keys and values. - if kitr := fs.TagKeyIterator(name); kitr != nil { - for k := kitr.Next(); k != nil; k = kitr.Next() { - // Delete key if not already deleted. - if !k.Deleted() { - if err := func() error { - p.mu.RLock() - defer p.mu.RUnlock() - return p.activeLogFile.DeleteTagKeyNoSync(name, k.Key()) - }(); err != nil { - return err - } - } - - // Delete each value in key. - if vitr := k.TagValueIterator(nil); vitr != nil { - for v := vitr.Next(); v != nil; v = vitr.Next() { - if !v.Deleted() { - if err := func() error { - p.mu.RLock() - defer p.mu.RUnlock() - return p.activeLogFile.DeleteTagValueNoSync(name, k.Key(), v.Value()) - }(); err != nil { - return err - } - } - } - } - } - } - - // Delete all series. - // TODO(edd): it's not clear to me why we have to delete all series IDs from - // the index when we could just mark the measurement as deleted. - if itr := fs.MeasurementSeriesIDIterator(name); itr != nil { - defer itr.Close() - - // 1024 is assuming that typically a bucket (measurement) will have at least - // 1024 series in it. - all := make([]tsdb.SeriesID, 0, 1024) - for { - elem, err := itr.Next() - if err != nil { - return err - } else if elem.SeriesID.IsZero() { - break - } - all = append(all, elem.SeriesID) - - // Update series set. - p.seriesIDSet.Remove(elem.SeriesID) - } - - if err := p.activeLogFile.DeleteSeriesIDList(all); err != nil { - return err - } - - p.tracker.AddSeriesDropped(uint64(len(all))) - p.tracker.SubSeries(uint64(len(all))) - - if err = itr.Close(); err != nil { - return err - } - } - - // Mark measurement as deleted. - if err := func() error { - p.mu.RLock() - defer p.mu.RUnlock() - return p.activeLogFile.DeleteMeasurement(name) - }(); err != nil { - return err - } - - // Ensure log is flushed & synced. - if err := func() error { - p.mu.RLock() - defer p.mu.RUnlock() - return p.activeLogFile.FlushAndSync() - }(); err != nil { - return err - } - - // Check if the log file needs to be swapped. - if err := p.CheckLogFile(); err != nil { - return err - } - - return nil -} - -// createSeriesListIfNotExists creates a list of series if they doesn't exist in -// bulk. -func (p *Partition) createSeriesListIfNotExists(collection *tsdb.SeriesCollection) ([]tsdb.SeriesID, error) { - // Is there anything to do? The partition may have been sent an empty batch. - if collection.Length() == 0 { - return nil, nil - } else if len(collection.Names) != len(collection.Tags) { - return nil, fmt.Errorf("uneven batch, partition %s sent %d names and %d tags", p.id, len(collection.Names), len(collection.Tags)) - } - - // Ensure fileset cannot change during insert. - now := time.Now() - p.mu.RLock() - - // Try to acquire a resource on the active log file - res, err := p.activeLogFile.Acquire() - if err != nil { - p.mu.RUnlock() - return nil, err - } - - // Insert series into log file. - ids, err := p.activeLogFile.AddSeriesList(p.seriesIDSet, collection) - - // Release our resources. - res.Release() - p.mu.RUnlock() - - // Check the error from insert. - if err != nil { - return nil, err - } - - if err := p.CheckLogFile(); err != nil { - return nil, err - } - - // NOTE(edd): if this becomes expensive then we can move the count into the - // log file. - var totalNew uint64 - for _, id := range ids { - if !id.IsZero() { - totalNew++ - } - } - if totalNew > 0 { - p.tracker.AddSeriesCreated(totalNew, time.Since(now)) - p.tracker.AddSeries(totalNew) - p.mu.RLock() - p.tracker.SetDiskSize(uint64(p.fileSet.Size())) - p.mu.RUnlock() - } - return ids, nil -} - -// DropSeries removes the provided set of series id from the index. -func (p *Partition) DropSeries(ids []tsdb.SeriesID) error { - // Count total affected series. - var n uint64 - for _, id := range ids { - if p.seriesIDSet.Contains(id) { - n++ - } - } - - // Delete series from index. - if err := p.activeLogFile.DeleteSeriesIDs(ids); err != nil { - return err - } - - // Update series set. - for _, id := range ids { - p.seriesIDSet.Remove(id) - } - p.tracker.AddSeriesDropped(n) - p.tracker.SubSeries(n) - - // Swap log file, if necessary. - return p.CheckLogFile() -} - -// HasTagKey returns true if tag key exists. -func (p *Partition) HasTagKey(name, key []byte) (bool, error) { - fs, err := p.FileSet() - if err != nil { - return false, err - } - defer fs.Release() - - return fs.HasTagKey(name, key), nil -} - -// HasTagValue returns true if tag value exists. -func (p *Partition) HasTagValue(name, key, value []byte) (bool, error) { - fs, err := p.FileSet() - if err != nil { - return false, err - } - defer fs.Release() - - return fs.HasTagValue(name, key, value), nil -} - -// TagKeyIterator returns an iterator for all keys across a single measurement. -func (p *Partition) TagKeyIterator(name []byte) (tsdb.TagKeyIterator, error) { - fs, err := p.FileSet() - if err != nil { - return nil, err - } - return newFileSetTagKeyIterator(fs, - NewTSDBTagKeyIteratorAdapter(fs.TagKeyIterator(name))), nil -} - -// TagValueIterator returns an iterator for all values across a single key. -func (p *Partition) TagValueIterator(name, key []byte) (tsdb.TagValueIterator, error) { - fs, err := p.FileSet() - if err != nil { - return nil, err - } - return newFileSetTagValueIterator(fs, - NewTSDBTagValueIteratorAdapter(fs.TagValueIterator(name, key))), nil -} - -// TagKeySeriesIDIterator returns a series iterator for all values across a single key. -func (p *Partition) TagKeySeriesIDIterator(name, key []byte) (tsdb.SeriesIDIterator, error) { - fs, err := p.FileSet() - if err != nil { - return nil, err - } - - itr, err := fs.TagKeySeriesIDIterator(name, key) - if err != nil { - fs.Release() - return nil, err - } - return newFileSetSeriesIDIterator(fs, itr), nil -} - -// TagValueSeriesIDIterator returns a series iterator for a single key value. -func (p *Partition) TagValueSeriesIDIterator(name, key, value []byte) (tsdb.SeriesIDIterator, error) { - fs, err := p.FileSet() - if err != nil { - return nil, err - } - itr, err := fs.TagValueSeriesIDIterator(name, key, value) - if err != nil { - fs.Release() - return nil, err - } - return newFileSetSeriesIDIterator(fs, itr), nil -} - -// MeasurementTagKeysByExpr extracts the tag keys wanted by the expression. -func (p *Partition) MeasurementTagKeysByExpr(name []byte, expr influxql.Expr) (map[string]struct{}, error) { - fs, err := p.FileSet() - if err != nil { - return nil, err - } - defer fs.Release() - - return fs.MeasurementTagKeysByExpr(name, expr) -} - -// ForEachMeasurementTagKey iterates over all tag keys in a measurement. -func (p *Partition) ForEachMeasurementTagKey(name []byte, fn func(key []byte) error) error { - fs, err := p.FileSet() - if err != nil { - return err - } - defer fs.Release() - - itr := fs.TagKeyIterator(name) - if itr == nil { - return nil - } - - for e := itr.Next(); e != nil; e = itr.Next() { - if err := fn(e.Key()); err != nil { - return err - } - } - - return nil -} - -// TagKeyCardinality always returns zero. -// It is not possible to determine cardinality of tags across index files. -func (p *Partition) TagKeyCardinality(name, key []byte) int { - return 0 -} - -func (p *Partition) SetFieldName(measurement []byte, name string) {} -func (p *Partition) RemoveShard(shardID uint64) {} -func (p *Partition) AssignShard(k string, shardID uint64) {} - -// Compact requests a compaction of log files. -func (p *Partition) Compact() { - p.mu.Lock() - defer p.mu.Unlock() - - p.compact() -} - -// DisableCompactions stops any compactions from starting until a call to EnableCompactions. -func (p *Partition) DisableCompactions() { - p.mu.Lock() - defer p.mu.Unlock() - - p.compactionsDisabled++ -} - -// EnableCompactions allows compactions to proceed again after a call to DisableCompactions. -func (p *Partition) EnableCompactions() { - p.mu.Lock() - defer p.mu.Unlock() - - p.compactionsDisabled-- -} - -// CurrentCompactionN returns the number of compactions currently running. -func (p *Partition) CurrentCompactionN() int { - p.mu.RLock() - defer p.mu.RUnlock() - return p.currentCompactionN -} - -// Wait will block until all compactions are finished. -// Must only be called while they are disabled. -func (p *Partition) Wait() { - if p.CurrentCompactionN() == 0 { // Is it possible to immediately return? - return - } - - ticker := time.NewTicker(10 * time.Millisecond) - defer ticker.Stop() - for range ticker.C { - if p.CurrentCompactionN() == 0 { - return - } - } -} - -// compact compacts continguous groups of files that are not currently compacting. -func (p *Partition) compact() { - if p.compactionsDisabled > 0 { - p.logger.Error("Cannot start a compaction while disabled") - return - } - - fs, err := p.fileSet.Duplicate() - if err != nil { - p.logger.Error("Attempt to compact while partition is closing", zap.Error(err)) - return - } - defer fs.Release() - - // Iterate over each level we are going to compact. - // We skip the first level (0) because it is log files and they are compacted separately. - // We skip the last level because the files have no higher level to compact into. - minLevel, maxLevel := 1, len(p.levels)-2 - for level := minLevel; level <= maxLevel; level++ { - // Skip level if it is currently compacting. - if p.levelCompacting[level] { - continue - } - - // Collect contiguous files from the end of the level. - files := fs.LastContiguousIndexFilesByLevel(level) - if len(files) < 2 { - continue - } else if len(files) > MaxIndexMergeCount { - files = files[len(files)-MaxIndexMergeCount:] - } - - // We intend to do a compaction. Acquire a resource to do so. - ref, err := p.res.Acquire() - if err != nil { - p.logger.Error("Attempt to compact while partition is closing", zap.Error(err)) - return - } - - // Acquire references to the files to keep them alive through compaction. - frefs, err := IndexFiles(files).Acquire() - if err != nil { - p.logger.Error("Attempt to compact a file that is closed", zap.Error(err)) - continue - } - - // Mark the level as compacting. - p.levelCompacting[level] = true - - // Start compacting in a separate goroutine. - p.currentCompactionN++ - go func(level int) { - // Compact to a new level. - p.compactToLevel(files, frefs, level+1, ref.Closing()) - - // Ensure references are released. - frefs.Release() - ref.Release() - - // Ensure compaction lock for the level is released. - p.mu.Lock() - p.levelCompacting[level] = false - p.currentCompactionN-- - p.mu.Unlock() - - // Check for new compactions - p.Compact() - }(level) - } -} - -// compactToLevel compacts a set of files into a new file. Replaces old files with -// compacted file on successful completion. This runs in a separate goroutine. -func (p *Partition) compactToLevel(files []*IndexFile, frefs lifecycle.References, - level int, interrupt <-chan struct{}) { - - assert(len(files) >= 2, "at least two index files are required for compaction") - assert(level > 0, "cannot compact level zero") - - var err error - var start time.Time - - p.tracker.IncActiveCompaction(level) - // Set the relevant metrics at the end of any compaction. - defer func() { - p.mu.RLock() - defer p.mu.RUnlock() - p.tracker.SetFiles(uint64(len(p.fileSet.IndexFiles())), "index") - p.tracker.SetFiles(uint64(len(p.fileSet.LogFiles())), "log") - p.tracker.SetDiskSize(uint64(p.fileSet.Size())) - p.tracker.DecActiveCompaction(level) - - success := err == nil - p.tracker.CompactionAttempted(level, success, time.Since(start)) - }() - - span, ctx := tracing.StartSpanFromContext(context.Background()) - defer span.Finish() - - // Build a logger for this compaction. - log, logEnd := logger.NewOperation(ctx, p.logger, "TSI level compaction", "tsi1_compact_to_level", zap.Int("tsi1_level", level)) - defer logEnd() - - // Check for cancellation. - select { - case <-interrupt: - log.Error("Cannot begin compaction", zap.Error(ErrCompactionInterrupted)) - return - default: - } - - // Track time to compact. - start = time.Now() - - // Create new index file. - path := filepath.Join(p.path, FormatIndexFileName(p.NextSequence(), level)) - var f *os.File - if f, err = fs.CreateFile(path); err != nil { - log.Error("Cannot create compaction files", zap.Error(err)) - return - } - defer f.Close() - - log.Info("Performing full compaction", - zap.String("src", joinIntSlice(IndexFiles(files).IDs(), ",")), - zap.String("dst", path), - ) - - // Compact all index files to new index file. - lvl := p.levels[level] - var n int64 - if n, err = IndexFiles(files).CompactTo(f, p.sfile, lvl.M, lvl.K, interrupt); err != nil { - log.Error("Cannot compact index files", zap.Error(err)) - return - } - - // Close file. - if err = f.Close(); err != nil { - log.Error("Error closing index file", zap.Error(err)) - return - } - - // Reopen as an index file. - file := NewIndexFile(p.sfile) - file.SetPath(path) - if err = file.Open(); err != nil { - log.Error("Cannot open new index file", zap.Error(err)) - return - } - file.pageFaultLimiter = mincore.NewLimiter(p.pageFaultLimiter, file.data) - - // Obtain lock to swap in index file and write manifest. - if err = func() error { - p.mu.Lock() - defer p.mu.Unlock() - - // Replace previous files with new index file. - fileSet, err := p.fileSet.MustReplace(IndexFiles(files).Files(), file) - if err != nil { - return err - } - - // Write new manifest. - manifestSize, err := p.manifest(fileSet).Write() - if err != nil { - // TODO: Close index if write fails. - fileSet.Release() - return err - } - - // Now that we can no longer error, update the local state. - p.replaceFileSet(fileSet) - p.manifestSize = manifestSize - - return nil - }(); err != nil { - log.Error("Cannot write manifest", zap.Error(err)) - return - } - - elapsed := time.Since(start) - log.Info("Full compaction complete", - zap.String("path", path), - logger.DurationLiteral("elapsed", elapsed), - zap.Int64("bytes", n), - zap.Int("kb_per_sec", int(float64(n)/elapsed.Seconds())/1024), - ) - - // Release old files. - frefs.Release() - - // Close and delete all old index files. - for _, f := range files { - log.Info("Removing index file", zap.String("path", f.Path())) - - if err = f.Close(); err != nil { - log.Error("Cannot close index file", zap.Error(err)) - return - } else if err = os.Remove(f.Path()); err != nil { - log.Error("Cannot remove index file", zap.Error(err)) - return - } - } -} - -func (p *Partition) CheckLogFile() error { - // Check log file size under read lock. - p.mu.RLock() - size := p.activeLogFile.Size() - p.mu.RUnlock() - - if size < p.MaxLogFileSize { - return nil - } - - // If file size exceeded then recheck under write lock and swap files. - p.mu.Lock() - defer p.mu.Unlock() - return p.checkLogFile() -} - -func (p *Partition) checkLogFile() error { - if p.compactionsDisabled > 0 { - return nil - } - - // Acquire a reference to hold the partition open. - ref, err := p.res.Acquire() - if err != nil { - return err - } - - if p.activeLogFile.Size() < p.MaxLogFileSize { - ref.Release() - return nil - } - - span, ctx := tracing.StartSpanFromContext(context.Background()) - defer span.Finish() - - // Swap current log file. - logFile := p.activeLogFile - - // Open new log file and insert it into the first position. - if err := p.prependActiveLogFile(); err != nil { - ref.Release() - return err - } - - // Begin compacting in a background goroutine. - p.currentCompactionN++ - go func() { - p.compactLogFile(ctx, logFile, ref.Closing()) - ref.Release() // release our reference - - p.mu.Lock() - p.currentCompactionN-- // compaction is now complete - p.mu.Unlock() - - p.Compact() // check for new compactions - }() - - return nil -} - -// compactLogFile compacts f into a tsi file. The new file will share the -// same identifier but will have a ".tsi" extension. Once the log file is -// compacted then the manifest is updated and the log file is discarded. -func (p *Partition) compactLogFile(ctx context.Context, logFile *LogFile, interrupt <-chan struct{}) { - defer func() { - p.mu.RLock() - defer p.mu.RUnlock() - p.tracker.SetFiles(uint64(len(p.fileSet.IndexFiles())), "index") - p.tracker.SetFiles(uint64(len(p.fileSet.LogFiles())), "log") - p.tracker.SetDiskSize(uint64(p.fileSet.Size())) - }() - - start := time.Now() - - // Retrieve identifier from current path. - id := logFile.ID() - assert(id != 0, "cannot parse log file id: %s", logFile.Path()) - - // Build a logger for this compaction. - log, logEnd := logger.NewOperation(ctx, p.logger, "TSI log compaction", "tsi1_compact_log_file", zap.Int("tsi1_log_file_id", id)) - defer logEnd() - - // Create new index file. - path := filepath.Join(p.path, FormatIndexFileName(id, 1)) - f, err := fs.CreateFile(path) - if err != nil { - log.Error("Cannot create index file", zap.Error(err)) - return - } - defer f.Close() - - // Compact log file to new index file. - lvl := p.levels[1] - n, err := logFile.CompactTo(f, lvl.M, lvl.K, interrupt) - if err != nil { - log.Error("Cannot compact log file", zap.Error(err), zap.String("path", logFile.Path())) - return - } - - // Close file. - if err := f.Close(); err != nil { - log.Error("Cannot close log file", zap.Error(err)) - return - } - - // Reopen as an index file. - file := NewIndexFile(p.sfile) - file.SetPath(path) - if err := file.Open(); err != nil { - log.Error("Cannot open compacted index file", zap.Error(err), zap.String("path", file.Path())) - return - } - file.pageFaultLimiter = mincore.NewLimiter(p.pageFaultLimiter, file.data) - - // Obtain lock to swap in index file and write manifest. - if err := func() error { - p.mu.Lock() - defer p.mu.Unlock() - - // Replace previous log file with index file. - fileSet, err := p.fileSet.MustReplace([]File{logFile}, file) - if err != nil { - return err - } - - // Write new manifest. - manifestSize, err := p.manifest(fileSet).Write() - if err != nil { - // TODO: Close index if write fails. - fileSet.Release() - return err - } - - // Now that we can no longer error, update the local state. - p.replaceFileSet(fileSet) - p.manifestSize = manifestSize - - return nil - }(); err != nil { - log.Error("Cannot update manifest or stats", zap.Error(err)) - return - } - - elapsed := time.Since(start) - log.Info("Log file compacted", - logger.DurationLiteral("elapsed", elapsed), - zap.Int64("bytes", n), - zap.Int("kb_per_sec", int(float64(n)/elapsed.Seconds())/1024), - ) - - // Closing the log file will automatically wait until the ref count is zero. - if err := logFile.Close(); err != nil { - log.Error("Cannot close log file", zap.Error(err)) - return - } else if err := os.Remove(logFile.Path()); err != nil { - log.Error("Cannot remove log file", zap.Error(err)) - return - } -} - -// MeasurementCardinalityStats returns cardinality stats for all measurements. -func (p *Partition) MeasurementCardinalityStats() (MeasurementCardinalityStats, error) { - p.mu.RLock() - defer p.mu.RUnlock() - - // Return cached version, if enabled and the TTL is less than the last cache time. - if p.StatsTTL > 0 && !p.lastStatsTime.IsZero() && time.Since(p.lastStatsTime) < p.StatsTTL { - return p.statsCache.Clone(), nil - } - - // If cache is unavailable then generate fresh stats. - stats, err := p.measurementCardinalityStats() - if err != nil { - return nil, err - } - - // Cache the stats if enabled. - if p.StatsTTL > 0 { - p.statsCache = stats - p.lastStatsTime = time.Now() - } - - return stats, nil -} - -func (p *Partition) measurementCardinalityStats() (MeasurementCardinalityStats, error) { - fs, err := p.fileSet.Duplicate() - if err != nil { - return nil, err - } - defer fs.Release() - - stats := make(MeasurementCardinalityStats) - mitr := fs.MeasurementIterator() - if mitr == nil { - return stats, nil - } - - for { - // Iterate over each measurement and set cardinality. - mm := mitr.Next() - if mm == nil { - return stats, nil - } - - // Obtain all series for measurement. - sitr := fs.MeasurementSeriesIDIterator(mm.Name()) - if sitr == nil { - continue - } - - // All iterators should be series id set iterators except legacy 1.x data. - // Skip if it does not conform as aggregation would be too slow. - ssitr, ok := sitr.(tsdb.SeriesIDSetIterator) - if !ok { - continue - } - - // Intersect with partition set to ensure deleted series are removed. - set := p.seriesIDSet.And(ssitr.SeriesIDSet()) - cardinality := int(set.Cardinality()) - if cardinality == 0 { - continue - } - - // Set cardinality for the given measurement. - stats[string(mm.Name())] = cardinality - } -} - -type partitionTracker struct { - metrics *partitionMetrics - labels prometheus.Labels - enabled bool // Allows tracker to be disabled. -} - -func newPartitionTracker(metrics *partitionMetrics, defaultLabels prometheus.Labels) *partitionTracker { - return &partitionTracker{ - metrics: metrics, - labels: defaultLabels, - enabled: true, - } -} - -// Labels returns a copy of labels for use with index partition metrics. -func (t *partitionTracker) Labels() prometheus.Labels { - l := make(map[string]string, len(t.labels)) - for k, v := range t.labels { - l[k] = v - } - return l -} - -// AddSeriesCreated increases the number of series created in the partition by n -// and sets a sample of the time taken to create a series. -func (t *partitionTracker) AddSeriesCreated(n uint64, d time.Duration) { - if !t.enabled { - return - } - - labels := t.Labels() - t.metrics.SeriesCreated.With(labels).Add(float64(n)) - - if n == 0 { - return // Nothing to record - } - - perseries := d.Seconds() / float64(n) - t.metrics.SeriesCreatedDuration.With(labels).Observe(perseries) -} - -// AddSeriesDropped increases the number of series dropped in the partition by n. -func (t *partitionTracker) AddSeriesDropped(n uint64) { - if !t.enabled { - return - } - - labels := t.Labels() - t.metrics.SeriesDropped.With(labels).Add(float64(n)) -} - -// SetSeries sets the number of series in the partition. -func (t *partitionTracker) SetSeries(n uint64) { - if !t.enabled { - return - } - - labels := t.Labels() - t.metrics.Series.With(labels).Set(float64(n)) -} - -// AddSeries increases the number of series in the partition by n. -func (t *partitionTracker) AddSeries(n uint64) { - if !t.enabled { - return - } - - labels := t.Labels() - t.metrics.Series.With(labels).Add(float64(n)) -} - -// SubSeries decreases the number of series in the partition by n. -func (t *partitionTracker) SubSeries(n uint64) { - if !t.enabled { - return - } - - labels := t.Labels() - t.metrics.Series.With(labels).Sub(float64(n)) -} - -// SetMeasurements sets the number of measurements in the partition. -func (t *partitionTracker) SetMeasurements(n uint64) { - if !t.enabled { - return - } - - labels := t.Labels() - t.metrics.Measurements.With(labels).Set(float64(n)) -} - -// AddMeasurements increases the number of measurements in the partition by n. -func (t *partitionTracker) AddMeasurements(n uint64) { - if !t.enabled { - return - } - - labels := t.Labels() - t.metrics.Measurements.With(labels).Add(float64(n)) -} - -// SubMeasurements decreases the number of measurements in the partition by n. -func (t *partitionTracker) SubMeasurements(n uint64) { - if !t.enabled { - return - } - - labels := t.Labels() - t.metrics.Measurements.With(labels).Sub(float64(n)) -} - -// SetFiles sets the number of files in the partition. -func (t *partitionTracker) SetFiles(n uint64, typ string) { - if !t.enabled { - return - } - - labels := t.Labels() - labels["type"] = typ - t.metrics.FilesTotal.With(labels).Set(float64(n)) -} - -// SetDiskSize sets the size of files in the partition. -func (t *partitionTracker) SetDiskSize(n uint64) { - if !t.enabled { - return - } - - labels := t.Labels() - t.metrics.DiskSize.With(labels).Set(float64(n)) -} - -// IncActiveCompaction increments the number of active compactions for the provided level. -func (t *partitionTracker) IncActiveCompaction(level int) { - if !t.enabled { - return - } - - labels := t.Labels() - labels["level"] = fmt.Sprint(level) - - t.metrics.CompactionsActive.With(labels).Inc() -} - -// DecActiveCompaction decrements the number of active compactions for the provided level. -func (t *partitionTracker) DecActiveCompaction(level int) { - if !t.enabled { - return - } - - labels := t.Labels() - labels["level"] = fmt.Sprint(level) - - t.metrics.CompactionsActive.With(labels).Dec() -} - -// CompactionAttempted updates the number of compactions attempted for the provided level. -func (t *partitionTracker) CompactionAttempted(level int, success bool, d time.Duration) { - if !t.enabled { - return - } - - labels := t.Labels() - labels["level"] = fmt.Sprint(level) - if success { - t.metrics.CompactionDuration.With(labels).Observe(d.Seconds()) - - labels["status"] = "ok" - t.metrics.Compactions.With(labels).Inc() - return - } - - labels["status"] = "error" - t.metrics.Compactions.With(labels).Inc() -} - -// unionStringSets returns the union of two sets -func unionStringSets(a, b map[string]struct{}) map[string]struct{} { - other := make(map[string]struct{}) - for k := range a { - other[k] = struct{}{} - } - for k := range b { - other[k] = struct{}{} - } - return other -} - -// intersectStringSets returns the intersection of two sets. -func intersectStringSets(a, b map[string]struct{}) map[string]struct{} { - if len(a) < len(b) { - a, b = b, a - } - - other := make(map[string]struct{}) - for k := range a { - if _, ok := b[k]; ok { - other[k] = struct{}{} - } - } - return other -} - -var fileIDRegex = regexp.MustCompile(`^L(\d+)-(\d+)\..+$`) - -// ParseFilename extracts the numeric id from a log or index file path. -// Returns 0 if it cannot be parsed. -func ParseFilename(name string) (level, id int) { - a := fileIDRegex.FindStringSubmatch(filepath.Base(name)) - if a == nil { - return 0, 0 - } - - level, _ = strconv.Atoi(a[1]) - id, _ = strconv.Atoi(a[2]) - return id, level -} - -// Manifest represents the list of log & index files that make up the index. -// The files are listed in time order, not necessarily ID order. -type Manifest struct { - Levels []CompactionLevel `json:"levels,omitempty"` - Files []string `json:"files,omitempty"` - - // Version should be updated whenever the TSI format has changed. - Version int `json:"version,omitempty"` - - path string // location on disk of the manifest. -} - -// NewManifest returns a new instance of Manifest with default compaction levels. -func NewManifest(path string) *Manifest { - m := &Manifest{ - Levels: make([]CompactionLevel, len(DefaultCompactionLevels)), - Version: Version, - path: path, - } - copy(m.Levels, DefaultCompactionLevels[:]) - return m -} - -// HasFile returns true if name is listed in the log files or index files. -func (m *Manifest) HasFile(name string) bool { - for _, filename := range m.Files { - if filename == name { - return true - } - } - return false -} - -// Validate checks if the Manifest's version is compatible with this version -// of the tsi1 index. -func (m *Manifest) Validate() error { - // If we don't have an explicit version in the manifest file then we know - // it's not compatible with the latest tsi1 Index. - if m.Version != Version { - return ErrIncompatibleVersion - } - return nil -} - -// Write writes the manifest file to the provided path, returning the number of -// bytes written and an error, if any. -func (m *Manifest) Write() (int64, error) { - buf, err := json.MarshalIndent(m, "", " ") - if err != nil { - return 0, err - } - buf = append(buf, '\n') - - if err := ioutil.WriteFile(m.path, buf, 0666); err != nil { - return 0, err - } - return int64(len(buf)), nil -} - -// ReadManifestFile reads a manifest from a file path and returns the Manifest, -// the size of the manifest on disk, and any error if appropriate. -func ReadManifestFile(path string) (*Manifest, int64, error) { - buf, err := ioutil.ReadFile(path) - if err != nil { - return nil, 0, err - } - - // Decode manifest. - var m Manifest - if err := json.Unmarshal(buf, &m); err != nil { - return nil, 0, err - } - - // Set the path of the manifest. - m.path = path - return &m, int64(len(buf)), nil -} - -func joinIntSlice(a []int, sep string) string { - other := make([]string, len(a)) - for i := range a { - other[i] = strconv.Itoa(a[i]) - } - return strings.Join(other, sep) -} - -// CompactionLevel represents a grouping of index files based on bloom filter -// settings. By having the same bloom filter settings, the filters -// can be merged and evaluated at a higher level. -type CompactionLevel struct { - // Bloom filter bit size & hash count - M uint64 `json:"m,omitempty"` - K uint64 `json:"k,omitempty"` -} - -// DefaultCompactionLevels is the default settings used by the index. -var DefaultCompactionLevels = []CompactionLevel{ - {M: 0, K: 0}, // L0: Log files, no filter. - {M: 1 << 25, K: 6}, // L1: Initial compaction - {M: 1 << 25, K: 6}, // L2 - {M: 1 << 26, K: 6}, // L3 - {M: 1 << 27, K: 6}, // L4 - {M: 1 << 28, K: 6}, // L5 - {M: 1 << 29, K: 6}, // L6 - {M: 1 << 30, K: 6}, // L7 -} - -// MaxIndexMergeCount is the maximum number of files that can be merged together at once. -const MaxIndexMergeCount = 2 - -// MaxIndexFileSize is the maximum expected size of an index file. -const MaxIndexFileSize = 4 * (1 << 30) - -// IsPartitionDir returns true if directory contains a MANIFEST file. -func IsPartitionDir(path string) (bool, error) { - if _, err := os.Stat(filepath.Join(path, ManifestFileName)); os.IsNotExist(err) { - return false, nil - } else if err != nil { - return false, err - } - return true, nil -} diff --git a/tsdb/tsi1/partition_test.go b/tsdb/tsi1/partition_test.go deleted file mode 100644 index e6f55ac8a5..0000000000 --- a/tsdb/tsi1/partition_test.go +++ /dev/null @@ -1,144 +0,0 @@ -package tsi1_test - -import ( - "fmt" - "io/ioutil" - "os" - "path/filepath" - "testing" - - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" - "github.com/influxdata/influxdb/v2/tsdb/tsi1" -) - -func TestPartition_Open(t *testing.T) { - sfile := MustOpenSeriesFile() - defer sfile.Close() - - // Opening a fresh index should set the MANIFEST version to current version. - p := NewPartition(sfile.SeriesFile) - t.Run("open new index", func(t *testing.T) { - if err := p.Open(); err != nil { - t.Fatal(err) - } - - fs, err := p.FileSet() - if err != nil { - p.Close() - t.Fatal(err) - } - defer fs.Release() - - // Check version set appropriately. - if got, exp := p.Manifest(fs).Version, 1; got != exp { - p.Close() - t.Fatalf("got index version %d, expected %d", got, exp) - } - }) - if t.Failed() { - return - } - - // Reopening an open index should return an error. - t.Run("reopen open index", func(t *testing.T) { - err := p.Open() - if err == nil { - p.Close() - t.Fatal("didn't get an error on reopen, but expected one") - } - p.Close() - }) - if t.Failed() { - return - } - - // Opening an incompatible index should return an error. - incompatibleVersions := []int{-1, 0, 2} - for _, v := range incompatibleVersions { - t.Run(fmt.Sprintf("incompatible index version: %d", v), func(t *testing.T) { - p = NewPartition(sfile.SeriesFile) - // Manually create a MANIFEST file for an incompatible index version. - mpath := filepath.Join(p.Path(), tsi1.ManifestFileName) - m := tsi1.NewManifest(mpath) - m.Levels = nil - m.Version = v // Set example MANIFEST version. - if _, err := m.Write(); err != nil { - t.Fatal(err) - } - - // Log the MANIFEST file. - data, err := ioutil.ReadFile(mpath) - if err != nil { - panic(err) - } - t.Logf("Incompatible MANIFEST: %s", data) - - // Opening this index should return an error because the MANIFEST has an - // incompatible version. - err = p.Open() - if err != tsi1.ErrIncompatibleVersion { - p.Close() - t.Fatalf("got error %v, expected %v", err, tsi1.ErrIncompatibleVersion) - } - }) - if t.Failed() { - return - } - } -} - -func TestPartition_Manifest(t *testing.T) { - t.Run("current MANIFEST", func(t *testing.T) { - sfile := MustOpenSeriesFile() - defer sfile.Close() - - p := MustOpenPartition(sfile.SeriesFile) - defer p.Close() - - fs, err := p.FileSet() - if err != nil { - t.Fatal(err) - } - defer fs.Release() - - if got, exp := p.Manifest(fs).Version, tsi1.Version; got != exp { - t.Fatalf("got MANIFEST version %d, expected %d", got, exp) - } - }) -} - -// Partition is a test wrapper for tsi1.Partition. -type Partition struct { - *tsi1.Partition -} - -// NewPartition returns a new instance of Partition at a temporary path. -func NewPartition(sfile *seriesfile.SeriesFile) *Partition { - return &Partition{Partition: tsi1.NewPartition(sfile, MustTempPartitionDir())} -} - -// MustOpenPartition returns a new, open index. Panic on error. -func MustOpenPartition(sfile *seriesfile.SeriesFile) *Partition { - p := NewPartition(sfile) - if err := p.Open(); err != nil { - panic(err) - } - return p -} - -// Close closes and removes the index directory. -func (p *Partition) Close() error { - defer os.RemoveAll(p.Path()) - return p.Partition.Close() -} - -// Reopen closes and opens the index. -func (p *Partition) Reopen() error { - if err := p.Partition.Close(); err != nil { - return err - } - - sfile, path := p.SeriesFile(), p.Path() - p.Partition = tsi1.NewPartition(sfile, path) - return p.Open() -} diff --git a/tsdb/tsi1/report.go b/tsdb/tsi1/report.go deleted file mode 100644 index 257a3c4bc5..0000000000 --- a/tsdb/tsi1/report.go +++ /dev/null @@ -1,368 +0,0 @@ -package tsi1 - -import ( - "bytes" - "context" - "fmt" - "io" - "math" - "sort" - "text/tabwriter" - "time" - - "github.com/influxdata/influxdb/v2" - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" -) - -const ( - // Number of series IDs to stored in slice before we convert to a roaring - // bitmap. Roaring bitmaps have a non-trivial initial cost to construct. - useBitmapN = 25 -) - -// ReportCommand represents the program execution for "influxd inspect report-tsi". -type ReportCommand struct { - // Standard input/output, overridden for testing. - Stderr io.Writer - Stdout io.Writer - - // Filters - DataPath string - OrgID, BucketID *influxdb.ID - - byOrgBucket map[influxdb.ID]map[influxdb.ID]*cardinality - byBucketMeasurement map[influxdb.ID]map[string]*cardinality - orgToBucket map[influxdb.ID][]influxdb.ID - - SeriesDirPath string // optional. Defaults to dbPath/_series - sfile *seriesfile.SeriesFile - indexFile *Index - - TopN int - ByMeasurement bool - byTagKey bool - - start time.Time -} - -// NewReportCommand returns a new instance of ReportCommand with default setting applied. -func NewReportCommand() *ReportCommand { - return &ReportCommand{ - byOrgBucket: make(map[influxdb.ID]map[influxdb.ID]*cardinality), - byBucketMeasurement: make(map[influxdb.ID]map[string]*cardinality), - orgToBucket: make(map[influxdb.ID][]influxdb.ID), - TopN: 0, - byTagKey: false, - } -} - -// ReportTSISummary is returned by a report-tsi Run() command and is used to access cardinality information -type Summary struct { - TotalCardinality int64 - OrgCardinality map[influxdb.ID]int64 - BucketByOrgCardinality map[influxdb.ID]map[influxdb.ID]int64 - BucketMeasurementCardinality map[influxdb.ID]map[string]int64 -} - -func newSummary() *Summary { - return &Summary{ - OrgCardinality: make(map[influxdb.ID]int64), - BucketByOrgCardinality: make(map[influxdb.ID]map[influxdb.ID]int64), - BucketMeasurementCardinality: make(map[influxdb.ID]map[string]int64), - } -} - -// Run runs the report-tsi tool which can be used to find the cardinality -// any org or bucket. Run returns a *ReportTSISummary, which contains maps for finding -// the cardinality of a bucket or org based on its influxdb.ID -func (report *ReportCommand) Run(print bool) (*Summary, error) { - report.start = time.Now() - - sfile := seriesfile.NewSeriesFile(report.SeriesDirPath) - - if err := sfile.Open(context.Background()); err != nil { - return nil, err - } - defer sfile.Close() - report.sfile = sfile - - report.indexFile = NewIndex(sfile, NewConfig(), WithPath(report.DataPath)) - if err := report.indexFile.Open(context.Background()); err != nil { - return nil, err - } - defer report.indexFile.Close() - - summary, err := report.calculateOrgBucketCardinality() - if err != nil { - return nil, err - } - - if print { - report.printCardinalitySummary(summary) - } - - return summary, nil -} - -type cardinality struct { - name []byte - short []uint32 - set *tsdb.SeriesIDSet -} - -func (c *cardinality) add(x uint64) { - if c.set != nil { - c.set.AddNoLock(tsdb.NewSeriesID(x)) - return - } - - c.short = append(c.short, uint32(x)) // Series IDs never get beyond 2^32 - - // Cheaper to store in bitmap. - if len(c.short) > useBitmapN { - c.set = tsdb.NewSeriesIDSet() - for i := 0; i < len(c.short); i++ { - c.set.AddNoLock(tsdb.NewSeriesID(uint64(c.short[i]))) - } - c.short = nil - return - } -} - -func (c *cardinality) cardinality() int64 { - if c == nil || (c.short == nil && c.set == nil) { - return 0 - } - - if c.short != nil { - return int64(len(c.short)) - } - return int64(c.set.Cardinality()) -} - -func (report *ReportCommand) calculateCardinalities() error { - itr, err := report.indexFile.MeasurementIterator() - if err != nil { - return err - } else if itr == nil { - return nil - } - defer itr.Close() - - for { - name, err := itr.Next() - if err != nil { - return err - } else if name == nil { - return nil - } - - if err = report.calculateMeasurementCardinalities(name); err != nil { - return err - } - } -} - -func (report *ReportCommand) calculateMeasurementCardinalities(name []byte) error { - // decode org and bucket from measurement name - var a [16]byte - copy(a[:], name[:16]) - org, bucket := tsdb.DecodeName(a) - if report.OrgID != nil && *report.OrgID != org || - report.BucketID != nil && *report.BucketID != bucket { - return nil - } - - idx := report.indexFile - sitr, err := idx.MeasurementSeriesIDIterator(name) - if err != nil { - return err - } else if sitr == nil { - return nil - } - - defer sitr.Close() - - var bucketCard *cardinality - - // initialize map of bucket to measurements - if _, ok := report.byBucketMeasurement[bucket]; !ok { - report.byBucketMeasurement[bucket] = make(map[string]*cardinality) - } - - if _, ok := report.byOrgBucket[org]; !ok { - report.byOrgBucket[org] = make(map[influxdb.ID]*cardinality) - } - - // initialize total cardinality tracking struct for this bucket - if c, ok := report.byOrgBucket[org][bucket]; !ok { - bucketCard = &cardinality{name: []byte(bucket.String())} - report.byOrgBucket[org][bucket] = bucketCard - } else { - bucketCard = c - } - - for { - e, err := sitr.Next() - if err != nil { - return err - } else if e.SeriesID.ID == 0 { - break - } - - id := e.SeriesID.ID - if id > math.MaxUint32 { - return fmt.Errorf("series ID is too large: %d (max %d). Corrupted series file?", e.SeriesID, uint32(math.MaxUint32)) - } - - // add cardinality to bucket - bucketCard.add(id) - - // retrieve tags associated with series id so we can get - // associated measurement - _, tags := report.sfile.Series(e.SeriesID) - if len(tags) == 0 { - return fmt.Errorf("series ID has empty key: %d", e.SeriesID) - } - - // measurement name should be first tag - if !bytes.Equal(tags[0].Key, models.MeasurementTagKeyBytes) { - return fmt.Errorf("corrupted data: first tag should be measurement name, got: %v", string(tags[0].Value)) - } - mName := string(tags[0].Value) - - // update measurement-level cardinality if tracking by measurement - if report.ByMeasurement { - var mCard *cardinality - if cardForM, ok := report.byBucketMeasurement[bucket][mName]; !ok { - mCard = &cardinality{name: []byte(mName)} - report.byBucketMeasurement[bucket][mName] = mCard - } else { - mCard = cardForM - } - mCard.add(id) - } - } - - return nil -} - -func (report *ReportCommand) calculateOrgBucketCardinality() (*Summary, error) { - if err := report.calculateCardinalities(); err != nil { - return nil, err - } - - var totalCard int64 - // Generate a new summary - summary := newSummary() - for orgID, bucketMap := range report.byOrgBucket { - summary.BucketByOrgCardinality[orgID] = make(map[influxdb.ID]int64) - orgTotal := int64(0) - for bucketID, bucketCard := range bucketMap { - count := bucketCard.cardinality() - summary.BucketByOrgCardinality[orgID][bucketID] = count - summary.BucketMeasurementCardinality[bucketID] = make(map[string]int64) - orgTotal += count - totalCard += count - } - summary.OrgCardinality[orgID] = orgTotal - } - - summary.TotalCardinality = totalCard - - for bucketID, bucketMeasurement := range report.byBucketMeasurement { - for mName, mCard := range bucketMeasurement { - summary.BucketMeasurementCardinality[bucketID][mName] = mCard.cardinality() - } - } - - return summary, nil -} - -func (report *ReportCommand) printCardinalitySummary(summary *Summary) { - tw := tabwriter.NewWriter(report.Stdout, 4, 4, 1, '\t', 0) - fmt.Fprint(tw, "\n") - - fmt.Fprintf(tw, "Total: %d\n", summary.TotalCardinality) - // sort total org and bucket and limit to top n values - sortedOrgs := sortKeys(summary.OrgCardinality, report.TopN) - - for i, orgResult := range sortedOrgs { - orgID, _ := influxdb.IDFromString(orgResult.id) - sortedBuckets := sortKeys(summary.BucketByOrgCardinality[*orgID], report.TopN) - // if we specify a bucket, we do not print the org cardinality - fmt.Fprintln(tw, "===============") - if report.BucketID == nil { - fmt.Fprintf(tw, "Org %s total: %d\n", orgResult.id, orgResult.card) - } - - for _, bucketResult := range sortedBuckets { - fmt.Fprintf(tw, "\tBucket %s total: %d\n", bucketResult.id, bucketResult.card) - - if report.ByMeasurement { - bucketID, _ := influxdb.IDFromString(bucketResult.id) - sortedMeasurements := sortMeasurements(summary.BucketMeasurementCardinality[*bucketID], report.TopN) - - for _, measResult := range sortedMeasurements { - fmt.Fprintf(tw, "\t\t_m=%s\t%d\n", measResult.id, measResult.card) - } - } - } - if i == len(sortedOrgs)-1 { - fmt.Fprintln(tw, "===============") - } - } - fmt.Fprint(tw, "\n\n") - - elapsed := time.Since(report.start) - fmt.Fprintf(tw, "Finished in %v\n", elapsed) - - tw.Flush() -} - -// sortKeys is a quick helper to return the sorted set of a map's keys -// sortKeys will only return report.topN keys if the flag is set -type result struct { - id string - card int64 -} - -type resultList []result - -func (a resultList) Len() int { return len(a) } -func (a resultList) Less(i, j int) bool { return a[i].card < a[j].card } -func (a resultList) Swap(i, j int) { a[i], a[j] = a[j], a[i] } - -func sortKeys(vals map[influxdb.ID]int64, topN int) resultList { - sorted := make(resultList, 0) - for k, v := range vals { - sorted = append(sorted, result{k.String(), v}) - } - sort.Sort(sort.Reverse(sorted)) - - if topN == 0 { - return sorted - } - if topN > len(sorted) { - topN = len(sorted) - } - return sorted[:topN] -} - -func sortMeasurements(vals map[string]int64, topN int) resultList { - sorted := make(resultList, 0) - for k, v := range vals { - sorted = append(sorted, result{k, v}) - } - sort.Sort(sort.Reverse(sorted)) - - if topN == 0 { - return sorted - } - if topN > len(sorted) { - topN = len(sorted) - } - return sorted[:topN] -} diff --git a/tsdb/tsi1/sql_index_exporter.go b/tsdb/tsi1/sql_index_exporter.go deleted file mode 100644 index 66f8829ea8..0000000000 --- a/tsdb/tsi1/sql_index_exporter.go +++ /dev/null @@ -1,227 +0,0 @@ -package tsi1 - -import ( - "bytes" - "fmt" - "io" - "strings" - "unicode/utf8" - - "go.uber.org/zap" -) - -// SQLIndexExporter writes out all TSI data for an index to a SQL export. -type SQLIndexExporter struct { - w io.Writer - - initialized bool - - // Logs non-fatal warnings. - Logger *zap.Logger - - // Write schema, if true. - ShowSchema bool -} - -// NewSQLIndexExporter returns a new instance of SQLIndexExporter. -func NewSQLIndexExporter(w io.Writer) *SQLIndexExporter { - return &SQLIndexExporter{ - w: w, - - Logger: zap.NewNop(), - ShowSchema: true, - } -} - -// Close ends the export and writes final output. -func (e *SQLIndexExporter) Close() error { - return nil -} - -// ExportIndex writes all blocks of the TSM file. -func (e *SQLIndexExporter) ExportIndex(idx *Index) error { - if err := e.initialize(); err != nil { - return err - } - - fmt.Fprintln(e.w, `BEGIN TRANSACTION;`) - - // Iterate over each measurement across all partitions. - itr, err := idx.MeasurementIterator() - if err != nil { - return err - } else if itr == nil { - return nil - } - defer itr.Close() - - for { - name, err := itr.Next() - if err != nil { - return err - } else if name == nil { - break - } - - if err := e.exportMeasurement(idx, name); err != nil { - return err - } - } - - fmt.Fprintln(e.w, "COMMIT;") - return nil -} - -func (e *SQLIndexExporter) exportMeasurement(idx *Index, name []byte) error { - if err := e.exportMeasurementSeries(idx, name); err != nil { - return err - } - - itr, err := idx.TagKeyIterator(name) - if err != nil { - return err - } else if itr == nil { - return nil - } - defer itr.Close() - - for { - key, err := itr.Next() - if err != nil { - return err - } else if key == nil { - break - } - - if err := e.exportTagKey(idx, name, key); err != nil { - return err - } - } - return nil -} - -func (e *SQLIndexExporter) exportMeasurementSeries(idx *Index, name []byte) error { - itr, err := idx.MeasurementSeriesIDIterator(name) - if err != nil { - return err - } else if itr == nil { - return nil - } - defer itr.Close() - - for { - elem, err := itr.Next() - if err != nil { - return err - } else if elem.SeriesID.ID == 0 { - break - } - - if _, err := fmt.Fprintf(e.w, "INSERT INTO measurement_series (name, series_id) VALUES ('%x', %d);\n", name, elem.SeriesID.ID); err != nil { - return err - } - } - return nil -} - -func (e *SQLIndexExporter) exportTagKey(idx *Index, name, key []byte) error { - itr, err := idx.TagValueIterator(name, key) - if err != nil { - return err - } else if itr == nil { - return nil - } - defer itr.Close() - - for { - value, err := itr.Next() - if err != nil { - return err - } else if value == nil { - break - } - - if err := e.exportTagValue(idx, name, key, value); err != nil { - return err - } - } - return nil -} - -func (e *SQLIndexExporter) exportTagValue(idx *Index, name, key, value []byte) error { - itr, err := idx.TagValueSeriesIDIterator(name, key, value) - if err != nil { - return err - } else if itr == nil { - return nil - } - defer itr.Close() - - for { - elem, err := itr.Next() - if err != nil { - return err - } else if elem.SeriesID.ID == 0 { - break - } - - // Replace special case keys for measurement & field. - if bytes.Equal(key, []byte{0}) { - key = []byte("_measurement") - } else if bytes.Equal(key, []byte{0xff}) { - key = []byte("_field") - } - - if _, err := fmt.Fprintf(e.w, - "INSERT INTO tag_value_series (name, key, value, series_id) VALUES ('%x', %s, %s, %d);\n", - name, - quoteSQL(string(key)), - quoteSQL(string(value)), - elem.SeriesID.ID, - ); err != nil { - return err - } - } - return nil -} - -func (e *SQLIndexExporter) initialize() error { - if e.initialized { - return nil - } - e.initialized = true - - if !e.ShowSchema { - return nil - } - fmt.Fprintln(e.w, ` -CREATE TABLE IF NOT EXISTS measurement_series ( - name TEXT NOT NULL, - series_id INTEGER NOT NULL -); - -CREATE TABLE IF NOT EXISTS tag_value_series ( - name TEXT NOT NULL, - key TEXT NOT NULL, - value TEXT NOT NULL, - series_id INTEGER NOT NULL -); -`[1:]) - - return nil -} - -func quoteSQL(s string) string { - return `'` + sqlReplacer.Replace(toValidUTF8(s)) + `'` -} - -var sqlReplacer = strings.NewReplacer(`'`, `''`, "\x00", "") - -func toValidUTF8(s string) string { - return strings.Map(func(r rune) rune { - if r == utf8.RuneError { - return -1 - } - return r - }, s) -} diff --git a/tsdb/tsi1/sql_index_exporter_test.go b/tsdb/tsi1/sql_index_exporter_test.go deleted file mode 100644 index c7b99c775a..0000000000 --- a/tsdb/tsi1/sql_index_exporter_test.go +++ /dev/null @@ -1,52 +0,0 @@ -package tsi1_test - -import ( - "bytes" - "os" - "testing" - - "github.com/influxdata/influxdb/v2/logger" - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/tsi1" -) - -func TestSQLIndexExporter_ExportIndex(t *testing.T) { - idx := MustOpenIndex(1, tsi1.NewConfig()) - defer idx.Close() - - // Add series to index. - if err := idx.CreateSeriesSliceIfNotExists([]Series{ - {Name: tsdb.EncodeNameSlice(1, 2), Tags: models.NewTags(map[string]string{"region": "east", "status": "ok"})}, - {Name: tsdb.EncodeNameSlice(1, 2), Tags: models.NewTags(map[string]string{"region": "west"})}, - {Name: tsdb.EncodeNameSlice(3, 4), Tags: models.NewTags(map[string]string{"region": "east"})}, - }); err != nil { - t.Fatal(err) - } - - // Expected output. - want := ` -BEGIN TRANSACTION; -INSERT INTO measurement_series (name, series_id) VALUES ('00000000000000010000000000000002', 1); -INSERT INTO measurement_series (name, series_id) VALUES ('00000000000000010000000000000002', 5); -INSERT INTO tag_value_series (name, key, value, series_id) VALUES ('00000000000000010000000000000002', 'region', 'east', 1); -INSERT INTO tag_value_series (name, key, value, series_id) VALUES ('00000000000000010000000000000002', 'region', 'west', 5); -INSERT INTO tag_value_series (name, key, value, series_id) VALUES ('00000000000000010000000000000002', 'status', 'ok', 1); -INSERT INTO measurement_series (name, series_id) VALUES ('00000000000000030000000000000004', 2); -INSERT INTO tag_value_series (name, key, value, series_id) VALUES ('00000000000000030000000000000004', 'region', 'east', 2); -COMMIT; -`[1:] - - // Export file to SQL. - var buf bytes.Buffer - e := tsi1.NewSQLIndexExporter(&buf) - e.ShowSchema = false - e.Logger = logger.New(os.Stderr) - if err := e.ExportIndex(idx.Index); err != nil { - t.Fatal(err) - } else if err := e.Close(); err != nil { - t.Fatal(err) - } else if got := buf.String(); got != want { - t.Fatalf("unexpected output:\ngot=%s\n--\nwant=%s", got, want) - } -} diff --git a/tsdb/tsi1/stats.go b/tsdb/tsi1/stats.go deleted file mode 100644 index c8499eec91..0000000000 --- a/tsdb/tsi1/stats.go +++ /dev/null @@ -1,233 +0,0 @@ -package tsi1 - -import ( - "bytes" - "encoding/binary" - "fmt" - "hash/crc32" - "io" - "sort" - - "github.com/influxdata/influxdb/v2/pkg/binaryutil" -) - -const ( - // MeasurementCardinalityStatsMagicNumber is written as the first 4 bytes - // of a data file to identify the file as a tsi1 cardinality file. - MeasurementCardinalityStatsMagicNumber string = "TSIS" - - // MeasurementCardinalityVersion indicates the version of the TSIC file format. - MeasurementCardinalityStatsVersion byte = 1 -) - -// MeasurementCardinalityStats represents a set of measurement sizes. -type MeasurementCardinalityStats map[string]int - -// NewMeasurementCardinality returns a new instance of MeasurementCardinality. -func NewMeasurementCardinalityStats() MeasurementCardinalityStats { - return make(MeasurementCardinalityStats) -} - -// MeasurementNames returns a list of sorted measurement names. -func (s MeasurementCardinalityStats) MeasurementNames() []string { - a := make([]string, 0, len(s)) - for name := range s { - a = append(a, name) - } - sort.Strings(a) - return a -} - -// Inc increments a measurement count by 1. -func (s MeasurementCardinalityStats) Inc(name []byte) { - s[string(name)]++ -} - -// Dec decrements a measurement count by 1. Deleted if zero. -func (s MeasurementCardinalityStats) Dec(name []byte) { - v := s[string(name)] - if v <= 1 { - delete(s, string(name)) - } else { - s[string(name)] = v - 1 - } -} - -// Add adds the values of all measurements in other to s. -func (s MeasurementCardinalityStats) Add(other MeasurementCardinalityStats) { - for name, v := range other { - s[name] += v - } -} - -// Sub subtracts the values of all measurements in other from s. -func (s MeasurementCardinalityStats) Sub(other MeasurementCardinalityStats) { - for name, v := range other { - s[name] -= v - } -} - -// Clone returns a copy of s. -func (s MeasurementCardinalityStats) Clone() MeasurementCardinalityStats { - other := make(MeasurementCardinalityStats, len(s)) - for k, v := range s { - other[k] = v - } - return other -} - -// ReadFrom reads stats from r in a binary format. Reader must also be an io.ByteReader. -func (s MeasurementCardinalityStats) ReadFrom(r io.Reader) (n int64, err error) { - br, ok := r.(io.ByteReader) - if !ok { - return 0, fmt.Errorf("tsm1.MeasurementCardinalityStats.ReadFrom: ByteReader required") - } - - // Read & verify magic. - magic := make([]byte, 4) - nn, err := io.ReadFull(r, magic) - if n += int64(nn); err != nil { - return n, fmt.Errorf("tsm1.MeasurementCardinalityStats.ReadFrom: cannot read stats magic: %s", err) - } else if string(magic) != MeasurementCardinalityStatsMagicNumber { - return n, fmt.Errorf("tsm1.MeasurementCardinalityStats.ReadFrom: invalid tsm1 stats file") - } - - // Read & verify version. - version := make([]byte, 1) - nn, err = io.ReadFull(r, version) - if n += int64(nn); err != nil { - return n, fmt.Errorf("tsm1.MeasurementCardinalityStats.ReadFrom: cannot read stats version: %s", err) - } else if version[0] != MeasurementCardinalityStatsVersion { - return n, fmt.Errorf("tsm1.MeasurementCardinalityStats.ReadFrom: incompatible tsm1 stats version: %d", version[0]) - } - - // Read checksum. - checksum := make([]byte, 4) - nn, err = io.ReadFull(r, checksum) - if n += int64(nn); err != nil { - return n, fmt.Errorf("tsm1.MeasurementCardinalityStats.ReadFrom: cannot read checksum: %s", err) - } - - // Read measurement count. - measurementN, err := binary.ReadVarint(br) - if err != nil { - return n, fmt.Errorf("tsm1.MeasurementCardinalityStats.ReadFrom: cannot read stats measurement count: %s", err) - } - n += int64(binaryutil.VarintSize(measurementN)) - - // Read measurements. - for i := int64(0); i < measurementN; i++ { - nn64, err := s.readMeasurementFrom(r) - if n += nn64; err != nil { - return n, err - } - } - - // Expect end-of-file. - buf := make([]byte, 1) - if _, err := r.Read(buf); err != io.EOF { - return n, fmt.Errorf("tsm1.MeasurementCardinalityStats.ReadFrom: file too large, expected EOF") - } - - return n, nil -} - -// readMeasurementFrom reads a measurement stat from r in a binary format. -func (s MeasurementCardinalityStats) readMeasurementFrom(r io.Reader) (n int64, err error) { - br, ok := r.(io.ByteReader) - if !ok { - return 0, fmt.Errorf("tsm1.MeasurementCardinalityStats.readMeasurementFrom: ByteReader required") - } - - // Read measurement name length. - nameLen, err := binary.ReadVarint(br) - if err != nil { - return n, fmt.Errorf("tsm1.MeasurementCardinalityStats.readMeasurementFrom: cannot read stats measurement name length: %s", err) - } - n += int64(binaryutil.VarintSize(nameLen)) - - // Read measurement name. Use large capacity so it can usually be stack allocated. - // Go allocates unescaped variables smaller than 64KB on the stack. - name := make([]byte, nameLen) - nn, err := io.ReadFull(r, name) - if n += int64(nn); err != nil { - return n, fmt.Errorf("tsm1.MeasurementCardinalityStats.readMeasurementFrom: cannot read stats measurement name: %s", err) - } - - // Read size. - sz, err := binary.ReadVarint(br) - if err != nil { - return n, fmt.Errorf("tsm1.MeasurementCardinalityStats.readMeasurementFrom: cannot read stats measurement size: %s", err) - } - n += int64(binaryutil.VarintSize(sz)) - - // Insert into map. - s[string(name)] = int(sz) - - return n, nil -} - -// WriteTo writes stats to w in a binary format. -func (s MeasurementCardinalityStats) WriteTo(w io.Writer) (n int64, err error) { - // Write magic & version. - nn, err := io.WriteString(w, MeasurementCardinalityStatsMagicNumber) - if n += int64(nn); err != nil { - return n, err - } - nn, err = w.Write([]byte{MeasurementCardinalityStatsVersion}) - if n += int64(nn); err != nil { - return n, err - } - - // Write measurement count. - var buf bytes.Buffer - b := make([]byte, binary.MaxVarintLen64) - if _, err = buf.Write(b[:binary.PutVarint(b, int64(len(s)))]); err != nil { - return n, err - } - - // Write all measurements in sorted order. - for _, name := range s.MeasurementNames() { - if _, err := s.writeMeasurementTo(&buf, name, s[name]); err != nil { - return n, err - } - } - data := buf.Bytes() - - // Compute & write checksum. - if err := binary.Write(w, binary.BigEndian, crc32.ChecksumIEEE(data)); err != nil { - return n, err - } - n += 4 - - // Write buffer. - nn, err = w.Write(data) - if n += int64(nn); err != nil { - return n, err - } - - return n, err -} - -func (s MeasurementCardinalityStats) writeMeasurementTo(w io.Writer, name string, sz int) (n int64, err error) { - // Write measurement name length. - buf := make([]byte, binary.MaxVarintLen64) - nn, err := w.Write(buf[:binary.PutVarint(buf, int64(len(name)))]) - if n += int64(nn); err != nil { - return n, err - } - - // Write measurement name. - nn, err = io.WriteString(w, name) - if n += int64(nn); err != nil { - return n, err - } - - // Write size. - nn, err = w.Write(buf[:binary.PutVarint(buf, int64(sz))]) - if n += int64(nn); err != nil { - return n, err - } - - return n, err -} diff --git a/tsdb/tsi1/stats_test.go b/tsdb/tsi1/stats_test.go deleted file mode 100644 index bd632f3d92..0000000000 --- a/tsdb/tsi1/stats_test.go +++ /dev/null @@ -1,42 +0,0 @@ -package tsi1_test - -import ( - "bytes" - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/influxdata/influxdb/v2/tsdb/tsi1" -) - -func TestMeasurementCardinalityStats_WriteTo(t *testing.T) { - t.Run("Empty", func(t *testing.T) { - stats, other := tsi1.NewMeasurementCardinalityStats(), tsi1.NewMeasurementCardinalityStats() - var buf bytes.Buffer - if wn, err := stats.WriteTo(&buf); err != nil { - t.Fatal(err) - } else if rn, err := other.ReadFrom(&buf); err != nil { - t.Fatal(err) - } else if wn != rn { - t.Fatalf("byte count mismatch: w=%d r=%d", wn, rn) - } else if diff := cmp.Diff(stats, other); diff != "" { - t.Fatal(diff) - } - }) - - t.Run("WithData", func(t *testing.T) { - stats, other := tsi1.NewMeasurementCardinalityStats(), tsi1.NewMeasurementCardinalityStats() - stats["cpu"] = 100 - stats["mem"] = 2000 - - var buf bytes.Buffer - if wn, err := stats.WriteTo(&buf); err != nil { - t.Fatal(err) - } else if rn, err := other.ReadFrom(&buf); err != nil { - t.Fatal(err) - } else if wn != rn { - t.Fatalf("byte count mismatch: w=%d r=%d", wn, rn) - } else if diff := cmp.Diff(stats, other); diff != "" { - t.Fatal(diff) - } - }) -} diff --git a/tsdb/tsi1/tag_block.go b/tsdb/tsi1/tag_block.go deleted file mode 100644 index 7135a89d58..0000000000 --- a/tsdb/tsi1/tag_block.go +++ /dev/null @@ -1,836 +0,0 @@ -package tsi1 - -import ( - "bytes" - "encoding/binary" - "errors" - "fmt" - "io" - - "github.com/influxdata/influxdb/v2/pkg/mincore" - "github.com/influxdata/influxdb/v2/pkg/rhh" - "github.com/influxdata/influxdb/v2/tsdb" -) - -// TagBlockVersion is the version of the tag block. -const TagBlockVersion = 1 - -// Tag key flag constants. -const ( - TagKeyTombstoneFlag = 0x01 -) - -// Tag value flag constants. -const ( - TagValueTombstoneFlag = 0x01 - TagValueSeriesIDSetFlag = 0x02 -) - -// TagBlock variable size constants. -const ( - // TagBlock key block fields. - TagKeyNSize = 8 - TagKeyOffsetSize = 8 - - // TagBlock value block fields. - TagValueNSize = 8 - TagValueOffsetSize = 8 -) - -// TagBlock errors. -var ( - ErrUnsupportedTagBlockVersion = errors.New("unsupported tag block version") - ErrTagBlockSizeMismatch = errors.New("tag block size mismatch") -) - -// TagBlock represents tag key/value block for a single measurement. -type TagBlock struct { - data []byte - - valueData []byte - keyData []byte - hashData []byte - - version int // tag block version -} - -// Version returns the encoding version parsed from the data. -// Only valid after UnmarshalBinary() has been successfully invoked. -func (blk *TagBlock) Version() int { return blk.version } - -// UnmarshalBinary unpacks data into the tag block. Tag block is not copied so data -// should be retained and unchanged after being passed into this function. -func (blk *TagBlock) UnmarshalBinary(data []byte) error { - // Read trailer. - t, err := ReadTagBlockTrailer(data) - if err != nil { - return err - } - - // Verify data size is correct. - if int64(len(data)) != t.Size { - return ErrTagBlockSizeMismatch - } - - // Save data section. - blk.valueData = data[t.ValueData.Offset:] - blk.valueData = blk.valueData[:t.ValueData.Size] - - // Save key data section. - blk.keyData = data[t.KeyData.Offset:] - blk.keyData = blk.keyData[:t.KeyData.Size] - - // Save hash index block. - blk.hashData = data[t.HashIndex.Offset:] - blk.hashData = blk.hashData[:t.HashIndex.Size] - - // Save entire block. - blk.data = data - - return nil -} - -// TagKeyElem returns an element for a tag key. -// Returns an element with a nil key if not found. -func (blk *TagBlock) TagKeyElem(key []byte, limiter *mincore.Limiter) TagKeyElem { - var elem TagBlockKeyElem - if !blk.DecodeTagKeyElem(key, &elem, limiter) { - return nil - } - return &elem -} - -func (blk *TagBlock) DecodeTagKeyElem(key []byte, elem *TagBlockKeyElem, limiter *mincore.Limiter) bool { - keyN := int64(binary.BigEndian.Uint64(blk.hashData[:TagKeyNSize])) - hash := rhh.HashKey(key) - pos := hash % keyN - - // Track current distance - var d int64 - for { - // Find offset of tag key. - _ = wait(limiter, blk.hashData[TagKeyNSize+(pos*TagKeyOffsetSize):TagKeyNSize+(pos*TagKeyOffsetSize)+8]) - offset := binary.BigEndian.Uint64(blk.hashData[TagKeyNSize+(pos*TagKeyOffsetSize):]) - if offset == 0 { - return false - } - - // Parse into element. - elem.unmarshal(blk.data[offset:], blk.data) - _ = wait(limiter, blk.data[offset:offset+uint64(elem.size)]) - - // Return if keys match. - if bytes.Equal(elem.key, key) { - return true - } - - // Check if we've exceeded the probe distance. - if d > rhh.Dist(rhh.HashKey(elem.key), pos, keyN) { - return false - } - - // Move position forward. - pos = (pos + 1) % keyN - d++ - - if d > keyN { - return false - } - } -} - -// TagValueElem returns an element for a tag value. -func (blk *TagBlock) TagValueElem(key, value []byte, limiter *mincore.Limiter) TagValueElem { - var valueElem TagBlockValueElem - if !blk.DecodeTagValueElem(key, value, &valueElem, limiter) { - return nil - } - return &valueElem -} - -// DecodeTagValueElem returns an element for a tag value. -func (blk *TagBlock) DecodeTagValueElem(key, value []byte, valueElem *TagBlockValueElem, limiter *mincore.Limiter) bool { - // Find key element, exit if not found. - var keyElem TagBlockKeyElem - if !blk.DecodeTagKeyElem(key, &keyElem, limiter) { - return false - } - - // Slice hash index data. - hashData := keyElem.hashIndex.buf - - _ = wait(limiter, hashData[:TagValueNSize]) - valueN := int64(binary.BigEndian.Uint64(hashData[:TagValueNSize])) - hash := rhh.HashKey(value) - pos := hash % valueN - - // Track current distance - var d int64 - for { - // Find offset of tag value. - _ = wait(limiter, hashData[TagValueNSize+(pos*TagValueOffsetSize):TagValueNSize+(pos*TagValueOffsetSize)+8]) - offset := binary.BigEndian.Uint64(hashData[TagValueNSize+(pos*TagValueOffsetSize):]) - if offset == 0 { - return false - } - - // Parse into element. - valueElem.unmarshal(blk.data[offset:]) - _ = wait(limiter, blk.data[offset:offset+uint64(valueElem.size)]) - - // Return if values match. - if bytes.Equal(valueElem.value, value) { - return true - } - - // Check if we've exceeded the probe distance. - max := rhh.Dist(rhh.HashKey(valueElem.value), pos, valueN) - if d > max { - return false - } - - // Move position forward. - pos = (pos + 1) % valueN - d++ - - if d > valueN { - return false - } - } -} - -// TagKeyIterator returns an iterator over all the keys in the block. -func (blk *TagBlock) TagKeyIterator(limiter *mincore.Limiter) TagKeyIterator { - return &tagBlockKeyIterator{ - blk: blk, - keyData: blk.keyData, - limiter: limiter, - } -} - -// tagBlockKeyIterator represents an iterator over all keys in a TagBlock. -type tagBlockKeyIterator struct { - blk *TagBlock - keyData []byte - e TagBlockKeyElem - limiter *mincore.Limiter -} - -// Next returns the next element in the iterator. -func (itr *tagBlockKeyIterator) Next() TagKeyElem { - // Exit when there is no data left. - if len(itr.keyData) == 0 { - return nil - } - - // Unmarshal next element & move data forward. - itr.e.unmarshal(itr.keyData, itr.blk.data) - _ = wait(itr.limiter, itr.keyData[:itr.e.size]) - itr.keyData = itr.keyData[itr.e.size:] - - assert(len(itr.e.Key()) > 0, "invalid zero-length tag key") - return &itr.e -} - -// tagBlockValueIterator represents an iterator over all values for a tag key. -type tagBlockValueIterator struct { - data []byte - e TagBlockValueElem - limiter *mincore.Limiter -} - -// Next returns the next element in the iterator. -func (itr *tagBlockValueIterator) Next() TagValueElem { - // Exit when there is no data left. - if len(itr.data) == 0 { - return nil - } - - // Unmarshal next element & move data forward. - itr.e.unmarshal(itr.data) - _ = wait(itr.limiter, itr.data[:itr.e.size]) - itr.data = itr.data[itr.e.size:] - - assert(len(itr.e.Value()) > 0, "invalid zero-length tag value") - return &itr.e -} - -// TagBlockKeyElem represents a tag key element in a TagBlock. -type TagBlockKeyElem struct { - flag byte - key []byte - - // Value data - data struct { - offset uint64 - size uint64 - buf []byte - } - - // Value hash index data - hashIndex struct { - offset uint64 - size uint64 - buf []byte - } - - size int -} - -// Deleted returns true if the key has been tombstoned. -func (e *TagBlockKeyElem) Deleted() bool { return (e.flag & TagKeyTombstoneFlag) != 0 } - -// Key returns the key name of the element. -func (e *TagBlockKeyElem) Key() []byte { return e.key } - -// TagValueIterator returns an iterator over the key's values. -func (e *TagBlockKeyElem) TagValueIterator(limiter *mincore.Limiter) TagValueIterator { - return &tagBlockValueIterator{data: e.data.buf, limiter: limiter} -} - -// unmarshal unmarshals buf into e. -// The data argument represents the entire block data. -func (e *TagBlockKeyElem) unmarshal(buf, data []byte) { - start := len(buf) - - // Parse flag data. - e.flag, buf = buf[0], buf[1:] - - // Parse data offset/size. - e.data.offset, buf = binary.BigEndian.Uint64(buf), buf[8:] - e.data.size, buf = binary.BigEndian.Uint64(buf), buf[8:] - - // Slice data. - e.data.buf = data[e.data.offset:] - e.data.buf = e.data.buf[:e.data.size] - - // Parse hash index offset/size. - e.hashIndex.offset, buf = binary.BigEndian.Uint64(buf), buf[8:] - e.hashIndex.size, buf = binary.BigEndian.Uint64(buf), buf[8:] - - // Slice hash index data. - e.hashIndex.buf = data[e.hashIndex.offset:] - e.hashIndex.buf = e.hashIndex.buf[:e.hashIndex.size] - - // Parse key. - n, sz := binary.Uvarint(buf) - e.key, buf = buf[sz:sz+int(n)], buf[int(n)+sz:] - - // Save length of elem. - e.size = start - len(buf) -} - -// TagBlockValueElem represents a tag value element. -type TagBlockValueElem struct { - flag byte - value []byte - - // Legacy uvarint-encoded series data. - // Mutually exclusive with seriesIDSetData field. - series struct { - n uint64 // Series count - data []byte // Raw series data - } - - // Roaring bitmap encoded series data. - // Mutually exclusive with series.data field. - seriesIDSetData []byte - - size int -} - -// Deleted returns true if the element has been tombstoned. -func (e *TagBlockValueElem) Deleted() bool { return (e.flag & TagValueTombstoneFlag) != 0 } - -// Value returns the value for the element. -func (e *TagBlockValueElem) Value() []byte { return e.value } - -// SeriesN returns the series count. -func (e *TagBlockValueElem) SeriesN() uint64 { return e.series.n } - -// SeriesData returns the raw series data. -func (e *TagBlockValueElem) SeriesData() []byte { return e.series.data } - -// SeriesID returns series ID at an index. -func (e *TagBlockValueElem) SeriesID(i int) uint64 { - return binary.BigEndian.Uint64(e.series.data[i*SeriesIDSize:]) -} - -// SeriesIDs returns a list decoded series ids. -func (e *TagBlockValueElem) SeriesIDs() ([]uint64, error) { - if e.seriesIDSetData != nil { - ss, err := e.SeriesIDSet() - if err != nil { - return nil, err - } - return ss.Slice(), nil - } - - a := make([]uint64, 0, e.series.n) - var prev uint64 - for data := e.series.data; len(data) > 0; { - delta, n, err := uvarint(data) - if err != nil { - return nil, err - } - data = data[n:] - - seriesID := prev + uint64(delta) - a = append(a, seriesID) - prev = seriesID - } - return a, nil -} - -// SeriesIDSet returns a set of series ids. -func (e *TagBlockValueElem) SeriesIDSet() (*tsdb.SeriesIDSet, error) { - ss := tsdb.NewSeriesIDSet() - - // Read bitmap data directly from mmap, if available. - if e.seriesIDSetData != nil { - if err := ss.UnmarshalBinaryUnsafe(e.seriesIDSetData); err != nil { - return nil, err - } - return ss, nil - } - - // Otherwise decode series ids from uvarint encoding. - var prev uint64 - for data := e.series.data; len(data) > 0; { - delta, n, err := uvarint(data) - if err != nil { - return nil, err - } - data = data[n:] - - seriesID := prev + uint64(delta) - ss.AddNoLock(tsdb.NewSeriesID(seriesID)) - prev = seriesID - } - return ss, nil -} - -// Size returns the size of the element. -func (e *TagBlockValueElem) Size() int { return e.size } - -// unmarshal unmarshals buf into e. -func (e *TagBlockValueElem) unmarshal(buf []byte) { - start := len(buf) - - // Parse flag data. - e.flag, buf = buf[0], buf[1:] - - // Parse value. - sz, n := binary.Uvarint(buf) - e.value, buf = buf[n:n+int(sz)], buf[n+int(sz):] - - // Parse series count. - v, n := binary.Uvarint(buf) - e.series.n = uint64(v) - buf = buf[n:] - - // Parse data block size. - sz, n = binary.Uvarint(buf) - buf = buf[n:] - - // Parse series data (original uvarint encoded or roaring bitmap). - if e.flag&TagValueSeriesIDSetFlag == 0 { - e.series.data, buf = buf[:sz], buf[sz:] - } else { - // buf = memalign(buf) - e.seriesIDSetData, buf = buf, buf[sz:] - } - - // Save length of elem. - e.size = start - len(buf) -} - -// TagBlockTrailerSize is the total size of the on-disk trailer. -const TagBlockTrailerSize = 0 + - 8 + 8 + // value data offset/size - 8 + 8 + // key data offset/size - 8 + 8 + // hash index offset/size - 8 + // size - 2 // version - -// TagBlockTrailer represents meta data at the end of a TagBlock. -type TagBlockTrailer struct { - Version int // Encoding version - Size int64 // Total size w/ trailer - - // Offset & size of value data section. - ValueData struct { - Offset int64 - Size int64 - } - - // Offset & size of key data section. - KeyData struct { - Offset int64 - Size int64 - } - - // Offset & size of hash map section. - HashIndex struct { - Offset int64 - Size int64 - } -} - -// WriteTo writes the trailer to w. -func (t *TagBlockTrailer) WriteTo(w io.Writer) (n int64, err error) { - // Write data info. - if err := writeUint64To(w, uint64(t.ValueData.Offset), &n); err != nil { - return n, err - } else if err := writeUint64To(w, uint64(t.ValueData.Size), &n); err != nil { - return n, err - } - - // Write key data info. - if err := writeUint64To(w, uint64(t.KeyData.Offset), &n); err != nil { - return n, err - } else if err := writeUint64To(w, uint64(t.KeyData.Size), &n); err != nil { - return n, err - } - - // Write hash index info. - if err := writeUint64To(w, uint64(t.HashIndex.Offset), &n); err != nil { - return n, err - } else if err := writeUint64To(w, uint64(t.HashIndex.Size), &n); err != nil { - return n, err - } - - // Write total size & encoding version. - if err := writeUint64To(w, uint64(t.Size), &n); err != nil { - return n, err - } else if err := writeUint16To(w, IndexFileVersion, &n); err != nil { - return n, err - } - - return n, nil -} - -// ReadTagBlockTrailer returns the tag block trailer from data. -func ReadTagBlockTrailer(data []byte) (TagBlockTrailer, error) { - var t TagBlockTrailer - - // Read version. - t.Version = int(binary.BigEndian.Uint16(data[len(data)-2:])) - if t.Version != TagBlockVersion { - return t, ErrUnsupportedTagBlockVersion - } - - // Slice trailer data. - buf := data[len(data)-TagBlockTrailerSize:] - - // Read data section info. - t.ValueData.Offset, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:] - t.ValueData.Size, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:] - - // Read key section info. - t.KeyData.Offset, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:] - t.KeyData.Size, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:] - - // Read hash section info. - t.HashIndex.Offset, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:] - t.HashIndex.Size, buf = int64(binary.BigEndian.Uint64(buf[0:8])), buf[8:] - - // Read total size. - t.Size = int64(binary.BigEndian.Uint64(buf[0:8])) - - return t, nil -} - -// TagBlockEncoder encodes a tags to a TagBlock section. -type TagBlockEncoder struct { - w io.Writer - buf bytes.Buffer - - // Track value offsets. - offsets *rhh.HashMap - - // Track bytes written, sections. - n int64 - trailer TagBlockTrailer - - // Track tag keys. - keys []tagKeyEncodeEntry - prevValue []byte -} - -// NewTagBlockEncoder returns a new TagBlockEncoder. -func NewTagBlockEncoder(w io.Writer) *TagBlockEncoder { - return &TagBlockEncoder{ - w: w, - offsets: rhh.NewHashMap(rhh.Options{LoadFactor: LoadFactor}), - trailer: TagBlockTrailer{ - Version: TagBlockVersion, - }, - } -} - -// N returns the number of bytes written. -func (enc *TagBlockEncoder) N() int64 { return enc.n } - -// EncodeKey writes a tag key to the underlying writer. -func (enc *TagBlockEncoder) EncodeKey(key []byte, deleted bool) error { - // An initial empty byte must be written. - if err := enc.ensureHeaderWritten(); err != nil { - return err - } - - // Verify key is lexicographically after previous key. - if len(enc.keys) > 0 { - prev := enc.keys[len(enc.keys)-1].key - if cmp := bytes.Compare(prev, key); cmp == 1 { - return fmt.Errorf("tag key out of order: prev=%s, new=%s", prev, key) - } else if cmp == 0 { - return fmt.Errorf("tag key already encoded: %s", key) - } - } - - // Flush values section for key. - if err := enc.flushValueHashIndex(); err != nil { - return err - } - - // Append key on to the end of the key list. - entry := tagKeyEncodeEntry{ - key: key, - deleted: deleted, - } - entry.data.offset = enc.n - - enc.keys = append(enc.keys, entry) - - // Clear previous value. - enc.prevValue = nil - - return nil -} - -// EncodeValue writes a tag value to the underlying writer. -// The tag key must be lexicographical sorted after the previous encoded tag key. -func (enc *TagBlockEncoder) EncodeValue(value []byte, deleted bool, ss *tsdb.SeriesIDSet) error { - if len(enc.keys) == 0 { - return fmt.Errorf("tag key must be encoded before encoding values") - } else if len(value) == 0 { - return fmt.Errorf("zero length tag value not allowed") - } - - // Validate that keys are in-order. - if cmp := bytes.Compare(enc.prevValue, value); cmp == 1 { - return fmt.Errorf("tag value out of order: prev=%s, new=%s", enc.prevValue, value) - } else if cmp == 0 { - return fmt.Errorf("tag value already encoded: %s", value) - } - - // Save offset to hash map. - enc.offsets.Put(value, enc.n) - - // Write flag. - if err := writeUint8To(enc.w, encodeTagValueFlag(deleted), &enc.n); err != nil { - return err - } - - // Write value. - if err := writeUvarintTo(enc.w, uint64(len(value)), &enc.n); err != nil { - return err - } else if err := writeTo(enc.w, value, &enc.n); err != nil { - return err - } - - // Build series data in buffer. - enc.buf.Reset() - if _, err := ss.WriteTo(&enc.buf); err != nil { - return err - } - - // Write series count. - if err := writeUvarintTo(enc.w, uint64(ss.Cardinality()), &enc.n); err != nil { - return err - } - - // Write data size & buffer. - if err := writeUvarintTo(enc.w, uint64(enc.buf.Len()), &enc.n); err != nil { - return err - } - - // Word align bitmap data. - // if offset := (enc.n) % 8; offset != 0 { - // if err := writeTo(enc.w, make([]byte, 8-offset), &enc.n); err != nil { - // return err - // } - // } - - nn, err := enc.buf.WriteTo(enc.w) - if enc.n += nn; err != nil { - return err - } - - // Save previous value. - enc.prevValue = value - - return nil -} - -// Close flushes the trailer of the encoder to the writer. -func (enc *TagBlockEncoder) Close() error { - // Flush last value set. - if err := enc.ensureHeaderWritten(); err != nil { - return err - } else if err := enc.flushValueHashIndex(); err != nil { - return err - } - - // Save ending position of entire data block. - enc.trailer.ValueData.Size = enc.n - enc.trailer.ValueData.Offset - - // Write key block to point to value blocks. - if err := enc.encodeTagKeyBlock(); err != nil { - return err - } - - // Compute total size w/ trailer. - enc.trailer.Size = enc.n + TagBlockTrailerSize - - // Write trailer. - nn, err := enc.trailer.WriteTo(enc.w) - enc.n += nn - return err -} - -// ensureHeaderWritten writes a single byte to offset the rest of the block. -func (enc *TagBlockEncoder) ensureHeaderWritten() error { - if enc.n > 0 { - return nil - } else if _, err := enc.w.Write([]byte{0}); err != nil { - return err - } - - enc.n++ - enc.trailer.ValueData.Offset = enc.n - - return nil -} - -// flushValueHashIndex builds writes the hash map at the end of a value set. -func (enc *TagBlockEncoder) flushValueHashIndex() error { - // Ignore if no keys have been written. - if len(enc.keys) == 0 { - return nil - } - key := &enc.keys[len(enc.keys)-1] - - // Save size of data section. - key.data.size = enc.n - key.data.offset - - // Encode hash map length. - key.hashIndex.offset = enc.n - if err := writeUint64To(enc.w, uint64(enc.offsets.Cap()), &enc.n); err != nil { - return err - } - - // Encode hash map offset entries. - for i := int64(0); i < enc.offsets.Cap(); i++ { - _, v := enc.offsets.Elem(i) - offset, _ := v.(int64) - if err := writeUint64To(enc.w, uint64(offset), &enc.n); err != nil { - return err - } - } - key.hashIndex.size = enc.n - key.hashIndex.offset - - // Clear offsets. - enc.offsets = rhh.NewHashMap(rhh.Options{LoadFactor: LoadFactor}) - - return nil -} - -// encodeTagKeyBlock encodes the keys section to the writer. -func (enc *TagBlockEncoder) encodeTagKeyBlock() error { - offsets := rhh.NewHashMap(rhh.Options{Capacity: int64(len(enc.keys)), LoadFactor: LoadFactor}) - - // Encode key list in sorted order. - enc.trailer.KeyData.Offset = enc.n - for i := range enc.keys { - entry := &enc.keys[i] - - // Save current offset so we can use it in the hash index. - offsets.Put(entry.key, enc.n) - - if err := writeUint8To(enc.w, encodeTagKeyFlag(entry.deleted), &enc.n); err != nil { - return err - } - - // Write value data offset & size. - if err := writeUint64To(enc.w, uint64(entry.data.offset), &enc.n); err != nil { - return err - } else if err := writeUint64To(enc.w, uint64(entry.data.size), &enc.n); err != nil { - return err - } - - // Write value hash index offset & size. - if err := writeUint64To(enc.w, uint64(entry.hashIndex.offset), &enc.n); err != nil { - return err - } else if err := writeUint64To(enc.w, uint64(entry.hashIndex.size), &enc.n); err != nil { - return err - } - - // Write key length and data. - if err := writeUvarintTo(enc.w, uint64(len(entry.key)), &enc.n); err != nil { - return err - } else if err := writeTo(enc.w, entry.key, &enc.n); err != nil { - return err - } - } - enc.trailer.KeyData.Size = enc.n - enc.trailer.KeyData.Offset - - // Encode hash map length. - enc.trailer.HashIndex.Offset = enc.n - if err := writeUint64To(enc.w, uint64(offsets.Cap()), &enc.n); err != nil { - return err - } - - // Encode hash map offset entries. - for i := int64(0); i < offsets.Cap(); i++ { - _, v := offsets.Elem(i) - offset, _ := v.(int64) - if err := writeUint64To(enc.w, uint64(offset), &enc.n); err != nil { - return err - } - } - enc.trailer.HashIndex.Size = enc.n - enc.trailer.HashIndex.Offset - - return nil -} - -type tagKeyEncodeEntry struct { - key []byte - deleted bool - - data struct { - offset int64 - size int64 - } - hashIndex struct { - offset int64 - size int64 - } -} - -func encodeTagKeyFlag(deleted bool) byte { - var flag byte - if deleted { - flag |= TagKeyTombstoneFlag - } - return flag -} - -func encodeTagValueFlag(deleted bool) byte { - flag := byte(TagValueSeriesIDSetFlag) - if deleted { - flag |= TagValueTombstoneFlag - } - return flag -} diff --git a/tsdb/tsi1/tag_block_test.go b/tsdb/tsi1/tag_block_test.go deleted file mode 100644 index bf5aa0b1db..0000000000 --- a/tsdb/tsi1/tag_block_test.go +++ /dev/null @@ -1,158 +0,0 @@ -package tsi1_test - -import ( - "bytes" - "fmt" - "reflect" - "testing" - - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/tsi1" -) - -func newSeriesIDSet(ids ...int) *tsdb.SeriesIDSet { - out := make([]tsdb.SeriesID, 0, len(ids)) - for _, v := range ids { - out = append(out, tsdb.NewSeriesID(uint64(v))) - } - return tsdb.NewSeriesIDSet(out...) -} - -// Ensure tag blocks can be written and opened. -func TestTagBlockWriter(t *testing.T) { - // Write 3 series to writer. - var buf bytes.Buffer - enc := tsi1.NewTagBlockEncoder(&buf) - - if err := enc.EncodeKey([]byte("host"), false); err != nil { - t.Fatal(err) - } else if err := enc.EncodeValue([]byte("server0"), false, newSeriesIDSet(1)); err != nil { - t.Fatal(err) - } else if err := enc.EncodeValue([]byte("server1"), false, newSeriesIDSet(2)); err != nil { - t.Fatal(err) - } else if err := enc.EncodeValue([]byte("server2"), false, newSeriesIDSet(3)); err != nil { - t.Fatal(err) - } - - if err := enc.EncodeKey([]byte("region"), false); err != nil { - t.Fatal(err) - } else if err := enc.EncodeValue([]byte("us-east"), false, newSeriesIDSet(1, 2)); err != nil { - t.Fatal(err) - } else if err := enc.EncodeValue([]byte("us-west"), false, newSeriesIDSet(3)); err != nil { - t.Fatal(err) - } - - // Flush encoder. - if err := enc.Close(); err != nil { - t.Fatal(err) - } else if int(enc.N()) != buf.Len() { - t.Fatalf("bytes written mismatch: %d, expected %d", enc.N(), buf.Len()) - } - - // Unmarshal into a block. - var blk tsi1.TagBlock - if err := blk.UnmarshalBinary(buf.Bytes()); err != nil { - t.Fatal(err) - } - - // Verify data. - if e := blk.TagValueElem([]byte("region"), []byte("us-east"), nil); e == nil { - t.Fatal("expected element") - } else if a, err := e.(*tsi1.TagBlockValueElem).SeriesIDs(); err != nil { - t.Fatalf("unexpected error: %v", err) - } else if !reflect.DeepEqual(a, []uint64{1, 2}) { - t.Fatalf("unexpected series ids: %#v", a) - } - - if e := blk.TagValueElem([]byte("region"), []byte("us-west"), nil); e == nil { - t.Fatal("expected element") - } else if a, err := e.(*tsi1.TagBlockValueElem).SeriesIDs(); err != nil { - t.Fatalf("unexpected error: %v", err) - } else if !reflect.DeepEqual(a, []uint64{3}) { - t.Fatalf("unexpected series ids: %#v", a) - } - if e := blk.TagValueElem([]byte("host"), []byte("server0"), nil); e == nil { - t.Fatal("expected element") - } else if a, err := e.(*tsi1.TagBlockValueElem).SeriesIDs(); err != nil { - t.Fatalf("unexpected error: %v", err) - } else if !reflect.DeepEqual(a, []uint64{1}) { - t.Fatalf("unexpected series ids: %#v", a) - } - if e := blk.TagValueElem([]byte("host"), []byte("server1"), nil); e == nil { - t.Fatal("expected element") - } else if a, err := e.(*tsi1.TagBlockValueElem).SeriesIDs(); err != nil { - t.Fatalf("unexpected error: %v", err) - } else if !reflect.DeepEqual(a, []uint64{2}) { - t.Fatalf("unexpected series ids: %#v", a) - } - if e := blk.TagValueElem([]byte("host"), []byte("server2"), nil); e == nil { - t.Fatal("expected element") - } else if a, err := e.(*tsi1.TagBlockValueElem).SeriesIDs(); err != nil { - t.Fatalf("unexpected error: %v", err) - } else if !reflect.DeepEqual(a, []uint64{3}) { - t.Fatalf("unexpected series ids: %#v", a) - } -} - -var benchmarkTagBlock10x1000 *tsi1.TagBlock -var benchmarkTagBlock100x1000 *tsi1.TagBlock -var benchmarkTagBlock1000x1000 *tsi1.TagBlock -var benchmarkTagBlock1x1000000 *tsi1.TagBlock - -func BenchmarkTagBlock_SeriesN_10_1000(b *testing.B) { - benchmarkTagBlock_SeriesN(b, 10, 1000, &benchmarkTagBlock10x1000) -} -func BenchmarkTagBlock_SeriesN_100_1000(b *testing.B) { - benchmarkTagBlock_SeriesN(b, 100, 1000, &benchmarkTagBlock100x1000) -} -func BenchmarkTagBlock_SeriesN_1000_1000(b *testing.B) { - benchmarkTagBlock_SeriesN(b, 1000, 1000, &benchmarkTagBlock1000x1000) -} -func BenchmarkTagBlock_SeriesN_1_1000000(b *testing.B) { - benchmarkTagBlock_SeriesN(b, 1, 1000000, &benchmarkTagBlock1x1000000) -} - -func benchmarkTagBlock_SeriesN(b *testing.B, tagN, valueN int, blk **tsi1.TagBlock) { - if (*blk) == nil { - var buf bytes.Buffer - enc := tsi1.NewTagBlockEncoder(&buf) - - // Write block. - for i := 0; i < tagN; i++ { - if err := enc.EncodeKey([]byte(fmt.Sprintf("%08d", i)), false); err != nil { - b.Fatal(err) - } - - for j := 0; j < valueN; j++ { - if err := enc.EncodeValue([]byte(fmt.Sprintf("%08d", j)), false, newSeriesIDSet(1)); err != nil { - b.Fatal(err) - } - } - } - - // Flush encoder. - if err := enc.Close(); err != nil { - b.Fatal(err) - } - b.Log("size", buf.Len()) - - // Unmarshal into a block. - *blk = &tsi1.TagBlock{} - if err := (*blk).UnmarshalBinary(buf.Bytes()); err != nil { - b.Fatal(err) - } - } - - // Benchmark lookups. - b.ReportAllocs() - b.ResetTimer() - - key, value := []byte("0"), []byte("0") - for i := 0; i < b.N; i++ { - if e := (*blk).TagValueElem(key, value, nil); e == nil { - b.Fatal("expected element") - } else if n := e.(*tsi1.TagBlockValueElem).SeriesN(); n != 1 { - b.Fatalf("unexpected series count: %d", n) - } - } -} diff --git a/tsdb/tsi1/tsi1.go b/tsdb/tsi1/tsi1.go deleted file mode 100644 index b59f726bf2..0000000000 --- a/tsdb/tsi1/tsi1.go +++ /dev/null @@ -1,541 +0,0 @@ -package tsi1 - -import ( - "bytes" - "encoding/binary" - "fmt" - "io" - - "github.com/influxdata/influxdb/v2/pkg/mincore" - "github.com/influxdata/influxdb/v2/tsdb" -) - -// LoadFactor is the fill percent for RHH indexes. -const LoadFactor = 80 - -// MeasurementElem represents a generic measurement element. -type MeasurementElem interface { - Name() []byte - Deleted() bool - // HasSeries() bool -} - -// MeasurementElems represents a list of MeasurementElem. -type MeasurementElems []MeasurementElem - -func (a MeasurementElems) Len() int { return len(a) } -func (a MeasurementElems) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a MeasurementElems) Less(i, j int) bool { return bytes.Compare(a[i].Name(), a[j].Name()) == -1 } - -// MeasurementIterator represents a iterator over a list of measurements. -type MeasurementIterator interface { - Next() MeasurementElem -} - -// MergeMeasurementIterators returns an iterator that merges a set of iterators. -// Iterators that are first in the list take precedence and a deletion by those -// early iterators will invalidate elements by later iterators. -func MergeMeasurementIterators(itrs ...MeasurementIterator) MeasurementIterator { - if len(itrs) == 0 { - return nil - } - - return &measurementMergeIterator{ - e: make(measurementMergeElem, 0, len(itrs)), - buf: make([]MeasurementElem, len(itrs)), - itrs: itrs, - } -} - -type measurementMergeIterator struct { - e measurementMergeElem - buf []MeasurementElem - itrs []MeasurementIterator -} - -// Next returns the element with the next lowest name across the iterators. -// -// If multiple iterators contain the same name then the first is returned -// and the remaining ones are skipped. -func (itr *measurementMergeIterator) Next() MeasurementElem { - // Find next lowest name amongst the buffers. - var name []byte - for i, buf := range itr.buf { - // Fill buffer if empty. - if buf == nil { - if buf = itr.itrs[i].Next(); buf != nil { - itr.buf[i] = buf - } else { - continue - } - } - - // Find next lowest name. - if name == nil || bytes.Compare(itr.buf[i].Name(), name) == -1 { - name = itr.buf[i].Name() - } - } - - // Return nil if no elements remaining. - if name == nil { - return nil - } - - // Merge all elements together and clear buffers. - itr.e = itr.e[:0] - for i, buf := range itr.buf { - if buf == nil || !bytes.Equal(buf.Name(), name) { - continue - } - itr.e = append(itr.e, buf) - itr.buf[i] = nil - } - return itr.e -} - -// measurementMergeElem represents a merged measurement element. -type measurementMergeElem []MeasurementElem - -// Name returns the name of the first element. -func (p measurementMergeElem) Name() []byte { - if len(p) == 0 { - return nil - } - return p[0].Name() -} - -// Deleted returns the deleted flag of the first element. -func (p measurementMergeElem) Deleted() bool { - if len(p) == 0 { - return false - } - return p[0].Deleted() -} - -// tsdbMeasurementIteratorAdapter wraps MeasurementIterator to match the TSDB interface. -// This is needed because TSDB doesn't have a concept of "deleted" measurements. -type tsdbMeasurementIteratorAdapter struct { - itr MeasurementIterator -} - -// NewTSDBMeasurementIteratorAdapter return an iterator which implements tsdb.MeasurementIterator. -func NewTSDBMeasurementIteratorAdapter(itr MeasurementIterator) tsdb.MeasurementIterator { - if itr == nil { - return nil - } - return &tsdbMeasurementIteratorAdapter{itr: itr} -} - -func (itr *tsdbMeasurementIteratorAdapter) Close() error { return nil } - -func (itr *tsdbMeasurementIteratorAdapter) Next() ([]byte, error) { - for { - e := itr.itr.Next() - if e == nil { - return nil, nil - } else if e.Deleted() { - continue - } - return e.Name(), nil - } -} - -// TagKeyElem represents a generic tag key element. -type TagKeyElem interface { - Key() []byte - Deleted() bool - TagValueIterator(*mincore.Limiter) TagValueIterator -} - -// TagKeyIterator represents a iterator over a list of tag keys. -type TagKeyIterator interface { - Next() TagKeyElem -} - -// tsdbTagKeyIteratorAdapter wraps TagKeyIterator to match the TSDB interface. -// This is needed because TSDB doesn't have a concept of "deleted" tag keys. -type tsdbTagKeyIteratorAdapter struct { - itr TagKeyIterator -} - -// NewTSDBTagKeyIteratorAdapter return an iterator which implements tsdb.TagKeyIterator. -func NewTSDBTagKeyIteratorAdapter(itr TagKeyIterator) tsdb.TagKeyIterator { - if itr == nil { - return nil - } - return &tsdbTagKeyIteratorAdapter{itr: itr} -} - -func (itr *tsdbTagKeyIteratorAdapter) Close() error { return nil } - -func (itr *tsdbTagKeyIteratorAdapter) Next() ([]byte, error) { - for { - e := itr.itr.Next() - if e == nil { - return nil, nil - } else if e.Deleted() { - continue - } - return e.Key(), nil - } -} - -// MergeTagKeyIterators returns an iterator that merges a set of iterators. -// Iterators that are first in the list take precedence and a deletion by those -// early iterators will invalidate elements by later iterators. -func MergeTagKeyIterators(itrs ...TagKeyIterator) TagKeyIterator { - if len(itrs) == 0 { - return nil - } - - return &tagKeyMergeIterator{ - e: make(tagKeyMergeElem, 0, len(itrs)), - buf: make([]TagKeyElem, len(itrs)), - itrs: itrs, - } -} - -type tagKeyMergeIterator struct { - e tagKeyMergeElem - buf []TagKeyElem - itrs []TagKeyIterator -} - -// Next returns the element with the next lowest key across the iterators. -// -// If multiple iterators contain the same key then the first is returned -// and the remaining ones are skipped. -func (itr *tagKeyMergeIterator) Next() TagKeyElem { - // Find next lowest key amongst the buffers. - var key []byte - for i, buf := range itr.buf { - // Fill buffer. - if buf == nil { - if buf = itr.itrs[i].Next(); buf != nil { - itr.buf[i] = buf - } else { - continue - } - } - - // Find next lowest key. - if key == nil || bytes.Compare(buf.Key(), key) == -1 { - key = buf.Key() - } - } - - // Return nil if no elements remaining. - if key == nil { - return nil - } - - // Merge elements together & clear buffer. - itr.e = itr.e[:0] - for i, buf := range itr.buf { - if buf == nil || !bytes.Equal(buf.Key(), key) { - continue - } - itr.e = append(itr.e, buf) - itr.buf[i] = nil - } - - return itr.e -} - -// tagKeyMergeElem represents a merged tag key element. -type tagKeyMergeElem []TagKeyElem - -// Key returns the key of the first element. -func (p tagKeyMergeElem) Key() []byte { - if len(p) == 0 { - return nil - } - return p[0].Key() -} - -// Deleted returns the deleted flag of the first element. -func (p tagKeyMergeElem) Deleted() bool { - if len(p) == 0 { - return false - } - return p[0].Deleted() -} - -// TagValueIterator returns a merge iterator for all elements until a tombstone occurs. -func (p tagKeyMergeElem) TagValueIterator(limiter *mincore.Limiter) TagValueIterator { - if len(p) == 0 { - return nil - } - - a := make([]TagValueIterator, 0, len(p)) - for _, e := range p { - itr := e.TagValueIterator(limiter) - - a = append(a, itr) - if e.Deleted() { - break - } - } - return MergeTagValueIterators(a...) -} - -// TagValueElem represents a generic tag value element. -type TagValueElem interface { - Value() []byte - Deleted() bool -} - -// TagValueIterator represents a iterator over a list of tag values. -type TagValueIterator interface { - Next() TagValueElem -} - -// tsdbTagValueIteratorAdapter wraps TagValueIterator to match the TSDB interface. -// This is needed because TSDB doesn't have a concept of "deleted" tag values. -type tsdbTagValueIteratorAdapter struct { - itr TagValueIterator -} - -// NewTSDBTagValueIteratorAdapter return an iterator which implements tsdb.TagValueIterator. -func NewTSDBTagValueIteratorAdapter(itr TagValueIterator) tsdb.TagValueIterator { - if itr == nil { - return nil - } - return &tsdbTagValueIteratorAdapter{itr: itr} -} - -func (itr *tsdbTagValueIteratorAdapter) Close() error { return nil } - -func (itr *tsdbTagValueIteratorAdapter) Next() ([]byte, error) { - for { - e := itr.itr.Next() - if e == nil { - return nil, nil - } else if e.Deleted() { - continue - } - return e.Value(), nil - } -} - -// MergeTagValueIterators returns an iterator that merges a set of iterators. -// Iterators that are first in the list take precedence and a deletion by those -// early iterators will invalidate elements by later iterators. -func MergeTagValueIterators(itrs ...TagValueIterator) TagValueIterator { - if len(itrs) == 0 { - return nil - } - - return &tagValueMergeIterator{ - e: make(tagValueMergeElem, 0, len(itrs)), - buf: make([]TagValueElem, len(itrs)), - itrs: itrs, - } -} - -type tagValueMergeIterator struct { - e tagValueMergeElem - buf []TagValueElem - itrs []TagValueIterator -} - -// Next returns the element with the next lowest value across the iterators. -// -// If multiple iterators contain the same value then the first is returned -// and the remaining ones are skipped. -func (itr *tagValueMergeIterator) Next() TagValueElem { - // Find next lowest value amongst the buffers. - var value []byte - for i, buf := range itr.buf { - // Fill buffer. - if buf == nil { - if buf = itr.itrs[i].Next(); buf != nil { - itr.buf[i] = buf - } else { - continue - } - } - - // Find next lowest value. - if value == nil || bytes.Compare(buf.Value(), value) == -1 { - value = buf.Value() - } - } - - // Return nil if no elements remaining. - if value == nil { - return nil - } - - // Merge elements and clear buffers. - itr.e = itr.e[:0] - for i, buf := range itr.buf { - if buf == nil || !bytes.Equal(buf.Value(), value) { - continue - } - itr.e = append(itr.e, buf) - itr.buf[i] = nil - } - return itr.e -} - -// tagValueMergeElem represents a merged tag value element. -type tagValueMergeElem []TagValueElem - -// Name returns the value of the first element. -func (p tagValueMergeElem) Value() []byte { - if len(p) == 0 { - return nil - } - return p[0].Value() -} - -// Deleted returns the deleted flag of the first element. -func (p tagValueMergeElem) Deleted() bool { - if len(p) == 0 { - return false - } - return p[0].Deleted() -} - -/* -type SeriesPointMergeIterator interface { - Next() (*query.FloatPoint, error) - Close() error - Stats() query.IteratorStats -} - -func MergeSeriesPointIterators(itrs ...*seriesPointIterator) SeriesPointMergeIterator { - if n := len(itrs); n == 0 { - return nil - } else if n == 1 { - return itrs[0] - } - - return &seriesPointMergeIterator{ - buf: make([]*query.FloatPoint, len(itrs)), - itrs: itrs, - } -} - -type seriesPointMergeIterator struct { - buf []*query.FloatPoint - itrs []*seriesPointIterator -} - -func (itr *seriesPointMergeIterator) Close() error { - for i := range itr.itrs { - itr.itrs[i].Close() - } - return nil -} -func (itr *seriesPointMergeIterator) Stats() query.IteratorStats { - return query.IteratorStats{} -} - -func (itr *seriesPointMergeIterator) Next() (_ *query.FloatPoint, err error) { - // Find next lowest point amongst the buffers. - var key []byte - for i, buf := range itr.buf { - // Fill buffer. - if buf == nil { - if buf, err = itr.itrs[i].Next(); err != nil { - return nil, err - } else if buf != nil { - itr.buf[i] = buf - } else { - continue - } - } - - // Find next lowest key. - if key == nil || bytes.Compare(buf.Key(), key) == -1 { - key = buf.Key() - } - } - - // Return nil if no elements remaining. - if key == nil { - return nil, nil - } - - // Merge elements together & clear buffer. - itr.e = itr.e[:0] - for i, buf := range itr.buf { - if buf == nil || !bytes.Equal(buf.Key(), key) { - continue - } - itr.e = append(itr.e, buf) - itr.buf[i] = nil - } - - return itr.e, nil -} -*/ - -// writeTo writes write v into w. Updates n. -func writeTo(w io.Writer, v []byte, n *int64) error { - nn, err := w.Write(v) - *n += int64(nn) - return err -} - -// writeUint8To writes write v into w. Updates n. -func writeUint8To(w io.Writer, v uint8, n *int64) error { - nn, err := w.Write([]byte{v}) - *n += int64(nn) - return err -} - -// writeUint16To writes write v into w using big endian encoding. Updates n. -func writeUint16To(w io.Writer, v uint16, n *int64) error { - var buf [2]byte - binary.BigEndian.PutUint16(buf[:], v) - nn, err := w.Write(buf[:]) - *n += int64(nn) - return err -} - -// writeUint64To writes write v into w using big endian encoding. Updates n. -func writeUint64To(w io.Writer, v uint64, n *int64) error { - var buf [8]byte - binary.BigEndian.PutUint64(buf[:], v) - nn, err := w.Write(buf[:]) - *n += int64(nn) - return err -} - -// writeUvarintTo writes write v into w using variable length encoding. Updates n. -func writeUvarintTo(w io.Writer, v uint64, n *int64) error { - var buf [binary.MaxVarintLen64]byte - i := binary.PutUvarint(buf[:], v) - nn, err := w.Write(buf[:i]) - *n += int64(nn) - return err -} - -type byteSlices [][]byte - -func (a byteSlices) Len() int { return len(a) } -func (a byteSlices) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a byteSlices) Less(i, j int) bool { return bytes.Compare(a[i], a[j]) == -1 } - -// assert will panic with a given formatted message if the given condition is false. -func assert(condition bool, msg string, v ...interface{}) { - if !condition { - panic(fmt.Sprintf("assert failed: "+msg, v...)) - } -} - -// uvarint is a wrapper around binary.Uvarint. -// Returns a non-nil error when binary.Uvarint returns n <= 0 or n > len(data). -func uvarint(data []byte) (value uint64, n int, err error) { - if len(data) < 1 { - err = io.ErrShortBuffer - } else if value, n = binary.Uvarint(data); n == 0 || n > len(data) { - err = io.ErrShortBuffer - } else if n < 0 { - err = fmt.Errorf("parsing binary-encoded uint64 value failed; binary.Uvarint() returned %d", n) - } - return -} diff --git a/tsdb/tsi1/tsi1_test.go b/tsdb/tsi1/tsi1_test.go deleted file mode 100644 index 1dadd1c791..0000000000 --- a/tsdb/tsi1/tsi1_test.go +++ /dev/null @@ -1,325 +0,0 @@ -package tsi1_test - -import ( - "bytes" - "context" - "io/ioutil" - "os" - "path/filepath" - "reflect" - "testing" - - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/pkg/mincore" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" - "github.com/influxdata/influxdb/v2/tsdb/tsi1" -) - -// Ensure iterator can operate over an in-memory list of elements. -func TestMeasurementIterator(t *testing.T) { - elems := []MeasurementElem{ - MeasurementElem{name: []byte("cpu"), deleted: true}, - MeasurementElem{name: []byte("mem")}, - } - - itr := MeasurementIterator{Elems: elems} - if e := itr.Next(); !reflect.DeepEqual(&elems[0], e) { - t.Fatalf("unexpected elem(0): %#v", e) - } else if e := itr.Next(); !reflect.DeepEqual(&elems[1], e) { - t.Fatalf("unexpected elem(1): %#v", e) - } else if e := itr.Next(); e != nil { - t.Fatalf("expected nil elem: %#v", e) - } -} - -// Ensure iterator can merge multiple iterators together. -func TestMergeMeasurementIterators(t *testing.T) { - itr := tsi1.MergeMeasurementIterators( - &MeasurementIterator{Elems: []MeasurementElem{ - {name: []byte("aaa")}, - {name: []byte("bbb"), deleted: true}, - {name: []byte("ccc")}, - }}, - &MeasurementIterator{}, - &MeasurementIterator{Elems: []MeasurementElem{ - {name: []byte("bbb")}, - {name: []byte("ccc"), deleted: true}, - {name: []byte("ddd")}, - }}, - ) - - if e := itr.Next(); !bytes.Equal(e.Name(), []byte("aaa")) || e.Deleted() { - t.Fatalf("unexpected elem(0): %s/%v", e.Name(), e.Deleted()) - } else if e := itr.Next(); !bytes.Equal(e.Name(), []byte("bbb")) || !e.Deleted() { - t.Fatalf("unexpected elem(1): %s/%v", e.Name(), e.Deleted()) - } else if e := itr.Next(); !bytes.Equal(e.Name(), []byte("ccc")) || e.Deleted() { - t.Fatalf("unexpected elem(2): %s/%v", e.Name(), e.Deleted()) - } else if e := itr.Next(); !bytes.Equal(e.Name(), []byte("ddd")) || e.Deleted() { - t.Fatalf("unexpected elem(3): %s/%v", e.Name(), e.Deleted()) - } else if e := itr.Next(); e != nil { - t.Fatalf("expected nil elem: %#v", e) - } -} - -// Ensure iterator can operate over an in-memory list of tag key elements. -func TestTagKeyIterator(t *testing.T) { - elems := []TagKeyElem{ - {key: []byte("aaa"), deleted: true}, - {key: []byte("bbb")}, - } - - itr := TagKeyIterator{Elems: elems} - if e := itr.Next(); !reflect.DeepEqual(&elems[0], e) { - t.Fatalf("unexpected elem(0): %#v", e) - } else if e := itr.Next(); !reflect.DeepEqual(&elems[1], e) { - t.Fatalf("unexpected elem(1): %#v", e) - } else if e := itr.Next(); e != nil { - t.Fatalf("expected nil elem: %#v", e) - } -} - -// Ensure iterator can merge multiple iterators together. -func TestMergeTagKeyIterators(t *testing.T) { - itr := tsi1.MergeTagKeyIterators( - &TagKeyIterator{Elems: []TagKeyElem{ - {key: []byte("aaa")}, - {key: []byte("bbb"), deleted: true}, - {key: []byte("ccc")}, - }}, - &TagKeyIterator{}, - &TagKeyIterator{Elems: []TagKeyElem{ - {key: []byte("bbb")}, - {key: []byte("ccc"), deleted: true}, - {key: []byte("ddd")}, - }}, - ) - - if e := itr.Next(); !bytes.Equal(e.Key(), []byte("aaa")) || e.Deleted() { - t.Fatalf("unexpected elem(0): %s/%v", e.Key(), e.Deleted()) - } else if e := itr.Next(); !bytes.Equal(e.Key(), []byte("bbb")) || !e.Deleted() { - t.Fatalf("unexpected elem(1): %s/%v", e.Key(), e.Deleted()) - } else if e := itr.Next(); !bytes.Equal(e.Key(), []byte("ccc")) || e.Deleted() { - t.Fatalf("unexpected elem(2): %s/%v", e.Key(), e.Deleted()) - } else if e := itr.Next(); !bytes.Equal(e.Key(), []byte("ddd")) || e.Deleted() { - t.Fatalf("unexpected elem(3): %s/%v", e.Key(), e.Deleted()) - } else if e := itr.Next(); e != nil { - t.Fatalf("expected nil elem: %#v", e) - } -} - -// Ensure iterator can operate over an in-memory list of tag value elements. -func TestTagValueIterator(t *testing.T) { - elems := []TagValueElem{ - {value: []byte("aaa"), deleted: true}, - {value: []byte("bbb")}, - } - - itr := &TagValueIterator{Elems: elems} - if e := itr.Next(); !reflect.DeepEqual(&elems[0], e) { - t.Fatalf("unexpected elem(0): %#v", e) - } else if e := itr.Next(); !reflect.DeepEqual(&elems[1], e) { - t.Fatalf("unexpected elem(1): %#v", e) - } else if e := itr.Next(); e != nil { - t.Fatalf("expected nil elem: %#v", e) - } -} - -// Ensure iterator can merge multiple iterators together. -func TestMergeTagValueIterators(t *testing.T) { - itr := tsi1.MergeTagValueIterators( - &TagValueIterator{Elems: []TagValueElem{ - {value: []byte("aaa")}, - {value: []byte("bbb"), deleted: true}, - {value: []byte("ccc")}, - }}, - &TagValueIterator{}, - &TagValueIterator{Elems: []TagValueElem{ - {value: []byte("bbb")}, - {value: []byte("ccc"), deleted: true}, - {value: []byte("ddd")}, - }}, - ) - - if e := itr.Next(); !bytes.Equal(e.Value(), []byte("aaa")) || e.Deleted() { - t.Fatalf("unexpected elem(0): %s/%v", e.Value(), e.Deleted()) - } else if e := itr.Next(); !bytes.Equal(e.Value(), []byte("bbb")) || !e.Deleted() { - t.Fatalf("unexpected elem(1): %s/%v", e.Value(), e.Deleted()) - } else if e := itr.Next(); !bytes.Equal(e.Value(), []byte("ccc")) || e.Deleted() { - t.Fatalf("unexpected elem(2): %s/%v", e.Value(), e.Deleted()) - } else if e := itr.Next(); !bytes.Equal(e.Value(), []byte("ddd")) || e.Deleted() { - t.Fatalf("unexpected elem(3): %s/%v", e.Value(), e.Deleted()) - } else if e := itr.Next(); e != nil { - t.Fatalf("expected nil elem: %#v", e) - } -} - -// Ensure iterator can operate over an in-memory list of series. -func TestSeriesIDIterator(t *testing.T) { - elems := []tsdb.SeriesIDElem{ - {SeriesID: tsdb.NewSeriesID(1)}, - {SeriesID: tsdb.NewSeriesID(2)}, - } - - itr := SeriesIDIterator{Elems: elems} - if e := itr.Next(); !reflect.DeepEqual(elems[0], e) { - t.Fatalf("unexpected elem(0): %#v", e) - } else if e := itr.Next(); !reflect.DeepEqual(elems[1], e) { - t.Fatalf("unexpected elem(1): %#v", e) - } else if e := itr.Next(); !e.SeriesID.IsZero() { - t.Fatalf("expected nil elem: %#v", e) - } -} - -// MeasurementElem represents a test implementation of tsi1.MeasurementElem. -type MeasurementElem struct { - name []byte - deleted bool - hasSeries bool -} - -func (e *MeasurementElem) Name() []byte { return e.name } -func (e *MeasurementElem) Deleted() bool { return e.deleted } -func (e *MeasurementElem) HasSeries() bool { return e.hasSeries } - -func (e *MeasurementElem) TagKeyIterator() tsi1.TagKeyIterator { return nil } - -// MeasurementIterator represents an iterator over a slice of measurements. -type MeasurementIterator struct { - Elems []MeasurementElem -} - -// Next returns the next element in the iterator. -func (itr *MeasurementIterator) Next() (e tsi1.MeasurementElem) { - if len(itr.Elems) == 0 { - return nil - } - e, itr.Elems = &itr.Elems[0], itr.Elems[1:] - return e -} - -// TagKeyElem represents a test implementation of tsi1.TagKeyElem. -type TagKeyElem struct { - key []byte - deleted bool -} - -func (e *TagKeyElem) Key() []byte { return e.key } -func (e *TagKeyElem) Deleted() bool { return e.deleted } -func (e *TagKeyElem) TagValueIterator(_ *mincore.Limiter) tsi1.TagValueIterator { return nil } - -// TagKeyIterator represents an iterator over a slice of tag keys. -type TagKeyIterator struct { - Elems []TagKeyElem -} - -// Next returns the next element in the iterator. -func (itr *TagKeyIterator) Next() (e tsi1.TagKeyElem) { - if len(itr.Elems) == 0 { - return nil - } - e, itr.Elems = &itr.Elems[0], itr.Elems[1:] - return e -} - -// TagValueElem represents a test implementation of tsi1.TagValueElem. -type TagValueElem struct { - value []byte - deleted bool -} - -func (e *TagValueElem) Value() []byte { return e.value } -func (e *TagValueElem) Deleted() bool { return e.deleted } - -// TagValueIterator represents an iterator over a slice of tag values. -type TagValueIterator struct { - Elems []TagValueElem -} - -// Next returns the next element in the iterator. -func (itr *TagValueIterator) Next() (e tsi1.TagValueElem) { - if len(itr.Elems) == 0 { - return nil - } - e, itr.Elems = &itr.Elems[0], itr.Elems[1:] - return e -} - -// SeriesIDIterator represents an iterator over a slice of series id elems. -type SeriesIDIterator struct { - Elems []tsdb.SeriesIDElem -} - -// Next returns the next element in the iterator. -func (itr *SeriesIDIterator) Next() (elem tsdb.SeriesIDElem) { - if len(itr.Elems) == 0 { - return tsdb.SeriesIDElem{} - } - elem, itr.Elems = itr.Elems[0], itr.Elems[1:] - return elem -} - -// MustTempDir returns a temporary directory. Panic on error. -func MustTempDir() string { - path, err := ioutil.TempDir("", "tsi-") - if err != nil { - panic(err) - } - return path -} - -// MustTempDir returns a temporary directory for a partition. Panic on error. -func MustTempPartitionDir() string { - path := MustTempDir() - path = filepath.Join(path, "0") - if err := os.Mkdir(path, 0777); err != nil { - panic(err) - } - return path -} - -// Series represents name/tagset pairs that are used in testing. -type Series struct { - Name []byte - Tags models.Tags - Type models.FieldType - Deleted bool -} - -// SeriesFile is a test wrapper for tsdb.SeriesFile. -type SeriesFile struct { - *seriesfile.SeriesFile -} - -// NewSeriesFile returns a new instance of SeriesFile with a temporary file path. -func NewSeriesFile() *SeriesFile { - dir, err := ioutil.TempDir("", "tsdb-series-file-") - if err != nil { - panic(err) - } - return &SeriesFile{SeriesFile: seriesfile.NewSeriesFile(dir)} -} - -// MustOpenSeriesFile returns a new, open instance of SeriesFile. Panic on error. -func MustOpenSeriesFile() *SeriesFile { - f := NewSeriesFile() - if err := f.Open(context.Background()); err != nil { - panic(err) - } - return f -} - -// Close closes the log file and removes it from disk. -func (f *SeriesFile) Close() error { - defer os.RemoveAll(f.Path()) - return f.SeriesFile.Close() -} - -// Reopen initialises a new series file using the existing one. -func (f *SeriesFile) Reopen() error { - if err := f.SeriesFile.Close(); err != nil { - return err - } - f.SeriesFile = seriesfile.NewSeriesFile(f.SeriesFile.Path()) - return nil -} diff --git a/tsdb/tsm1/DESIGN.md b/tsdb/tsm1/DESIGN.md deleted file mode 100644 index 0b5935c9ed..0000000000 --- a/tsdb/tsm1/DESIGN.md +++ /dev/null @@ -1,451 +0,0 @@ -# File Structure - -A TSM file is composed for four sections: header, blocks, index and the footer. - -``` -┌────────┬────────────────────────────────────┬─────────────┬──────────────┐ -│ Header │ Blocks │ Index │ Footer │ -│5 bytes │ N bytes │ N bytes │ 4 bytes │ -└────────┴────────────────────────────────────┴─────────────┴──────────────┘ -``` -Header is composed of a magic number to identify the file type and a version number. - -``` -┌───────────────────┐ -│ Header │ -├─────────┬─────────┤ -│ Magic │ Version │ -│ 4 bytes │ 1 byte │ -└─────────┴─────────┘ -``` - -Blocks are sequences of block CRC32 and data. The block data is opaque to the file. The CRC32 is used for recovery to ensure blocks have not been corrupted due to bugs outside of our control. The length of the blocks is stored in the index. - -``` -┌───────────────────────────────────────────────────────────┐ -│ Blocks │ -├───────────────────┬───────────────────┬───────────────────┤ -│ Block 1 │ Block 2 │ Block N │ -├─────────┬─────────┼─────────┬─────────┼─────────┬─────────┤ -│ CRC │ Data │ CRC │ Data │ CRC │ Data │ -│ 4 bytes │ N bytes │ 4 bytes │ N bytes │ 4 bytes │ N bytes │ -└─────────┴─────────┴─────────┴─────────┴─────────┴─────────┘ -``` - -Following the blocks is the index for the blocks in the file. The index is composed of a sequence of index entries ordered lexicographically by key and then by time. Each index entry starts with a key length and key followed by a count of the number of blocks in the file. Each block entry is composed of the min and max time for the block, the offset into the file where the block is located and the size of the block. - -The index structure can provide efficient access to all blocks as well as the ability to determine the cost associated with accessing a given key. Given a key and timestamp, we know exactly which file contains the block for that timestamp as well as where that block resides and how much data to read to retrieve the block. If we know we need to read all or multiple blocks in a file, we can use the size to determine how much to read in a given IO. - -_TBD: The block length stored in the block data could probably be dropped since we store it in the index._ - -``` -┌────────────────────────────────────────────────────────────────────────────┐ -│ Index │ -├─────────┬─────────┬──────┬───────┬─────────┬─────────┬────────┬────────┬───┤ -│ Key Len │ Key │ Type │ Count │Min Time │Max Time │ Offset │ Size │...│ -│ 2 bytes │ N bytes │1 byte│2 bytes│ 8 bytes │ 8 bytes │8 bytes │4 bytes │ │ -└─────────┴─────────┴──────┴───────┴─────────┴─────────┴────────┴────────┴───┘ -``` - -The last section is the footer that stores the offset of the start of the index. - -``` -┌─────────┐ -│ Footer │ -├─────────┤ -│Index Ofs│ -│ 8 bytes │ -└─────────┘ -``` - -# File System Layout - -The file system is organized a directory per shard where each shard is an integer number. Associated with each shard directory, there is a set of other directories and files: - -* a wal directory - contains a set numerically increasing files WAL segment files named #####.wal. The wal directory is separate from the directory containing the TSM files so that different types can be used if necessary. -* .tsm files - a set of numerically increasing TSM files containing compressed series data. -* .tombstone files - files named after the corresponding TSM file as #####.tombstone. These contain measurement and series keys that have been deleted. These files are removed during compactions. - -# Data Flow - -Writes are appended to the current WAL segment and are also added to the Cache. Each WAL segment is size bounded and rolls-over to a new file after it fills up. The cache is also size bounded; snapshots are taken and WAL compactions are initiated when the cache becomes too full. If the inbound write rate exceeds the WAL compaction rate for a sustained period, the cache may become too full in which case new writes will fail until the compaction process catches up. The WAL and Cache are separate entities and do not interact with each other. The Engine coordinates the writes to both. - -When WAL segments fill up and have been closed, the Compactor reads the WAL entries and combines them with one or more existing TSM files. This process runs continuously until all WAL files are compacted and there is a minimum number of TSM files. As each TSM file is completed, it is loaded and referenced by the FileStore. - -Queries are executed by constructing Cursors for keys. The Cursors iterate over slices of Values. When the current Values are exhausted, a Cursor requests the next set of Values from the Engine. The Engine returns a slice of Values by querying the FileStore and Cache. The Values in the Cache are overlaid on top of the values returned from the FileStore. The FileStore reads and decodes blocks of Values according to the index for the file. - -Updates (writing a newer value for a point that already exists) occur as normal writes. Since cached values overwrite existing values, newer writes take precedence. - -Deletes occur by writing a delete entry for the measurement or series to the WAL and then updating the Cache and FileStore. The Cache evicts all relevant entries. The FileStore writes a tombstone file for each TSM file that contains relevant data. These tombstone files are used at startup time to ignore blocks as well as during compactions to remove deleted entries. - -# Compactions - -Compactions are a serial and continuously running process that iteratively optimizes the storage for queries. Specifically, it does the following: - -* Converts closed WAL files into TSM files and removes the closed WAL files -* Combines smaller TSM files into larger ones to improve compression ratios -* Rewrites existing files that contain series data that has been deleted -* Rewrites existing files that contain writes with more recent data to ensure a point exists in only one TSM file. - -The compaction algorithm is continuously running and always selects files to compact based on a priority. - -1. If there are closed WAL files, the 5 oldest WAL segments are added to the set of compaction files. -2. If any TSM files contain points with older timestamps that also exist in the WAL files, those TSM files are added to the compaction set. -3. If any TSM files have a tombstone marker, those TSM files are added to the compaction set. - -The compaction algorithm generates a set of SeriesIterators that return a sequence of `key`, `Values` where each `key` returned is lexicographically greater than the previous one. The iterators are ordered such that WAL iterators will override any values returned by the TSM file iterators. WAL iterators read and cache the WAL segment so that deletes later in the log can be processed correctly. TSM file iterators use the tombstone files to ensure that deleted series are not returned during iteration. As each key is processed, the Values slice is grown, sorted, and then written to a new block in the new TSM file. The blocks can be split based on number of points or size of the block. If the total size of the current TSM file would exceed the maximum file size, a new file is created. - -Deletions can occur while a new file is being written. Since the new TSM file is not complete a tombstone would not be written for it. This could result in deleted values getting written into a new file. To prevent this, if a compaction is running and a delete occurs, the current compaction is aborted and new compaction is started. - -When all WAL files in the current compaction have been processed and the new TSM files have been successfully written, the new TSM files are renamed to their final names, the WAL segments are truncated and the associated snapshots are released from the cache. - -The compaction process then runs again until there are no more WAL files and the minimum number of TSM files exist that are also under the maximum file size. - -# WAL - -Currently, there is a WAL per shard. This means all the writes in a WAL segment are for the given shard. It also means that writes across a lot of shards append to many files which might result in more disk IO due to seeking to the end of multiple files. - -Two options are being considered: - -## WAL per Shard - -This is the current behavior of the WAL. This option is conceptually easier to reason about. For example, compactions that read in multiple WAL segments are assured that all the WAL entries pertain to the current shard. If it completes a compaction, it is safe to remove the WAL segment. It is also easier to deal with shard deletions as all the WAL segments can be dropped along with the other shard files. - -The drawback of this option is the potential for turning sequential write IO into random IO in the presence of multiple shards and writes to many different shards. - -## Single WAL - -Using a single WAL adds some complexity to compactions and deletions. Compactions will need to either sort all the WAL entries in a segment by shard first and then run compactions on each shard or the compactor needs to be able to compact multiple shards concurrently while ensuring points in existing TSM files in different shards remain separate. - -Deletions would not be able to reclaim WAL segments immediately as in the case where there is a WAL per shard. Similarly, a compaction of a WAL segment that contains writes for a deleted shard would need to be dropped. - -Currently, we are moving towards a Single WAL implementation. - -# Cache - -The purpose of the cache is so that data in the WAL is queryable. Every time a point is written to a WAL segment, it is also written to an in-memory cache. The cache is split into two parts: a "hot" part, representing the most recent writes and a "cold" part containing snapshots for which an active WAL compaction -process is underway. - -Queries are satisfied with values read from the cache and finalized TSM files. Points in the cache always take precedence over points in TSM files with the same timestamp. Queries are never read directly from WAL segment files which are designed to optimize write rather than read performance. - -The cache tracks its size on a "point-calculated" basis. "point-calculated" means that the RAM storage footprint for a point is the determined by calling its `Size()` method. While this does not correspond directly to the actual RAM footprint in the cache, the two values are sufficiently well correlated for the purpose of controlling RAM usage. - -If the cache becomes too full, or the cache has been idle for too long, a snapshot of the cache is taken and a compaction process is initiated for the related WAL segments. When the compaction of these segments is complete, the related snapshots are released from the cache. - -In cases where IO performance of the compaction process falls behind the incoming write rate, it is possible that writes might arrive at the cache while the cache is both too full and the compaction of the previous snapshot is still in progress. In this case, the cache will reject the write, causing the write to fail. -Well behaved clients should interpret write failures as back pressure and should either discard the write or back off and retry the write after a delay. - -# TSM File Index - -Each TSM file contains a full index of the blocks contained within the file. The existing index structure is designed to allow for a binary search across the index to find the starting block for a key. We would then seek to that start key and sequentially scan each block to find the location of a timestamp. - -Some issues with the existing structure is that seeking to a given timestamp for a key has a unknown cost. This can cause variability in read performance that would very difficult to fix. Another issue is that startup times for loading a TSM file would grow in proportion to number and size of TSM files on disk since we would need to scan the entire file to find all keys contained in the file. This could be addressed by using a separate index like file or changing the index structure. - -We've chosen to update the block index structure to ensure a TSM file is fully self-contained, supports consistent IO characteristics for sequential and random accesses as well as provides an efficient load time regardless of file size. The implications of these changes are that the index is slightly larger and we need to be able to search the index despite each entry being variably sized. - -The following are some alternative design options to handle the cases where the index is too large to fit in memory. We are currently planning to use an indirect MMAP indexing approach for loaded TSM files. - -### Indirect MMAP Indexing - -One option is to MMAP the index into memory and record the pointers to the start of each index entry in a slice. When searching for a given key, the pointers are used to perform a binary search on the underlying mmap data. When the matching key is found, the block entries can be loaded and search or a subsequent binary search on the blocks can be performed. - -A variation of this can also be done without MMAPs by seeking and reading in the file. The underlying file cache will still be utilized in this approach as well. - -As an example, if we have an index structure in memory such as: - - ``` -┌────────────────────────────────────────────────────────────────────┐ -│ Index │ -├─┬──────────────────────┬──┬───────────────────────┬───┬────────────┘ -│0│ │62│ │145│ -├─┴───────┬─────────┬────┼──┴──────┬─────────┬──────┼───┴─────┬──────┐ -│Key 1 Len│ Key │... │Key 2 Len│ Key 2 │ ... │ Key 3 │ ... │ -│ 2 bytes │ N bytes │ │ 2 bytes │ N bytes │ │ 2 bytes │ │ -└─────────┴─────────┴────┴─────────┴─────────┴──────┴─────────┴──────┘ -``` - -We would build an `offsets` slices where each element pointers to the byte location for the first key in then index slice. - -``` -┌────────────────────────────────────────────────────────────────────┐ -│ Offsets │ -├────┬────┬────┬─────────────────────────────────────────────────────┘ -│ 0 │ 62 │145 │ -└────┴────┴────┘ - ``` - - -Using this offset slice we can find `Key 2` by doing a binary search over the offsets slice. Instead of comparing the value in the offsets (e.g. `62`), we use that as an index into the underlying index to retrieve the key at position `62` and perform our comparisons with that. - -When we have identified the correct position in the index for a given key, we could perform another binary search or a linear scan. This should be fast as well since each index entry is 28 bytes and all contiguous in memory. - -The size of the offsets slice would be proportional to the number of unique series. If we we limit file sizes to 4GB, we would use 4 bytes for each pointer. - -### LRU/Lazy Load - -A second option could be to have the index work as a memory bounded, lazy-load style cache. When a cache miss occurs, the index structure is scanned to find the key and the entries are load and added to the cache which causes the least-recently used entries to be evicted. - -### Key Compression - -Another option is compress keys using a key specific dictionary encoding. For example, - -``` -cpu,host=server1 value=1 -cpu,host=server2 value=2 -memory,host=server1 value=3 -``` - -Could be compressed by expanding the key into its respective parts: measurement, tag keys, tag values and tag fields . For each part a unique number is assigned. e.g. - -Measurements -``` -cpu = 1 -memory = 2 -``` - -Tag Keys -``` -host = 1 -``` - -Tag Values -``` -server1 = 1 -server2 = 2 -``` - -Fields -``` -value = 1 -``` - -Using this encoding dictionary, the string keys could be converted to a sequence of integers: - -``` -cpu,host=server1 value=1 --> 1,1,1,1 -cpu,host=server2 value=2 --> 1,1,2,1 -memory,host=server1 value=3 --> 3,1,2,1 -``` - -These sequences of small integers list can then be compressed further using a bit packed format such as Simple9 or Simple8b. The resulting byte slices would be a multiple of 4 or 8 bytes (using Simple9/Simple8b respectively) which could used as the (string). - -### Separate Index - -Another option might be to have a separate index file (BoltDB) that serves as the storage for the `FileIndex` and is transient. This index would be recreated at startup and updated at compaction time. - -# Components - -These are some of the high-level components and their responsibilities. These are ideas preliminary. - -## WAL - -* Append-only log composed of fixed size segment files. -* Writes are appended to the current segment -* Roll-over to new segment after filling the current segment -* Closed segments are never modified and used for startup and recovery as well as compactions. -* There is a single WAL for the store as opposed to a WAL per shard. - -## Compactor - -* Continuously running, iterative file storage optimizer -* Takes closed WAL files, existing TSM files and combines into one or more new TSM files - -## Cache - -* Hold recently written series data -* Has max size and a flushing limit -* When the flushing limit is crossed, a snapshot is taken and a compaction process for the related WAL segments is commenced. -* If a write comes in, the cache is too full, and the previous snapshot is still being compacted, the write will fail. - -# Engine - -* Maintains references to Cache, FileStore, WAL, etc.. -* Creates a cursor -* Receives writes, coordinates queries -* Hides underlying files and types from clients - -## Cursor - -* Iterates forward or reverse for given key -* Requests values from Engine for key and timestamp -* Has no knowledge of TSM files or WAL - delegates to Engine to request next set of Values - -## FileStore - -* Manages TSM files -* Maintains the file indexes and references to active files -* A TSM file that is opened entails reading in and adding the index section to the `FileIndex`. The block data is then MMAPed up to the index offset to avoid having the index in memory twice. - -## FileIndex -* Provides location information to a file and block for a given key and timestamp. - -## Interfaces - -``` -SeriesIterator returns the key and []Value such that a key is only returned -once and subsequent calls to Next() do not return the same key twice. -type SeriesIterator interface { - func Next() (key, []Value, error) -} -``` - -## Types - -_NOTE: the actual func names are to illustrate the type of functionality the type is responsible._ - -``` -TSMWriter writes a sets of key and Values to a TSM file. -type TSMWriter struct {} -func (t *TSMWriter) Write(key string, values []Value) error {} -func (t *TSMWriter) Close() error -``` - - -``` -// WALIterator returns the key and []Values for a set of WAL segment files. -type WALIterator struct{ - Files *os.File -} -func (r *WALReader) Next() (key, []Value, error) -``` - - -``` -TSMIterator returns the key and values from a TSM file. -type TSMIterator struct {} -func (r *TSMIterator) Next() (key, []Value, error) -``` - -``` -type Compactor struct {} -func (c *Compactor) Compact(iters ...SeriesIterators) error -``` - -``` -type Engine struct { - wal *WAL - cache *Cache - fileStore *FileStore - compactor *Compactor -} - -func (e *Engine) ValuesBefore(key string, timestamp time.Time) ([]Value, error) -func (e *Engine) ValuesAfter(key string, timestamp time.Time) ([]Value, error) -``` - -``` -type Cursor struct{ - engine *Engine -} -... -``` - -``` -// FileStore maintains references -type FileStore struct {} -func (f *FileStore) ValuesBefore(key string, timestamp time.Time) ([]Value, error) -func (f *FileStore) ValuesAfter(key string, timestamp time.Time) ([]Value, error) - -``` - -``` -type FileIndex struct {} - -// Returns a file and offset for a block located in the return file that contains the requested key and timestamp. -func (f *FileIndex) Location(key, timestamp) (*os.File, uint64, error) -``` - -``` -type Cache struct {} -func (c *Cache) Write(key string, values []Value, checkpoint uint64) error -func (c *Cache) SetCheckpoint(checkpoint uint64) error -func (c *Cache) Cursor(key string) tsdb.Cursor -``` - -``` -type WAL struct {} -func (w *WAL) Write(key string, values []Value) -func (w *WAL) ClosedSegments() ([]*os.File, error) -``` - - -# Concerns - -## Performance - -There are three categories of performance this design is concerned with: - -* Write Throughput/Latency -* Query Throughput/Latency -* Startup time -* Compaction Throughput/Latency -* Memory Usage - -### Writes - -Write throughput is bounded by the time to process the write on the CPU (parsing, sorting, etc..), adding and evicting to the Cache and appending the write to the WAL. The first two items are CPU bound and can be tuned and optimized if they become a bottleneck. The WAL write can be tuned such that in the worst case every write requires at least 2 IOPS (write + fsync) or batched so that multiple writes are queued and fsync'd in sizes matching one or more disk blocks. Performing more work with each IO will improve throughput - -Write latency is minimal for the WAL write since there are no seeks. The latency is bounded by the time to complete any write and fsync calls. - -### Queries - -Query throughput is directly related to how many blocks can be read in a period of time. The index structure contains enough information to determine if one or multiple blocks can be read in a single IO. - -Query latency is determine by how long it takes to find and read the relevant blocks. The in-memory index structure contains the offsets and sizes of all blocks for a key. This allows every block to be read in 2 IOPS (seek + read) regardless of position, structure or size of file. - -### Startup - -Startup time is proportional to the number of WAL files, TSM files and tombstone files. WAL files can be read and process in large batches using the WALIterators. TSM files require reading the index block into memory (5 IOPS/file). Tombstone files are expected to be small and infrequent and would require approximately 2 IOPS/file. - -### Compactions - -Compactions are IO intensive in that they may need to read multiple, large TSM files to rewrite them. The throughput of a compactions (MB/s) as well as the latency for each compaction is important to keep consistent even as data sizes grow. - -To address these concerns, compactions prioritize old WAL files over optimizing storage/compression to avoid data being hidden during overload situations. This also accounts for the fact that shards will eventually become cold for writes so that existing data will be able to be optimized. To maintain consistent performance, the number of each type of file processed as well as the size of each file processed is bounded. - -### Memory Footprint - -The memory footprint should not grow unbounded due to additional files or series keys of large sizes or numbers. Some options for addressing this concern is covered in the [Design Options] section. - -## Concurrency - -The main concern with concurrency is that reads and writes should not block each other. Writes add entries to the Cache and append entries to the WAL. During queries, the contention points will be the Cache and existing TSM files. Since the Cache and TSM file data is only accessed through the engine by the cursors, several strategies can be used to improve concurrency. - -1. cached series data is returned to cursors as a copy. Since cache snapshots are released following compaction, cursor iteration and writes to the same series could block each other. Iterating over copies of the values can relieve some of this contention. -2. TSM data values returned by the engine are new references to Values and not access to the actual TSM files. This means that the `Engine`, through the `FileStore` can limit contention. -3. Compactions are the only place where new TSM files are added and removed. Since this is a serial, continuously running process, file contention is minimized. - -## Robustness - -The two robustness concerns considered by this design are writes filling the cache and crash recovery. - -### Cache Exhaustion - -The cache is used to hold the contents of uncompacted WAL segments in memory until such time that the compaction process has had a chance to convert the write-optimised WAL segments into read-optimised TSM files. - -The question arises about what to do in the case that the inbound write rate temporarily exceeds the compaction rate. There are four alternatives: - -* block the write until the compaction process catches up -* cache the write and hope that the compaction process catches up before memory exhaustion occurs -* evict older cache entries to make room for new writes -* fail the write and propagate the error back to the database client as a form of back pressure - -The current design chooses the last option - failing the writes. While this option reduces the apparent robustness of the database API from the perspective of the clients, it does provide a means by which the database can communicate, via back pressure, the need for clients to temporarily backoff. Well behaved clients should respond to write errors either by discarding the write or by retrying the write after a delay in the hope that the compaction process will eventually catch up. The problem with the first two options is that they may exhaust server resources. The problem with the third option is that queries (which don't touch WAL segments) might silently return incomplete results during compaction periods; with the selected option the possibility of incomplete queries is at least flagged by the presence of write errors during periods of degraded compaction performance. - -### Crash Recovery - -Crash recovery is facilitated with the following two properties: the append-only nature of WAL segments and the write-once nature of TSM files. If the server crashes incomplete compactions are discarded and the cache is rebuilt from the discovered WAL segments. Compactions will then resume in the normal way. Similarly, TSM files are immutable once they have been created and registered with the file store. A compaction may replace an existing TSM file, but the replaced file is not removed from the file system until replacement file has been created and synced to disk. - -#Errata - -This section is reserved for errata. In cases where the document is incorrect or inconsistent, such errata will be noted here with the contents of this section taking precedence over text elsewhere in the document in the case of discrepancies. Future full revisions of this document will fold the errata text back into the body of the document. - -#Revisions - -##14 February, 2016 - -* refined description of cache behaviour and robustness to reflect current design based on snapshots. Most references to checkpoints and evictions have been removed. See discussion here - https://goo.gl/L7AzVu - -##11 November, 2015 - -* initial design published \ No newline at end of file diff --git a/tsdb/tsm1/array_cursor.gen.go b/tsdb/tsm1/array_cursor.gen.go deleted file mode 100644 index 61ad5f8e11..0000000000 --- a/tsdb/tsm1/array_cursor.gen.go +++ /dev/null @@ -1,1579 +0,0 @@ -// Generated by tmpl -// https://github.com/benbjohnson/tmpl -// -// DO NOT EDIT! -// Source: array_cursor.gen.go.tmpl - -package tsm1 - -import ( - "sort" - - "github.com/influxdata/influxdb/v2/tsdb/cursors" -) - -// Array Cursors - -type floatArrayAscendingCursor struct { - cache struct { - values Values - pos int - } - - tsm struct { - buf *cursors.FloatArray - values *cursors.FloatArray - pos int - keyCursor *KeyCursor - } - - end int64 - res *cursors.FloatArray - stats cursors.CursorStats -} - -func newFloatArrayAscendingCursor() *floatArrayAscendingCursor { - c := &floatArrayAscendingCursor{ - res: cursors.NewFloatArrayLen(MaxPointsPerBlock), - } - c.tsm.buf = cursors.NewFloatArrayLen(MaxPointsPerBlock) - return c -} - -func (c *floatArrayAscendingCursor) reset(seek, end int64, cacheValues Values, tsmKeyCursor *KeyCursor) { - c.end = end - c.cache.values = cacheValues - c.cache.pos = sort.Search(len(c.cache.values), func(i int) bool { - return c.cache.values[i].UnixNano() >= seek - }) - - c.tsm.keyCursor = tsmKeyCursor - c.tsm.values = c.readArrayBlock() - c.tsm.pos = sort.Search(c.tsm.values.Len(), func(i int) bool { - return c.tsm.values.Timestamps[i] >= seek - }) -} - -func (c *floatArrayAscendingCursor) Err() error { return nil } - -// close closes the cursor and any dependent cursors. -func (c *floatArrayAscendingCursor) Close() { - if c.tsm.keyCursor != nil { - c.tsm.keyCursor.Close() - c.tsm.keyCursor = nil - } - c.cache.values = nil - c.tsm.values = nil -} - -func (c *floatArrayAscendingCursor) Stats() cursors.CursorStats { return c.stats } - -// Next returns the next key/value for the cursor. -func (c *floatArrayAscendingCursor) Next() *cursors.FloatArray { - pos := 0 - cvals := c.cache.values - tvals := c.tsm.values - - c.res.Timestamps = c.res.Timestamps[:cap(c.res.Timestamps)] - c.res.Values = c.res.Values[:cap(c.res.Values)] - - for pos < len(c.res.Timestamps) && c.tsm.pos < len(tvals.Timestamps) && c.cache.pos < len(cvals) { - ckey := cvals[c.cache.pos].UnixNano() - tkey := tvals.Timestamps[c.tsm.pos] - if ckey == tkey { - c.res.Timestamps[pos] = ckey - c.res.Values[pos] = cvals[c.cache.pos].(FloatValue).RawValue() - c.cache.pos++ - c.tsm.pos++ - } else if ckey < tkey { - c.res.Timestamps[pos] = ckey - c.res.Values[pos] = cvals[c.cache.pos].(FloatValue).RawValue() - c.cache.pos++ - } else { - c.res.Timestamps[pos] = tkey - c.res.Values[pos] = tvals.Values[c.tsm.pos] - c.tsm.pos++ - } - - pos++ - - if c.tsm.pos >= len(tvals.Timestamps) { - tvals = c.nextTSM() - } - } - - if pos < len(c.res.Timestamps) { - if c.tsm.pos < len(tvals.Timestamps) { - if pos == 0 && len(c.res.Timestamps) >= len(tvals.Timestamps) { - // optimization: all points can be served from TSM data because - // we need the entire block and the block completely fits within - // the buffer. - copy(c.res.Timestamps, tvals.Timestamps) - pos += copy(c.res.Values, tvals.Values) - c.nextTSM() - } else { - // copy as much as we can - n := copy(c.res.Timestamps[pos:], tvals.Timestamps[c.tsm.pos:]) - copy(c.res.Values[pos:], tvals.Values[c.tsm.pos:]) - pos += n - c.tsm.pos += n - if c.tsm.pos >= len(tvals.Timestamps) { - c.nextTSM() - } - } - } - - if c.cache.pos < len(cvals) { - // TSM was exhausted - for pos < len(c.res.Timestamps) && c.cache.pos < len(cvals) { - c.res.Timestamps[pos] = cvals[c.cache.pos].UnixNano() - c.res.Values[pos] = cvals[c.cache.pos].(FloatValue).RawValue() - pos++ - c.cache.pos++ - } - } - } - - if pos > 0 && c.res.Timestamps[pos-1] >= c.end { - pos -= 2 - for pos >= 0 && c.res.Timestamps[pos] >= c.end { - pos-- - } - pos++ - } - - c.res.Timestamps = c.res.Timestamps[:pos] - c.res.Values = c.res.Values[:pos] - - c.stats.ScannedValues += len(c.res.Values) - - c.stats.ScannedBytes += len(c.res.Values) * 8 - - return c.res -} - -func (c *floatArrayAscendingCursor) nextTSM() *cursors.FloatArray { - c.tsm.keyCursor.Next() - c.tsm.values = c.readArrayBlock() - c.tsm.pos = 0 - return c.tsm.values -} - -func (c *floatArrayAscendingCursor) readArrayBlock() *cursors.FloatArray { - values, _ := c.tsm.keyCursor.ReadFloatArrayBlock(c.tsm.buf) - return values -} - -type floatArrayDescendingCursor struct { - cache struct { - values Values - pos int - } - - tsm struct { - buf *cursors.FloatArray - values *cursors.FloatArray - pos int - keyCursor *KeyCursor - } - - end int64 - res *cursors.FloatArray - stats cursors.CursorStats -} - -func newFloatArrayDescendingCursor() *floatArrayDescendingCursor { - c := &floatArrayDescendingCursor{ - res: cursors.NewFloatArrayLen(MaxPointsPerBlock), - } - c.tsm.buf = cursors.NewFloatArrayLen(MaxPointsPerBlock) - return c -} - -func (c *floatArrayDescendingCursor) reset(seek, end int64, cacheValues Values, tsmKeyCursor *KeyCursor) { - c.end = end - c.cache.values = cacheValues - if len(c.cache.values) > 0 { - c.cache.pos = sort.Search(len(c.cache.values), func(i int) bool { - return c.cache.values[i].UnixNano() >= seek - }) - if c.cache.pos == len(c.cache.values) { - c.cache.pos-- - } else if c.cache.values[c.cache.pos].UnixNano() != seek { - c.cache.pos-- - } - } else { - c.cache.pos = -1 - } - - c.tsm.keyCursor = tsmKeyCursor - c.tsm.values = c.readArrayBlock() - c.tsm.pos = sort.Search(c.tsm.values.Len(), func(i int) bool { - return c.tsm.values.Timestamps[i] >= seek - }) - if c.tsm.values.Len() > 0 { - if c.tsm.pos == c.tsm.values.Len() { - c.tsm.pos-- - } else if c.tsm.values.Timestamps[c.tsm.pos] != seek { - c.tsm.pos-- - } - } else { - c.tsm.pos = -1 - } -} - -func (c *floatArrayDescendingCursor) Err() error { return nil } - -func (c *floatArrayDescendingCursor) Close() { - if c.tsm.keyCursor != nil { - c.tsm.keyCursor.Close() - c.tsm.keyCursor = nil - } - c.cache.values = nil - c.tsm.values = nil -} - -func (c *floatArrayDescendingCursor) Stats() cursors.CursorStats { return c.stats } - -func (c *floatArrayDescendingCursor) Next() *cursors.FloatArray { - pos := 0 - cvals := c.cache.values - tvals := c.tsm.values - - c.res.Timestamps = c.res.Timestamps[:cap(c.res.Timestamps)] - c.res.Values = c.res.Values[:cap(c.res.Values)] - - for pos < len(c.res.Timestamps) && c.tsm.pos >= 0 && c.cache.pos >= 0 { - ckey := cvals[c.cache.pos].UnixNano() - tkey := tvals.Timestamps[c.tsm.pos] - if ckey == tkey { - c.res.Timestamps[pos] = ckey - c.res.Values[pos] = cvals[c.cache.pos].(FloatValue).RawValue() - c.cache.pos-- - c.tsm.pos-- - } else if ckey > tkey { - c.res.Timestamps[pos] = ckey - c.res.Values[pos] = cvals[c.cache.pos].(FloatValue).RawValue() - c.cache.pos-- - } else { - c.res.Timestamps[pos] = tkey - c.res.Values[pos] = tvals.Values[c.tsm.pos] - c.tsm.pos-- - } - - pos++ - - if c.tsm.pos < 0 { - tvals = c.nextTSM() - } - } - - if pos < len(c.res.Timestamps) { - // cache was exhausted - if c.tsm.pos >= 0 { - for pos < len(c.res.Timestamps) && c.tsm.pos >= 0 { - c.res.Timestamps[pos] = tvals.Timestamps[c.tsm.pos] - c.res.Values[pos] = tvals.Values[c.tsm.pos] - pos++ - c.tsm.pos-- - if c.tsm.pos < 0 { - tvals = c.nextTSM() - } - } - } - - if c.cache.pos >= 0 { - // TSM was exhausted - for pos < len(c.res.Timestamps) && c.cache.pos >= 0 { - c.res.Timestamps[pos] = cvals[c.cache.pos].UnixNano() - c.res.Values[pos] = cvals[c.cache.pos].(FloatValue).RawValue() - pos++ - c.cache.pos-- - } - } - } - - // If the earliest timestamp is strictly earlier than - // the end time, remove it from the result and repeat. - if pos > 0 && c.res.Timestamps[pos-1] < c.end { - pos -= 2 - for pos >= 0 && c.res.Timestamps[pos] < c.end { - pos-- - } - pos++ - } - - c.res.Timestamps = c.res.Timestamps[:pos] - c.res.Values = c.res.Values[:pos] - - return c.res -} - -func (c *floatArrayDescendingCursor) nextTSM() *cursors.FloatArray { - c.tsm.keyCursor.Next() - c.tsm.values = c.readArrayBlock() - c.tsm.pos = len(c.tsm.values.Timestamps) - 1 - return c.tsm.values -} - -func (c *floatArrayDescendingCursor) readArrayBlock() *cursors.FloatArray { - values, _ := c.tsm.keyCursor.ReadFloatArrayBlock(c.tsm.buf) - - c.stats.ScannedValues += len(values.Values) - - c.stats.ScannedBytes += len(values.Values) * 8 - - return values -} - -type integerArrayAscendingCursor struct { - cache struct { - values Values - pos int - } - - tsm struct { - buf *cursors.IntegerArray - values *cursors.IntegerArray - pos int - keyCursor *KeyCursor - } - - end int64 - res *cursors.IntegerArray - stats cursors.CursorStats -} - -func newIntegerArrayAscendingCursor() *integerArrayAscendingCursor { - c := &integerArrayAscendingCursor{ - res: cursors.NewIntegerArrayLen(MaxPointsPerBlock), - } - c.tsm.buf = cursors.NewIntegerArrayLen(MaxPointsPerBlock) - return c -} - -func (c *integerArrayAscendingCursor) reset(seek, end int64, cacheValues Values, tsmKeyCursor *KeyCursor) { - c.end = end - c.cache.values = cacheValues - c.cache.pos = sort.Search(len(c.cache.values), func(i int) bool { - return c.cache.values[i].UnixNano() >= seek - }) - - c.tsm.keyCursor = tsmKeyCursor - c.tsm.values = c.readArrayBlock() - c.tsm.pos = sort.Search(c.tsm.values.Len(), func(i int) bool { - return c.tsm.values.Timestamps[i] >= seek - }) -} - -func (c *integerArrayAscendingCursor) Err() error { return nil } - -// close closes the cursor and any dependent cursors. -func (c *integerArrayAscendingCursor) Close() { - if c.tsm.keyCursor != nil { - c.tsm.keyCursor.Close() - c.tsm.keyCursor = nil - } - c.cache.values = nil - c.tsm.values = nil -} - -func (c *integerArrayAscendingCursor) Stats() cursors.CursorStats { return c.stats } - -// Next returns the next key/value for the cursor. -func (c *integerArrayAscendingCursor) Next() *cursors.IntegerArray { - pos := 0 - cvals := c.cache.values - tvals := c.tsm.values - - c.res.Timestamps = c.res.Timestamps[:cap(c.res.Timestamps)] - c.res.Values = c.res.Values[:cap(c.res.Values)] - - for pos < len(c.res.Timestamps) && c.tsm.pos < len(tvals.Timestamps) && c.cache.pos < len(cvals) { - ckey := cvals[c.cache.pos].UnixNano() - tkey := tvals.Timestamps[c.tsm.pos] - if ckey == tkey { - c.res.Timestamps[pos] = ckey - c.res.Values[pos] = cvals[c.cache.pos].(IntegerValue).RawValue() - c.cache.pos++ - c.tsm.pos++ - } else if ckey < tkey { - c.res.Timestamps[pos] = ckey - c.res.Values[pos] = cvals[c.cache.pos].(IntegerValue).RawValue() - c.cache.pos++ - } else { - c.res.Timestamps[pos] = tkey - c.res.Values[pos] = tvals.Values[c.tsm.pos] - c.tsm.pos++ - } - - pos++ - - if c.tsm.pos >= len(tvals.Timestamps) { - tvals = c.nextTSM() - } - } - - if pos < len(c.res.Timestamps) { - if c.tsm.pos < len(tvals.Timestamps) { - if pos == 0 && len(c.res.Timestamps) >= len(tvals.Timestamps) { - // optimization: all points can be served from TSM data because - // we need the entire block and the block completely fits within - // the buffer. - copy(c.res.Timestamps, tvals.Timestamps) - pos += copy(c.res.Values, tvals.Values) - c.nextTSM() - } else { - // copy as much as we can - n := copy(c.res.Timestamps[pos:], tvals.Timestamps[c.tsm.pos:]) - copy(c.res.Values[pos:], tvals.Values[c.tsm.pos:]) - pos += n - c.tsm.pos += n - if c.tsm.pos >= len(tvals.Timestamps) { - c.nextTSM() - } - } - } - - if c.cache.pos < len(cvals) { - // TSM was exhausted - for pos < len(c.res.Timestamps) && c.cache.pos < len(cvals) { - c.res.Timestamps[pos] = cvals[c.cache.pos].UnixNano() - c.res.Values[pos] = cvals[c.cache.pos].(IntegerValue).RawValue() - pos++ - c.cache.pos++ - } - } - } - - if pos > 0 && c.res.Timestamps[pos-1] >= c.end { - pos -= 2 - for pos >= 0 && c.res.Timestamps[pos] >= c.end { - pos-- - } - pos++ - } - - c.res.Timestamps = c.res.Timestamps[:pos] - c.res.Values = c.res.Values[:pos] - - c.stats.ScannedValues += len(c.res.Values) - - c.stats.ScannedBytes += len(c.res.Values) * 8 - - return c.res -} - -func (c *integerArrayAscendingCursor) nextTSM() *cursors.IntegerArray { - c.tsm.keyCursor.Next() - c.tsm.values = c.readArrayBlock() - c.tsm.pos = 0 - return c.tsm.values -} - -func (c *integerArrayAscendingCursor) readArrayBlock() *cursors.IntegerArray { - values, _ := c.tsm.keyCursor.ReadIntegerArrayBlock(c.tsm.buf) - return values -} - -type integerArrayDescendingCursor struct { - cache struct { - values Values - pos int - } - - tsm struct { - buf *cursors.IntegerArray - values *cursors.IntegerArray - pos int - keyCursor *KeyCursor - } - - end int64 - res *cursors.IntegerArray - stats cursors.CursorStats -} - -func newIntegerArrayDescendingCursor() *integerArrayDescendingCursor { - c := &integerArrayDescendingCursor{ - res: cursors.NewIntegerArrayLen(MaxPointsPerBlock), - } - c.tsm.buf = cursors.NewIntegerArrayLen(MaxPointsPerBlock) - return c -} - -func (c *integerArrayDescendingCursor) reset(seek, end int64, cacheValues Values, tsmKeyCursor *KeyCursor) { - c.end = end - c.cache.values = cacheValues - if len(c.cache.values) > 0 { - c.cache.pos = sort.Search(len(c.cache.values), func(i int) bool { - return c.cache.values[i].UnixNano() >= seek - }) - if c.cache.pos == len(c.cache.values) { - c.cache.pos-- - } else if c.cache.values[c.cache.pos].UnixNano() != seek { - c.cache.pos-- - } - } else { - c.cache.pos = -1 - } - - c.tsm.keyCursor = tsmKeyCursor - c.tsm.values = c.readArrayBlock() - c.tsm.pos = sort.Search(c.tsm.values.Len(), func(i int) bool { - return c.tsm.values.Timestamps[i] >= seek - }) - if c.tsm.values.Len() > 0 { - if c.tsm.pos == c.tsm.values.Len() { - c.tsm.pos-- - } else if c.tsm.values.Timestamps[c.tsm.pos] != seek { - c.tsm.pos-- - } - } else { - c.tsm.pos = -1 - } -} - -func (c *integerArrayDescendingCursor) Err() error { return nil } - -func (c *integerArrayDescendingCursor) Close() { - if c.tsm.keyCursor != nil { - c.tsm.keyCursor.Close() - c.tsm.keyCursor = nil - } - c.cache.values = nil - c.tsm.values = nil -} - -func (c *integerArrayDescendingCursor) Stats() cursors.CursorStats { return c.stats } - -func (c *integerArrayDescendingCursor) Next() *cursors.IntegerArray { - pos := 0 - cvals := c.cache.values - tvals := c.tsm.values - - c.res.Timestamps = c.res.Timestamps[:cap(c.res.Timestamps)] - c.res.Values = c.res.Values[:cap(c.res.Values)] - - for pos < len(c.res.Timestamps) && c.tsm.pos >= 0 && c.cache.pos >= 0 { - ckey := cvals[c.cache.pos].UnixNano() - tkey := tvals.Timestamps[c.tsm.pos] - if ckey == tkey { - c.res.Timestamps[pos] = ckey - c.res.Values[pos] = cvals[c.cache.pos].(IntegerValue).RawValue() - c.cache.pos-- - c.tsm.pos-- - } else if ckey > tkey { - c.res.Timestamps[pos] = ckey - c.res.Values[pos] = cvals[c.cache.pos].(IntegerValue).RawValue() - c.cache.pos-- - } else { - c.res.Timestamps[pos] = tkey - c.res.Values[pos] = tvals.Values[c.tsm.pos] - c.tsm.pos-- - } - - pos++ - - if c.tsm.pos < 0 { - tvals = c.nextTSM() - } - } - - if pos < len(c.res.Timestamps) { - // cache was exhausted - if c.tsm.pos >= 0 { - for pos < len(c.res.Timestamps) && c.tsm.pos >= 0 { - c.res.Timestamps[pos] = tvals.Timestamps[c.tsm.pos] - c.res.Values[pos] = tvals.Values[c.tsm.pos] - pos++ - c.tsm.pos-- - if c.tsm.pos < 0 { - tvals = c.nextTSM() - } - } - } - - if c.cache.pos >= 0 { - // TSM was exhausted - for pos < len(c.res.Timestamps) && c.cache.pos >= 0 { - c.res.Timestamps[pos] = cvals[c.cache.pos].UnixNano() - c.res.Values[pos] = cvals[c.cache.pos].(IntegerValue).RawValue() - pos++ - c.cache.pos-- - } - } - } - - // If the earliest timestamp is strictly earlier than - // the end time, remove it from the result and repeat. - if pos > 0 && c.res.Timestamps[pos-1] < c.end { - pos -= 2 - for pos >= 0 && c.res.Timestamps[pos] < c.end { - pos-- - } - pos++ - } - - c.res.Timestamps = c.res.Timestamps[:pos] - c.res.Values = c.res.Values[:pos] - - return c.res -} - -func (c *integerArrayDescendingCursor) nextTSM() *cursors.IntegerArray { - c.tsm.keyCursor.Next() - c.tsm.values = c.readArrayBlock() - c.tsm.pos = len(c.tsm.values.Timestamps) - 1 - return c.tsm.values -} - -func (c *integerArrayDescendingCursor) readArrayBlock() *cursors.IntegerArray { - values, _ := c.tsm.keyCursor.ReadIntegerArrayBlock(c.tsm.buf) - - c.stats.ScannedValues += len(values.Values) - - c.stats.ScannedBytes += len(values.Values) * 8 - - return values -} - -type unsignedArrayAscendingCursor struct { - cache struct { - values Values - pos int - } - - tsm struct { - buf *cursors.UnsignedArray - values *cursors.UnsignedArray - pos int - keyCursor *KeyCursor - } - - end int64 - res *cursors.UnsignedArray - stats cursors.CursorStats -} - -func newUnsignedArrayAscendingCursor() *unsignedArrayAscendingCursor { - c := &unsignedArrayAscendingCursor{ - res: cursors.NewUnsignedArrayLen(MaxPointsPerBlock), - } - c.tsm.buf = cursors.NewUnsignedArrayLen(MaxPointsPerBlock) - return c -} - -func (c *unsignedArrayAscendingCursor) reset(seek, end int64, cacheValues Values, tsmKeyCursor *KeyCursor) { - c.end = end - c.cache.values = cacheValues - c.cache.pos = sort.Search(len(c.cache.values), func(i int) bool { - return c.cache.values[i].UnixNano() >= seek - }) - - c.tsm.keyCursor = tsmKeyCursor - c.tsm.values = c.readArrayBlock() - c.tsm.pos = sort.Search(c.tsm.values.Len(), func(i int) bool { - return c.tsm.values.Timestamps[i] >= seek - }) -} - -func (c *unsignedArrayAscendingCursor) Err() error { return nil } - -// close closes the cursor and any dependent cursors. -func (c *unsignedArrayAscendingCursor) Close() { - if c.tsm.keyCursor != nil { - c.tsm.keyCursor.Close() - c.tsm.keyCursor = nil - } - c.cache.values = nil - c.tsm.values = nil -} - -func (c *unsignedArrayAscendingCursor) Stats() cursors.CursorStats { return c.stats } - -// Next returns the next key/value for the cursor. -func (c *unsignedArrayAscendingCursor) Next() *cursors.UnsignedArray { - pos := 0 - cvals := c.cache.values - tvals := c.tsm.values - - c.res.Timestamps = c.res.Timestamps[:cap(c.res.Timestamps)] - c.res.Values = c.res.Values[:cap(c.res.Values)] - - for pos < len(c.res.Timestamps) && c.tsm.pos < len(tvals.Timestamps) && c.cache.pos < len(cvals) { - ckey := cvals[c.cache.pos].UnixNano() - tkey := tvals.Timestamps[c.tsm.pos] - if ckey == tkey { - c.res.Timestamps[pos] = ckey - c.res.Values[pos] = cvals[c.cache.pos].(UnsignedValue).RawValue() - c.cache.pos++ - c.tsm.pos++ - } else if ckey < tkey { - c.res.Timestamps[pos] = ckey - c.res.Values[pos] = cvals[c.cache.pos].(UnsignedValue).RawValue() - c.cache.pos++ - } else { - c.res.Timestamps[pos] = tkey - c.res.Values[pos] = tvals.Values[c.tsm.pos] - c.tsm.pos++ - } - - pos++ - - if c.tsm.pos >= len(tvals.Timestamps) { - tvals = c.nextTSM() - } - } - - if pos < len(c.res.Timestamps) { - if c.tsm.pos < len(tvals.Timestamps) { - if pos == 0 && len(c.res.Timestamps) >= len(tvals.Timestamps) { - // optimization: all points can be served from TSM data because - // we need the entire block and the block completely fits within - // the buffer. - copy(c.res.Timestamps, tvals.Timestamps) - pos += copy(c.res.Values, tvals.Values) - c.nextTSM() - } else { - // copy as much as we can - n := copy(c.res.Timestamps[pos:], tvals.Timestamps[c.tsm.pos:]) - copy(c.res.Values[pos:], tvals.Values[c.tsm.pos:]) - pos += n - c.tsm.pos += n - if c.tsm.pos >= len(tvals.Timestamps) { - c.nextTSM() - } - } - } - - if c.cache.pos < len(cvals) { - // TSM was exhausted - for pos < len(c.res.Timestamps) && c.cache.pos < len(cvals) { - c.res.Timestamps[pos] = cvals[c.cache.pos].UnixNano() - c.res.Values[pos] = cvals[c.cache.pos].(UnsignedValue).RawValue() - pos++ - c.cache.pos++ - } - } - } - - if pos > 0 && c.res.Timestamps[pos-1] >= c.end { - pos -= 2 - for pos >= 0 && c.res.Timestamps[pos] >= c.end { - pos-- - } - pos++ - } - - c.res.Timestamps = c.res.Timestamps[:pos] - c.res.Values = c.res.Values[:pos] - - c.stats.ScannedValues += len(c.res.Values) - - c.stats.ScannedBytes += len(c.res.Values) * 8 - - return c.res -} - -func (c *unsignedArrayAscendingCursor) nextTSM() *cursors.UnsignedArray { - c.tsm.keyCursor.Next() - c.tsm.values = c.readArrayBlock() - c.tsm.pos = 0 - return c.tsm.values -} - -func (c *unsignedArrayAscendingCursor) readArrayBlock() *cursors.UnsignedArray { - values, _ := c.tsm.keyCursor.ReadUnsignedArrayBlock(c.tsm.buf) - return values -} - -type unsignedArrayDescendingCursor struct { - cache struct { - values Values - pos int - } - - tsm struct { - buf *cursors.UnsignedArray - values *cursors.UnsignedArray - pos int - keyCursor *KeyCursor - } - - end int64 - res *cursors.UnsignedArray - stats cursors.CursorStats -} - -func newUnsignedArrayDescendingCursor() *unsignedArrayDescendingCursor { - c := &unsignedArrayDescendingCursor{ - res: cursors.NewUnsignedArrayLen(MaxPointsPerBlock), - } - c.tsm.buf = cursors.NewUnsignedArrayLen(MaxPointsPerBlock) - return c -} - -func (c *unsignedArrayDescendingCursor) reset(seek, end int64, cacheValues Values, tsmKeyCursor *KeyCursor) { - c.end = end - c.cache.values = cacheValues - if len(c.cache.values) > 0 { - c.cache.pos = sort.Search(len(c.cache.values), func(i int) bool { - return c.cache.values[i].UnixNano() >= seek - }) - if c.cache.pos == len(c.cache.values) { - c.cache.pos-- - } else if c.cache.values[c.cache.pos].UnixNano() != seek { - c.cache.pos-- - } - } else { - c.cache.pos = -1 - } - - c.tsm.keyCursor = tsmKeyCursor - c.tsm.values = c.readArrayBlock() - c.tsm.pos = sort.Search(c.tsm.values.Len(), func(i int) bool { - return c.tsm.values.Timestamps[i] >= seek - }) - if c.tsm.values.Len() > 0 { - if c.tsm.pos == c.tsm.values.Len() { - c.tsm.pos-- - } else if c.tsm.values.Timestamps[c.tsm.pos] != seek { - c.tsm.pos-- - } - } else { - c.tsm.pos = -1 - } -} - -func (c *unsignedArrayDescendingCursor) Err() error { return nil } - -func (c *unsignedArrayDescendingCursor) Close() { - if c.tsm.keyCursor != nil { - c.tsm.keyCursor.Close() - c.tsm.keyCursor = nil - } - c.cache.values = nil - c.tsm.values = nil -} - -func (c *unsignedArrayDescendingCursor) Stats() cursors.CursorStats { return c.stats } - -func (c *unsignedArrayDescendingCursor) Next() *cursors.UnsignedArray { - pos := 0 - cvals := c.cache.values - tvals := c.tsm.values - - c.res.Timestamps = c.res.Timestamps[:cap(c.res.Timestamps)] - c.res.Values = c.res.Values[:cap(c.res.Values)] - - for pos < len(c.res.Timestamps) && c.tsm.pos >= 0 && c.cache.pos >= 0 { - ckey := cvals[c.cache.pos].UnixNano() - tkey := tvals.Timestamps[c.tsm.pos] - if ckey == tkey { - c.res.Timestamps[pos] = ckey - c.res.Values[pos] = cvals[c.cache.pos].(UnsignedValue).RawValue() - c.cache.pos-- - c.tsm.pos-- - } else if ckey > tkey { - c.res.Timestamps[pos] = ckey - c.res.Values[pos] = cvals[c.cache.pos].(UnsignedValue).RawValue() - c.cache.pos-- - } else { - c.res.Timestamps[pos] = tkey - c.res.Values[pos] = tvals.Values[c.tsm.pos] - c.tsm.pos-- - } - - pos++ - - if c.tsm.pos < 0 { - tvals = c.nextTSM() - } - } - - if pos < len(c.res.Timestamps) { - // cache was exhausted - if c.tsm.pos >= 0 { - for pos < len(c.res.Timestamps) && c.tsm.pos >= 0 { - c.res.Timestamps[pos] = tvals.Timestamps[c.tsm.pos] - c.res.Values[pos] = tvals.Values[c.tsm.pos] - pos++ - c.tsm.pos-- - if c.tsm.pos < 0 { - tvals = c.nextTSM() - } - } - } - - if c.cache.pos >= 0 { - // TSM was exhausted - for pos < len(c.res.Timestamps) && c.cache.pos >= 0 { - c.res.Timestamps[pos] = cvals[c.cache.pos].UnixNano() - c.res.Values[pos] = cvals[c.cache.pos].(UnsignedValue).RawValue() - pos++ - c.cache.pos-- - } - } - } - - // If the earliest timestamp is strictly earlier than - // the end time, remove it from the result and repeat. - if pos > 0 && c.res.Timestamps[pos-1] < c.end { - pos -= 2 - for pos >= 0 && c.res.Timestamps[pos] < c.end { - pos-- - } - pos++ - } - - c.res.Timestamps = c.res.Timestamps[:pos] - c.res.Values = c.res.Values[:pos] - - return c.res -} - -func (c *unsignedArrayDescendingCursor) nextTSM() *cursors.UnsignedArray { - c.tsm.keyCursor.Next() - c.tsm.values = c.readArrayBlock() - c.tsm.pos = len(c.tsm.values.Timestamps) - 1 - return c.tsm.values -} - -func (c *unsignedArrayDescendingCursor) readArrayBlock() *cursors.UnsignedArray { - values, _ := c.tsm.keyCursor.ReadUnsignedArrayBlock(c.tsm.buf) - - c.stats.ScannedValues += len(values.Values) - - c.stats.ScannedBytes += len(values.Values) * 8 - - return values -} - -type stringArrayAscendingCursor struct { - cache struct { - values Values - pos int - } - - tsm struct { - buf *cursors.StringArray - values *cursors.StringArray - pos int - keyCursor *KeyCursor - } - - end int64 - res *cursors.StringArray - stats cursors.CursorStats -} - -func newStringArrayAscendingCursor() *stringArrayAscendingCursor { - c := &stringArrayAscendingCursor{ - res: cursors.NewStringArrayLen(MaxPointsPerBlock), - } - c.tsm.buf = cursors.NewStringArrayLen(MaxPointsPerBlock) - return c -} - -func (c *stringArrayAscendingCursor) reset(seek, end int64, cacheValues Values, tsmKeyCursor *KeyCursor) { - c.end = end - c.cache.values = cacheValues - c.cache.pos = sort.Search(len(c.cache.values), func(i int) bool { - return c.cache.values[i].UnixNano() >= seek - }) - - c.tsm.keyCursor = tsmKeyCursor - c.tsm.values = c.readArrayBlock() - c.tsm.pos = sort.Search(c.tsm.values.Len(), func(i int) bool { - return c.tsm.values.Timestamps[i] >= seek - }) -} - -func (c *stringArrayAscendingCursor) Err() error { return nil } - -// close closes the cursor and any dependent cursors. -func (c *stringArrayAscendingCursor) Close() { - if c.tsm.keyCursor != nil { - c.tsm.keyCursor.Close() - c.tsm.keyCursor = nil - } - c.cache.values = nil - c.tsm.values = nil -} - -func (c *stringArrayAscendingCursor) Stats() cursors.CursorStats { return c.stats } - -// Next returns the next key/value for the cursor. -func (c *stringArrayAscendingCursor) Next() *cursors.StringArray { - pos := 0 - cvals := c.cache.values - tvals := c.tsm.values - - c.res.Timestamps = c.res.Timestamps[:cap(c.res.Timestamps)] - c.res.Values = c.res.Values[:cap(c.res.Values)] - - for pos < len(c.res.Timestamps) && c.tsm.pos < len(tvals.Timestamps) && c.cache.pos < len(cvals) { - ckey := cvals[c.cache.pos].UnixNano() - tkey := tvals.Timestamps[c.tsm.pos] - if ckey == tkey { - c.res.Timestamps[pos] = ckey - c.res.Values[pos] = cvals[c.cache.pos].(StringValue).RawValue() - c.cache.pos++ - c.tsm.pos++ - } else if ckey < tkey { - c.res.Timestamps[pos] = ckey - c.res.Values[pos] = cvals[c.cache.pos].(StringValue).RawValue() - c.cache.pos++ - } else { - c.res.Timestamps[pos] = tkey - c.res.Values[pos] = tvals.Values[c.tsm.pos] - c.tsm.pos++ - } - - pos++ - - if c.tsm.pos >= len(tvals.Timestamps) { - tvals = c.nextTSM() - } - } - - if pos < len(c.res.Timestamps) { - if c.tsm.pos < len(tvals.Timestamps) { - if pos == 0 && len(c.res.Timestamps) >= len(tvals.Timestamps) { - // optimization: all points can be served from TSM data because - // we need the entire block and the block completely fits within - // the buffer. - copy(c.res.Timestamps, tvals.Timestamps) - pos += copy(c.res.Values, tvals.Values) - c.nextTSM() - } else { - // copy as much as we can - n := copy(c.res.Timestamps[pos:], tvals.Timestamps[c.tsm.pos:]) - copy(c.res.Values[pos:], tvals.Values[c.tsm.pos:]) - pos += n - c.tsm.pos += n - if c.tsm.pos >= len(tvals.Timestamps) { - c.nextTSM() - } - } - } - - if c.cache.pos < len(cvals) { - // TSM was exhausted - for pos < len(c.res.Timestamps) && c.cache.pos < len(cvals) { - c.res.Timestamps[pos] = cvals[c.cache.pos].UnixNano() - c.res.Values[pos] = cvals[c.cache.pos].(StringValue).RawValue() - pos++ - c.cache.pos++ - } - } - } - - if pos > 0 && c.res.Timestamps[pos-1] >= c.end { - pos -= 2 - for pos >= 0 && c.res.Timestamps[pos] >= c.end { - pos-- - } - pos++ - } - - c.res.Timestamps = c.res.Timestamps[:pos] - c.res.Values = c.res.Values[:pos] - - c.stats.ScannedValues += len(c.res.Values) - - for _, v := range c.res.Values { - c.stats.ScannedBytes += len(v) - } - - return c.res -} - -func (c *stringArrayAscendingCursor) nextTSM() *cursors.StringArray { - c.tsm.keyCursor.Next() - c.tsm.values = c.readArrayBlock() - c.tsm.pos = 0 - return c.tsm.values -} - -func (c *stringArrayAscendingCursor) readArrayBlock() *cursors.StringArray { - values, _ := c.tsm.keyCursor.ReadStringArrayBlock(c.tsm.buf) - return values -} - -type stringArrayDescendingCursor struct { - cache struct { - values Values - pos int - } - - tsm struct { - buf *cursors.StringArray - values *cursors.StringArray - pos int - keyCursor *KeyCursor - } - - end int64 - res *cursors.StringArray - stats cursors.CursorStats -} - -func newStringArrayDescendingCursor() *stringArrayDescendingCursor { - c := &stringArrayDescendingCursor{ - res: cursors.NewStringArrayLen(MaxPointsPerBlock), - } - c.tsm.buf = cursors.NewStringArrayLen(MaxPointsPerBlock) - return c -} - -func (c *stringArrayDescendingCursor) reset(seek, end int64, cacheValues Values, tsmKeyCursor *KeyCursor) { - c.end = end - c.cache.values = cacheValues - if len(c.cache.values) > 0 { - c.cache.pos = sort.Search(len(c.cache.values), func(i int) bool { - return c.cache.values[i].UnixNano() >= seek - }) - if c.cache.pos == len(c.cache.values) { - c.cache.pos-- - } else if c.cache.values[c.cache.pos].UnixNano() != seek { - c.cache.pos-- - } - } else { - c.cache.pos = -1 - } - - c.tsm.keyCursor = tsmKeyCursor - c.tsm.values = c.readArrayBlock() - c.tsm.pos = sort.Search(c.tsm.values.Len(), func(i int) bool { - return c.tsm.values.Timestamps[i] >= seek - }) - if c.tsm.values.Len() > 0 { - if c.tsm.pos == c.tsm.values.Len() { - c.tsm.pos-- - } else if c.tsm.values.Timestamps[c.tsm.pos] != seek { - c.tsm.pos-- - } - } else { - c.tsm.pos = -1 - } -} - -func (c *stringArrayDescendingCursor) Err() error { return nil } - -func (c *stringArrayDescendingCursor) Close() { - if c.tsm.keyCursor != nil { - c.tsm.keyCursor.Close() - c.tsm.keyCursor = nil - } - c.cache.values = nil - c.tsm.values = nil -} - -func (c *stringArrayDescendingCursor) Stats() cursors.CursorStats { return c.stats } - -func (c *stringArrayDescendingCursor) Next() *cursors.StringArray { - pos := 0 - cvals := c.cache.values - tvals := c.tsm.values - - c.res.Timestamps = c.res.Timestamps[:cap(c.res.Timestamps)] - c.res.Values = c.res.Values[:cap(c.res.Values)] - - for pos < len(c.res.Timestamps) && c.tsm.pos >= 0 && c.cache.pos >= 0 { - ckey := cvals[c.cache.pos].UnixNano() - tkey := tvals.Timestamps[c.tsm.pos] - if ckey == tkey { - c.res.Timestamps[pos] = ckey - c.res.Values[pos] = cvals[c.cache.pos].(StringValue).RawValue() - c.cache.pos-- - c.tsm.pos-- - } else if ckey > tkey { - c.res.Timestamps[pos] = ckey - c.res.Values[pos] = cvals[c.cache.pos].(StringValue).RawValue() - c.cache.pos-- - } else { - c.res.Timestamps[pos] = tkey - c.res.Values[pos] = tvals.Values[c.tsm.pos] - c.tsm.pos-- - } - - pos++ - - if c.tsm.pos < 0 { - tvals = c.nextTSM() - } - } - - if pos < len(c.res.Timestamps) { - // cache was exhausted - if c.tsm.pos >= 0 { - for pos < len(c.res.Timestamps) && c.tsm.pos >= 0 { - c.res.Timestamps[pos] = tvals.Timestamps[c.tsm.pos] - c.res.Values[pos] = tvals.Values[c.tsm.pos] - pos++ - c.tsm.pos-- - if c.tsm.pos < 0 { - tvals = c.nextTSM() - } - } - } - - if c.cache.pos >= 0 { - // TSM was exhausted - for pos < len(c.res.Timestamps) && c.cache.pos >= 0 { - c.res.Timestamps[pos] = cvals[c.cache.pos].UnixNano() - c.res.Values[pos] = cvals[c.cache.pos].(StringValue).RawValue() - pos++ - c.cache.pos-- - } - } - } - - // If the earliest timestamp is strictly earlier than - // the end time, remove it from the result and repeat. - if pos > 0 && c.res.Timestamps[pos-1] < c.end { - pos -= 2 - for pos >= 0 && c.res.Timestamps[pos] < c.end { - pos-- - } - pos++ - } - - c.res.Timestamps = c.res.Timestamps[:pos] - c.res.Values = c.res.Values[:pos] - - return c.res -} - -func (c *stringArrayDescendingCursor) nextTSM() *cursors.StringArray { - c.tsm.keyCursor.Next() - c.tsm.values = c.readArrayBlock() - c.tsm.pos = len(c.tsm.values.Timestamps) - 1 - return c.tsm.values -} - -func (c *stringArrayDescendingCursor) readArrayBlock() *cursors.StringArray { - values, _ := c.tsm.keyCursor.ReadStringArrayBlock(c.tsm.buf) - - c.stats.ScannedValues += len(values.Values) - - for _, v := range values.Values { - c.stats.ScannedBytes += len(v) - } - - return values -} - -type booleanArrayAscendingCursor struct { - cache struct { - values Values - pos int - } - - tsm struct { - buf *cursors.BooleanArray - values *cursors.BooleanArray - pos int - keyCursor *KeyCursor - } - - end int64 - res *cursors.BooleanArray - stats cursors.CursorStats -} - -func newBooleanArrayAscendingCursor() *booleanArrayAscendingCursor { - c := &booleanArrayAscendingCursor{ - res: cursors.NewBooleanArrayLen(MaxPointsPerBlock), - } - c.tsm.buf = cursors.NewBooleanArrayLen(MaxPointsPerBlock) - return c -} - -func (c *booleanArrayAscendingCursor) reset(seek, end int64, cacheValues Values, tsmKeyCursor *KeyCursor) { - c.end = end - c.cache.values = cacheValues - c.cache.pos = sort.Search(len(c.cache.values), func(i int) bool { - return c.cache.values[i].UnixNano() >= seek - }) - - c.tsm.keyCursor = tsmKeyCursor - c.tsm.values = c.readArrayBlock() - c.tsm.pos = sort.Search(c.tsm.values.Len(), func(i int) bool { - return c.tsm.values.Timestamps[i] >= seek - }) -} - -func (c *booleanArrayAscendingCursor) Err() error { return nil } - -// close closes the cursor and any dependent cursors. -func (c *booleanArrayAscendingCursor) Close() { - if c.tsm.keyCursor != nil { - c.tsm.keyCursor.Close() - c.tsm.keyCursor = nil - } - c.cache.values = nil - c.tsm.values = nil -} - -func (c *booleanArrayAscendingCursor) Stats() cursors.CursorStats { return c.stats } - -// Next returns the next key/value for the cursor. -func (c *booleanArrayAscendingCursor) Next() *cursors.BooleanArray { - pos := 0 - cvals := c.cache.values - tvals := c.tsm.values - - c.res.Timestamps = c.res.Timestamps[:cap(c.res.Timestamps)] - c.res.Values = c.res.Values[:cap(c.res.Values)] - - for pos < len(c.res.Timestamps) && c.tsm.pos < len(tvals.Timestamps) && c.cache.pos < len(cvals) { - ckey := cvals[c.cache.pos].UnixNano() - tkey := tvals.Timestamps[c.tsm.pos] - if ckey == tkey { - c.res.Timestamps[pos] = ckey - c.res.Values[pos] = cvals[c.cache.pos].(BooleanValue).RawValue() - c.cache.pos++ - c.tsm.pos++ - } else if ckey < tkey { - c.res.Timestamps[pos] = ckey - c.res.Values[pos] = cvals[c.cache.pos].(BooleanValue).RawValue() - c.cache.pos++ - } else { - c.res.Timestamps[pos] = tkey - c.res.Values[pos] = tvals.Values[c.tsm.pos] - c.tsm.pos++ - } - - pos++ - - if c.tsm.pos >= len(tvals.Timestamps) { - tvals = c.nextTSM() - } - } - - if pos < len(c.res.Timestamps) { - if c.tsm.pos < len(tvals.Timestamps) { - if pos == 0 && len(c.res.Timestamps) >= len(tvals.Timestamps) { - // optimization: all points can be served from TSM data because - // we need the entire block and the block completely fits within - // the buffer. - copy(c.res.Timestamps, tvals.Timestamps) - pos += copy(c.res.Values, tvals.Values) - c.nextTSM() - } else { - // copy as much as we can - n := copy(c.res.Timestamps[pos:], tvals.Timestamps[c.tsm.pos:]) - copy(c.res.Values[pos:], tvals.Values[c.tsm.pos:]) - pos += n - c.tsm.pos += n - if c.tsm.pos >= len(tvals.Timestamps) { - c.nextTSM() - } - } - } - - if c.cache.pos < len(cvals) { - // TSM was exhausted - for pos < len(c.res.Timestamps) && c.cache.pos < len(cvals) { - c.res.Timestamps[pos] = cvals[c.cache.pos].UnixNano() - c.res.Values[pos] = cvals[c.cache.pos].(BooleanValue).RawValue() - pos++ - c.cache.pos++ - } - } - } - - if pos > 0 && c.res.Timestamps[pos-1] >= c.end { - pos -= 2 - for pos >= 0 && c.res.Timestamps[pos] >= c.end { - pos-- - } - pos++ - } - - c.res.Timestamps = c.res.Timestamps[:pos] - c.res.Values = c.res.Values[:pos] - - c.stats.ScannedValues += len(c.res.Values) - - c.stats.ScannedBytes += len(c.res.Values) * 1 - - return c.res -} - -func (c *booleanArrayAscendingCursor) nextTSM() *cursors.BooleanArray { - c.tsm.keyCursor.Next() - c.tsm.values = c.readArrayBlock() - c.tsm.pos = 0 - return c.tsm.values -} - -func (c *booleanArrayAscendingCursor) readArrayBlock() *cursors.BooleanArray { - values, _ := c.tsm.keyCursor.ReadBooleanArrayBlock(c.tsm.buf) - return values -} - -type booleanArrayDescendingCursor struct { - cache struct { - values Values - pos int - } - - tsm struct { - buf *cursors.BooleanArray - values *cursors.BooleanArray - pos int - keyCursor *KeyCursor - } - - end int64 - res *cursors.BooleanArray - stats cursors.CursorStats -} - -func newBooleanArrayDescendingCursor() *booleanArrayDescendingCursor { - c := &booleanArrayDescendingCursor{ - res: cursors.NewBooleanArrayLen(MaxPointsPerBlock), - } - c.tsm.buf = cursors.NewBooleanArrayLen(MaxPointsPerBlock) - return c -} - -func (c *booleanArrayDescendingCursor) reset(seek, end int64, cacheValues Values, tsmKeyCursor *KeyCursor) { - c.end = end - c.cache.values = cacheValues - if len(c.cache.values) > 0 { - c.cache.pos = sort.Search(len(c.cache.values), func(i int) bool { - return c.cache.values[i].UnixNano() >= seek - }) - if c.cache.pos == len(c.cache.values) { - c.cache.pos-- - } else if c.cache.values[c.cache.pos].UnixNano() != seek { - c.cache.pos-- - } - } else { - c.cache.pos = -1 - } - - c.tsm.keyCursor = tsmKeyCursor - c.tsm.values = c.readArrayBlock() - c.tsm.pos = sort.Search(c.tsm.values.Len(), func(i int) bool { - return c.tsm.values.Timestamps[i] >= seek - }) - if c.tsm.values.Len() > 0 { - if c.tsm.pos == c.tsm.values.Len() { - c.tsm.pos-- - } else if c.tsm.values.Timestamps[c.tsm.pos] != seek { - c.tsm.pos-- - } - } else { - c.tsm.pos = -1 - } -} - -func (c *booleanArrayDescendingCursor) Err() error { return nil } - -func (c *booleanArrayDescendingCursor) Close() { - if c.tsm.keyCursor != nil { - c.tsm.keyCursor.Close() - c.tsm.keyCursor = nil - } - c.cache.values = nil - c.tsm.values = nil -} - -func (c *booleanArrayDescendingCursor) Stats() cursors.CursorStats { return c.stats } - -func (c *booleanArrayDescendingCursor) Next() *cursors.BooleanArray { - pos := 0 - cvals := c.cache.values - tvals := c.tsm.values - - c.res.Timestamps = c.res.Timestamps[:cap(c.res.Timestamps)] - c.res.Values = c.res.Values[:cap(c.res.Values)] - - for pos < len(c.res.Timestamps) && c.tsm.pos >= 0 && c.cache.pos >= 0 { - ckey := cvals[c.cache.pos].UnixNano() - tkey := tvals.Timestamps[c.tsm.pos] - if ckey == tkey { - c.res.Timestamps[pos] = ckey - c.res.Values[pos] = cvals[c.cache.pos].(BooleanValue).RawValue() - c.cache.pos-- - c.tsm.pos-- - } else if ckey > tkey { - c.res.Timestamps[pos] = ckey - c.res.Values[pos] = cvals[c.cache.pos].(BooleanValue).RawValue() - c.cache.pos-- - } else { - c.res.Timestamps[pos] = tkey - c.res.Values[pos] = tvals.Values[c.tsm.pos] - c.tsm.pos-- - } - - pos++ - - if c.tsm.pos < 0 { - tvals = c.nextTSM() - } - } - - if pos < len(c.res.Timestamps) { - // cache was exhausted - if c.tsm.pos >= 0 { - for pos < len(c.res.Timestamps) && c.tsm.pos >= 0 { - c.res.Timestamps[pos] = tvals.Timestamps[c.tsm.pos] - c.res.Values[pos] = tvals.Values[c.tsm.pos] - pos++ - c.tsm.pos-- - if c.tsm.pos < 0 { - tvals = c.nextTSM() - } - } - } - - if c.cache.pos >= 0 { - // TSM was exhausted - for pos < len(c.res.Timestamps) && c.cache.pos >= 0 { - c.res.Timestamps[pos] = cvals[c.cache.pos].UnixNano() - c.res.Values[pos] = cvals[c.cache.pos].(BooleanValue).RawValue() - pos++ - c.cache.pos-- - } - } - } - - // If the earliest timestamp is strictly earlier than - // the end time, remove it from the result and repeat. - if pos > 0 && c.res.Timestamps[pos-1] < c.end { - pos -= 2 - for pos >= 0 && c.res.Timestamps[pos] < c.end { - pos-- - } - pos++ - } - - c.res.Timestamps = c.res.Timestamps[:pos] - c.res.Values = c.res.Values[:pos] - - return c.res -} - -func (c *booleanArrayDescendingCursor) nextTSM() *cursors.BooleanArray { - c.tsm.keyCursor.Next() - c.tsm.values = c.readArrayBlock() - c.tsm.pos = len(c.tsm.values.Timestamps) - 1 - return c.tsm.values -} - -func (c *booleanArrayDescendingCursor) readArrayBlock() *cursors.BooleanArray { - values, _ := c.tsm.keyCursor.ReadBooleanArrayBlock(c.tsm.buf) - - c.stats.ScannedValues += len(values.Values) - - c.stats.ScannedBytes += len(values.Values) * 1 - - return values -} diff --git a/tsdb/tsm1/array_cursor.gen.go.tmpl b/tsdb/tsm1/array_cursor.gen.go.tmpl deleted file mode 100644 index e10f29b245..0000000000 --- a/tsdb/tsm1/array_cursor.gen.go.tmpl +++ /dev/null @@ -1,341 +0,0 @@ -package tsm1 - -import ( - "sort" - - "github.com/influxdata/influxdb/v2/tsdb/cursors" -) - -// Array Cursors - -{{range .}} -{{$arrayType := print "*cursors." .Name "Array"}} -{{$type := print .name "ArrayAscendingCursor"}} -{{$Type := print .Name "ArrayAscendingCursor"}} - -type {{$type}} struct { - cache struct { - values Values - pos int - } - - tsm struct { - buf {{$arrayType}} - values {{$arrayType}} - pos int - keyCursor *KeyCursor - } - - end int64 - res {{$arrayType}} - stats cursors.CursorStats -} - -func new{{$Type}}() *{{$type}} { - c := &{{$type}}{ - res: cursors.New{{.Name}}ArrayLen(MaxPointsPerBlock), - } - c.tsm.buf = cursors.New{{.Name}}ArrayLen(MaxPointsPerBlock) - return c -} - -func (c *{{$type}}) reset(seek, end int64, cacheValues Values, tsmKeyCursor *KeyCursor) { - c.end = end - c.cache.values = cacheValues - c.cache.pos = sort.Search(len(c.cache.values), func(i int) bool { - return c.cache.values[i].UnixNano() >= seek - }) - - c.tsm.keyCursor = tsmKeyCursor - c.tsm.values = c.readArrayBlock() - c.tsm.pos = sort.Search(c.tsm.values.Len(), func(i int) bool { - return c.tsm.values.Timestamps[i] >= seek - }) -} - -func (c *{{$type}}) Err() error { return nil } - -// close closes the cursor and any dependent cursors. -func (c *{{$type}}) Close() { - if c.tsm.keyCursor != nil { - c.tsm.keyCursor.Close() - c.tsm.keyCursor = nil - } - c.cache.values = nil - c.tsm.values = nil -} - -func (c *{{$type}}) Stats() cursors.CursorStats { return c.stats } - -// Next returns the next key/value for the cursor. -func (c *{{$type}}) Next() {{$arrayType}} { - pos := 0 - cvals := c.cache.values - tvals := c.tsm.values - - c.res.Timestamps = c.res.Timestamps[:cap(c.res.Timestamps)] - c.res.Values = c.res.Values[:cap(c.res.Values)] - - for pos < len(c.res.Timestamps) && c.tsm.pos < len(tvals.Timestamps) && c.cache.pos < len(cvals) { - ckey := cvals[c.cache.pos].UnixNano() - tkey := tvals.Timestamps[c.tsm.pos] - if ckey == tkey { - c.res.Timestamps[pos] = ckey - c.res.Values[pos] = cvals[c.cache.pos].({{.Name}}Value).RawValue() - c.cache.pos++ - c.tsm.pos++ - } else if ckey < tkey { - c.res.Timestamps[pos] = ckey - c.res.Values[pos] = cvals[c.cache.pos].({{.Name}}Value).RawValue() - c.cache.pos++ - } else { - c.res.Timestamps[pos] = tkey - c.res.Values[pos] = tvals.Values[c.tsm.pos] - c.tsm.pos++ - } - - pos++ - - if c.tsm.pos >= len(tvals.Timestamps) { - tvals = c.nextTSM() - } - } - - if pos < len(c.res.Timestamps) { - if c.tsm.pos < len(tvals.Timestamps) { - if pos == 0 && len(c.res.Timestamps) >= len(tvals.Timestamps){ - // optimization: all points can be served from TSM data because - // we need the entire block and the block completely fits within - // the buffer. - copy(c.res.Timestamps, tvals.Timestamps) - pos += copy(c.res.Values, tvals.Values) - c.nextTSM() - } else { - // copy as much as we can - n := copy(c.res.Timestamps[pos:], tvals.Timestamps[c.tsm.pos:]) - copy(c.res.Values[pos:], tvals.Values[c.tsm.pos:]) - pos += n - c.tsm.pos += n - if c.tsm.pos >= len(tvals.Timestamps) { - c.nextTSM() - } - } - } - - if c.cache.pos < len(cvals) { - // TSM was exhausted - for pos < len(c.res.Timestamps) && c.cache.pos < len(cvals) { - c.res.Timestamps[pos] = cvals[c.cache.pos].UnixNano() - c.res.Values[pos] = cvals[c.cache.pos].({{.Name}}Value).RawValue() - pos++ - c.cache.pos++ - } - } - } - - if pos > 0 && c.res.Timestamps[pos-1] >= c.end { - pos -= 2 - for pos >= 0 && c.res.Timestamps[pos] >= c.end { - pos-- - } - pos++ - } - - c.res.Timestamps = c.res.Timestamps[:pos] - c.res.Values = c.res.Values[:pos] - - c.stats.ScannedValues += len(c.res.Values) - {{if eq .Name "String" }} - for _, v := range c.res.Values { - c.stats.ScannedBytes += len(v) - } - {{else}} - c.stats.ScannedBytes += len(c.res.Values) * {{.Size}} - {{end}} - - return c.res -} - -func (c *{{$type}}) nextTSM() {{$arrayType}} { - c.tsm.keyCursor.Next() - c.tsm.values = c.readArrayBlock() - c.tsm.pos = 0 - return c.tsm.values -} - -func (c *{{$type}}) readArrayBlock() {{$arrayType}} { - values, _ := c.tsm.keyCursor.Read{{.Name}}ArrayBlock(c.tsm.buf) - return values -} - -{{$type := print .name "ArrayDescendingCursor"}} -{{$Type := print .Name "ArrayDescendingCursor"}} - -type {{$type}} struct { - cache struct { - values Values - pos int - } - - tsm struct { - buf {{$arrayType}} - values {{$arrayType}} - pos int - keyCursor *KeyCursor - } - - end int64 - res {{$arrayType}} - stats cursors.CursorStats -} - -func new{{$Type}}() *{{$type}} { - c := &{{$type}}{ - res: cursors.New{{.Name}}ArrayLen(MaxPointsPerBlock), - } - c.tsm.buf = cursors.New{{.Name}}ArrayLen(MaxPointsPerBlock) - return c -} - -func (c *{{$type}}) reset(seek, end int64, cacheValues Values, tsmKeyCursor *KeyCursor) { - c.end = end - c.cache.values = cacheValues - if len(c.cache.values) > 0 { - c.cache.pos = sort.Search(len(c.cache.values), func(i int) bool { - return c.cache.values[i].UnixNano() >= seek - }) - if c.cache.pos == len(c.cache.values) { - c.cache.pos-- - } else if c.cache.values[c.cache.pos].UnixNano() != seek { - c.cache.pos-- - } - } else { - c.cache.pos = -1 - } - - c.tsm.keyCursor = tsmKeyCursor - c.tsm.values = c.readArrayBlock() - c.tsm.pos = sort.Search(c.tsm.values.Len(), func(i int) bool { - return c.tsm.values.Timestamps[i] >= seek - }) - if c.tsm.values.Len() > 0 { - if c.tsm.pos == c.tsm.values.Len() { - c.tsm.pos-- - } else if c.tsm.values.Timestamps[c.tsm.pos] != seek { - c.tsm.pos-- - } - } else { - c.tsm.pos = -1 - } -} - -func (c *{{$type}}) Err() error { return nil } - -func (c *{{$type}}) Close() { - if c.tsm.keyCursor != nil { - c.tsm.keyCursor.Close() - c.tsm.keyCursor = nil - } - c.cache.values = nil - c.tsm.values = nil -} - -func (c *{{$type}}) Stats() cursors.CursorStats { return c.stats } - -func (c *{{$type}}) Next() {{$arrayType}} { - pos := 0 - cvals := c.cache.values - tvals := c.tsm.values - - c.res.Timestamps = c.res.Timestamps[:cap(c.res.Timestamps)] - c.res.Values = c.res.Values[:cap(c.res.Values)] - - for pos < len(c.res.Timestamps) && c.tsm.pos >= 0 && c.cache.pos >= 0 { - ckey := cvals[c.cache.pos].UnixNano() - tkey := tvals.Timestamps[c.tsm.pos] - if ckey == tkey { - c.res.Timestamps[pos] = ckey - c.res.Values[pos] = cvals[c.cache.pos].({{.Name}}Value).RawValue() - c.cache.pos-- - c.tsm.pos-- - } else if ckey > tkey { - c.res.Timestamps[pos] = ckey - c.res.Values[pos] = cvals[c.cache.pos].({{.Name}}Value).RawValue() - c.cache.pos-- - } else { - c.res.Timestamps[pos] = tkey - c.res.Values[pos] = tvals.Values[c.tsm.pos] - c.tsm.pos-- - } - - pos++ - - if c.tsm.pos < 0 { - tvals = c.nextTSM() - } - } - - if pos < len(c.res.Timestamps) { - // cache was exhausted - if c.tsm.pos >= 0 { - for pos < len(c.res.Timestamps) && c.tsm.pos >= 0 { - c.res.Timestamps[pos] = tvals.Timestamps[c.tsm.pos] - c.res.Values[pos] = tvals.Values[c.tsm.pos] - pos++ - c.tsm.pos-- - if c.tsm.pos < 0 { - tvals = c.nextTSM() - } - } - } - - if c.cache.pos >= 0 { - // TSM was exhausted - for pos < len(c.res.Timestamps) && c.cache.pos >= 0 { - c.res.Timestamps[pos] = cvals[c.cache.pos].UnixNano() - c.res.Values[pos] = cvals[c.cache.pos].({{.Name}}Value).RawValue() - pos++ - c.cache.pos-- - } - } - } - - // If the earliest timestamp is strictly earlier than - // the end time, remove it from the result and repeat. - if pos > 0 && c.res.Timestamps[pos-1] < c.end { - pos -= 2 - for pos >= 0 && c.res.Timestamps[pos] < c.end { - pos-- - } - pos++ - } - - c.res.Timestamps = c.res.Timestamps[:pos] - c.res.Values = c.res.Values[:pos] - - return c.res -} - -func (c *{{$type}}) nextTSM() {{$arrayType}} { - c.tsm.keyCursor.Next() - c.tsm.values = c.readArrayBlock() - c.tsm.pos = len(c.tsm.values.Timestamps) - 1 - return c.tsm.values -} - -func (c *{{$type}}) readArrayBlock() {{$arrayType}} { - values, _ := c.tsm.keyCursor.Read{{.Name}}ArrayBlock(c.tsm.buf) - - c.stats.ScannedValues += len(values.Values) - {{if eq .Name "String" }} - for _, v := range values.Values { - c.stats.ScannedBytes += len(v) - } - {{else}} - c.stats.ScannedBytes += len(values.Values) * {{.Size}} - {{end}} - - return values -} - -{{end}} diff --git a/tsdb/tsm1/array_cursor.gen.go.tmpldata b/tsdb/tsm1/array_cursor.gen.go.tmpldata deleted file mode 100644 index 648898fbdb..0000000000 --- a/tsdb/tsm1/array_cursor.gen.go.tmpldata +++ /dev/null @@ -1,42 +0,0 @@ -[ - { - "Name":"Float", - "name":"float", - "Type":"float64", - "ValueType":"FloatValue", - "Nil":"0", - "Size":"8" - }, - { - "Name":"Integer", - "name":"integer", - "Type":"int64", - "ValueType":"IntegerValue", - "Nil":"0", - "Size":"8" - }, - { - "Name":"Unsigned", - "name":"unsigned", - "Type":"uint64", - "ValueType":"UnsignedValue", - "Nil":"0", - "Size":"8" - }, - { - "Name":"String", - "name":"string", - "Type":"string", - "ValueType":"StringValue", - "Nil":"\"\"", - "Size":"0" - }, - { - "Name":"Boolean", - "name":"boolean", - "Type":"bool", - "ValueType":"BooleanValue", - "Nil":"false", - "Size":"1" - } -] diff --git a/tsdb/tsm1/array_cursor_iterator.gen.go b/tsdb/tsm1/array_cursor_iterator.gen.go deleted file mode 100644 index 9b72f781c5..0000000000 --- a/tsdb/tsm1/array_cursor_iterator.gen.go +++ /dev/null @@ -1,130 +0,0 @@ -// Generated by tmpl -// https://github.com/benbjohnson/tmpl -// -// DO NOT EDIT! -// Source: array_cursor_iterator.gen.go.tmpl - -package tsm1 - -import ( - "context" - - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/query" - "github.com/influxdata/influxdb/v2/tsdb/cursors" -) - -// buildFloatArrayCursor creates an array cursor for a float field. -func (q *arrayCursorIterator) buildFloatArrayCursor(ctx context.Context, name []byte, tags models.Tags, field string, opt query.IteratorOptions) cursors.FloatArrayCursor { - key := q.seriesFieldKeyBytes(name, tags, field) - cacheValues := q.e.Cache.Values(key) - keyCursor := q.e.KeyCursor(ctx, key, opt.SeekTime(), opt.Ascending) - - q.e.readTracker.AddSeeks(uint64(keyCursor.seekN())) - - if opt.Ascending { - if q.asc.Float == nil { - q.asc.Float = newFloatArrayAscendingCursor() - } - q.asc.Float.reset(opt.SeekTime(), opt.StopTime(), cacheValues, keyCursor) - return q.asc.Float - } else { - if q.desc.Float == nil { - q.desc.Float = newFloatArrayDescendingCursor() - } - q.desc.Float.reset(opt.SeekTime(), opt.StopTime(), cacheValues, keyCursor) - return q.desc.Float - } -} - -// buildIntegerArrayCursor creates an array cursor for a integer field. -func (q *arrayCursorIterator) buildIntegerArrayCursor(ctx context.Context, name []byte, tags models.Tags, field string, opt query.IteratorOptions) cursors.IntegerArrayCursor { - key := q.seriesFieldKeyBytes(name, tags, field) - cacheValues := q.e.Cache.Values(key) - keyCursor := q.e.KeyCursor(ctx, key, opt.SeekTime(), opt.Ascending) - - q.e.readTracker.AddSeeks(uint64(keyCursor.seekN())) - - if opt.Ascending { - if q.asc.Integer == nil { - q.asc.Integer = newIntegerArrayAscendingCursor() - } - q.asc.Integer.reset(opt.SeekTime(), opt.StopTime(), cacheValues, keyCursor) - return q.asc.Integer - } else { - if q.desc.Integer == nil { - q.desc.Integer = newIntegerArrayDescendingCursor() - } - q.desc.Integer.reset(opt.SeekTime(), opt.StopTime(), cacheValues, keyCursor) - return q.desc.Integer - } -} - -// buildUnsignedArrayCursor creates an array cursor for a unsigned field. -func (q *arrayCursorIterator) buildUnsignedArrayCursor(ctx context.Context, name []byte, tags models.Tags, field string, opt query.IteratorOptions) cursors.UnsignedArrayCursor { - key := q.seriesFieldKeyBytes(name, tags, field) - cacheValues := q.e.Cache.Values(key) - keyCursor := q.e.KeyCursor(ctx, key, opt.SeekTime(), opt.Ascending) - - q.e.readTracker.AddSeeks(uint64(keyCursor.seekN())) - - if opt.Ascending { - if q.asc.Unsigned == nil { - q.asc.Unsigned = newUnsignedArrayAscendingCursor() - } - q.asc.Unsigned.reset(opt.SeekTime(), opt.StopTime(), cacheValues, keyCursor) - return q.asc.Unsigned - } else { - if q.desc.Unsigned == nil { - q.desc.Unsigned = newUnsignedArrayDescendingCursor() - } - q.desc.Unsigned.reset(opt.SeekTime(), opt.StopTime(), cacheValues, keyCursor) - return q.desc.Unsigned - } -} - -// buildStringArrayCursor creates an array cursor for a string field. -func (q *arrayCursorIterator) buildStringArrayCursor(ctx context.Context, name []byte, tags models.Tags, field string, opt query.IteratorOptions) cursors.StringArrayCursor { - key := q.seriesFieldKeyBytes(name, tags, field) - cacheValues := q.e.Cache.Values(key) - keyCursor := q.e.KeyCursor(ctx, key, opt.SeekTime(), opt.Ascending) - - q.e.readTracker.AddSeeks(uint64(keyCursor.seekN())) - - if opt.Ascending { - if q.asc.String == nil { - q.asc.String = newStringArrayAscendingCursor() - } - q.asc.String.reset(opt.SeekTime(), opt.StopTime(), cacheValues, keyCursor) - return q.asc.String - } else { - if q.desc.String == nil { - q.desc.String = newStringArrayDescendingCursor() - } - q.desc.String.reset(opt.SeekTime(), opt.StopTime(), cacheValues, keyCursor) - return q.desc.String - } -} - -// buildBooleanArrayCursor creates an array cursor for a boolean field. -func (q *arrayCursorIterator) buildBooleanArrayCursor(ctx context.Context, name []byte, tags models.Tags, field string, opt query.IteratorOptions) cursors.BooleanArrayCursor { - key := q.seriesFieldKeyBytes(name, tags, field) - cacheValues := q.e.Cache.Values(key) - keyCursor := q.e.KeyCursor(ctx, key, opt.SeekTime(), opt.Ascending) - - q.e.readTracker.AddSeeks(uint64(keyCursor.seekN())) - - if opt.Ascending { - if q.asc.Boolean == nil { - q.asc.Boolean = newBooleanArrayAscendingCursor() - } - q.asc.Boolean.reset(opt.SeekTime(), opt.StopTime(), cacheValues, keyCursor) - return q.asc.Boolean - } else { - if q.desc.Boolean == nil { - q.desc.Boolean = newBooleanArrayDescendingCursor() - } - q.desc.Boolean.reset(opt.SeekTime(), opt.StopTime(), cacheValues, keyCursor) - return q.desc.Boolean - } -} diff --git a/tsdb/tsm1/array_cursor_iterator.gen.go.tmpl b/tsdb/tsm1/array_cursor_iterator.gen.go.tmpl deleted file mode 100644 index 4140033ca7..0000000000 --- a/tsdb/tsm1/array_cursor_iterator.gen.go.tmpl +++ /dev/null @@ -1,36 +0,0 @@ -package tsm1 - -import ( - "context" - - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/query" - "github.com/influxdata/influxdb/v2/tsdb/cursors" -) - -{{range .}} - -// build{{.Name}}ArrayCursor creates an array cursor for a {{.name}} field. -func (q *arrayCursorIterator) build{{.Name}}ArrayCursor(ctx context.Context, name []byte, tags models.Tags, field string, opt query.IteratorOptions) cursors.{{.Name}}ArrayCursor { - key := q.seriesFieldKeyBytes(name, tags, field) - cacheValues := q.e.Cache.Values(key) - keyCursor := q.e.KeyCursor(ctx, key, opt.SeekTime(), opt.Ascending) - - q.e.readTracker.AddSeeks(uint64(keyCursor.seekN())) - - if opt.Ascending { - if q.asc.{{.Name}} == nil { - q.asc.{{.Name}} = new{{.Name}}ArrayAscendingCursor() - } - q.asc.{{.Name}}.reset(opt.SeekTime(), opt.StopTime(), cacheValues, keyCursor) - return q.asc.{{.Name}} - } else { - if q.desc.{{.Name}} == nil { - q.desc.{{.Name}} = new{{.Name}}ArrayDescendingCursor() - } - q.desc.{{.Name}}.reset(opt.SeekTime(), opt.StopTime(), cacheValues, keyCursor) - return q.desc.{{.Name}} - } -} - -{{end}} diff --git a/tsdb/tsm1/array_cursor_iterator.go b/tsdb/tsm1/array_cursor_iterator.go deleted file mode 100644 index 4468f84658..0000000000 --- a/tsdb/tsm1/array_cursor_iterator.go +++ /dev/null @@ -1,111 +0,0 @@ -package tsm1 - -import ( - "context" - "fmt" - - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/pkg/metrics" - "github.com/influxdata/influxdb/v2/query" - "github.com/influxdata/influxdb/v2/tsdb/cursors" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" -) - -type arrayCursorIterator struct { - e *Engine - key []byte - - asc struct { - Float *floatArrayAscendingCursor - Integer *integerArrayAscendingCursor - Unsigned *unsignedArrayAscendingCursor - Boolean *booleanArrayAscendingCursor - String *stringArrayAscendingCursor - } - - desc struct { - Float *floatArrayDescendingCursor - Integer *integerArrayDescendingCursor - Unsigned *unsignedArrayDescendingCursor - Boolean *booleanArrayDescendingCursor - String *stringArrayDescendingCursor - } -} - -func (q *arrayCursorIterator) Next(ctx context.Context, r *cursors.CursorRequest) (cursors.Cursor, error) { - q.key = seriesfile.AppendSeriesKey(q.key[:0], r.Name, r.Tags) - id := q.e.sfile.SeriesIDTypedBySeriesKey(q.key) - if id.IsZero() { - return nil, nil - } - - q.e.readTracker.AddCursors(1) - - if grp := metrics.GroupFromContext(ctx); grp != nil { - grp.GetCounter(numberOfRefCursorsCounter).Add(1) - } - - var opt query.IteratorOptions - opt.Ascending = r.Ascending - opt.StartTime = r.StartTime - opt.EndTime = r.EndTime - - // Return appropriate cursor based on type. - switch typ := id.Type(); typ { - case models.Float: - return q.buildFloatArrayCursor(ctx, r.Name, r.Tags, r.Field, opt), nil - case models.Integer: - return q.buildIntegerArrayCursor(ctx, r.Name, r.Tags, r.Field, opt), nil - case models.Unsigned: - return q.buildUnsignedArrayCursor(ctx, r.Name, r.Tags, r.Field, opt), nil - case models.String: - return q.buildStringArrayCursor(ctx, r.Name, r.Tags, r.Field, opt), nil - case models.Boolean: - return q.buildBooleanArrayCursor(ctx, r.Name, r.Tags, r.Field, opt), nil - default: - panic(fmt.Sprintf("unreachable: %v", typ)) - } -} - -func (q *arrayCursorIterator) seriesFieldKeyBytes(name []byte, tags models.Tags, field string) []byte { - q.key = models.AppendMakeKey(q.key[:0], name, tags) - q.key = append(q.key, KeyFieldSeparatorBytes...) - q.key = append(q.key, field...) - return q.key -} - -// Stats returns the cumulative stats for all cursors. -func (q *arrayCursorIterator) Stats() cursors.CursorStats { - var stats cursors.CursorStats - if cur := q.asc.Float; cur != nil { - stats.Add(cur.Stats()) - } - if cur := q.asc.Integer; cur != nil { - stats.Add(cur.Stats()) - } - if cur := q.asc.Unsigned; cur != nil { - stats.Add(cur.Stats()) - } - if cur := q.asc.Boolean; cur != nil { - stats.Add(cur.Stats()) - } - if cur := q.asc.String; cur != nil { - stats.Add(cur.Stats()) - } - if cur := q.desc.Float; cur != nil { - stats.Add(cur.Stats()) - } - if cur := q.desc.Integer; cur != nil { - stats.Add(cur.Stats()) - } - if cur := q.desc.Unsigned; cur != nil { - stats.Add(cur.Stats()) - } - if cur := q.desc.Boolean; cur != nil { - stats.Add(cur.Stats()) - } - if cur := q.desc.String; cur != nil { - stats.Add(cur.Stats()) - } - return stats -} diff --git a/tsdb/tsm1/array_cursor_test.go b/tsdb/tsm1/array_cursor_test.go deleted file mode 100644 index b9868e030d..0000000000 --- a/tsdb/tsm1/array_cursor_test.go +++ /dev/null @@ -1,413 +0,0 @@ -package tsm1 - -import ( - "context" - "fmt" - "io/ioutil" - "os" - "path/filepath" - "sort" - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/influxdata/influxdb/v2/pkg/fs" - "github.com/influxdata/influxdb/v2/tsdb/cursors" - "github.com/stretchr/testify/assert" -) - -type keyValues struct { - key string - values []Value -} - -func MustTempDir() string { - dir, err := ioutil.TempDir("", "tsm1-test") - if err != nil { - panic(fmt.Sprintf("failed to create temp dir: %v", err)) - } - return dir -} - -func MustTempFile(dir string) *os.File { - f, err := ioutil.TempFile(dir, "tsm1test") - if err != nil { - panic(fmt.Sprintf("failed to create temp file: %v", err)) - } - return f -} - -func newFiles(dir string, values ...keyValues) ([]string, error) { - var files []string - - id := 1 - for _, v := range values { - f := MustTempFile(dir) - w, err := NewTSMWriter(f) - if err != nil { - return nil, err - } - - if err := w.Write([]byte(v.key), v.values); err != nil { - return nil, err - } - - if err := w.WriteIndex(); err != nil { - return nil, err - } - - if err := w.Close(); err != nil { - return nil, err - } - - newName := filepath.Join(filepath.Dir(f.Name()), DefaultFormatFileName(id, 1)+".tsm") - if err := fs.RenameFile(f.Name(), newName); err != nil { - return nil, err - } - id++ - - files = append(files, newName) - } - return files, nil -} - -func TestDescendingCursor_SinglePointStartTime(t *testing.T) { - t.Run("cache", func(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := NewFileStore(dir) - - const START, END = 10, 1 - kc := fs.KeyCursor(context.Background(), []byte("m,_field=v#!~#v"), START, false) - defer kc.Close() - cur := newIntegerArrayDescendingCursor() - // Include a cached value with timestamp equal to END - cur.reset(START, END, Values{NewIntegerValue(1, 1)}, kc) - - var got []int64 - ar := cur.Next() - for ar.Len() > 0 { - got = append(got, ar.Timestamps...) - ar = cur.Next() - } - - if exp := []int64{1}; !cmp.Equal(got, exp) { - t.Errorf("unexpected values; -got/+exp\n%s", cmp.Diff(got, exp)) - } - }) - t.Run("tsm", func(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := NewFileStore(dir) - - const START, END = 10, 1 - - data := []keyValues{ - // Write a single data point with timestamp equal to END - {"m,_field=v#!~#v", []Value{NewIntegerValue(1, 1)}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - _ = fs.Replace(nil, files) - - kc := fs.KeyCursor(context.Background(), []byte("m,_field=v#!~#v"), START, false) - defer kc.Close() - cur := newIntegerArrayDescendingCursor() - cur.reset(START, END, nil, kc) - - var got []int64 - ar := cur.Next() - for ar.Len() > 0 { - got = append(got, ar.Timestamps...) - ar = cur.Next() - } - - if exp := []int64{1}; !cmp.Equal(got, exp) { - t.Errorf("unexpected values; -got/+exp\n%s", cmp.Diff(got, exp)) - } - }) -} - -func TestFileStore_DuplicatePoints(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := NewFileStore(dir) - - makeVals := func(ts ...int64) []Value { - vals := make([]Value, len(ts)) - for i, t := range ts { - vals[i] = NewFloatValue(t, 1.01) - } - return vals - } - - // Setup 3 files - data := []keyValues{ - {"m,_field=v#!~#v", makeVals(21)}, - {"m,_field=v#!~#v", makeVals(44)}, - {"m,_field=v#!~#v", makeVals(40, 46)}, - {"m,_field=v#!~#v", makeVals(46, 51)}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - _ = fs.Replace(nil, files) - - t.Run("ascending", func(t *testing.T) { - const START, END = 21, 100 - kc := fs.KeyCursor(context.Background(), []byte("m,_field=v#!~#v"), START, true) - defer kc.Close() - cur := newFloatArrayAscendingCursor() - cur.reset(START, END, nil, kc) - - var got []int64 - ar := cur.Next() - for ar.Len() > 0 { - got = append(got, ar.Timestamps...) - ar = cur.Next() - } - - if exp := []int64{21, 40, 44, 46, 51}; !cmp.Equal(got, exp) { - t.Errorf("unexpected values; -got/+exp\n%s", cmp.Diff(got, exp)) - } - }) - - t.Run("descending", func(t *testing.T) { - const START, END = 51, 0 - kc := fs.KeyCursor(context.Background(), []byte("m,_field=v#!~#v"), START, false) - defer kc.Close() - cur := newFloatArrayDescendingCursor() - cur.reset(START, END, nil, kc) - - var got []int64 - ar := cur.Next() - for ar.Len() > 0 { - got = append(got, ar.Timestamps...) - ar = cur.Next() - } - - if exp := []int64{51, 46, 44, 40, 21}; !cmp.Equal(got, exp) { - t.Errorf("unexpected values; -got/+exp\n%s", cmp.Diff(got, exp)) - } - }) -} - -// Int64Slice attaches the methods of Interface to []int64, sorting in increasing order. -type Int64Slice []int64 - -func (p Int64Slice) Len() int { return len(p) } -func (p Int64Slice) Less(i, j int) bool { return p[i] < p[j] } -func (p Int64Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } - -// Verifies the array cursors correctly handle merged blocks from KeyCursor which may exceed the -// array cursor's local values buffer, which is initialized to MaxPointsPerBlock elements (1000) -// -// This test creates two TSM files which have a single block each. The second file -// has interleaving timestamps with the first file. -// -// The first file has a block of 800 timestamps starting at 1000 an increasing by 10ns -// The second file has a block of 400 timestamps starting at 1005, also increasing by 10ns -// -// When calling `nextTSM`, a single block of 1200 timestamps will be returned and the -// array cursor must chuck the values in the Next call. -func TestFileStore_MergeBlocksLargerThat1000_SecondEntirelyContained(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := NewFileStore(dir) - - // makeVals creates count points starting at ts and incrementing by step - makeVals := func(ts, count, step int64) []Value { - vals := make([]Value, count) - for i := range vals { - vals[i] = NewFloatValue(ts, 1.01) - ts += step - } - return vals - } - - makeTs := func(ts, count, step int64) []int64 { - vals := make([]int64, count) - for i := range vals { - vals[i] = ts - ts += step - } - return vals - } - - // Setup 2 files with the second containing a single block that is completely within the first - data := []keyValues{ - {"m,_field=v#!~#v", makeVals(1000, 800, 10)}, - {"m,_field=v#!~#v", makeVals(1005, 400, 10)}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - _ = fs.Replace(nil, files) - - t.Run("ascending", func(t *testing.T) { - const START, END = 1000, 10000 - kc := fs.KeyCursor(context.Background(), []byte("m,_field=v#!~#v"), START, true) - defer kc.Close() - cur := newFloatArrayAscendingCursor() - cur.reset(START, END, nil, kc) - - exp := makeTs(1000, 800, 10) - exp = append(exp, makeTs(1005, 400, 10)...) - sort.Sort(Int64Slice(exp)) - - // check first block - ar := cur.Next() - assert.Len(t, ar.Timestamps, 1000) - assert.Equal(t, exp[:1000], ar.Timestamps) - - // check second block - exp = exp[1000:] - ar = cur.Next() - assert.Len(t, ar.Timestamps, 200) - assert.Equal(t, exp, ar.Timestamps) - }) - - t.Run("descending", func(t *testing.T) { - const START, END = 10000, 0 - kc := fs.KeyCursor(context.Background(), []byte("m,_field=v#!~#v"), START, false) - defer kc.Close() - cur := newFloatArrayDescendingCursor() - cur.reset(START, END, nil, kc) - - exp := makeTs(1000, 800, 10) - exp = append(exp, makeTs(1005, 400, 10)...) - sort.Sort(sort.Reverse(Int64Slice(exp))) - - // check first block - ar := cur.Next() - assert.Len(t, ar.Timestamps, 1000) - assert.Equal(t, exp[:1000], ar.Timestamps) - - // check second block - exp = exp[1000:] - ar = cur.Next() - assert.Len(t, ar.Timestamps, 200) - assert.Equal(t, exp, ar.Timestamps) - }) -} - -// FloatArray attaches the methods of sort.Interface to *tsdb.FloatArray, sorting in increasing order. -type FloatArray struct { - *cursors.FloatArray -} - -func (a *FloatArray) Less(i, j int) bool { return a.Timestamps[i] < a.Timestamps[j] } -func (a *FloatArray) Swap(i, j int) { - a.Timestamps[i], a.Timestamps[j] = a.Timestamps[j], a.Timestamps[i] - a.Values[i], a.Values[j] = a.Values[j], a.Values[i] -} - -// Verifies the array cursors correctly handle merged blocks from KeyCursor which may exceed the -// array cursor's local values buffer, which is initialized to MaxPointsPerBlock elements (1000) -// -// This test creates two TSM files with a significant number of interleaved points in addition -// to a significant number of points in the second file which replace values in the first. -// To verify intersecting data from the second file replaces the first, the values differ, -// so the enumerated results can be compared with the expected output. -func TestFileStore_MergeBlocksLargerThat1000_MultipleBlocksInEachFile(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := NewFileStore(dir) - - // makeVals creates count points starting at ts and incrementing by step - makeVals := func(ts, count, step int64, v float64) []Value { - vals := make([]Value, count) - for i := range vals { - vals[i] = NewFloatValue(ts, v) - ts += step - } - return vals - } - - makeArray := func(ts, count, step int64, v float64) *cursors.FloatArray { - ar := cursors.NewFloatArrayLen(int(count)) - for i := range ar.Timestamps { - ar.Timestamps[i] = ts - ar.Values[i] = v - ts += step - } - return ar - } - - // Setup 2 files with partially overlapping blocks and the second file replaces some elements of the first - data := []keyValues{ - {"m,_field=v#!~#v", makeVals(1000, 3500, 10, 1.01)}, - {"m,_field=v#!~#v", makeVals(4005, 3500, 5, 2.01)}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - _ = fs.Replace(nil, files) - - t.Run("ascending", func(t *testing.T) { - const START, END = 1000, 1e9 - kc := fs.KeyCursor(context.Background(), []byte("m,_field=v#!~#v"), START, true) - defer kc.Close() - cur := newFloatArrayAscendingCursor() - cur.reset(START, END, nil, kc) - - exp := makeArray(1000, 3500, 10, 1.01) - a2 := makeArray(4005, 3500, 5, 2.01) - exp.Merge(a2) - - got := cursors.NewFloatArrayLen(exp.Len()) - got.Timestamps = got.Timestamps[:0] - got.Values = got.Values[:0] - - ar := cur.Next() - for ar.Len() > 0 { - got.Timestamps = append(got.Timestamps, ar.Timestamps...) - got.Values = append(got.Values, ar.Values...) - ar = cur.Next() - } - - assert.Len(t, got.Timestamps, exp.Len()) - assert.Equal(t, got.Timestamps, exp.Timestamps) - assert.Equal(t, got.Values, exp.Values) - }) - - t.Run("descending", func(t *testing.T) { - const START, END = 1e9, 0 - kc := fs.KeyCursor(context.Background(), []byte("m,_field=v#!~#v"), START, false) - defer kc.Close() - cur := newFloatArrayDescendingCursor() - cur.reset(START, END, nil, kc) - - exp := makeArray(1000, 3500, 10, 1.01) - a2 := makeArray(4005, 3500, 5, 2.01) - exp.Merge(a2) - sort.Sort(sort.Reverse(&FloatArray{exp})) - - got := cursors.NewFloatArrayLen(exp.Len()) - got.Timestamps = got.Timestamps[:0] - got.Values = got.Values[:0] - - ar := cur.Next() - for ar.Len() > 0 { - got.Timestamps = append(got.Timestamps, ar.Timestamps...) - got.Values = append(got.Values, ar.Values...) - ar = cur.Next() - } - - assert.Len(t, got.Timestamps, exp.Len()) - assert.Equal(t, got.Timestamps, exp.Timestamps) - assert.Equal(t, got.Values, exp.Values) - }) -} diff --git a/tsdb/tsm1/array_encoding.go b/tsdb/tsm1/array_encoding.go deleted file mode 100644 index e3e9e6f365..0000000000 --- a/tsdb/tsm1/array_encoding.go +++ /dev/null @@ -1,124 +0,0 @@ -package tsm1 - -import ( - "fmt" - - "github.com/influxdata/influxdb/v2/tsdb/cursors" -) - -// DecodeBooleanArrayBlock decodes the boolean block from the byte slice -// and writes the values to a. -func DecodeBooleanArrayBlock(block []byte, a *cursors.BooleanArray) error { - blockType := block[0] - if blockType != BlockBoolean { - return fmt.Errorf("invalid block type: exp %d, got %d", BlockBoolean, blockType) - } - - tb, vb, err := unpackBlock(block[1:]) - if err != nil { - return err - } - - a.Timestamps, err = TimeArrayDecodeAll(tb, a.Timestamps) - if err != nil { - return err - } - a.Values, err = BooleanArrayDecodeAll(vb, a.Values) - return err -} - -// DecodeFloatArrayBlock decodes the float block from the byte slice -// and writes the values to a. -func DecodeFloatArrayBlock(block []byte, a *cursors.FloatArray) error { - blockType := block[0] - if blockType != BlockFloat64 { - return fmt.Errorf("invalid block type: exp %d, got %d", BlockFloat64, blockType) - } - - tb, vb, err := unpackBlock(block[1:]) - if err != nil { - return err - } - - a.Timestamps, err = TimeArrayDecodeAll(tb, a.Timestamps) - if err != nil { - return err - } - a.Values, err = FloatArrayDecodeAll(vb, a.Values) - return err -} - -// DecodeIntegerArrayBlock decodes the integer block from the byte slice -// and writes the values to a. -func DecodeIntegerArrayBlock(block []byte, a *cursors.IntegerArray) error { - blockType := block[0] - if blockType != BlockInteger { - return fmt.Errorf("invalid block type: exp %d, got %d", BlockInteger, blockType) - } - - tb, vb, err := unpackBlock(block[1:]) - if err != nil { - return err - } - - a.Timestamps, err = TimeArrayDecodeAll(tb, a.Timestamps) - if err != nil { - return err - } - a.Values, err = IntegerArrayDecodeAll(vb, a.Values) - return err -} - -// DecodeUnsignedArrayBlock decodes the unsigned integer block from the byte slice -// and writes the values to a. -func DecodeUnsignedArrayBlock(block []byte, a *cursors.UnsignedArray) error { - blockType := block[0] - if blockType != BlockUnsigned { - return fmt.Errorf("invalid block type: exp %d, got %d", BlockUnsigned, blockType) - } - - tb, vb, err := unpackBlock(block[1:]) - if err != nil { - return err - } - - a.Timestamps, err = TimeArrayDecodeAll(tb, a.Timestamps) - if err != nil { - return err - } - a.Values, err = UnsignedArrayDecodeAll(vb, a.Values) - return err -} - -// DecodeStringArrayBlock decodes the string block from the byte slice -// and writes the values to a. -func DecodeStringArrayBlock(block []byte, a *cursors.StringArray) error { - blockType := block[0] - if blockType != BlockString { - return fmt.Errorf("invalid block type: exp %d, got %d", BlockString, blockType) - } - - tb, vb, err := unpackBlock(block[1:]) - if err != nil { - return err - } - - a.Timestamps, err = TimeArrayDecodeAll(tb, a.Timestamps) - if err != nil { - return err - } - a.Values, err = StringArrayDecodeAll(vb, a.Values) - return err -} - -// DecodeTimestampArrayBlock decodes the timestamps from the specified -// block, ignoring the block type and the values. -func DecodeTimestampArrayBlock(block []byte, a *cursors.TimestampArray) error { - tb, _, err := unpackBlock(block[1:]) - if err != nil { - return err - } - - a.Timestamps, err = TimeArrayDecodeAll(tb, a.Timestamps) - return err -} diff --git a/tsdb/tsm1/array_encoding_test.go b/tsdb/tsm1/array_encoding_test.go deleted file mode 100644 index c68b0f10eb..0000000000 --- a/tsdb/tsm1/array_encoding_test.go +++ /dev/null @@ -1,202 +0,0 @@ -package tsm1_test - -import ( - "fmt" - "math/rand" - "testing" - "time" - - "github.com/google/go-cmp/cmp" - "github.com/influxdata/influxdb/v2/tsdb/cursors" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" -) - -func TestDecodeFloatArrayBlock(t *testing.T) { - valueCount := 1000 - times := getTimes(valueCount, 60, time.Second) - values := make(tsm1.FloatValues, len(times)) - for i, t := range times { - values[i] = tsm1.NewFloatValue(t, float64(i)).(tsm1.FloatValue) - } - exp := tsm1.NewFloatArrayFromValues(values) - - b, err := values.Encode(nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - got := cursors.NewFloatArrayLen(exp.Len()) - tsm1.DecodeFloatArrayBlock(b, got) - if !cmp.Equal(got, exp) { - t.Fatalf("unexpected values -got/+exp\n%s", cmp.Diff(got, exp)) - } -} - -func BenchmarkDecodeBooleanArrayBlock(b *testing.B) { - cases := []int{ - 5, - 55, - 555, - 1000, - } - for _, n := range cases { - b.Run(fmt.Sprintf("%d", n), func(b *testing.B) { - valueCount := n - times := getTimes(valueCount, 60, time.Second) - values := make([]tsm1.Value, len(times)) - for i, t := range times { - values[i] = tsm1.NewValue(t, true) - } - - bytes, err := tsm1.Values(values).Encode(nil) - if err != nil { - b.Fatalf("unexpected error: %v", err) - } - - b.ResetTimer() - b.ReportAllocs() - b.SetBytes(int64(tsm1.Values(values).Size())) - - b.RunParallel(func(pb *testing.PB) { - decodedValues := cursors.NewBooleanArrayLen(len(values)) - - for pb.Next() { - err = tsm1.DecodeBooleanArrayBlock(bytes, decodedValues) - if err != nil { - b.Fatalf("unexpected error decoding block: %v", err) - } - } - }) - }) - } -} - -func BenchmarkDecodeFloatArrayBlock(b *testing.B) { - cases := []int{ - 5, - 55, - 555, - 1000, - } - for _, n := range cases { - b.Run(fmt.Sprintf("%d", n), func(b *testing.B) { - valueCount := n - times := getTimes(valueCount, 60, time.Second) - values := make([]tsm1.Value, len(times)) - for i, t := range times { - values[i] = tsm1.NewValue(t, float64(i)) - } - - bytes, err := tsm1.Values(values).Encode(nil) - if err != nil { - b.Fatalf("unexpected error: %v", err) - } - - b.ResetTimer() - b.ReportAllocs() - b.SetBytes(int64(tsm1.Values(values).Size())) - - b.RunParallel(func(pb *testing.PB) { - decodedValues := cursors.NewFloatArrayLen(len(values)) - - for pb.Next() { - err = tsm1.DecodeFloatArrayBlock(bytes, decodedValues) - if err != nil { - b.Fatalf("unexpected error decoding block: %v", err) - } - } - }) - }) - } -} - -func BenchmarkDecodeIntegerArrayBlock(b *testing.B) { - rle := func(i int) int64 { return int64(i) } - s8b := func(i int) int64 { return int64(i + int(rand.Int31n(10))) } - - cases := []struct { - enc string - gen func(i int) int64 - n int - }{ - {enc: "rle", gen: rle, n: 5}, - {enc: "rle", gen: rle, n: 55}, - {enc: "rle", gen: rle, n: 555}, - {enc: "rle", gen: rle, n: 1000}, - {enc: "s8b", gen: s8b, n: 5}, - {enc: "s8b", gen: s8b, n: 55}, - {enc: "s8b", gen: s8b, n: 555}, - {enc: "s8b", gen: s8b, n: 1000}, - } - for _, bm := range cases { - b.Run(fmt.Sprintf("%s_%d", bm.enc, bm.n), func(b *testing.B) { - rand.Seed(int64(bm.n * 1e3)) - - valueCount := bm.n - times := getTimes(valueCount, 60, time.Second) - values := make([]tsm1.Value, len(times)) - for i, t := range times { - values[i] = tsm1.NewValue(t, bm.gen(i)) - } - - bytes, err := tsm1.Values(values).Encode(nil) - if err != nil { - b.Fatalf("unexpected error: %v", err) - } - - b.ResetTimer() - b.ReportAllocs() - b.SetBytes(int64(tsm1.Values(values).Size())) - - b.RunParallel(func(pb *testing.PB) { - decodedValues := cursors.NewIntegerArrayLen(len(values)) - - for pb.Next() { - err = tsm1.DecodeIntegerArrayBlock(bytes, decodedValues) - if err != nil { - b.Fatalf("unexpected error decoding block: %v", err) - } - } - }) - }) - } -} - -func BenchmarkDecodeStringArrayBlock(b *testing.B) { - cases := []int{ - 5, - 55, - 555, - 1000, - } - for _, n := range cases { - b.Run(fmt.Sprintf("%d", n), func(b *testing.B) { - valueCount := n - times := getTimes(valueCount, 60, time.Second) - values := make([]tsm1.Value, len(times)) - for i, t := range times { - values[i] = tsm1.NewValue(t, fmt.Sprintf("value %d", i)) - } - - bytes, err := tsm1.Values(values).Encode(nil) - if err != nil { - b.Fatalf("unexpected error: %v", err) - } - - b.ResetTimer() - b.ReportAllocs() - b.SetBytes(int64(tsm1.Values(values).Size())) - - b.RunParallel(func(pb *testing.PB) { - decodedValues := cursors.NewStringArrayLen(len(values)) - - for pb.Next() { - err = tsm1.DecodeStringArrayBlock(bytes, decodedValues) - if err != nil { - b.Fatalf("unexpected error decoding block: %v", err) - } - } - }) - }) - } -} diff --git a/tsdb/tsm1/batch_boolean.go b/tsdb/tsm1/batch_boolean.go deleted file mode 100644 index bf5cebf3ee..0000000000 --- a/tsdb/tsm1/batch_boolean.go +++ /dev/null @@ -1,77 +0,0 @@ -package tsm1 - -import ( - "encoding/binary" - "fmt" -) - -// BooleanArrayEncodeAll encodes src into b, returning b and any error encountered. -// The returned slice may be of a different length and capactity to b. -func BooleanArrayEncodeAll(src []bool, b []byte) ([]byte, error) { - sz := 1 + 8 + ((len(src) + 7) / 8) // Header + Num bools + bool data. - if len(b) < sz && cap(b) > sz { - b = b[:sz] - } else if len(b) < sz { - b = append(b, make([]byte, sz)...) - } - - // Store the encoding type in the 4 high bits of the first byte - b[0] = byte(booleanCompressedBitPacked) << 4 - n := uint64(8) // Current bit in current byte. - - // Encode the number of booleans written. - i := binary.PutUvarint(b[n>>3:], uint64(len(src))) - n += uint64(i * 8) - - for _, v := range src { - if v { - b[n>>3] |= 128 >> (n & 7) // Set current bit on current byte. - } else { - b[n>>3] &^= 128 >> (n & 7) // Clear current bit on current byte. - } - n++ - } - - length := n >> 3 - if n&7 > 0 { - length++ // Add an extra byte to capture overflowing bits. - } - return b[:length], nil -} - -func BooleanArrayDecodeAll(b []byte, dst []bool) ([]bool, error) { - if len(b) == 0 { - return nil, nil - } - - // First byte stores the encoding type, only have 1 bit-packet format - // currently ignore for now. - b = b[1:] - val, n := binary.Uvarint(b) - if n <= 0 { - return nil, fmt.Errorf("booleanBatchDecoder: invalid count") - } - - count := int(val) - - b = b[n:] - if min := len(b) * 8; min < count { - // Shouldn't happen - TSM file was truncated/corrupted - count = min - } - - if cap(dst) < count { - dst = make([]bool, count) - } else { - dst = dst[:count] - } - - j := 0 - for _, v := range b { - for i := byte(128); i > 0 && j < len(dst); i >>= 1 { - dst[j] = v&i != 0 - j++ - } - } - return dst, nil -} diff --git a/tsdb/tsm1/batch_boolean_test.go b/tsdb/tsm1/batch_boolean_test.go deleted file mode 100644 index 8ea0d1df43..0000000000 --- a/tsdb/tsm1/batch_boolean_test.go +++ /dev/null @@ -1,305 +0,0 @@ -package tsm1_test - -import ( - "bytes" - "fmt" - "math/rand" - "reflect" - "testing" - "testing/quick" - - "github.com/google/go-cmp/cmp" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" -) - -func TestBooleanArrayEncodeAll_NoValues(t *testing.T) { - b, err := tsm1.BooleanArrayEncodeAll(nil, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var dec tsm1.BooleanDecoder - dec.SetBytes(b) - if dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } -} - -func TestBooleanArrayEncodeAll_Single(t *testing.T) { - src := []bool{true} - - b, err := tsm1.BooleanArrayEncodeAll(src, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var dec tsm1.BooleanDecoder - dec.SetBytes(b) - if !dec.Next() { - t.Fatalf("unexpected next value: got false, exp true") - } - - if src[0] != dec.Read() { - t.Fatalf("unexpected value: got %v, exp %v", dec.Read(), src[0]) - } -} - -func TestBooleanArrayEncodeAll_Compare(t *testing.T) { - // generate random values - input := make([]bool, 1000) - for i := 0; i < len(input); i++ { - input[i] = rand.Int63n(2) == 1 - } - - s := tsm1.NewBooleanEncoder(1000) - for _, v := range input { - s.Write(v) - } - s.Flush() - - buf1, err := s.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - buf2 := append([]byte("this is some jibberish"), make([]byte, 100, 200)...) - buf2, err = tsm1.BooleanArrayEncodeAll(input, buf2) - if err != nil { - t.Fatalf("unexpected error: %v\nbuf: %db %x", err, len(buf2), buf2) - } - - result, err := tsm1.BooleanArrayDecodeAll(buf2, nil) - if err != nil { - dumpBufs(buf1, buf2) - t.Fatalf("unexpected error: %v\nbuf: %db %x", err, len(buf2), buf2) - } - - if got, exp := result, input; !reflect.DeepEqual(got, exp) { - dumpBufs(buf1, buf2) - t.Fatalf("got result %v, expected %v", got, exp) - } - - // Check that the encoders are byte for byte the same... - if !bytes.Equal(buf1, buf2) { - dumpBufs(buf1, buf2) - t.Fatalf("Raw bytes differ for encoders") - } -} - -func TestBooleanArrayEncodeAll_Multi_Compressed(t *testing.T) { - src := make([]bool, 10) - for i := range src { - src[i] = i%2 == 0 - } - - b, err := tsm1.BooleanArrayEncodeAll(src, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if exp := 4; len(b) != exp { - t.Fatalf("unexpected length: got %v, exp %v", len(b), exp) - } - - var dec tsm1.BooleanDecoder - dec.SetBytes(b) - - for i, v := range src { - if !dec.Next() { - t.Fatalf("unexpected next value: got false, exp true") - } - if v != dec.Read() { - t.Fatalf("unexpected value at pos %d: got %v, exp %v", i, dec.Read(), v) - } - } - - if dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } -} - -func TestBooleanArrayEncodeAll_Quick(t *testing.T) { - if err := quick.Check(func(values []bool) bool { - src := values - if values == nil { - src = []bool{} - } - - // Retrieve compressed bytes. - buf, err := tsm1.BooleanArrayEncodeAll(src, nil) - if err != nil { - t.Fatal(err) - } - - // Read values out of decoder. - got := make([]bool, 0, len(values)) - var dec tsm1.BooleanDecoder - dec.SetBytes(buf) - for dec.Next() { - got = append(got, dec.Read()) - } - - // Verify that input and output values match. - if !reflect.DeepEqual(src, got) { - t.Fatalf("mismatch:\n\nexp=%#v\n\ngot=%#v\n\n", src, got) - } - - return true - }, nil); err != nil { - t.Fatal(err) - } -} - -func Test_BooleanArrayDecodeAll_Single(t *testing.T) { - enc := tsm1.NewBooleanEncoder(1) - exp := true - enc.Write(exp) - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - got, _ := tsm1.BooleanArrayDecodeAll(b, nil) - if len(got) != 1 { - t.Fatalf("expected 1 value") - } - if got := got[0]; got != exp { - t.Fatalf("unexpected value -got/+exp\n%s", cmp.Diff(got, exp)) - } -} - -func Test_BooleanArrayDecodeAll_Multi_Compressed(t *testing.T) { - cases := []struct { - n int - p float64 // probability of a true value - }{ - {10, 0.33}, - {100, 0.55}, - {1000, 0.68}, - } - - for _, tc := range cases { - t.Run(fmt.Sprintf("%d_%0.2f", tc.n, tc.p), func(t *testing.T) { - rand.Seed(int64(tc.n * tc.n)) - - enc := tsm1.NewBooleanEncoder(tc.n) - values := make([]bool, tc.n) - for i := range values { - values[i] = rand.Float64() < tc.p - enc.Write(values[i]) - } - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - got, err := tsm1.BooleanArrayDecodeAll(b, nil) - if err != nil { - t.Fatalf("unexpected error %q", err.Error()) - } - - if !cmp.Equal(got, values) { - t.Fatalf("unexpected values, -got/+exp\n%s", cmp.Diff(got, values)) - } - }) - } -} - -func Test_BooleanBatchDecoder_Corrupt(t *testing.T) { - cases := []struct { - name string - d string - }{ - {"empty", ""}, - {"invalid count", "\x10\x90"}, - {"count greater than remaining bits, multiple bytes expected", "\x10\x7f"}, - {"count greater than remaining bits, one byte expected", "\x10\x01"}, - } - - for _, c := range cases { - t.Run(c.name, func(t *testing.T) { - dst, _ := tsm1.BooleanArrayDecodeAll([]byte(c.d), nil) - if len(dst) != 0 { - t.Fatalf("unexpected result -got/+want\n%s", cmp.Diff(dst, nil)) - } - }) - } -} - -func BenchmarkEncodeBooleans(b *testing.B) { - var err error - cases := []int{10, 100, 1000} - - for _, n := range cases { - enc := tsm1.NewBooleanEncoder(n) - b.Run(fmt.Sprintf("%d_ran", n), func(b *testing.B) { - input := make([]bool, n) - for i := 0; i < n; i++ { - input[i] = rand.Int63n(2) == 1 - } - - b.Run("itr", func(b *testing.B) { - b.ReportAllocs() - enc.Reset() - b.ResetTimer() - for n := 0; n < b.N; n++ { - enc.Reset() - for _, x := range input { - enc.Write(x) - } - enc.Flush() - if bufResult, err = enc.Bytes(); err != nil { - b.Fatal(err) - } - } - }) - - b.Run("batch", func(b *testing.B) { - b.ReportAllocs() - b.ResetTimer() - for n := 0; n < b.N; n++ { - if bufResult, err = tsm1.BooleanArrayEncodeAll(input, bufResult); err != nil { - b.Fatal(err) - } - } - }) - - }) - } -} - -func BenchmarkBooleanArrayDecodeAll(b *testing.B) { - benchmarks := []struct { - n int - }{ - {1}, - {55}, - {555}, - {1000}, - } - for _, bm := range benchmarks { - b.Run(fmt.Sprintf("%d", bm.n), func(b *testing.B) { - size := bm.n - e := tsm1.NewBooleanEncoder(size) - for i := 0; i < size; i++ { - e.Write(i&1 == 1) - } - bytes, err := e.Bytes() - if err != nil { - b.Fatalf("unexpected error: %v", err) - } - - b.SetBytes(int64(len(bytes))) - b.ResetTimer() - - dst := make([]bool, size) - for i := 0; i < b.N; i++ { - res, _ := tsm1.BooleanArrayDecodeAll(bytes, dst) - if len(res) != size { - b.Fatalf("expected to read %d booleans, but read %d", size, len(res)) - } - } - }) - } -} diff --git a/tsdb/tsm1/batch_float.go b/tsdb/tsm1/batch_float.go deleted file mode 100644 index eb1787473e..0000000000 --- a/tsdb/tsm1/batch_float.go +++ /dev/null @@ -1,514 +0,0 @@ -package tsm1 - -import ( - "encoding/binary" - "fmt" - "io" - "math" - "math/bits" - "unsafe" -) - -// FloatArrayEncodeAll encodes src into b, returning b and any error encountered. -// The returned slice may be of a different length and capactity to b. -// -// Currently only the float compression scheme used in Facebook's Gorilla is -// supported, so this method implements a batch oriented version of that. -func FloatArrayEncodeAll(src []float64, b []byte) ([]byte, error) { - if cap(b) < 9 { - b = make([]byte, 0, 9) // Enough room for the header and one value. - } - - b = b[:1] - b[0] = floatCompressedGorilla << 4 - - var first float64 - var finished bool - if len(src) > 0 && math.IsNaN(src[0]) { - return nil, fmt.Errorf("unsupported value: NaN") - } else if len(src) == 0 { - first = math.NaN() // Write sentinal value to terminate batch. - finished = true - } else { - first = src[0] - src = src[1:] - } - - b = b[:9] - n := uint64(8 + 64) // Number of bits written. - prev := math.Float64bits(first) - - // Write first value. - binary.BigEndian.PutUint64(b[1:], prev) - - prevLeading, prevTrailing := ^uint64(0), uint64(0) - var leading, trailing uint64 - var mask uint64 - var sum float64 - - // Encode remaining values. - for i := 0; !finished; i++ { - var x float64 - if i < len(src) { - x = src[i] - sum += x - } else { - // Encode sentinal value to terminate batch - x = math.NaN() - finished = true - } - - { - cur := math.Float64bits(x) - vDelta := cur ^ prev - if vDelta == 0 { - n++ // Write a zero bit. Nothing else to do. - prev = cur - continue - } - - // First the current bit of the current byte is set to indicate we're - // writing a delta value to the stream. - for n>>3 >= uint64(len(b)) { // Keep growing b until we can fit all bits in. - b = append(b, byte(0)) - } - - // n&7 - current bit in current byte. - // n>>3 - the current byte. - b[n>>3] |= 128 >> (n & 7) // Sets the current bit of the current byte. - n++ - - // Write the delta to b. - - // Determine the leading and trailing zeros. - leading = uint64(bits.LeadingZeros64(vDelta)) - trailing = uint64(bits.TrailingZeros64(vDelta)) - - // Clamp number of leading zeros to avoid overflow when encoding - leading &= 0x1F - if leading >= 32 { - leading = 31 - } - - // At least 2 further bits will be required. - if (n+2)>>3 >= uint64(len(b)) { - b = append(b, byte(0)) - } - - if prevLeading != ^uint64(0) && leading >= prevLeading && trailing >= prevTrailing { - n++ // Write a zero bit. - - // Write the l least significant bits of vDelta to b, most significant - // bit first. - l := uint64(64 - prevLeading - prevTrailing) - for (n+l)>>3 >= uint64(len(b)) { // Keep growing b until we can fit all bits in. - b = append(b, byte(0)) - } - - // Full value to write. - v := (vDelta >> prevTrailing) << (64 - l) // l least signifciant bits of v. - - var m = n & 7 // Current bit in current byte. - var written uint64 - if m > 0 { // In this case the current byte is not full. - written = 8 - m - if l < written { - written = l - } - mask = v >> 56 // Move 8 MSB to 8 LSB - b[n>>3] |= byte(mask >> m) - n += written - - if l-written == 0 { - prev = cur - continue - } - } - - vv := v << written // Move written bits out of the way. - - // TODO(edd): Optimise this. It's unlikely we actually have 8 bytes to write. - if (n>>3)+8 >= uint64(len(b)) { - b = append(b, 0, 0, 0, 0, 0, 0, 0, 0) - } - binary.BigEndian.PutUint64(b[n>>3:], vv) - n += (l - written) - } else { - prevLeading, prevTrailing = leading, trailing - - // Set a single bit to indicate a value will follow. - b[n>>3] |= 128 >> (n & 7) // Set current bit on current byte - n++ - - // Write 5 bits of leading. - if (n+5)>>3 >= uint64(len(b)) { - b = append(b, byte(0)) - } - - // Enough room to write the 5 bits in the current byte? - var m = n & 7 - l := uint64(5) - v := leading << 59 // 5 LSB of leading. - mask = v >> 56 // Move 5 MSB to 8 LSB - - if m <= 3 { // 5 bits fit into current byte. - b[n>>3] |= byte(mask >> m) - n += l - } else { // In this case there are fewer than 5 bits available in current byte. - // First step is to fill current byte - written := 8 - m - b[n>>3] |= byte(mask >> m) // Some of mask will get lost. - n += written - - // Second step is to write the lost part of mask into the next byte. - mask = v << written // Move written bits in previous byte out of way. - mask >>= 56 - - m = n & 7 // Recompute current bit. - b[n>>3] |= byte(mask >> m) - n += (l - written) - } - - // Note that if leading == trailing == 0, then sigbits == 64. But that - // value doesn't actually fit into the 6 bits we have. - // Luckily, we never need to encode 0 significant bits, since that would - // put us in the other case (vdelta == 0). So instead we write out a 0 and - // adjust it back to 64 on unpacking. - sigbits := 64 - leading - trailing - - if (n+6)>>3 >= uint64(len(b)) { - b = append(b, byte(0)) - } - - m = n & 7 - l = uint64(6) - v = sigbits << 58 // Move 6 LSB of sigbits to MSB - mask = v >> 56 // Move 6 MSB to 8 LSB - if m <= 2 { - // The 6 bits fit into the current byte. - b[n>>3] |= byte(mask >> m) - n += l - } else { // In this case there are fewer than 6 bits available in current byte. - // First step is to fill the current byte. - written := 8 - m - b[n>>3] |= byte(mask >> m) // Write to the current bit. - n += written - - // Second step is to write the lost part of mask into the next byte. - // Write l remaining bits into current byte. - mask = v << written // Remove bits written in previous byte out of way. - mask >>= 56 - - m = n & 7 // Recompute current bit. - b[n>>3] |= byte(mask >> m) - n += l - written - } - - // Write final value. - m = n & 7 - l = sigbits - v = (vDelta >> trailing) << (64 - l) // Move l LSB into MSB - for (n+l)>>3 >= uint64(len(b)) { // Keep growing b until we can fit all bits in. - b = append(b, byte(0)) - } - - var written uint64 - if m > 0 { // In this case the current byte is not full. - written = 8 - m - if l < written { - written = l - } - mask = v >> 56 // Move 8 MSB to 8 LSB - b[n>>3] |= byte(mask >> m) - n += written - - if l-written == 0 { - prev = cur - continue - } - } - - // Shift remaining bits and write out in one go. - vv := v << written // Remove bits written in previous byte. - // TODO(edd): Optimise this. - if (n>>3)+8 >= uint64(len(b)) { - b = append(b, 0, 0, 0, 0, 0, 0, 0, 0) - } - - binary.BigEndian.PutUint64(b[n>>3:], vv) - n += (l - written) - } - prev = cur - } - } - - if math.IsNaN(sum) { - return nil, fmt.Errorf("unsupported value: NaN") - } - - length := n >> 3 - if n&7 > 0 { - length++ // Add an extra byte to capture overflowing bits. - } - return b[:length], nil -} - -// bitMask contains a lookup table where the index is the number of bits -// and the value is a mask. The table is always read by ANDing the index -// with 0x3f, such that if the index is 64, position 0 will be read, which -// is a 0xffffffffffffffff, thus returning all bits. -// -// 00 = 0xffffffffffffffff -// 01 = 0x0000000000000001 -// 02 = 0x0000000000000003 -// 03 = 0x0000000000000007 -// ... -// 62 = 0x3fffffffffffffff -// 63 = 0x7fffffffffffffff -var bitMask [64]uint64 - -func init() { - v := uint64(1) - for i := 1; i <= 64; i++ { - bitMask[i&0x3f] = v - v = v<<1 | 1 - } -} - -func FloatArrayDecodeAll(b []byte, buf []float64) ([]float64, error) { - if len(b) < 9 { - return []float64{}, nil - } - - var ( - val uint64 // current value - trailingN uint8 // trailing zero count - meaningfulN uint8 = 64 // meaningful bit count - ) - - // first byte is the compression type; always Gorilla - b = b[1:] - - val = binary.BigEndian.Uint64(b) - if val == uvnan { - if buf == nil { - var tmp [1]float64 - buf = tmp[:0] - } - // special case: there were no values to decode - return buf[:0], nil - } - - buf = buf[:0] - // convert the []float64 to []uint64 to avoid calling math.Float64Frombits, - // which results in unnecessary moves between Xn registers before moving - // the value into the float64 slice. This change increased performance from - // 320 MB/s to 340 MB/s on an Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz - dst := *(*[]uint64)(unsafe.Pointer(&buf)) - dst = append(dst, val) - - b = b[8:] - - // The bit reader code uses brCachedVal to store up to the next 8 bytes - // of MSB data read from b. brValidBits stores the number of remaining unread - // bits starting from the MSB. Before N bits are read from brCachedVal, - // they are left-rotated N bits, such that they end up in the left-most position. - // Using bits.RotateLeft64 results in a single instruction on many CPU architectures. - // This approach permits simple tests, such as for the two control bits: - // - // brCachedVal&1 > 0 - // - // The alternative was to leave brCachedValue alone and perform shifts and - // masks to read specific bits. The original approach looked like the - // following: - // - // brCachedVal&(1<<(brValidBits&0x3f)) > 0 - // - var ( - brCachedVal = uint64(0) // a buffer of up to the next 8 bytes read from b in MSB order - brValidBits = uint8(0) // the number of unread bits remaining in brCachedVal - ) - - // Refill brCachedVal, reading up to 8 bytes from b - if len(b) >= 8 { - // fast path reads 8 bytes directly - brCachedVal = binary.BigEndian.Uint64(b) - brValidBits = 64 - b = b[8:] - } else if len(b) > 0 { - brCachedVal = 0 - brValidBits = uint8(len(b) * 8) - for i := range b { - brCachedVal = (brCachedVal << 8) | uint64(b[i]) - } - brCachedVal = bits.RotateLeft64(brCachedVal, -int(brValidBits)) - b = b[:0] - } else { - goto ERROR - } - - // The expected exit condition is for a uvnan to be decoded. - // Any other error (EOF) indicates a truncated stream. - for { - if brValidBits > 0 { - // brValidBits > 0 is impossible to predict, so we place the - // most likely case inside the if and immediately jump, keeping - // the instruction pipeline consistently full. - // This is a similar approach to using the GCC __builtin_expect - // intrinsic, which modifies the order of branches such that the - // likely case follows the conditional jump. - // - // Written as if brValidBits == 0 and placing the Refill brCachedVal - // code inside reduces benchmarks from 318 MB/s to 260 MB/s on an - // Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz - goto READ0 - } - - // Refill brCachedVal, reading up to 8 bytes from b - if len(b) >= 8 { - brCachedVal = binary.BigEndian.Uint64(b) - brValidBits = 64 - b = b[8:] - } else if len(b) > 0 { - brCachedVal = 0 - brValidBits = uint8(len(b) * 8) - for i := range b { - brCachedVal = (brCachedVal << 8) | uint64(b[i]) - } - brCachedVal = bits.RotateLeft64(brCachedVal, -int(brValidBits)) - b = b[:0] - } else { - goto ERROR - } - - READ0: - // read control bit 0 - brValidBits -= 1 - brCachedVal = bits.RotateLeft64(brCachedVal, 1) - if brCachedVal&1 > 0 { - if brValidBits > 0 { - goto READ1 - } - - // Refill brCachedVal, reading up to 8 bytes from b - if len(b) >= 8 { - brCachedVal = binary.BigEndian.Uint64(b) - brValidBits = 64 - b = b[8:] - } else if len(b) > 0 { - brCachedVal = 0 - brValidBits = uint8(len(b) * 8) - for i := range b { - brCachedVal = (brCachedVal << 8) | uint64(b[i]) - } - brCachedVal = bits.RotateLeft64(brCachedVal, -int(brValidBits)) - b = b[:0] - } else { - goto ERROR - } - - READ1: - // read control bit 1 - brValidBits -= 1 - brCachedVal = bits.RotateLeft64(brCachedVal, 1) - if brCachedVal&1 > 0 { - // read 5 bits for leading zero count and 6 bits for the meaningful data count - const leadingTrailingBitCount = 11 - var lmBits uint64 // leading + meaningful data counts - if brValidBits >= leadingTrailingBitCount { - // decode 5 bits leading + 6 bits meaningful for a total of 11 bits - brValidBits -= leadingTrailingBitCount - brCachedVal = bits.RotateLeft64(brCachedVal, leadingTrailingBitCount) - lmBits = brCachedVal - } else { - bits01 := uint8(11) - if brValidBits > 0 { - bits01 -= brValidBits - lmBits = bits.RotateLeft64(brCachedVal, 11) - } - - // Refill brCachedVal, reading up to 8 bytes from b - if len(b) >= 8 { - brCachedVal = binary.BigEndian.Uint64(b) - brValidBits = 64 - b = b[8:] - } else if len(b) > 0 { - brCachedVal = 0 - brValidBits = uint8(len(b) * 8) - for i := range b { - brCachedVal = (brCachedVal << 8) | uint64(b[i]) - } - brCachedVal = bits.RotateLeft64(brCachedVal, -int(brValidBits)) - b = b[:0] - } else { - goto ERROR - } - brCachedVal = bits.RotateLeft64(brCachedVal, int(bits01)) - brValidBits -= bits01 - lmBits &^= bitMask[bits01&0x3f] - lmBits |= brCachedVal & bitMask[bits01&0x3f] - } - - lmBits &= 0x7ff - leadingN := uint8((lmBits >> 6) & 0x1f) // 5 bits leading - meaningfulN = uint8(lmBits & 0x3f) // 6 bits meaningful - if meaningfulN > 0 { - trailingN = 64 - leadingN - meaningfulN - } else { - // meaningfulN == 0 is a special case, such that all bits - // are meaningful - trailingN = 0 - meaningfulN = 64 - } - } - - var sBits uint64 // significant bits - if brValidBits >= meaningfulN { - brValidBits -= meaningfulN - brCachedVal = bits.RotateLeft64(brCachedVal, int(meaningfulN)) - sBits = brCachedVal - } else { - mBits := meaningfulN - if brValidBits > 0 { - mBits -= brValidBits - sBits = bits.RotateLeft64(brCachedVal, int(meaningfulN)) - } - - // Refill brCachedVal, reading up to 8 bytes from b - if len(b) >= 8 { - brCachedVal = binary.BigEndian.Uint64(b) - brValidBits = 64 - b = b[8:] - } else if len(b) > 0 { - brCachedVal = 0 - brValidBits = uint8(len(b) * 8) - for i := range b { - brCachedVal = (brCachedVal << 8) | uint64(b[i]) - } - brCachedVal = bits.RotateLeft64(brCachedVal, -int(brValidBits)) - b = b[:0] - } else { - goto ERROR - } - brCachedVal = bits.RotateLeft64(brCachedVal, int(mBits)) - brValidBits -= mBits - sBits &^= bitMask[mBits&0x3f] - sBits |= brCachedVal & bitMask[mBits&0x3f] - } - sBits &= bitMask[meaningfulN&0x3f] - - val ^= sBits << (trailingN & 0x3f) - if val == uvnan { - // IsNaN, eof - break - } - } - - dst = append(dst, val) - } - - return *(*[]float64)(unsafe.Pointer(&dst)), nil - -ERROR: - return (*(*[]float64)(unsafe.Pointer(&dst)))[:0], io.EOF -} diff --git a/tsdb/tsm1/batch_float_test.go b/tsdb/tsm1/batch_float_test.go deleted file mode 100644 index 44be432cef..0000000000 --- a/tsdb/tsm1/batch_float_test.go +++ /dev/null @@ -1,438 +0,0 @@ -package tsm1_test - -import ( - "bytes" - "fmt" - "math" - "math/rand" - "reflect" - "testing" - "testing/quick" - - "github.com/google/go-cmp/cmp" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" -) - -var fullBlockFloat64Ones []float64 - -func init() { - for i := 0; i < 1000; i++ { - fullBlockFloat64Ones = append(fullBlockFloat64Ones, 1.0) - } -} -func TestFloatArrayEncodeAll(t *testing.T) { - examples := [][]float64{ - {12, 12, 24, 13, 24, 24, 24, 24}, // From example paper. - {-3.8970913068231994e+307, -9.036931257783943e+307, 1.7173073833490201e+308, - -9.312369166661538e+307, -2.2435523083555231e+307, 1.4779121287289644e+307, - 1.771273431601434e+308, 8.140360378221364e+307, 4.783405048208089e+307, - -2.8044680049605344e+307, 4.412915337205696e+307, -1.2779380602005046e+308, - 1.6235802318921885e+308, -1.3402901846299688e+307, 1.6961015582104055e+308, - -1.067980796435633e+308, -3.02868987458268e+307, 1.7641793640790284e+308, - 1.6587191845856813e+307, -1.786073304985983e+308, 1.0694549382051123e+308, - 3.5635180996210295e+307}, // Failed during early development - {6.00065e+06, 6.000656e+06, 6.000657e+06, 6.000659e+06, 6.000661e+06}, // Similar values. - twoHoursData, - fullBlockFloat64Ones, - {}, - } - - for _, example := range examples { - src := example - var buf []byte - buf, err := tsm1.FloatArrayEncodeAll(src, buf) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - result, err := tsm1.FloatArrayDecodeAll(buf, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got, exp := result, src; !reflect.DeepEqual(got, exp) { - t.Fatalf("got result %v, expected %v", got, exp) - } - } -} - -func TestFloatArrayEncode_Compare(t *testing.T) { - // generate random values - input := make([]float64, 1000) - for i := 0; i < len(input); i++ { - input[i] = (rand.Float64() * math.MaxFloat64) - math.MaxFloat32 - } - - s := tsm1.NewFloatEncoder() - for _, v := range input { - s.Write(v) - } - s.Flush() - - buf1, err := s.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var buf2 []byte - buf2, err = tsm1.FloatArrayEncodeAll(input, buf2) - if err != nil { - t.Fatalf("unexpected error: %v\nbuf: %db %x", err, len(buf2), buf2) - } - - result, err := tsm1.FloatArrayDecodeAll(buf2, nil) - if err != nil { - dumpBufs(buf1, buf2) - t.Fatalf("unexpected error: %v\nbuf: %db %x", err, len(buf2), buf2) - } - - if got, exp := result, input; !reflect.DeepEqual(got, exp) { - t.Fatalf("got result %v, expected %v", got, exp) - } - - // Check that the encoders are byte for byte the same... - if !bytes.Equal(buf1, buf2) { - dumpBufs(buf1, buf2) - t.Fatalf("Raw bytes differ for encoders") - } -} - -func dumpBufs(a, b []byte) { - longest := len(a) - if len(b) > longest { - longest = len(b) - } - - for i := 0; i < longest; i++ { - var as, bs string - if i < len(a) { - as = fmt.Sprintf("%08b", a[i]) - } - if i < len(b) { - bs = fmt.Sprintf("%08b", b[i]) - } - - same := as == bs - fmt.Printf("%d (%d) %s - %s :: %v\n", i, i*8, as, bs, same) - } - fmt.Println() -} - -func TestFloatArrayEncodeAll_NaN(t *testing.T) { - examples := [][]float64{ - {1.0, math.NaN(), 2.0}, - {1.22, math.NaN()}, - {math.NaN(), math.NaN()}, - {math.NaN()}, - } - - for _, example := range examples { - var buf []byte - _, err := tsm1.FloatArrayEncodeAll(example, buf) - if err == nil { - t.Fatalf("expected error. got nil") - } - } -} - -func Test_FloatArrayEncodeAll_Quick(t *testing.T) { - quick.Check(func(values []float64) bool { - src := values - if src == nil { - src = []float64{} - } - - for i, v := range src { - if math.IsNaN(v) { - src[i] = 1.0 // Remove invalid values - } - } - - s := tsm1.NewFloatEncoder() - for _, p := range src { - s.Write(p) - } - s.Flush() - - buf1, err := s.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var buf2 []byte - buf2, err = tsm1.FloatArrayEncodeAll(src, buf2) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - result, err := tsm1.FloatArrayDecodeAll(buf2, nil) - if err != nil { - dumpBufs(buf1, buf2) - fmt.Println(src) - t.Fatalf("unexpected error: %v", err) - } - - if got, exp := result, src[:]; !reflect.DeepEqual(got, exp) { - t.Fatalf("got result %v, expected %v", got, exp) - } - return true - }, nil) -} - -func TestDecodeFloatArrayAll_Empty(t *testing.T) { - s := tsm1.NewFloatEncoder() - s.Flush() - - b, err := s.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var got []float64 - if _, err := tsm1.FloatArrayDecodeAll(b, got); err != nil { - t.Fatal(err) - } - -} - -func TestFloatArrayDecodeAll_Simple(t *testing.T) { - // Example from the paper - s := tsm1.NewFloatEncoder() - - exp := []float64{ - 12, - 12, - 24, - - // extra tests - - // floating point masking/shifting bug - 13, - 24, - - // delta-of-delta sizes - 24, - 24, - 24, - } - - for _, f := range exp { - s.Write(f) - } - s.Flush() - - b, err := s.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - buf := make([]float64, 8) - got, err := tsm1.FloatArrayDecodeAll(b, buf) - if err != nil { - t.Fatalf("unexpected decode error %q", err) - } - - if !cmp.Equal(got, exp) { - t.Fatalf("unexpected values -got/+exp\n%s", cmp.Diff(got, exp)) - } -} - -func TestFloatArrayDecodeAll_Empty(t *testing.T) { - s := tsm1.NewFloatEncoder() - s.Flush() - - b, err := s.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - buf := make([]float64, 8) - got, err := tsm1.FloatArrayDecodeAll(b, buf) - if err != nil { - t.Fatalf("unexpected decode error %q", err) - } - - if exp := []float64{}; !cmp.Equal(got, exp) { - t.Fatalf("unexpected values -got/+exp\n%s", cmp.Diff(got, exp)) - } -} - -var bufResult []byte - -func BenchmarkEncodeFloats(b *testing.B) { - var err error - cases := []int{10, 100, 1000} - enc := tsm1.NewFloatEncoder() - - for _, n := range cases { - b.Run(fmt.Sprintf("%d_seq", n), func(b *testing.B) { - input := make([]float64, n) - for i := 0; i < n; i++ { - input[i] = float64(i) - } - - b.Run("itr", func(b *testing.B) { - b.ReportAllocs() - enc.Reset() - b.ResetTimer() - for n := 0; n < b.N; n++ { - enc.Reset() - for _, x := range input { - enc.Write(x) - } - enc.Flush() - if bufResult, err = enc.Bytes(); err != nil { - b.Fatal(err) - } else { - b.SetBytes(int64(len(bufResult))) - } - } - }) - - b.Run("batch", func(b *testing.B) { - b.ReportAllocs() - b.ResetTimer() - for n := 0; n < b.N; n++ { - if bufResult, err = tsm1.FloatArrayEncodeAll(input, bufResult); err != nil { - b.Fatal(err) - } else { - b.SetBytes(int64(len(bufResult))) - } - } - }) - - }) - - b.Run(fmt.Sprintf("%d_ran", n), func(b *testing.B) { - input := make([]float64, n) - for i := 0; i < n; i++ { - input[i] = rand.Float64() * 100.0 - } - - b.Run("itr", func(b *testing.B) { - b.ReportAllocs() - b.ResetTimer() - for n := 0; n < b.N; n++ { - enc.Reset() - for _, x := range input { - enc.Write(x) - } - enc.Flush() - if bufResult, err = enc.Bytes(); err != nil { - b.Fatal(err) - } else { - b.SetBytes(int64(len(bufResult))) - } - } - }) - - b.Run("batch", func(b *testing.B) { - b.ReportAllocs() - b.ResetTimer() - for n := 0; n < b.N; n++ { - if bufResult, err = tsm1.FloatArrayEncodeAll(input, bufResult); err != nil { - b.Fatal(err) - } else { - b.SetBytes(int64(len(bufResult))) - } - } - }) - }) - } -} - -func BenchmarkDecodeFloats(b *testing.B) { - cases := []int{1, 55, 550, 1000} - for _, n := range cases { - b.Run(fmt.Sprintf("%d_seq", n), func(b *testing.B) { - s := tsm1.NewFloatEncoder() - for i := 0; i < n; i++ { - s.Write(float64(i)) - } - s.Flush() - data, err := s.Bytes() - if err != nil { - b.Fatalf("unexpected error: %v", err) - } - - b.SetBytes(int64(len(data))) - b.ResetTimer() - - dst := make([]float64, n) - for i := 0; i < b.N; i++ { - - got, err := tsm1.FloatArrayDecodeAll(data, dst) - if err != nil { - b.Fatalf("unexpected error\n%s", err.Error()) - } - if len(got) != n { - b.Fatalf("unexpected length -got/+exp\n%s", cmp.Diff(len(got), n)) - } - } - }) - - b.Run(fmt.Sprintf("%d_ran", n), func(b *testing.B) { - s := tsm1.NewFloatEncoder() - for i := 0; i < n; i++ { - s.Write(rand.Float64() * 100.0) - } - s.Flush() - data, err := s.Bytes() - if err != nil { - b.Fatalf("unexpected error: %v", err) - } - - b.SetBytes(int64(len(data))) - b.ResetTimer() - - dst := make([]float64, n) - for i := 0; i < b.N; i++ { - - got, err := tsm1.FloatArrayDecodeAll(data, dst) - if err != nil { - b.Fatalf("unexpected error\n%s", err.Error()) - } - if len(got) != n { - b.Fatalf("unexpected length -got/+exp\n%s", cmp.Diff(len(got), n)) - } - } - }) - } -} - -func BenchmarkFloatArrayDecodeAll(b *testing.B) { - benchmarks := []int{ - 1, - 55, - 550, - 1000, - } - for _, size := range benchmarks { - s := tsm1.NewFloatEncoder() - for c := 0; c < size; c++ { - s.Write(twoHoursData[c%len(twoHoursData)]) - } - s.Flush() - bytes, err := s.Bytes() - if err != nil { - b.Fatalf("unexpected error: %v", err) - } - - b.Run(fmt.Sprintf("%d", size), func(b *testing.B) { - b.SetBytes(int64(len(bytes))) - b.ResetTimer() - - dst := make([]float64, size) - for i := 0; i < b.N; i++ { - - got, err := tsm1.FloatArrayDecodeAll(bytes, dst) - if err != nil { - b.Fatalf("unexpected error\n%s", err.Error()) - } - if len(got) != size { - b.Fatalf("unexpected length -got/+exp\n%s", cmp.Diff(len(got), size)) - } - } - }) - } -} diff --git a/tsdb/tsm1/batch_integer.go b/tsdb/tsm1/batch_integer.go deleted file mode 100644 index bc5afad7db..0000000000 --- a/tsdb/tsm1/batch_integer.go +++ /dev/null @@ -1,290 +0,0 @@ -package tsm1 - -import ( - "encoding/binary" - "fmt" - "unsafe" - - "github.com/influxdata/influxdb/v2/pkg/encoding/simple8b" -) - -// IntegerArrayEncodeAll encodes src into b, returning b and any error encountered. -// The returned slice may be of a different length and capactity to b. -// -// IntegerArrayEncodeAll implements batch oriented versions of the three integer -// encoding types we support: uncompressed, simple8b and RLE. -// -// Important: IntegerArrayEncodeAll modifies the contents of src by using it as -// scratch space for delta encoded values. It is NOT SAFE to use src after -// passing it into IntegerArrayEncodeAll. -func IntegerArrayEncodeAll(src []int64, b []byte) ([]byte, error) { - if len(src) == 0 { - return nil, nil // Nothing to do - } - - var max = uint64(0) - - // To prevent an allocation of the entire block we're encoding reuse the - // src slice to store the encoded deltas. - deltas := reintepretInt64ToUint64Slice(src) - for i := len(deltas) - 1; i > 0; i-- { - deltas[i] = deltas[i] - deltas[i-1] - deltas[i] = ZigZagEncode(int64(deltas[i])) - if deltas[i] > max { - max = deltas[i] - } - } - - deltas[0] = ZigZagEncode(int64(deltas[0])) - - if len(deltas) > 2 { - var rle = true - for i := 2; i < len(deltas); i++ { - if deltas[1] != deltas[i] { - rle = false - break - } - } - - if rle { - // Large varints can take up to 10 bytes. We're storing 3 + 1 - // type byte. - if len(b) < 31 && cap(b) >= 31 { - b = b[:31] - } else if len(b) < 31 { - b = append(b, make([]byte, 31-len(b))...) - } - - // 4 high bits used for the encoding type - b[0] = byte(intCompressedRLE) << 4 - - i := 1 - // The first value - binary.BigEndian.PutUint64(b[i:], deltas[0]) - i += 8 - // The first delta - i += binary.PutUvarint(b[i:], deltas[1]) - // The number of times the delta is repeated - i += binary.PutUvarint(b[i:], uint64(len(deltas)-1)) - - return b[:i], nil - } - } - - if max > simple8b.MaxValue { // There is an encoded value that's too big to simple8b encode. - // Encode uncompressed. - sz := 1 + len(deltas)*8 - if len(b) < sz && cap(b) >= sz { - b = b[:sz] - } else if len(b) < sz { - b = append(b, make([]byte, sz-len(b))...) - } - - // 4 high bits of first byte store the encoding type for the block - b[0] = byte(intUncompressed) << 4 - for i, v := range deltas { - binary.BigEndian.PutUint64(b[1+i*8:1+i*8+8], uint64(v)) - } - return b[:sz], nil - } - - // Encode with simple8b - fist value is written unencoded using 8 bytes. - encoded, err := simple8b.EncodeAll(deltas[1:]) - if err != nil { - return nil, err - } - - sz := 1 + (len(encoded)+1)*8 - if len(b) < sz && cap(b) >= sz { - b = b[:sz] - } else if len(b) < sz { - b = append(b, make([]byte, sz-len(b))...) - } - - // 4 high bits of first byte store the encoding type for the block - b[0] = byte(intCompressedSimple) << 4 - - // Write the first value since it's not part of the encoded values - binary.BigEndian.PutUint64(b[1:9], deltas[0]) - - // Write the encoded values - for i, v := range encoded { - binary.BigEndian.PutUint64(b[9+i*8:9+i*8+8], v) - } - return b, nil -} - -// UnsignedArrayEncodeAll encodes src into b, returning b and any error encountered. -// The returned slice may be of a different length and capactity to b. -// -// UnsignedArrayEncodeAll implements batch oriented versions of the three integer -// encoding types we support: uncompressed, simple8b and RLE. -// -// Important: IntegerArrayEncodeAll modifies the contents of src by using it as -// scratch space for delta encoded values. It is NOT SAFE to use src after -// passing it into IntegerArrayEncodeAll. -func UnsignedArrayEncodeAll(src []uint64, b []byte) ([]byte, error) { - srcint := reintepretUint64ToInt64Slice(src) - return IntegerArrayEncodeAll(srcint, b) -} - -var ( - integerBatchDecoderFunc = [...]func(b []byte, dst []int64) ([]int64, error){ - integerBatchDecodeAllUncompressed, - integerBatchDecodeAllSimple, - integerBatchDecodeAllRLE, - integerBatchDecodeAllInvalid, - } -) - -func IntegerArrayDecodeAll(b []byte, dst []int64) ([]int64, error) { - if len(b) == 0 { - return []int64{}, nil - } - - encoding := b[0] >> 4 - if encoding > intCompressedRLE { - encoding = 3 // integerBatchDecodeAllInvalid - } - - return integerBatchDecoderFunc[encoding&3](b, dst) -} - -func UnsignedArrayDecodeAll(b []byte, dst []uint64) ([]uint64, error) { - if len(b) == 0 { - return []uint64{}, nil - } - - encoding := b[0] >> 4 - if encoding > intCompressedRLE { - encoding = 3 // integerBatchDecodeAllInvalid - } - - res, err := integerBatchDecoderFunc[encoding&3](b, reintepretUint64ToInt64Slice(dst)) - return reintepretInt64ToUint64Slice(res), err -} - -func integerBatchDecodeAllUncompressed(b []byte, dst []int64) ([]int64, error) { - b = b[1:] - if len(b)&0x7 != 0 { - return []int64{}, fmt.Errorf("integerArrayDecodeAll: expected multiple of 8 bytes") - } - - count := len(b) / 8 - if cap(dst) < count { - dst = make([]int64, count) - } else { - dst = dst[:count] - } - - prev := int64(0) - for i := range dst { - prev += ZigZagDecode(binary.BigEndian.Uint64(b[i*8:])) - dst[i] = prev - } - - return dst, nil -} - -func integerBatchDecodeAllSimple(b []byte, dst []int64) ([]int64, error) { - b = b[1:] - if len(b) < 8 { - return []int64{}, fmt.Errorf("integerArrayDecodeAll: not enough data to decode packed value") - } - - count, err := simple8b.CountBytes(b[8:]) - if err != nil { - return []int64{}, err - } - - count += 1 - if cap(dst) < count { - dst = make([]int64, count) - } else { - dst = dst[:count] - } - - // first value - dst[0] = ZigZagDecode(binary.BigEndian.Uint64(b)) - - // decode compressed values - buf := reintepretInt64ToUint64Slice(dst) - n, err := simple8b.DecodeBytesBigEndian(buf[1:], b[8:]) - if err != nil { - return []int64{}, err - } - if n != count-1 { - return []int64{}, fmt.Errorf("integerArrayDecodeAll: unexpected number of values decoded; got=%d, exp=%d", n, count-1) - } - - // calculate prefix sum - prev := dst[0] - for i := 1; i < len(dst); i++ { - prev += ZigZagDecode(uint64(dst[i])) - dst[i] = prev - } - - return dst, nil -} - -func integerBatchDecodeAllRLE(b []byte, dst []int64) ([]int64, error) { - b = b[1:] - if len(b) < 8 { - return []int64{}, fmt.Errorf("integerArrayDecodeAll: not enough data to decode RLE starting value") - } - - var k, n int - - // Next 8 bytes is the starting value - first := ZigZagDecode(binary.BigEndian.Uint64(b[k : k+8])) - k += 8 - - // Next 1-10 bytes is the delta value - value, n := binary.Uvarint(b[k:]) - if n <= 0 { - return []int64{}, fmt.Errorf("integerArrayDecodeAll: invalid RLE delta value") - } - k += n - - delta := ZigZagDecode(value) - - // Last 1-10 bytes is how many times the value repeats - count, n := binary.Uvarint(b[k:]) - if n <= 0 { - return []int64{}, fmt.Errorf("integerArrayDecodeAll: invalid RLE repeat value") - } - - count += 1 - - if cap(dst) < int(count) { - dst = make([]int64, count) - } else { - dst = dst[:count] - } - - if delta == 0 { - for i := range dst { - dst[i] = first - } - } else { - acc := first - for i := range dst { - dst[i] = acc - acc += delta - } - } - - return dst, nil -} - -func integerBatchDecodeAllInvalid(b []byte, _ []int64) ([]int64, error) { - return []int64{}, fmt.Errorf("unknown encoding %v", b[0]>>4) -} - -func reintepretInt64ToUint64Slice(src []int64) []uint64 { - return *(*[]uint64)(unsafe.Pointer(&src)) -} - -func reintepretUint64ToInt64Slice(src []uint64) []int64 { - return *(*[]int64)(unsafe.Pointer(&src)) -} diff --git a/tsdb/tsm1/batch_integer_test.go b/tsdb/tsm1/batch_integer_test.go deleted file mode 100644 index e33feb7949..0000000000 --- a/tsdb/tsm1/batch_integer_test.go +++ /dev/null @@ -1,1177 +0,0 @@ -package tsm1 - -import ( - "bytes" - "fmt" - "math" - "math/rand" - "reflect" - "sort" - "testing" - "testing/quick" - - "github.com/google/go-cmp/cmp" -) - -func dumpBufs(a, b []byte) { - longest := len(a) - if len(b) > longest { - longest = len(b) - } - - for i := 0; i < longest; i++ { - var as, bs string - if i < len(a) { - as = fmt.Sprintf("%08[1]b (%[1]d)", a[i]) - } - if i < len(b) { - bs = fmt.Sprintf("%08[1]b (%[1]d)", b[i]) - } - - same := as == bs - fmt.Printf("%d (%d) %s - %s :: %v\n", i, i*8, as, bs, same) - } - fmt.Println() -} - -func TestIntegerArrayEncodeAll_NoValues(t *testing.T) { - b, err := IntegerArrayEncodeAll(nil, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if len(b) > 0 { - t.Fatalf("unexpected length: exp 0, got %v", len(b)) - } - - var dec IntegerDecoder - dec.SetBytes(b) - if dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } -} - -func TestIntegerArrayEncodeAll_Compare(t *testing.T) { - // generate random values (should use simple8b) - input := make([]int64, 1000) - for i := 0; i < len(input); i++ { - input[i] = rand.Int63n(100000) - 50000 - } - sort.Slice(input, func(i int, j int) bool { return input[i] < input[j] }) - testIntegerArrayEncodeAll_Compare(t, input, intCompressedSimple) - - // Generate same values (should use RLE) - for i := 0; i < len(input); i++ { - input[i] = 1232342341234 - } - testIntegerArrayEncodeAll_Compare(t, input, intCompressedRLE) - - // Generate large random values that are not sorted. The deltas will be large - // and the values should be stored uncompressed. - for i := 0; i < len(input); i++ { - input[i] = int64(rand.Uint64()) - } - testIntegerArrayEncodeAll_Compare(t, input, intUncompressed) -} - -func testIntegerArrayEncodeAll_Compare(t *testing.T, input []int64, encoding byte) { - exp := make([]int64, len(input)) - copy(exp, input) - - s := NewIntegerEncoder(1000) - for _, v := range input { - s.Write(v) - } - - buf1, err := s.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got, exp := buf1[0]>>4, encoding; got != exp { - t.Fatalf("got encoding %v, expected %v", got, encoding) - } - - var buf2 []byte - buf2, err = IntegerArrayEncodeAll(input, buf2) - if err != nil { - t.Fatalf("unexpected error: %v\nbuf: %db %x", err, len(buf2), buf2) - } - - if got, exp := buf2[0]>>4, encoding; got != exp { - t.Fatalf("got encoding %v, expected %v", got, encoding) - } - - result, err := IntegerArrayDecodeAll(buf2, nil) - if err != nil { - dumpBufs(buf1, buf2) - t.Fatalf("unexpected error: %v\nbuf: %db %x", err, len(buf2), buf2) - } - - if got := result; !reflect.DeepEqual(got, exp) { - t.Fatalf("-got/+exp\n%s", cmp.Diff(got, exp)) - } - - // Check that the encoders are byte for byte the same... - if !bytes.Equal(buf1, buf2) { - dumpBufs(buf1, buf2) - t.Fatalf("Raw bytes differ for encoders") - } -} - -func TestUnsignedArrayEncodeAll_Compare(t *testing.T) { - // generate random values (should use simple8b) - input := make([]uint64, 1000) - for i := 0; i < len(input); i++ { - input[i] = uint64(rand.Int63n(100000)) - } - sort.Slice(input, func(i int, j int) bool { return input[i] < input[j] }) - testUnsignedArrayEncodeAll_Compare(t, input, intCompressedSimple) - - // Generate same values (should use RLE) - for i := 0; i < len(input); i++ { - input[i] = 1232342341234 - } - testUnsignedArrayEncodeAll_Compare(t, input, intCompressedRLE) - - // Generate large random values that are not sorted. The deltas will be large - // and the values should be stored uncompressed. - for i := 0; i < len(input); i++ { - input[i] = rand.Uint64() - } - testUnsignedArrayEncodeAll_Compare(t, input, intUncompressed) -} - -func testUnsignedArrayEncodeAll_Compare(t *testing.T, input []uint64, encoding byte) { - exp := make([]uint64, len(input)) - copy(exp, input) - - s := NewIntegerEncoder(1000) - for _, v := range input { - s.Write(int64(v)) - } - - buf1, err := s.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got, exp := buf1[0]>>4, encoding; got != exp { - t.Fatalf("got encoding %v, expected %v", got, encoding) - } - - var buf2 []byte - buf2, err = UnsignedArrayEncodeAll(input, buf2) - if err != nil { - t.Fatalf("unexpected error: %v\nbuf: %db %x", err, len(buf2), buf2) - } - - if got, exp := buf2[0]>>4, encoding; got != exp { - t.Fatalf("got encoding %v, expected %v", got, encoding) - } - - result, err := UnsignedArrayDecodeAll(buf2, nil) - if err != nil { - dumpBufs(buf1, buf2) - t.Fatalf("unexpected error: %v\nbuf: %db %x", err, len(buf2), buf2) - } - - if got := result; !reflect.DeepEqual(got, exp) { - t.Fatalf("got result %v, expected %v", got, exp) - } - - // Check that the encoders are byte for byte the same... - if !bytes.Equal(buf1, buf2) { - dumpBufs(buf1, buf2) - t.Fatalf("Raw bytes differ for encoders") - } -} - -func TestIntegerArrayEncodeAll_One(t *testing.T) { - v1 := int64(1) - - src := []int64{1} - b, err := IntegerArrayEncodeAll(src, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; intCompressedSimple != got { - t.Fatalf("encoding type mismatch: exp uncompressed, got %v", got) - } - - var dec IntegerDecoder - dec.SetBytes(b) - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if v1 != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v1) - } -} - -func TestIntegerArrayEncodeAll_Two(t *testing.T) { - var v1, v2 int64 = 1, 2 - - src := []int64{v1, v2} - b, err := IntegerArrayEncodeAll(src, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; intCompressedSimple != got { - t.Fatalf("encoding type mismatch: exp uncompressed, got %v", got) - } - - var dec IntegerDecoder - dec.SetBytes(b) - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if v1 != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v1) - } - - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if v2 != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v2) - } -} - -func TestIntegerArrayEncodeAll_Negative(t *testing.T) { - var v1, v2, v3 int64 = -2, 0, 1 - - src := []int64{v1, v2, v3} - b, err := IntegerArrayEncodeAll(src, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; intCompressedSimple != got { - t.Fatalf("encoding type mismatch: exp uncompressed, got %v", got) - } - - var dec IntegerDecoder - dec.SetBytes(b) - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if v1 != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v1) - } - - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if v2 != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v2) - } - - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if v3 != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v3) - } -} - -func TestIntegerArrayEncodeAll_Large_Range(t *testing.T) { - exp := []int64{math.MaxInt64, 0, math.MaxInt64} - - b, err := IntegerArrayEncodeAll(append([]int64{}, exp...), nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; intUncompressed != got { - t.Fatalf("encoding type mismatch: exp uncompressed, got %v", got) - } - - var dec IntegerDecoder - dec.SetBytes(b) - - var got []int64 - for dec.Next() { - got = append(got, dec.Read()) - } - - if !cmp.Equal(got, exp) { - t.Fatalf("unxpected result, -got/+exp\n%s", cmp.Diff(got, exp)) - } -} - -func TestIntegerArrayEncodeAll_Uncompressed(t *testing.T) { - var v1, v2, v3 int64 = 0, 1, 1 << 60 - - src := []int64{v1, v2, v3} - b, err := IntegerArrayEncodeAll(src, nil) - if err != nil { - t.Fatalf("expected error: %v", err) - } - - // 1 byte header + 3 * 8 byte values - if exp := 25; len(b) != exp { - t.Fatalf("length mismatch: got %v, exp %v", len(b), exp) - } - - if got := b[0] >> 4; intUncompressed != got { - t.Fatalf("encoding type mismatch: exp uncompressed, got %v", got) - } - - var dec IntegerDecoder - dec.SetBytes(b) - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if v1 != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v1) - } - - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if v2 != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v2) - } - - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if v3 != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v3) - } -} - -func TestIntegerArrayEncodeAll_NegativeUncompressed(t *testing.T) { - src := []int64{ - -2352281900722994752, 1438442655375607923, -4110452567888190110, - -1221292455668011702, -1941700286034261841, -2836753127140407751, - 1432686216250034552, 3663244026151507025, -3068113732684750258, - -1949953187327444488, 3713374280993588804, 3226153669854871355, - -2093273755080502606, 1006087192578600616, -2272122301622271655, - 2533238229511593671, -4450454445568858273, 2647789901083530435, - 2761419461769776844, -1324397441074946198, -680758138988210958, - 94468846694902125, -2394093124890745254, -2682139311758778198, - } - exp := make([]int64, len(src)) - copy(exp, src) - - b, err := IntegerArrayEncodeAll(src, nil) - if err != nil { - t.Fatalf("expected error: %v", err) - } - - if got := b[0] >> 4; intUncompressed != got { - t.Fatalf("encoding type mismatch: exp uncompressed, got %v", got) - } - - var dec IntegerDecoder - dec.SetBytes(b) - - i := 0 - for dec.Next() { - if i > len(src) { - t.Fatalf("read too many values: got %v, exp %v", i, len(exp)) - } - - if exp[i] != dec.Read() { - t.Fatalf("read value %d mismatch: got %v, exp %v", i, dec.Read(), exp[i]) - } - i += 1 - } - - if i != len(exp) { - t.Fatalf("failed to read enough values: got %v, exp %v", i, len(exp)) - } -} - -func TestIntegerArrayEncodeAll_AllNegative(t *testing.T) { - src := []int64{ - -10, -5, -1, - } - exp := make([]int64, len(src)) - copy(exp, src) - - b, err := IntegerArrayEncodeAll(src, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; intCompressedSimple != got { - t.Fatalf("encoding type mismatch: exp uncompressed, got %v", got) - } - - var dec IntegerDecoder - dec.SetBytes(b) - i := 0 - for dec.Next() { - if i > len(exp) { - t.Fatalf("read too many values: got %v, exp %v", i, len(exp)) - } - - if exp[i] != dec.Read() { - t.Fatalf("read value %d mismatch: got %v, exp %v", i, dec.Read(), exp[i]) - } - i += 1 - } - - if i != len(exp) { - t.Fatalf("failed to read enough values: got %v, exp %v", i, len(exp)) - } -} - -func TestIntegerArrayEncodeAll_CounterPacked(t *testing.T) { - src := []int64{ - 1e15, 1e15 + 1, 1e15 + 2, 1e15 + 3, 1e15 + 4, 1e15 + 6, - } - exp := make([]int64, len(src)) - copy(exp, src) - - b, err := IntegerArrayEncodeAll(src, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if b[0]>>4 != intCompressedSimple { - t.Fatalf("unexpected encoding format: expected simple, got %v", b[0]>>4) - } - - // Should use 1 header byte + 2, 8 byte words if delta-encoding is used based on - // values sizes. Without delta-encoding, we'd get 49 bytes. - if exp := 17; len(b) != exp { - t.Fatalf("encoded length mismatch: got %v, exp %v", len(b), exp) - } - - var dec IntegerDecoder - dec.SetBytes(b) - i := 0 - for dec.Next() { - if i > len(exp) { - t.Fatalf("read too many values: got %v, exp %v", i, len(exp)) - } - - if exp[i] != dec.Read() { - t.Fatalf("read value %d mismatch: got %v, exp %v", i, dec.Read(), exp[i]) - } - i += 1 - } - - if i != len(exp) { - t.Fatalf("failed to read enough values: got %v, exp %v", i, len(exp)) - } -} - -func TestIntegerArrayEncodeAll_CounterRLE(t *testing.T) { - src := []int64{ - 1e15, 1e15 + 1, 1e15 + 2, 1e15 + 3, 1e15 + 4, 1e15 + 5, - } - exp := make([]int64, len(src)) - copy(exp, src) - - b, err := IntegerArrayEncodeAll(src, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if b[0]>>4 != intCompressedRLE { - t.Fatalf("unexpected encoding format: expected RLE, got %v", b[0]>>4) - } - - // Should use 1 header byte, 8 byte first value, 1 var-byte for delta and 1 var-byte for - // count of deltas in this particular RLE. - if exp := 11; len(b) != exp { - t.Fatalf("encoded length mismatch: got %v, exp %v", len(b), exp) - } - - var dec IntegerDecoder - dec.SetBytes(b) - i := 0 - for dec.Next() { - if i > len(exp) { - t.Fatalf("read too many values: got %v, exp %v", i, len(exp)) - } - - if exp[i] != dec.Read() { - t.Fatalf("read value %d mismatch: got %v, exp %v", i, dec.Read(), exp[i]) - } - i += 1 - } - - if i != len(exp) { - t.Fatalf("failed to read enough values: got %v, exp %v", i, len(exp)) - } -} - -func TestIntegerArrayEncodeAll_Descending(t *testing.T) { - src := []int64{ - 7094, 4472, 1850, - } - exp := make([]int64, len(src)) - copy(exp, src) - - b, err := IntegerArrayEncodeAll(src, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if b[0]>>4 != intCompressedRLE { - t.Fatalf("unexpected encoding format: expected simple, got %v", b[0]>>4) - } - - // Should use 1 header byte, 8 byte first value, 1 var-byte for delta and 1 var-byte for - // count of deltas in this particular RLE. - if exp := 12; len(b) != exp { - t.Fatalf("encoded length mismatch: got %v, exp %v", len(b), exp) - } - - var dec IntegerDecoder - dec.SetBytes(b) - i := 0 - for dec.Next() { - if i > len(exp) { - t.Fatalf("read too many values: got %v, exp %v", i, len(exp)) - } - - if exp[i] != dec.Read() { - t.Fatalf("read value %d mismatch: got %v, exp %v", i, dec.Read(), exp[i]) - } - i += 1 - } - - if i != len(exp) { - t.Fatalf("failed to read enough values: got %v, exp %v", i, len(exp)) - } -} - -func TestIntegerArrayEncodeAll_Flat(t *testing.T) { - src := []int64{ - 1, 1, 1, 1, - } - exp := make([]int64, len(src)) - copy(exp, src) - - b, err := IntegerArrayEncodeAll(src, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if b[0]>>4 != intCompressedRLE { - t.Fatalf("unexpected encoding format: expected simple, got %v", b[0]>>4) - } - - // Should use 1 header byte, 8 byte first value, 1 var-byte for delta and 1 var-byte for - // count of deltas in this particular RLE. - if exp := 11; len(b) != exp { - t.Fatalf("encoded length mismatch: got %v, exp %v", len(b), exp) - } - - var dec IntegerDecoder - dec.SetBytes(b) - i := 0 - for dec.Next() { - if i > len(exp) { - t.Fatalf("read too many values: got %v, exp %v", i, len(exp)) - } - - if exp[i] != dec.Read() { - t.Fatalf("read value %d mismatch: got %v, exp %v", i, dec.Read(), exp[i]) - } - i += 1 - } - - if i != len(exp) { - t.Fatalf("failed to read enough values: got %v, exp %v", i, len(exp)) - } -} - -func TestIntegerArrayEncodeAll_MinMax(t *testing.T) { - src := []int64{ - math.MinInt64, math.MaxInt64, - } - exp := make([]int64, len(src)) - copy(exp, src) - - b, err := IntegerArrayEncodeAll(src, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if b[0]>>4 != intCompressedSimple { - t.Fatalf("unexpected encoding format: expected simple, got %v", b[0]>>4) - } - - if exp := 17; len(b) != exp { - t.Fatalf("encoded length mismatch: got %v, exp %v", len(b), exp) - } - - var dec IntegerDecoder - dec.SetBytes(b) - i := 0 - for dec.Next() { - if i > len(exp) { - t.Fatalf("read too many values: got %v, exp %v", i, len(exp)) - } - - if exp[i] != dec.Read() { - t.Fatalf("read value %d mismatch: got %v, exp %v", i, dec.Read(), exp[i]) - } - i += 1 - } - - if i != len(exp) { - t.Fatalf("failed to read enough values: got %v, exp %v", i, len(exp)) - } -} - -func TestIntegerArrayEncodeAll_Quick(t *testing.T) { - quick.Check(func(values []int64) bool { - src := values - if values == nil { - src = []int64{} // is this really expected? - } - - // Copy over values to compare result—src is modified... - exp := make([]int64, 0, len(src)) - exp = append(exp, src...) - - // Retrieve encoded bytes from encoder. - b, err := IntegerArrayEncodeAll(src, nil) - if err != nil { - t.Fatal(err) - } - - // Read values out of decoder. - got := make([]int64, 0, len(src)) - var dec IntegerDecoder - dec.SetBytes(b) - for dec.Next() { - if err := dec.Error(); err != nil { - t.Fatal(err) - } - got = append(got, dec.Read()) - } - - // Verify that input and output values match. - if !reflect.DeepEqual(exp, got) { - t.Fatalf("mismatch:\n\nexp=%#v\n\ngot=%#v\n\n", src, got) - } - - return true - }, nil) -} - -func TestIntegerArrayDecodeAll_NegativeUncompressed(t *testing.T) { - exp := []int64{ - -2352281900722994752, 1438442655375607923, -4110452567888190110, - -1221292455668011702, -1941700286034261841, -2836753127140407751, - 1432686216250034552, 3663244026151507025, -3068113732684750258, - -1949953187327444488, 3713374280993588804, 3226153669854871355, - -2093273755080502606, 1006087192578600616, -2272122301622271655, - 2533238229511593671, -4450454445568858273, 2647789901083530435, - 2761419461769776844, -1324397441074946198, -680758138988210958, - 94468846694902125, -2394093124890745254, -2682139311758778198, - } - enc := NewIntegerEncoder(256) - for _, v := range exp { - enc.Write(v) - } - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("expected error: %v", err) - } - - if got := b[0] >> 4; intUncompressed != got { - t.Fatalf("encoding type mismatch: exp uncompressed, got %v", got) - } - - got, err := IntegerArrayDecodeAll(b, nil) - if err != nil { - t.Fatalf("unexpected decode error %q", err) - } - - if !cmp.Equal(got, exp) { - t.Fatalf("unexpected values: -got/+exp\n%s", cmp.Diff(got, exp)) - } -} - -func TestIntegerArrayDecodeAll_AllNegative(t *testing.T) { - enc := NewIntegerEncoder(3) - exp := []int64{ - -10, -5, -1, - } - - for _, v := range exp { - enc.Write(v) - } - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; intCompressedSimple != got { - t.Fatalf("encoding type mismatch: exp uncompressed, got %v", got) - } - - got, err := IntegerArrayDecodeAll(b, nil) - if err != nil { - t.Fatalf("unexpected decode error %q", err) - } - - if !cmp.Equal(got, exp) { - t.Fatalf("unexpected values: -got/+exp\n%s", cmp.Diff(got, exp)) - } -} - -func TestIntegerArrayDecodeAll_CounterPacked(t *testing.T) { - enc := NewIntegerEncoder(16) - exp := []int64{ - 1e15, 1e15 + 1, 1e15 + 2, 1e15 + 3, 1e15 + 4, 1e15 + 6, - } - - for _, v := range exp { - enc.Write(v) - } - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if b[0]>>4 != intCompressedSimple { - t.Fatalf("unexpected encoding format: expected simple, got %v", b[0]>>4) - } - - // Should use 1 header byte + 2, 8 byte words if delta-encoding is used based on - // values sizes. Without delta-encoding, we'd get 49 bytes. - if exp := 17; len(b) != exp { - t.Fatalf("encoded length mismatch: got %v, exp %v", len(b), exp) - } - - got, err := IntegerArrayDecodeAll(b, nil) - if err != nil { - t.Fatalf("unexpected decode error %q", err) - } - - if !cmp.Equal(got, exp) { - t.Fatalf("unexpected values: -got/+exp\n%s", cmp.Diff(got, exp)) - } -} - -func TestIntegerArrayDecodeAll_CounterRLE(t *testing.T) { - enc := NewIntegerEncoder(16) - exp := []int64{ - 1e15, 1e15 + 1, 1e15 + 2, 1e15 + 3, 1e15 + 4, 1e15 + 5, - } - - for _, v := range exp { - enc.Write(v) - } - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if b[0]>>4 != intCompressedRLE { - t.Fatalf("unexpected encoding format: expected RLE, got %v", b[0]>>4) - } - - // Should use 1 header byte, 8 byte first value, 1 var-byte for delta and 1 var-byte for - // count of deltas in this particular RLE. - if exp := 11; len(b) != exp { - t.Fatalf("encoded length mismatch: got %v, exp %v", len(b), exp) - } - - got, err := IntegerArrayDecodeAll(b, nil) - if err != nil { - t.Fatalf("unexpected decode error %q", err) - } - - if !cmp.Equal(got, exp) { - t.Fatalf("unexpected values: -got/+exp\n%s", cmp.Diff(got, exp)) - } -} - -func TestIntegerArrayDecodeAll_Descending(t *testing.T) { - enc := NewIntegerEncoder(16) - exp := []int64{ - 7094, 4472, 1850, - } - - for _, v := range exp { - enc.Write(v) - } - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if b[0]>>4 != intCompressedRLE { - t.Fatalf("unexpected encoding format: expected simple, got %v", b[0]>>4) - } - - // Should use 1 header byte, 8 byte first value, 1 var-byte for delta and 1 var-byte for - // count of deltas in this particular RLE. - if exp := 12; len(b) != exp { - t.Fatalf("encoded length mismatch: got %v, exp %v", len(b), exp) - } - - got, err := IntegerArrayDecodeAll(b, nil) - if err != nil { - t.Fatalf("unexpected decode error %q", err) - } - - if !cmp.Equal(got, exp) { - t.Fatalf("unexpected values: -got/+exp\n%s", cmp.Diff(got, exp)) - } -} - -func TestIntegerArrayDecodeAll_Flat(t *testing.T) { - enc := NewIntegerEncoder(16) - exp := []int64{ - 1, 1, 1, 1, - } - - for _, v := range exp { - enc.Write(v) - } - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if b[0]>>4 != intCompressedRLE { - t.Fatalf("unexpected encoding format: expected simple, got %v", b[0]>>4) - } - - // Should use 1 header byte, 8 byte first value, 1 var-byte for delta and 1 var-byte for - // count of deltas in this particular RLE. - if exp := 11; len(b) != exp { - t.Fatalf("encoded length mismatch: got %v, exp %v", len(b), exp) - } - - got, err := IntegerArrayDecodeAll(b, nil) - if err != nil { - t.Fatalf("unexpected decode error %q", err) - } - - if !cmp.Equal(got, exp) { - t.Fatalf("unexpected values: -got/+exp\n%s", cmp.Diff(got, exp)) - } -} - -func TestIntegerArrayDecodeAll_MinMax(t *testing.T) { - enc := NewIntegerEncoder(2) - exp := []int64{ - math.MinInt64, math.MaxInt64, - } - - for _, v := range exp { - enc.Write(v) - } - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if b[0]>>4 != intUncompressed { - t.Fatalf("unexpected encoding format: expected simple, got %v", b[0]>>4) - } - - if exp := 17; len(b) != exp { - t.Fatalf("encoded length mismatch: got %v, exp %v", len(b), exp) - } - - got, err := IntegerArrayDecodeAll(b, nil) - if err != nil { - t.Fatalf("unexpected decode error %q", err) - } - - if !cmp.Equal(got, exp) { - t.Fatalf("unexpected values: -got/+exp\n%s", cmp.Diff(got, exp)) - } -} - -func TestIntegerArrayDecodeAll_Quick(t *testing.T) { - quick.Check(func(values []int64) bool { - exp := values - if values == nil { - exp = []int64{} // is this really expected? - } - - // Write values to encoder. - enc := NewIntegerEncoder(1024) - for _, v := range values { - enc.Write(v) - } - - // Retrieve encoded bytes from encoder. - buf, err := enc.Bytes() - if err != nil { - t.Fatal(err) - } - - // Read values out of decoder. - got, err := IntegerArrayDecodeAll(buf, nil) - if err != nil { - t.Fatalf("unexpected decode error %q", err) - } - - if !cmp.Equal(got, exp) { - t.Fatalf("unexpected values: -got/+exp\n%s", cmp.Diff(got, exp)) - } - - return true - }, nil) -} - -var bufResult []byte - -func BenchmarkEncodeIntegers(b *testing.B) { - var err error - cases := []int{10, 100, 1000} - - for _, n := range cases { - enc := NewIntegerEncoder(n) - - b.Run(fmt.Sprintf("%d_seq", n), func(b *testing.B) { - src := make([]int64, n) - for i := 0; i < n; i++ { - src[i] = int64(i) - } - - input := make([]int64, len(src)) - copy(input, src) - - b.Run("itr", func(b *testing.B) { - b.ReportAllocs() - b.ResetTimer() - for n := 0; n < b.N; n++ { - enc.Reset() - for _, x := range src { - enc.Write(x) - } - enc.Flush() - if bufResult, err = enc.Bytes(); err != nil { - b.Fatal(err) - } - - // Since the batch encoder needs to do a copy to reset the - // input, we will add a copy here too. - copy(input, src) - } - }) - - b.Run("batch", func(b *testing.B) { - b.ReportAllocs() - b.ResetTimer() - for n := 0; n < b.N; n++ { - if bufResult, err = IntegerArrayEncodeAll(input, bufResult); err != nil { - b.Fatal(err) - } - copy(input, src) // Reset input that gets modified in IntegerArrayEncodeAll - } - }) - - }) - - b.Run(fmt.Sprintf("%d_ran", n), func(b *testing.B) { - src := make([]int64, n) - for i := 0; i < n; i++ { - src[i] = rand.Int63n(100) - } - - input := make([]int64, len(src)) - copy(input, src) - - b.Run("itr", func(b *testing.B) { - b.ReportAllocs() - b.ResetTimer() - for n := 0; n < b.N; n++ { - enc.Reset() - for _, x := range src { - enc.Write(x) - } - enc.Flush() - if bufResult, err = enc.Bytes(); err != nil { - b.Fatal(err) - } - - // Since the batch encoder needs to do a copy to reset the - // input, we will add a copy here too. - copy(input, src) - } - }) - - b.Run("batch", func(b *testing.B) { - b.ReportAllocs() - b.ResetTimer() - for n := 0; n < b.N; n++ { - if bufResult, err = IntegerArrayEncodeAll(input, bufResult); err != nil { - b.Fatal(err) - } - copy(input, src) // Reset input that gets modified in IntegerArrayEncodeAll - } - }) - }) - - b.Run(fmt.Sprintf("%d_dup", n), func(b *testing.B) { - src := make([]int64, n) - for i := 0; i < n; i++ { - src[i] = 1233242 - } - - input := make([]int64, len(src)) - copy(input, src) - - b.Run("itr", func(b *testing.B) { - b.ReportAllocs() - b.ResetTimer() - for n := 0; n < b.N; n++ { - enc.Reset() - for _, x := range src { - enc.Write(x) - } - enc.Flush() - if bufResult, err = enc.Bytes(); err != nil { - b.Fatal(err) - } - - // Since the batch encoder needs to do a copy to reset the - // input, we will add a copy here too. - copy(input, src) - } - }) - - b.Run("batch", func(b *testing.B) { - b.ReportAllocs() - b.ResetTimer() - for n := 0; n < b.N; n++ { - if bufResult, err = IntegerArrayEncodeAll(input, bufResult); err != nil { - b.Fatal(err) - } - copy(input, src) // Reset input that gets modified in IntegerArrayEncodeAll - } - }) - }) - } -} - -func BenchmarkIntegerArrayDecodeAllUncompressed(b *testing.B) { - benchmarks := []int{ - 5, - 55, - 555, - 1000, - } - - values := []int64{ - -2352281900722994752, 1438442655375607923, -4110452567888190110, - -1221292455668011702, -1941700286034261841, -2836753127140407751, - 1432686216250034552, 3663244026151507025, -3068113732684750258, - -1949953187327444488, 3713374280993588804, 3226153669854871355, - -2093273755080502606, 1006087192578600616, -2272122301622271655, - 2533238229511593671, -4450454445568858273, 2647789901083530435, - 2761419461769776844, -1324397441074946198, -680758138988210958, - 94468846694902125, -2394093124890745254, -2682139311758778198, - } - - for _, size := range benchmarks { - rand.Seed(int64(size * 1e3)) - - enc := NewIntegerEncoder(size) - for i := 0; i < size; i++ { - enc.Write(values[rand.Int()%len(values)]) - } - bytes, _ := enc.Bytes() - - b.Run(fmt.Sprintf("%d", size), func(b *testing.B) { - b.SetBytes(int64(len(bytes))) - b.ReportAllocs() - - dst := make([]int64, size) - for i := 0; i < b.N; i++ { - dst, _ = IntegerArrayDecodeAll(bytes, dst) - } - }) - } -} - -func BenchmarkIntegerArrayDecodeAllPackedSimple(b *testing.B) { - benchmarks := []int{ - 5, - 55, - 555, - 1000, - } - for _, size := range benchmarks { - rand.Seed(int64(size * 1e3)) - - enc := NewIntegerEncoder(size) - for i := 0; i < size; i++ { - // Small amount of randomness prevents RLE from being used - enc.Write(int64(i) + int64(rand.Intn(10))) - } - bytes, _ := enc.Bytes() - - b.Run(fmt.Sprintf("%d", size), func(b *testing.B) { - b.SetBytes(int64(len(bytes))) - b.ReportAllocs() - - dst := make([]int64, size) - for i := 0; i < b.N; i++ { - IntegerArrayDecodeAll(bytes, dst) - } - }) - } -} - -func BenchmarkIntegerArrayDecodeAllRLE(b *testing.B) { - benchmarks := []struct { - n int - delta int64 - }{ - {5, 1}, - {55, 1}, - {555, 1}, - {1000, 1}, - {1000, 0}, - } - for _, bm := range benchmarks { - rand.Seed(int64(bm.n * 1e3)) - - enc := NewIntegerEncoder(bm.n) - acc := int64(0) - for i := 0; i < bm.n; i++ { - enc.Write(acc) - acc += bm.delta - } - bytes, _ := enc.Bytes() - - b.Run(fmt.Sprintf("%d_delta_%d", bm.n, bm.delta), func(b *testing.B) { - b.SetBytes(int64(len(bytes))) - b.ReportAllocs() - - dst := make([]int64, bm.n) - for i := 0; i < b.N; i++ { - IntegerArrayDecodeAll(bytes, dst) - } - }) - } -} diff --git a/tsdb/tsm1/batch_string.go b/tsdb/tsm1/batch_string.go deleted file mode 100644 index 7088579994..0000000000 --- a/tsdb/tsm1/batch_string.go +++ /dev/null @@ -1,136 +0,0 @@ -package tsm1 - -import ( - "encoding/binary" - "errors" - "fmt" - "unsafe" - - "github.com/golang/snappy" -) - -var ( - errStringBatchDecodeInvalidStringLength = fmt.Errorf("stringArrayDecodeAll: invalid encoded string length") - errStringBatchDecodeLengthOverflow = fmt.Errorf("stringArrayDecodeAll: length overflow") - errStringBatchDecodeShortBuffer = fmt.Errorf("stringArrayDecodeAll: short buffer") - - // ErrStringArrayEncodeTooLarge reports that the encoded length of a slice of strings is too large. - ErrStringArrayEncodeTooLarge = errors.New("StringArrayEncodeAll: source length too large") -) - -// StringArrayEncodeAll encodes src into b, returning b and any error encountered. -// The returned slice may be of a different length and capactity to b. -// -// Currently only the string compression scheme used snappy. -func StringArrayEncodeAll(src []string, b []byte) ([]byte, error) { - srcSz := 2 + len(src)*binary.MaxVarintLen32 // strings should't be longer than 64kb - for i := range src { - srcSz += len(src[i]) - } - - // determine the maximum possible length needed for the buffer, which - // includes the compressed size - var compressedSz = 0 - if len(src) > 0 { - mle := snappy.MaxEncodedLen(srcSz) - if mle == -1 { - return b[:0], ErrStringArrayEncodeTooLarge - } - compressedSz = mle + 1 /* header */ - } - totSz := srcSz + compressedSz - - if cap(b) < totSz { - b = make([]byte, totSz) - } else { - b = b[:totSz] - } - - // Shortcut to snappy encoding nothing. - if len(src) == 0 { - b[0] = stringCompressedSnappy << 4 - return b[:2], nil - } - - // write the data to be compressed *after* the space needed for snappy - // compression. The compressed data is at the start of the allocated buffer, - // ensuring the entire capacity is returned and available for subsequent use. - dta := b[compressedSz:] - n := 0 - for i := range src { - n += binary.PutUvarint(dta[n:], uint64(len(src[i]))) - n += copy(dta[n:], src[i]) - } - dta = dta[:n] - - dst := b[:compressedSz] - dst[0] = stringCompressedSnappy << 4 - res := snappy.Encode(dst[1:], dta) - return dst[:len(res)+1], nil -} - -func StringArrayDecodeAll(b []byte, dst []string) ([]string, error) { - // First byte stores the encoding type, only have snappy format - // currently so ignore for now. - if len(b) > 0 { - var err error - // it is important that to note that `snappy.Decode` always returns - // a newly allocated slice as the final strings reference this slice - // directly. - b, err = snappy.Decode(nil, b[1:]) - if err != nil { - return []string{}, fmt.Errorf("failed to decode string block: %v", err.Error()) - } - } else { - return []string{}, nil - } - - var ( - i, l int - ) - - sz := cap(dst) - if sz == 0 { - sz = 64 - dst = make([]string, sz) - } else { - dst = dst[:sz] - } - - j := 0 - - for i < len(b) { - length, n := binary.Uvarint(b[i:]) - if n <= 0 { - return []string{}, errStringBatchDecodeInvalidStringLength - } - - // The length of this string plus the length of the variable byte encoded length - l = int(length) + n - - lower := i + n - upper := lower + int(length) - if upper < lower { - return []string{}, errStringBatchDecodeLengthOverflow - } - if upper > len(b) { - return []string{}, errStringBatchDecodeShortBuffer - } - - // NOTE: this optimization is critical for performance and to reduce - // allocations. This is just as "safe" as string.Builder, which - // returns a string mapped to the original byte slice - s := b[lower:upper] - val := *(*string)(unsafe.Pointer(&s)) - if j < len(dst) { - dst[j] = val - } else { - dst = append(dst, val) // force a resize - dst = dst[:cap(dst)] - } - i += l - j++ - } - - return dst[:j], nil -} diff --git a/tsdb/tsm1/batch_string_test.go b/tsdb/tsm1/batch_string_test.go deleted file mode 100644 index e7736f14ef..0000000000 --- a/tsdb/tsm1/batch_string_test.go +++ /dev/null @@ -1,405 +0,0 @@ -package tsm1 - -import ( - "bytes" - "fmt" - "math/rand" - "reflect" - "strings" - "testing" - "testing/quick" - - "github.com/google/go-cmp/cmp" - "github.com/influxdata/influxdb/v2/internal/testutil" - "github.com/influxdata/influxdb/v2/uuid" -) - -func equalError(a, b error) bool { - return a == nil && b == nil || a != nil && b != nil && a.Error() == b.Error() -} - -func TestStringArrayEncodeAll_NoValues(t *testing.T) { - b, err := StringArrayEncodeAll(nil, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var dec StringDecoder - if err := dec.SetBytes(b); err != nil { - t.Fatalf("unexpected error creating string decoder: %v", err) - } - if dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } -} - -func TestStringArrayEncodeAll_ExceedsMaxEncodedLen(t *testing.T) { - str := strings.Repeat(" ", 1<<23) // 8MB string - var s []string - for i := 0; i < (1<<32)/(1<<23); i++ { - s = append(s, str) - } - - _, got := StringArrayEncodeAll(s, nil) - if !cmp.Equal(got, ErrStringArrayEncodeTooLarge, cmp.Comparer(equalError)) { - t.Fatalf("expected error, got: %v", got) - } -} - -func TestStringArrayEncodeAll_Single(t *testing.T) { - src := []string{"v1"} - b, err := StringArrayEncodeAll(src, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var dec StringDecoder - if dec.SetBytes(b); err != nil { - t.Fatalf("unexpected error creating string decoder: %v", err) - } - if !dec.Next() { - t.Fatalf("unexpected next value: got false, exp true") - } - - if src[0] != dec.Read() { - t.Fatalf("unexpected value: got %v, exp %v", dec.Read(), src[0]) - } -} - -func TestStringArrayEncode_Compare(t *testing.T) { - // generate random values - input := make([]string, 1000) - for i := 0; i < len(input); i++ { - input[i] = uuid.TimeUUID().String() - } - - // Example from the paper - s := NewStringEncoder(1000) - for _, v := range input { - s.Write(v) - } - s.Flush() - - buf1, err := s.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - buf2 := append([]byte("this is some jibberish"), make([]byte, 100, 200)...) - buf2, err = StringArrayEncodeAll(input, buf2) - if err != nil { - t.Fatalf("unexpected error: %v\nbuf: %db %x", err, len(buf2), buf2) - } - - result, err := StringArrayDecodeAll(buf2, nil) - if err != nil { - dumpBufs(buf1, buf2) - t.Fatalf("unexpected error: %v\nbuf: %db %x", err, len(buf2), buf2) - } - - if got, exp := result, input; !reflect.DeepEqual(got, exp) { - t.Fatalf("got result %v, expected %v", got, exp) - } - - // Check that the encoders are byte for byte the same... - if !bytes.Equal(buf1, buf2) { - dumpBufs(buf1, buf2) - t.Fatalf("Raw bytes differ for encoders") - } -} - -func TestStringArrayEncodeAll_Multi_Compressed(t *testing.T) { - src := make([]string, 10) - for i := range src { - src[i] = fmt.Sprintf("value %d", i) - } - - b, err := StringArrayEncodeAll(src, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if b[0]>>4 != stringCompressedSnappy { - t.Fatalf("unexpected encoding: got %v, exp %v", b[0], stringCompressedSnappy) - } - - if exp := 51; len(b) != exp { - t.Fatalf("unexpected length: got %v, exp %v", len(b), exp) - } - - var dec StringDecoder - if err := dec.SetBytes(b); err != nil { - t.Fatalf("unexpected erorr creating string decoder: %v", err) - } - - for i, v := range src { - if !dec.Next() { - t.Fatalf("unexpected next value: got false, exp true") - } - if v != dec.Read() { - t.Fatalf("unexpected value at pos %d: got %v, exp %v", i, dec.Read(), v) - } - } - - if dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } -} - -func TestStringArrayEncodeAll_Quick(t *testing.T) { - var base []byte - quick.Check(func(values []string) bool { - src := values - if values == nil { - src = []string{} - } - - // Retrieve encoded bytes from encoder. - buf, err := StringArrayEncodeAll(src, base) - if err != nil { - t.Fatal(err) - } - - // Read values out of decoder. - got := make([]string, 0, len(src)) - var dec StringDecoder - if err := dec.SetBytes(buf); err != nil { - t.Fatal(err) - } - for dec.Next() { - if err := dec.Error(); err != nil { - t.Fatal(err) - } - got = append(got, dec.Read()) - } - - // Verify that input and output values match. - if !reflect.DeepEqual(src, got) { - t.Fatalf("mismatch:\n\nexp=%#v\n\ngot=%#v\n\n", src, got) - } - - return true - }, nil) -} - -func TestStringArrayDecodeAll_NoValues(t *testing.T) { - enc := NewStringEncoder(1024) - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - got, err := StringArrayDecodeAll(b, nil) - if err != nil { - t.Fatalf("unexpected error creating string decoder: %v", err) - } - - exp := []string{} - if !cmp.Equal(got, exp) { - t.Fatalf("unexpected value: -got/+exp\n%s", cmp.Diff(got, exp)) - } -} - -func TestStringArrayDecodeAll_Single(t *testing.T) { - enc := NewStringEncoder(1024) - v1 := "v1" - enc.Write(v1) - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - got, err := StringArrayDecodeAll(b, nil) - if err != nil { - t.Fatalf("unexpected error creating string decoder: %v", err) - } - - exp := []string{"v1"} - if !cmp.Equal(got, exp) { - t.Fatalf("unexpected value: -got/+exp\n%s", cmp.Diff(got, exp)) - } -} - -func TestStringArrayDecodeAll_Multi_Compressed(t *testing.T) { - enc := NewStringEncoder(1024) - - exp := make([]string, 10) - for i := range exp { - exp[i] = fmt.Sprintf("value %d", i) - enc.Write(exp[i]) - } - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if b[0]>>4 != stringCompressedSnappy { - t.Fatalf("unexpected encoding: got %v, exp %v", b[0], stringCompressedSnappy) - } - - if exp := 51; len(b) != exp { - t.Fatalf("unexpected length: got %v, exp %v", len(b), exp) - } - - got, err := StringArrayDecodeAll(b, nil) - if err != nil { - t.Fatalf("unexpected error creating string decoder: %v", err) - } - - if !cmp.Equal(got, exp) { - t.Fatalf("unexpected value: -got/+exp\n%s", cmp.Diff(got, exp)) - } -} - -func TestStringArrayDecodeAll_Quick(t *testing.T) { - quick.Check(func(values []string) bool { - exp := values - if values == nil { - exp = []string{} - } - // Write values to encoder. - enc := NewStringEncoder(1024) - for _, v := range values { - enc.Write(v) - } - - // Retrieve encoded bytes from encoder. - buf, err := enc.Bytes() - if err != nil { - t.Fatal(err) - } - - // Read values out of decoder. - got, err := StringArrayDecodeAll(buf, nil) - if err != nil { - t.Fatalf("unexpected error creating string decoder: %v", err) - } - - if !cmp.Equal(got, exp) { - t.Fatalf("unexpected value: -got/+exp\n%s", cmp.Diff(got, exp)) - } - - return true - }, nil) -} - -func TestStringArrayDecodeAll_Empty(t *testing.T) { - got, err := StringArrayDecodeAll([]byte{}, nil) - if err != nil { - t.Fatalf("unexpected error creating string decoder: %v", err) - } - - exp := []string{} - if !cmp.Equal(got, exp) { - t.Fatalf("unexpected value: -got/+exp\n%s", cmp.Diff(got, exp)) - } -} - -func TestStringArrayDecodeAll_CorruptBytes(t *testing.T) { - cases := []string{ - "\x10\x03\b\x03Hi", // Higher length than actual data - "\x10\x1dp\x9c\x90\x90\x90\x90\x90\x90\x90\x90\x90length overflow----", - "0t\x00\x01\x000\x00\x01\x000\x00\x01\x000\x00\x01\x000\x00\x01" + - "\x000\x00\x01\x000\x00\x01\x000\x00\x00\x00\xff:\x01\x00\x01\x00\x01" + - "\x00\x01\x00\x01\x00\x01\x00\x010\x010\x000\x010\x010\x010\x01" + - "0\x010\x010\x010\x010\x010\x010\x010\x010\x010\x010", // Upper slice bounds overflows negative - } - - for _, c := range cases { - t.Run(fmt.Sprintf("%q", c), func(t *testing.T) { - got, err := StringArrayDecodeAll([]byte(c), nil) - if err == nil { - t.Fatal("exp an err, got nil") - } - - exp := []string{} - if !cmp.Equal(got, exp) { - t.Fatalf("unexpected value: -got/+exp\n%s", cmp.Diff(got, exp)) - } - }) - } -} - -func BenchmarkEncodeStrings(b *testing.B) { - var err error - cases := []int{10, 100, 1000} - - for _, n := range cases { - enc := NewStringEncoder(n) - b.Run(fmt.Sprintf("%d", n), func(b *testing.B) { - input := make([]string, n) - for i := 0; i < n; i++ { - input[i] = uuid.TimeUUID().String() - } - - b.Run("itr", func(b *testing.B) { - b.ReportAllocs() - enc.Reset() - b.ResetTimer() - for n := 0; n < b.N; n++ { - enc.Reset() - for _, x := range input { - enc.Write(x) - } - enc.Flush() - if bufResult, err = enc.Bytes(); err != nil { - b.Fatal(err) - } - } - }) - - b.Run("batch", func(b *testing.B) { - b.ReportAllocs() - b.ResetTimer() - for n := 0; n < b.N; n++ { - if bufResult, err = StringArrayEncodeAll(input, bufResult); err != nil { - b.Fatal(err) - } - } - }) - - }) - } -} - -func BenchmarkStringArrayDecodeAll(b *testing.B) { - benchmarks := []struct { - n int - w int - }{ - {1, 10}, - {55, 10}, - {550, 10}, - {1000, 10}, - } - for _, bm := range benchmarks { - rand.Seed(int64(bm.n * 1e3)) - - s := NewStringEncoder(bm.n) - for c := 0; c < bm.n; c++ { - s.Write(testutil.MakeSentence(bm.w)) - } - s.Flush() - bytes, err := s.Bytes() - if err != nil { - b.Fatalf("unexpected error: %v", err) - } - - b.Run(fmt.Sprintf("%d", bm.n), func(b *testing.B) { - b.SetBytes(int64(len(bytes))) - b.ReportAllocs() - - dst := make([]string, bm.n) - for i := 0; i < b.N; i++ { - got, err := StringArrayDecodeAll(bytes, dst) - if err != nil { - b.Fatalf("unexpected length -got/+exp\n%s", cmp.Diff(len(dst), bm.n)) - } - if len(got) != bm.n { - b.Fatalf("unexpected length -got/+exp\n%s", cmp.Diff(len(dst), bm.n)) - } - } - }) - } -} diff --git a/tsdb/tsm1/batch_timestamp.go b/tsdb/tsm1/batch_timestamp.go deleted file mode 100644 index b0be9d9b8e..0000000000 --- a/tsdb/tsm1/batch_timestamp.go +++ /dev/null @@ -1,296 +0,0 @@ -package tsm1 - -import ( - "encoding/binary" - "fmt" - "math" - "unsafe" - - "github.com/influxdata/influxdb/v2/pkg/encoding/simple8b" -) - -// TimeArrayEncodeAll encodes src into b, returning b and any error encountered. -// The returned slice may be of a different length and capacity to b. -// -// TimeArrayEncodeAll implements batch oriented versions of the three integer -// encoding types we support: uncompressed, simple8b and RLE. -// -// Timestamp values to be encoded should be sorted before encoding. When encoded, -// the values are first delta-encoded. The first value is the starting timestamp, -// subsequent values are the difference from the prior value. -// -// Important: TimeArrayEncodeAll modifies the contents of src by using it as -// scratch space for delta encoded values. It is NOT SAFE to use src after -// passing it into TimeArrayEncodeAll. -func TimeArrayEncodeAll(src []int64, b []byte) ([]byte, error) { - if len(src) == 0 { - return nil, nil // Nothing to do - } - - var max, div = uint64(0), uint64(1e12) - - // To prevent an allocation of the entire block we're encoding reuse the - // src slice to store the encoded deltas. - deltas := reintepretInt64ToUint64Slice(src) - - if len(deltas) > 1 { - for i := len(deltas) - 1; i > 0; i-- { - deltas[i] = deltas[i] - deltas[i-1] - if deltas[i] > max { - max = deltas[i] - } - } - - var rle = true - for i := 2; i < len(deltas); i++ { - if deltas[1] != deltas[i] { - rle = false - break - } - } - - // Deltas are the same - encode with RLE - if rle { - // Large varints can take up to 10 bytes. We're storing 3 + 1 - // type byte. - if len(b) < 31 && cap(b) >= 31 { - b = b[:31] - } else if len(b) < 31 { - b = append(b, make([]byte, 31-len(b))...) - } - - // 4 high bits used for the encoding type - b[0] = byte(timeCompressedRLE) << 4 - - i := 1 - // The first value - binary.BigEndian.PutUint64(b[i:], deltas[0]) - i += 8 - - // The first delta, checking the divisor - // given all deltas are the same, we can do a single check for the divisor - v := deltas[1] - for div > 1 && v%div != 0 { - div /= 10 - } - - if div > 1 { - // 4 low bits are the log10 divisor - b[0] |= byte(math.Log10(float64(div))) - i += binary.PutUvarint(b[i:], deltas[1]/div) - } else { - i += binary.PutUvarint(b[i:], deltas[1]) - } - - // The number of times the delta is repeated - i += binary.PutUvarint(b[i:], uint64(len(deltas))) - - return b[:i], nil - } - } - - // We can't compress this time-range, the deltas exceed 1 << 60 - if max > simple8b.MaxValue { - // Encode uncompressed. - sz := 1 + len(deltas)*8 - if len(b) < sz && cap(b) >= sz { - b = b[:sz] - } else if len(b) < sz { - b = append(b, make([]byte, sz-len(b))...) - } - - // 4 high bits of first byte store the encoding type for the block - b[0] = byte(timeUncompressed) << 4 - for i, v := range deltas { - binary.BigEndian.PutUint64(b[1+i*8:1+i*8+8], v) - } - return b[:sz], nil - } - - // find divisor only if we're compressing with simple8b - for i := 1; i < len(deltas) && div > 1; i++ { - // If our value is divisible by 10, break. Otherwise, try the next smallest divisor. - v := deltas[i] - for div > 1 && v%div != 0 { - div /= 10 - } - } - - // Only apply the divisor if it's greater than 1 since division is expensive. - if div > 1 { - for i := 1; i < len(deltas); i++ { - deltas[i] /= div - } - } - - // Encode with simple8b - fist value is written unencoded using 8 bytes. - encoded, err := simple8b.EncodeAll(deltas[1:]) - if err != nil { - return nil, err - } - - sz := 1 + (len(encoded)+1)*8 - if len(b) < sz && cap(b) >= sz { - b = b[:sz] - } else if len(b) < sz { - b = append(b, make([]byte, sz-len(b))...) - } - - // 4 high bits of first byte store the encoding type for the block - b[0] = byte(timeCompressedPackedSimple) << 4 - // 4 low bits are the log10 divisor - b[0] |= byte(math.Log10(float64(div))) - - // Write the first value since it's not part of the encoded values - binary.BigEndian.PutUint64(b[1:9], deltas[0]) - - // Write the encoded values - for i, v := range encoded { - binary.BigEndian.PutUint64(b[9+i*8:9+i*8+8], v) - } - return b[:sz], nil -} - -var ( - timeBatchDecoderFunc = [...]func(b []byte, dst []int64) ([]int64, error){ - timeBatchDecodeAllUncompressed, - timeBatchDecodeAllSimple, - timeBatchDecodeAllRLE, - timeBatchDecodeAllInvalid, - } -) - -func TimeArrayDecodeAll(b []byte, dst []int64) ([]int64, error) { - if len(b) == 0 { - return []int64{}, nil - } - - encoding := b[0] >> 4 - if encoding > timeCompressedRLE { - encoding = 3 // timeBatchDecodeAllInvalid - } - - return timeBatchDecoderFunc[encoding&3](b, dst) -} - -func timeBatchDecodeAllUncompressed(b []byte, dst []int64) ([]int64, error) { - b = b[1:] - if len(b)&0x7 != 0 { - return []int64{}, fmt.Errorf("timeArrayDecodeAll: expected multiple of 8 bytes") - } - - count := len(b) / 8 - if cap(dst) < count { - dst = make([]int64, count) - } else { - dst = dst[:count] - } - - prev := uint64(0) - for i := range dst { - prev += binary.BigEndian.Uint64(b[i*8:]) - dst[i] = int64(prev) - } - - return dst, nil -} - -func timeBatchDecodeAllSimple(b []byte, dst []int64) ([]int64, error) { - if len(b) < 9 { - return []int64{}, fmt.Errorf("timeArrayDecodeAll: not enough data to decode packed timestamps") - } - - div := uint64(math.Pow10(int(b[0] & 0xF))) // multiplier - - count, err := simple8b.CountBytes(b[9:]) - if err != nil { - return []int64{}, err - } - - count += 1 - - if cap(dst) < count { - dst = make([]int64, count) - } else { - dst = dst[:count] - } - - buf := *(*[]uint64)(unsafe.Pointer(&dst)) - - // first value - buf[0] = binary.BigEndian.Uint64(b[1:9]) - n, err := simple8b.DecodeBytesBigEndian(buf[1:], b[9:]) - if err != nil { - return []int64{}, err - } - if n != count-1 { - return []int64{}, fmt.Errorf("timeArrayDecodeAll: unexpected number of values decoded; got=%d, exp=%d", n, count-1) - } - - // Compute the prefix sum and scale the deltas back up - last := buf[0] - if div > 1 { - for i := 1; i < len(buf); i++ { - dgap := buf[i] * div - buf[i] = last + dgap - last = buf[i] - } - } else { - for i := 1; i < len(buf); i++ { - buf[i] += last - last = buf[i] - } - } - - return dst, nil -} - -func timeBatchDecodeAllRLE(b []byte, dst []int64) ([]int64, error) { - if len(b) < 9 { - return []int64{}, fmt.Errorf("timeArrayDecodeAll: not enough data to decode RLE starting value") - } - - var k, n int - - // Lower 4 bits hold the 10 based exponent so we can scale the values back up - mod := int64(math.Pow10(int(b[k] & 0xF))) - k++ - - // Next 8 bytes is the starting timestamp - first := binary.BigEndian.Uint64(b[k:]) - k += 8 - - // Next 1-10 bytes is our (scaled down by factor of 10) run length delta - delta, n := binary.Uvarint(b[k:]) - if n <= 0 { - return []int64{}, fmt.Errorf("timeArrayDecodeAll: invalid run length in decodeRLE") - } - k += n - - // Scale the delta back up - delta *= uint64(mod) - - // Last 1-10 bytes is how many times the value repeats - count, n := binary.Uvarint(b[k:]) - if n <= 0 { - return []int64{}, fmt.Errorf("timeDecoder: invalid repeat value in decodeRLE") - } - - if cap(dst) < int(count) { - dst = make([]int64, count) - } else { - dst = dst[:count] - } - - acc := first - for i := range dst { - dst[i] = int64(acc) - acc += delta - } - - return dst, nil -} - -func timeBatchDecodeAllInvalid(b []byte, _ []int64) ([]int64, error) { - return []int64{}, fmt.Errorf("unknown encoding %v", b[0]>>4) -} diff --git a/tsdb/tsm1/batch_timestamp_test.go b/tsdb/tsm1/batch_timestamp_test.go deleted file mode 100644 index 9ea9a6d3bf..0000000000 --- a/tsdb/tsm1/batch_timestamp_test.go +++ /dev/null @@ -1,1171 +0,0 @@ -package tsm1 - -import ( - "bytes" - "fmt" - "math/rand" - "reflect" - "sort" - "testing" - "testing/quick" - "time" - - "github.com/google/go-cmp/cmp" -) - -func TestTimeArrayEncodeAll(t *testing.T) { - now := time.Unix(0, 0) - src := []int64{now.UnixNano()} - - for i := 1; i < 4; i++ { - src = append(src, now.Add(time.Duration(i)*time.Second).UnixNano()) - } - - exp := make([]int64, len(src)) - copy(exp, src) - - b, err := TimeArrayEncodeAll(src, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; got != timeCompressedRLE { - t.Fatalf("Wrong encoding used: expected rle, got %v", got) - } - - var dec TimeDecoder - dec.Init(b) - for i, v := range exp { - if !dec.Next() { - t.Fatalf("Next == false, expected true") - } - - if v != dec.Read() { - t.Fatalf("Item %d mismatch, got %v, exp %v", i, dec.Read(), v) - } - } -} - -// This test compares the ArrayEncoder to the original iterator encoder, byte for -// byte. -func TestTimeArrayEncodeAll_Compare(t *testing.T) { - // generate random values (should use simple8b) - input := make([]int64, 1000) - for i := 0; i < len(input); i++ { - input[i] = rand.Int63n(100000) - 50000 - } - sort.Slice(input, func(i int, j int) bool { return input[i] < input[j] }) - testTimeArrayEncodeAll_Compare(t, input, timeCompressedPackedSimple) - - // Generate same values (should use RLE) - for i := 0; i < len(input); i++ { - input[i] = 1232342341234 - } - testTimeArrayEncodeAll_Compare(t, input, timeCompressedRLE) - - // Generate large random values that are not sorted. The deltas will be large - // and the values should be stored uncompressed. - for i := 0; i < len(input); i++ { - input[i] = int64(rand.Uint64()) - } - testTimeArrayEncodeAll_Compare(t, input, timeUncompressed) -} - -func testTimeArrayEncodeAll_Compare(t *testing.T, input []int64, encoding byte) { - exp := make([]int64, len(input)) - copy(exp, input) - - s := NewTimeEncoder(1000) - for _, v := range input { - s.Write(v) - } - - buf1, err := s.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got, exp := buf1[0]>>4, encoding; got != exp { - t.Fatalf("got encoding %v, expected %v", got, encoding) - } - - var buf2 []byte - buf2, err = TimeArrayEncodeAll(input, buf2) - if err != nil { - t.Fatalf("unexpected error: %v\nbuf: %db %x", err, len(buf2), buf2) - } - - if got, exp := buf2[0]>>4, encoding; got != exp { - t.Fatalf("got encoding %v, expected %v", got, encoding) - } - - result, err := TimeArrayDecodeAll(buf2, nil) - if err != nil { - dumpBufs(buf1, buf2) - t.Fatalf("unexpected error: %v\nbuf: %db %x", err, len(buf2), buf2) - } - - if got := result; !reflect.DeepEqual(got, exp) { - t.Fatalf("-got/+exp\n%s", cmp.Diff(got, exp)) - } - - // Check that the encoders are byte for byte the same... - if !bytes.Equal(buf1, buf2) { - dumpBufs(buf1, buf2) - t.Fatalf("Raw bytes differ for encoders") - } -} - -func TestTimeArrayEncodeAll_NoValues(t *testing.T) { - b, err := TimeArrayEncodeAll(nil, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var dec TimeDecoder - dec.Init(b) - if dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } -} - -func TestTimeArrayEncodeAll_One(t *testing.T) { - src := []int64{0} - exp := make([]int64, len(src)) - copy(exp, src) - - b, err := TimeArrayEncodeAll(src, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; got != timeCompressedPackedSimple { - t.Fatalf("Wrong encoding used: expected uncompressed, got %v", got) - } - - var dec TimeDecoder - dec.Init(b) - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if exp[0] != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), exp[0]) - } -} - -func TestTimeArrayEncodeAll_Two(t *testing.T) { - src := []int64{0, 1} - exp := make([]int64, len(src)) - copy(exp, src) - - b, err := TimeArrayEncodeAll(src, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; got != timeCompressedRLE { - t.Fatalf("Wrong encoding used: expected rle, got %v", got) - } - - var dec TimeDecoder - dec.Init(b) - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if exp[0] != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), exp[0]) - } - - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if exp[1] != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), exp[1]) - } -} - -func TestTimeArrayEncodeAll_Three(t *testing.T) { - src := []int64{0, 1, 3} - exp := make([]int64, len(src)) - copy(exp, src) - - b, err := TimeArrayEncodeAll(src, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; got != timeCompressedPackedSimple { - t.Fatalf("Wrong encoding used: expected rle, got %v", got) - } - - var dec TimeDecoder - dec.Init(b) - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if exp[0] != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), exp[0]) - } - - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if exp[1] != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), exp[1]) - } - - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if exp[2] != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), exp[2]) - } -} - -func TestTimeArrayEncodeAll_Large_Range(t *testing.T) { - src := []int64{1442369134000000000, 1442369135000000000} - exp := make([]int64, len(src)) - copy(exp, src) - - b, err := TimeArrayEncodeAll(src, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; got != timeCompressedRLE { - t.Fatalf("Wrong encoding used: expected rle, got %v", got) - } - - var dec TimeDecoder - dec.Init(b) - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if exp[0] != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), exp[2]) - } - - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if exp[1] != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), exp[1]) - } -} - -func TestTimeArrayEncodeAll_Uncompressed(t *testing.T) { - src := []int64{time.Unix(0, 0).UnixNano(), time.Unix(1, 0).UnixNano()} - - // about 36.5yrs in NS resolution is max range for compressed format - // This should cause the encoding to fallback to raw points - src = append(src, time.Unix(2, (2<<59)).UnixNano()) - exp := make([]int64, len(src)) - copy(exp, src) - - b, err := TimeArrayEncodeAll(src, nil) - if err != nil { - t.Fatalf("expected error: %v", err) - } - - if exp := 25; len(b) != exp { - t.Fatalf("length mismatch: got %v, exp %v", len(b), exp) - } - - if got := b[0] >> 4; got != timeUncompressed { - t.Fatalf("Wrong encoding used: expected uncompressed, got %v", got) - } - - var dec TimeDecoder - dec.Init(b) - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if exp[0] != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), exp[0]) - } - - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if exp[1] != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), exp[1]) - } - - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if exp[2] != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), exp[2]) - } -} - -func TestTimeArrayEncodeAll_RLE(t *testing.T) { - var src []int64 - for i := 0; i < 500; i++ { - src = append(src, int64(i)) - } - exp := make([]int64, len(src)) - copy(exp, src) - - b, err := TimeArrayEncodeAll(src, nil) - if exp := 12; len(b) != exp { - t.Fatalf("length mismatch: got %v, exp %v", len(b), exp) - } - - if got := b[0] >> 4; got != timeCompressedRLE { - t.Fatalf("Wrong encoding used: expected uncompressed, got %v", got) - } - - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var dec TimeDecoder - dec.Init(b) - for i, v := range exp { - if !dec.Next() { - t.Fatalf("Next == false, expected true") - } - - if v != dec.Read() { - t.Fatalf("Item %d mismatch, got %v, exp %v", i, dec.Read(), v) - } - } - - if dec.Next() { - t.Fatalf("unexpected extra values") - } -} - -func TestTimeArrayEncodeAll_Reverse(t *testing.T) { - src := []int64{3, 2, 0} - exp := make([]int64, len(src)) - copy(exp, src) - - b, err := TimeArrayEncodeAll(src, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; got != timeUncompressed { - t.Fatalf("Wrong encoding used: expected uncompressed, got %v", got) - } - - var dec TimeDecoder - dec.Init(b) - i := 0 - for dec.Next() { - if exp[i] != dec.Read() { - t.Fatalf("read value %d mismatch: got %v, exp %v", i, dec.Read(), exp[i]) - } - i++ - } -} - -func TestTimeArrayEncodeAll_220SecondDelta(t *testing.T) { - var src []int64 - now := time.Now() - - for i := 0; i < 220; i++ { - src = append(src, now.Add(time.Duration(i*60)*time.Second).UnixNano()) - } - exp := make([]int64, len(src)) - copy(exp, src) - - b, err := TimeArrayEncodeAll(src, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - // Using RLE, should get 12 bytes - if exp := 12; len(b) != exp { - t.Fatalf("unexpected length: got %v, exp %v", len(b), exp) - } - - if got := b[0] >> 4; got != timeCompressedRLE { - t.Fatalf("Wrong encoding used: expected uncompressed, got %v", got) - } - - var dec TimeDecoder - dec.Init(b) - i := 0 - for dec.Next() { - if exp[i] != dec.Read() { - t.Fatalf("read value %d mismatch: got %v, exp %v", i, dec.Read(), exp[i]) - } - i++ - } - - if i != len(exp) { - t.Fatalf("Read too few values: exp %d, got %d", len(exp), i) - } - - if dec.Next() { - t.Fatalf("expecte Next() = false, got true") - } -} - -func TestTimeArrayEncodeAll_Quick(t *testing.T) { - quick.Check(func(values []int64) bool { - // Write values to encoder. - - exp := make([]int64, len(values)) - for i, v := range values { - exp[i] = int64(v) - } - - // Retrieve encoded bytes from encoder. - b, err := TimeArrayEncodeAll(values, nil) - if err != nil { - t.Fatal(err) - } - - // Read values out of decoder. - got := make([]int64, 0, len(values)) - var dec TimeDecoder - dec.Init(b) - for dec.Next() { - if err := dec.Error(); err != nil { - t.Fatal(err) - } - got = append(got, dec.Read()) - } - - // Verify that input and output values match. - if !reflect.DeepEqual(exp, got) { - t.Fatalf("mismatch:\n\nexp=%+v\n\ngot=%+v\n\n", exp, got) - } - - return true - }, nil) -} - -func TestTimeArrayEncodeAll_RLESeconds(t *testing.T) { - src := []int64{ - 1444448158000000000, - 1444448168000000000, - 1444448178000000000, - 1444448188000000000, - 1444448198000000000, - 1444448208000000000, - } - exp := make([]int64, len(src)) - copy(exp, src) - - b, err := TimeArrayEncodeAll(src, nil) - if got := b[0] >> 4; got != timeCompressedRLE { - t.Fatalf("Wrong encoding used: expected rle, got %v", got) - } - - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var dec TimeDecoder - dec.Init(b) - for i, v := range exp { - if !dec.Next() { - t.Fatalf("Next == false, expected true") - } - - if v != dec.Read() { - t.Fatalf("Item %d mismatch, got %v, exp %v", i, dec.Read(), v) - } - } - - if dec.Next() { - t.Fatalf("unexpected extra values") - } -} - -func TestTimeArrayEncodeAll_Count_Uncompressed(t *testing.T) { - src := []int64{time.Unix(0, 0).UnixNano(), - time.Unix(1, 0).UnixNano(), - } - - // about 36.5yrs in NS resolution is max range for compressed format - // This should cause the encoding to fallback to raw points - src = append(src, time.Unix(2, (2<<59)).UnixNano()) - exp := make([]int64, len(src)) - copy(exp, src) - - b, err := TimeArrayEncodeAll(src, nil) - if got := b[0] >> 4; got != timeUncompressed { - t.Fatalf("Wrong encoding used: expected rle, got %v", got) - } - - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got, exp := CountTimestamps(b), 3; got != exp { - t.Fatalf("count mismatch: got %v, exp %v", got, exp) - } -} - -func TestTimeArrayEncodeAll_Count_RLE(t *testing.T) { - src := []int64{ - 1444448158000000000, - 1444448168000000000, - 1444448178000000000, - 1444448188000000000, - 1444448198000000000, - 1444448208000000000, - } - exp := make([]int64, len(src)) - copy(exp, src) - - b, err := TimeArrayEncodeAll(src, nil) - if got := b[0] >> 4; got != timeCompressedRLE { - t.Fatalf("Wrong encoding used: expected rle, got %v", got) - } - - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got, exp := CountTimestamps(b), len(exp); got != exp { - t.Fatalf("count mismatch: got %v, exp %v", got, exp) - } -} - -func TestTimeArrayEncodeAll_Count_Simple8(t *testing.T) { - src := []int64{0, 1, 3} - - b, err := TimeArrayEncodeAll(src, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; got != timeCompressedPackedSimple { - t.Fatalf("Wrong encoding used: expected rle, got %v", got) - } - - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got, exp := CountTimestamps(b), 3; got != exp { - t.Fatalf("count mismatch: got %v, exp %v", got, exp) - } -} - -func TestTimeArrayDecodeAll_NoValues(t *testing.T) { - enc := NewTimeEncoder(0) - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - got, err := TimeArrayDecodeAll(b, nil) - if err != nil { - t.Fatalf("unexpected decode error %q", err) - } - - exp := []int64{} - if !cmp.Equal(got, exp) { - t.Fatalf("unexpected values: -got/+exp\n%s", cmp.Diff(got, exp)) - } -} - -func TestTimeArrayDecodeAll_One(t *testing.T) { - enc := NewTimeEncoder(1) - exp := []int64{0} - for _, v := range exp { - enc.Write(v) - } - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; got != timeCompressedPackedSimple { - t.Fatalf("Wrong encoding used: expected uncompressed, got %v", got) - } - - got, err := TimeArrayDecodeAll(b, nil) - if err != nil { - t.Fatalf("unexpected decode error %q", err) - } - - if !cmp.Equal(got, exp) { - t.Fatalf("unexpected values: -got/+exp\n%s", cmp.Diff(got, exp)) - } -} - -func TestTimeArrayDecodeAll_Two(t *testing.T) { - enc := NewTimeEncoder(2) - exp := []int64{0, 1} - for _, v := range exp { - enc.Write(v) - } - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; got != timeCompressedRLE { - t.Fatalf("Wrong encoding used: expected rle, got %v", got) - } - - got, err := TimeArrayDecodeAll(b, nil) - if err != nil { - t.Fatalf("unexpected decode error %q", err) - } - - if !cmp.Equal(got, exp) { - t.Fatalf("unexpected values: -got/+exp\n%s", cmp.Diff(got, exp)) - } -} - -func TestTimeArrayDecodeAll_Three(t *testing.T) { - enc := NewTimeEncoder(3) - exp := []int64{0, 1, 3} - for _, v := range exp { - enc.Write(v) - } - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; got != timeCompressedPackedSimple { - t.Fatalf("Wrong encoding used: expected rle, got %v", got) - } - - got, err := TimeArrayDecodeAll(b, nil) - if err != nil { - t.Fatalf("unexpected decode error %q", err) - } - - if !cmp.Equal(got, exp) { - t.Fatalf("unexpected values: -got/+exp\n%s", cmp.Diff(got, exp)) - } -} - -func TestTimeArrayDecodeAll_Large_Range(t *testing.T) { - enc := NewTimeEncoder(2) - exp := []int64{1442369134000000000, 1442369135000000000} - for _, v := range exp { - enc.Write(v) - } - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; got != timeCompressedRLE { - t.Fatalf("Wrong encoding used: expected rle, got %v", got) - } - - got, err := TimeArrayDecodeAll(b, nil) - if err != nil { - t.Fatalf("unexpected decode error %q", err) - } - - if !cmp.Equal(got, exp) { - t.Fatalf("unexpected values: -got/+exp\n%s", cmp.Diff(got, exp)) - } -} - -func TestTimeArrayDecodeAll_Uncompressed(t *testing.T) { - enc := NewTimeEncoder(3) - exp := []int64{ - time.Unix(0, 0).UnixNano(), - time.Unix(1, 0).UnixNano(), - // about 36.5yrs in NS resolution is max range for compressed format - // This should cause the encoding to fallback to raw points - time.Unix(2, 2<<59).UnixNano(), - } - for _, v := range exp { - enc.Write(v) - } - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("expected error: %v", err) - } - - if exp := 25; len(b) != exp { - t.Fatalf("length mismatch: got %v, exp %v", len(b), exp) - } - - if got := b[0] >> 4; got != timeUncompressed { - t.Fatalf("Wrong encoding used: expected uncompressed, got %v", got) - } - - got, err := TimeArrayDecodeAll(b, nil) - if err != nil { - t.Fatalf("unexpected decode error %q", err) - } - - if !cmp.Equal(got, exp) { - t.Fatalf("unexpected values: -got/+exp\n%s", cmp.Diff(got, exp)) - } -} - -func TestTimeArrayDecodeAll_RLE(t *testing.T) { - enc := NewTimeEncoder(512) - var exp []int64 - for i := 0; i < 500; i++ { - exp = append(exp, int64(i)) - } - - for _, v := range exp { - enc.Write(v) - } - - b, err := enc.Bytes() - if exp := 12; len(b) != exp { - t.Fatalf("length mismatch: got %v, exp %v", len(b), exp) - } - - if got := b[0] >> 4; got != timeCompressedRLE { - t.Fatalf("Wrong encoding used: expected uncompressed, got %v", got) - } - - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - got, err := TimeArrayDecodeAll(b, nil) - if err != nil { - t.Fatalf("unexpected decode error %q", err) - } - - if !cmp.Equal(got, exp) { - t.Fatalf("unexpected values: -got/+exp\n%s", cmp.Diff(got, exp)) - } -} - -func TestTimeArrayDecodeAll_Reverse(t *testing.T) { - enc := NewTimeEncoder(3) - exp := []int64{ - int64(3), - int64(2), - int64(0), - } - - for _, v := range exp { - enc.Write(v) - } - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; got != timeUncompressed { - t.Fatalf("Wrong encoding used: expected uncompressed, got %v", got) - } - - got, err := TimeArrayDecodeAll(b, nil) - if err != nil { - t.Fatalf("unexpected decode error %q", err) - } - - if !cmp.Equal(got, exp) { - t.Fatalf("unexpected values: -got/+exp\n%s", cmp.Diff(got, exp)) - } -} - -func TestTimeArrayDecodeAll_Negative(t *testing.T) { - enc := NewTimeEncoder(3) - exp := []int64{ - -2352281900722994752, 1438442655375607923, -4110452567888190110, - -1221292455668011702, -1941700286034261841, -2836753127140407751, - 1432686216250034552, 3663244026151507025, -3068113732684750258, - -1949953187327444488, 3713374280993588804, 3226153669854871355, - -2093273755080502606, 1006087192578600616, -2272122301622271655, - 2533238229511593671, -4450454445568858273, 2647789901083530435, - 2761419461769776844, -1324397441074946198, -680758138988210958, - 94468846694902125, -2394093124890745254, -2682139311758778198, - } - - for _, v := range exp { - enc.Write(v) - } - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; got != timeUncompressed { - t.Fatalf("Wrong encoding used: expected uncompressed, got %v", got) - } - - got, err := TimeArrayDecodeAll(b, nil) - if err != nil { - t.Fatalf("unexpected decode error %q", err) - } - - if !cmp.Equal(got, exp) { - t.Fatalf("unexpected values: -got/+exp\n%s", cmp.Diff(got, exp)) - } -} - -func TestTimeArrayDecodeAll_220SecondDelta(t *testing.T) { - enc := NewTimeEncoder(256) - var exp []int64 - now := time.Now() - for i := 0; i < 220; i++ { - exp = append(exp, now.Add(time.Duration(i*60)*time.Second).UnixNano()) - } - - for _, v := range exp { - enc.Write(v) - } - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - // Using RLE, should get 12 bytes - if exp := 12; len(b) != exp { - t.Fatalf("unexpected length: got %v, exp %v", len(b), exp) - } - - if got := b[0] >> 4; got != timeCompressedRLE { - t.Fatalf("Wrong encoding used: expected uncompressed, got %v", got) - } - - got, err := TimeArrayDecodeAll(b, nil) - if err != nil { - t.Fatalf("unexpected decode error %q", err) - } - - if !cmp.Equal(got, exp) { - t.Fatalf("unexpected values: -got/+exp\n%s", cmp.Diff(got, exp)) - } -} - -func TestTimeArrayDecodeAll_Quick(t *testing.T) { - quick.Check(func(values []int64) bool { - // Write values to encoder. - enc := NewTimeEncoder(1024) - exp := make([]int64, len(values)) - for i, v := range values { - exp[i] = int64(v) - enc.Write(exp[i]) - } - - // Retrieve encoded bytes from encoder. - buf, err := enc.Bytes() - if err != nil { - t.Fatal(err) - } - - got, err := TimeArrayDecodeAll(buf, nil) - if err != nil { - t.Fatalf("unexpected decode error %q", err) - } - - if !cmp.Equal(got, exp) { - t.Fatalf("unexpected values: -got/+exp\n%s", cmp.Diff(got, exp)) - } - - return true - }, nil) -} - -func TestTimeArrayDecodeAll_RLESeconds(t *testing.T) { - enc := NewTimeEncoder(6) - exp := make([]int64, 6) - - exp[0] = int64(1444448158000000000) - exp[1] = int64(1444448168000000000) - exp[2] = int64(1444448178000000000) - exp[3] = int64(1444448188000000000) - exp[4] = int64(1444448198000000000) - exp[5] = int64(1444448208000000000) - - for _, v := range exp { - enc.Write(v) - } - - b, err := enc.Bytes() - if got := b[0] >> 4; got != timeCompressedRLE { - t.Fatalf("Wrong encoding used: expected rle, got %v", got) - } - - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - got, err := TimeArrayDecodeAll(b, nil) - if err != nil { - t.Fatalf("unexpected decode error %q", err) - } - - if !cmp.Equal(got, exp) { - t.Fatalf("unexpected values: -got/+exp\n%s", cmp.Diff(got, exp)) - } -} - -func TestTimeArrayDecodeAll_Corrupt(t *testing.T) { - cases := []string{ - "\x10\x14", // Packed: not enough data - "\x20\x00", // RLE: not enough data for starting timestamp - "\x2012345678\x90", // RLE: initial timestamp but invalid uvarint encoding - "\x2012345678\x7f", // RLE: timestamp, RLE but invalid repeat - "\x00123", // Raw: data length not multiple of 8 - } - - for _, c := range cases { - t.Run(fmt.Sprintf("%q", c), func(t *testing.T) { - got, err := TimeArrayDecodeAll([]byte(c), nil) - if err == nil { - t.Fatal("exp an err, got nil") - } - - exp := []int64{} - if !cmp.Equal(got, exp) { - t.Fatalf("unexpected value: -got/+exp\n%s", cmp.Diff(got, exp)) - } - }) - } -} - -func BenchmarkEncodeTimestamps(b *testing.B) { - var err error - cases := []int{10, 100, 1000} - - for _, n := range cases { - enc := NewTimeEncoder(n) - - b.Run(fmt.Sprintf("%d_seq", n), func(b *testing.B) { - src := make([]int64, n) - for i := 0; i < n; i++ { - src[i] = int64(i) - } - sort.Slice(src, func(i int, j int) bool { return src[i] < src[j] }) - - input := make([]int64, len(src)) - copy(input, src) - - b.Run("itr", func(b *testing.B) { - b.ReportAllocs() - b.ResetTimer() - for n := 0; n < b.N; n++ { - enc.Reset() - for _, x := range src { - enc.Write(x) - } - if bufResult, err = enc.Bytes(); err != nil { - b.Fatal(err) - } - - // Since the batch encoder needs to do a copy to reset the - // input, we will add a copy here too. - copy(input, src) - } - }) - - b.Run("batch", func(b *testing.B) { - b.ReportAllocs() - b.ResetTimer() - for n := 0; n < b.N; n++ { - if bufResult, err = TimeArrayEncodeAll(input, bufResult); err != nil { - b.Fatal(err) - } - copy(input, src) // Reset input that gets modified in IntegerArrayEncodeAll - } - }) - - }) - - b.Run(fmt.Sprintf("%d_ran", n), func(b *testing.B) { - src := make([]int64, n) - for i := 0; i < n; i++ { - src[i] = int64(rand.Uint64()) - } - sort.Slice(src, func(i int, j int) bool { return src[i] < src[j] }) - - input := make([]int64, len(src)) - copy(input, src) - - b.Run("itr", func(b *testing.B) { - b.ReportAllocs() - b.ResetTimer() - for n := 0; n < b.N; n++ { - enc.Reset() - for _, x := range src { - enc.Write(x) - } - if bufResult, err = enc.Bytes(); err != nil { - b.Fatal(err) - } - - // Since the batch encoder needs to do a copy to reset the - // input, we will add a copy here too. - copy(input, src) - } - }) - - b.Run("batch", func(b *testing.B) { - b.ReportAllocs() - b.ResetTimer() - for n := 0; n < b.N; n++ { - if bufResult, err = TimeArrayEncodeAll(input, bufResult); err != nil { - b.Fatal(err) - } - copy(input, src) // Reset input that gets modified in IntegerArrayEncodeAll - } - }) - }) - - b.Run(fmt.Sprintf("%d_dup", n), func(b *testing.B) { - src := make([]int64, n) - for i := 0; i < n; i++ { - src[i] = 1233242 - } - - input := make([]int64, len(src)) - copy(input, src) - - b.Run("itr", func(b *testing.B) { - b.ReportAllocs() - b.ResetTimer() - for n := 0; n < b.N; n++ { - enc.Reset() - for _, x := range src { - enc.Write(x) - } - if bufResult, err = enc.Bytes(); err != nil { - b.Fatal(err) - } - - // Since the batch encoder needs to do a copy to reset the - // input, we will add a copy here too. - copy(input, src) - } - }) - - b.Run("batch", func(b *testing.B) { - b.ReportAllocs() - b.ResetTimer() - for n := 0; n < b.N; n++ { - if bufResult, err = TimeArrayEncodeAll(input, bufResult); err != nil { - b.Fatal(err) - } - copy(input, src) // Reset input that gets modified in IntegerArrayEncodeAll - } - }) - }) - } -} - -func BenchmarkTimeArrayDecodeAllUncompressed(b *testing.B) { - benchmarks := []int{ - 5, - 55, - 555, - 1000, - } - - values := []int64{ - -2352281900722994752, 1438442655375607923, -4110452567888190110, - -1221292455668011702, -1941700286034261841, -2836753127140407751, - 1432686216250034552, 3663244026151507025, -3068113732684750258, - -1949953187327444488, 3713374280993588804, 3226153669854871355, - -2093273755080502606, 1006087192578600616, -2272122301622271655, - 2533238229511593671, -4450454445568858273, 2647789901083530435, - 2761419461769776844, -1324397441074946198, -680758138988210958, - 94468846694902125, -2394093124890745254, -2682139311758778198, - } - - for _, size := range benchmarks { - rand.Seed(int64(size * 1e3)) - - enc := NewTimeEncoder(size) - for i := 0; i < size; i++ { - enc.Write(values[rand.Int()%len(values)]) - } - bytes, _ := enc.Bytes() - - b.Run(fmt.Sprintf("%d", size), func(b *testing.B) { - b.SetBytes(int64(len(bytes))) - b.ReportAllocs() - - dst := make([]int64, size) - for i := 0; i < b.N; i++ { - dst, _ = TimeArrayDecodeAll(bytes, dst) - } - }) - } -} - -func BenchmarkTimeArrayDecodeAllPackedSimple(b *testing.B) { - benchmarks := []int{ - 5, - 55, - 555, - 1000, - } - for _, size := range benchmarks { - rand.Seed(int64(size * 1e3)) - - enc := NewTimeEncoder(size) - for i := 0; i < size; i++ { - // Small amount of randomness prevents RLE from being used - enc.Write(int64(i*1000) + int64(rand.Intn(10))) - } - bytes, _ := enc.Bytes() - - b.Run(fmt.Sprintf("%d", size), func(b *testing.B) { - b.SetBytes(int64(len(bytes))) - b.ReportAllocs() - - dst := make([]int64, size) - for i := 0; i < b.N; i++ { - dst, _ = TimeArrayDecodeAll(bytes, dst) - } - }) - } -} - -func BenchmarkTimeArrayDecodeAllRLE(b *testing.B) { - benchmarks := []struct { - n int - delta int64 - }{ - {5, 10}, - {55, 10}, - {555, 10}, - {1000, 10}, - } - for _, bm := range benchmarks { - enc := NewTimeEncoder(bm.n) - acc := int64(0) - for i := 0; i < bm.n; i++ { - enc.Write(acc) - acc += bm.delta - } - bytes, _ := enc.Bytes() - - b.Run(fmt.Sprintf("%d_delta_%d", bm.n, bm.delta), func(b *testing.B) { - b.SetBytes(int64(len(bytes))) - b.ReportAllocs() - - dst := make([]int64, bm.n) - for i := 0; i < b.N; i++ { - dst, _ = TimeArrayDecodeAll(bytes, dst) - } - }) - } -} diff --git a/tsdb/tsm1/bit_reader.go b/tsdb/tsm1/bit_reader.go deleted file mode 100644 index d91c5b8ca9..0000000000 --- a/tsdb/tsm1/bit_reader.go +++ /dev/null @@ -1,133 +0,0 @@ -package tsm1 - -import "io" - -// BitReader reads bits from an io.Reader. -type BitReader struct { - data []byte - - buf struct { - v uint64 // bit buffer - n uint // available bits - } -} - -// NewBitReader returns a new instance of BitReader that reads from data. -func NewBitReader(data []byte) *BitReader { - b := new(BitReader) - b.Reset(data) - return b -} - -// Reset sets the underlying reader on b and reinitializes. -func (r *BitReader) Reset(data []byte) { - r.data = data - r.buf.v, r.buf.n = 0, 0 - r.readBuf() -} - -// CanReadBitFast returns true if calling ReadBitFast() is allowed. -// Fast bit reads are allowed when at least 2 values are in the buffer. -// This is because it is not required to refilled the buffer and the caller -// can inline the calls. -func (r *BitReader) CanReadBitFast() bool { return r.buf.n > 1 } - -// ReadBitFast is an optimized bit read. -// IMPORTANT: Only allowed if CanReadFastBit() is true! -func (r *BitReader) ReadBitFast() bool { - v := (r.buf.v&(1<<63) != 0) - r.buf.v <<= 1 - r.buf.n -= 1 - return v -} - -// ReadBit returns the next bit from the underlying data. -func (r *BitReader) ReadBit() (bool, error) { - v, err := r.ReadBits(1) - return v != 0, err -} - -// ReadBits reads nbits from the underlying data into a uint64. -// nbits must be from 1 to 64, inclusive. -func (r *BitReader) ReadBits(nbits uint) (uint64, error) { - // Return EOF if there is no more data. - if r.buf.n == 0 { - return 0, io.EOF - } - - // Return bits from buffer if less than available bits. - if nbits <= r.buf.n { - // Return all bits, if requested. - if nbits == 64 { - v := r.buf.v - r.buf.v, r.buf.n = 0, 0 - r.readBuf() - return v, nil - } - - // Otherwise mask returned bits. - v := (r.buf.v >> (64 - nbits)) - r.buf.v <<= nbits - r.buf.n -= nbits - - if r.buf.n == 0 { - r.readBuf() - } - return v, nil - } - - // Otherwise read all available bits in current buffer. - v, n := r.buf.v, r.buf.n - - // Read new buffer. - r.buf.v, r.buf.n = 0, 0 - r.readBuf() - - // Append new buffer to previous buffer and shift to remove unnecessary bits. - v |= (r.buf.v >> n) - v >>= 64 - nbits - - // Remove used bits from new buffer. - bufN := nbits - n - if bufN > r.buf.n { - bufN = r.buf.n - } - r.buf.v <<= bufN - r.buf.n -= bufN - - if r.buf.n == 0 { - r.readBuf() - } - - return v, nil -} - -func (r *BitReader) readBuf() { - // Determine number of bytes to read to fill buffer. - byteN := 8 - (r.buf.n / 8) - - // Limit to the length of our data. - if n := uint(len(r.data)); byteN > n { - byteN = n - } - - // Optimized 8-byte read. - if byteN == 8 { - r.buf.v = uint64(r.data[7]) | uint64(r.data[6])<<8 | - uint64(r.data[5])<<16 | uint64(r.data[4])<<24 | - uint64(r.data[3])<<32 | uint64(r.data[2])<<40 | - uint64(r.data[1])<<48 | uint64(r.data[0])<<56 - r.buf.n = 64 - r.data = r.data[8:] - return - } - - // Otherwise append bytes to buffer. - for i := uint(0); i < byteN; i++ { - r.buf.n += 8 - r.buf.v |= uint64(r.data[i]) << (64 - r.buf.n) - } - - // Move data forward. - r.data = r.data[byteN:] -} diff --git a/tsdb/tsm1/bit_reader_test.go b/tsdb/tsm1/bit_reader_test.go deleted file mode 100644 index 0f658f4f2c..0000000000 --- a/tsdb/tsm1/bit_reader_test.go +++ /dev/null @@ -1,180 +0,0 @@ -package tsm1_test - -import ( - "bytes" - "io" - "math" - "math/rand" - "reflect" - "testing" - "testing/quick" - - bitstream "github.com/dgryski/go-bitstream" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" -) - -func TestBitStreamEOF(t *testing.T) { - br := tsm1.NewBitReader([]byte("0")) - - b, err := br.ReadBits(8) - if err != nil { - t.Fatal(err) - } - if b != '0' { - t.Error("ReadBits(8) didn't return first byte") - } - - if _, err := br.ReadBits(8); err != io.EOF { - t.Error("ReadBits(8) on empty string didn't return EOF") - } - - // 0 = 0b00110000 - br = tsm1.NewBitReader([]byte("0")) - - buf := bytes.NewBuffer(nil) - bw := bitstream.NewWriter(buf) - - for i := 0; i < 4; i++ { - bit, err := br.ReadBit() - if err == io.EOF { - break - } - if err != nil { - t.Error("GetBit returned error err=", err.Error()) - return - } - bw.WriteBit(bitstream.Bit(bit)) - } - - bw.Flush(bitstream.One) - - err = bw.WriteByte(0xAA) - if err != nil { - t.Error("unable to WriteByte") - } - - c := buf.Bytes() - - if len(c) != 2 || c[1] != 0xAA || c[0] != 0x3f { - t.Error("bad return from 4 read bytes") - } - - _, err = tsm1.NewBitReader([]byte("")).ReadBit() - if err != io.EOF { - t.Error("ReadBit on empty string didn't return EOF") - } -} - -func TestBitStream(t *testing.T) { - buf := bytes.NewBuffer(nil) - br := tsm1.NewBitReader([]byte("hello")) - bw := bitstream.NewWriter(buf) - - for { - bit, err := br.ReadBit() - if err == io.EOF { - break - } - if err != nil { - t.Error("GetBit returned error err=", err.Error()) - return - } - bw.WriteBit(bitstream.Bit(bit)) - } - - s := buf.String() - - if s != "hello" { - t.Error("expected 'hello', got=", []byte(s)) - } -} - -func TestByteStream(t *testing.T) { - buf := bytes.NewBuffer(nil) - br := tsm1.NewBitReader([]byte("hello")) - bw := bitstream.NewWriter(buf) - - for i := 0; i < 3; i++ { - bit, err := br.ReadBit() - if err == io.EOF { - break - } - if err != nil { - t.Error("GetBit returned error err=", err.Error()) - return - } - bw.WriteBit(bitstream.Bit(bit)) - } - - for i := 0; i < 3; i++ { - byt, err := br.ReadBits(8) - if err == io.EOF { - break - } - if err != nil { - t.Error("ReadBits(8) returned error err=", err.Error()) - return - } - bw.WriteByte(byte(byt)) - } - - u, err := br.ReadBits(13) - - if err != nil { - t.Error("ReadBits returned error err=", err.Error()) - return - } - - bw.WriteBits(u, 13) - - bw.WriteBits(('!'<<12)|('.'<<4)|0x02, 20) - // 0x2f == '/' - bw.Flush(bitstream.One) - - s := buf.String() - - if s != "hello!./" { - t.Errorf("expected 'hello!./', got=%x", []byte(s)) - } -} - -// Ensure bit reader can read random bits written to a stream. -func TestBitReader_Quick(t *testing.T) { - if err := quick.Check(func(values []uint64, nbits []uint) bool { - // Limit nbits to 64. - for i := 0; i < len(values) && i < len(nbits); i++ { - nbits[i] = (nbits[i] % 64) + 1 - values[i] = values[i] & (math.MaxUint64 >> (64 - nbits[i])) - } - - // Write bits to a buffer. - var buf bytes.Buffer - w := bitstream.NewWriter(&buf) - for i := 0; i < len(values) && i < len(nbits); i++ { - w.WriteBits(values[i], int(nbits[i])) - } - w.Flush(bitstream.Zero) - - // Read bits from the buffer. - r := tsm1.NewBitReader(buf.Bytes()) - for i := 0; i < len(values) && i < len(nbits); i++ { - v, err := r.ReadBits(nbits[i]) - if err != nil { - t.Errorf("unexpected error(%d): %s", i, err) - return false - } else if v != values[i] { - t.Errorf("value mismatch(%d): got=%d, exp=%d (nbits=%d)", i, v, values[i], nbits[i]) - return false - } - } - - return true - }, &quick.Config{ - Values: func(a []reflect.Value, rand *rand.Rand) { - a[0], _ = quick.Value(reflect.TypeOf([]uint64{}), rand) - a[1], _ = quick.Value(reflect.TypeOf([]uint{}), rand) - }, - }); err != nil { - t.Fatal(err) - } -} diff --git a/tsdb/tsm1/block_exporter.go b/tsdb/tsm1/block_exporter.go deleted file mode 100644 index 9b93f214d3..0000000000 --- a/tsdb/tsm1/block_exporter.go +++ /dev/null @@ -1,173 +0,0 @@ -package tsm1 - -import ( - "errors" - "fmt" - "io" - "os" - "strings" - "unicode/utf8" - - "github.com/influxdata/influxdb/v2" - "github.com/influxdata/influxdb/v2/tsdb" -) - -// BlockExporter writes all blocks in a file to a given format. -type BlockExporter interface { - io.Closer - ExportFile(filename string) error -} - -// Ensure type implements interface. -var _ BlockExporter = (*SQLBlockExporter)(nil) - -// SQLBlockExporter writes out all blocks for TSM files to SQL. -type SQLBlockExporter struct { - w io.Writer - initialized bool // true when initial block written - - // Write schema, if true. - ShowSchema bool -} - -// NewSQLBlockExporter returns a new instance of SQLBlockExporter. -func NewSQLBlockExporter(w io.Writer) *SQLBlockExporter { - return &SQLBlockExporter{ - w: w, - - ShowSchema: true, - } -} - -// Close ends the export and writes final output. -func (e *SQLBlockExporter) Close() error { - return nil -} - -// ExportFile writes all blocks of the TSM file. -func (e *SQLBlockExporter) ExportFile(filename string) error { - if !e.initialized { - if err := e.initialize(); err != nil { - return err - } - } - - f, err := os.OpenFile(filename, os.O_RDONLY, 0600) - if err != nil { - return err - } - defer f.Close() - - r, err := NewTSMReader(f) - if err != nil { - return err - } - defer r.Close() - - itr := r.BlockIterator() - if itr == nil { - return errors.New("invalid TSM file, no block iterator") - } - - fmt.Fprintln(e.w, `BEGIN TRANSACTION;`) - for itr.Next() { - key, minTime, maxTime, typ, checksum, buf, err := itr.Read() - if err != nil { - return err - } - - // Extract organization & bucket ID. - var record blockExportRecord - record.Filename = filename - if len(key) < 16 { - record.Key = string(key) - } else { - record.OrgID, record.BucketID = tsdb.DecodeNameSlice(key[:16]) - record.Key = string(key[16:]) - } - record.Type = typ - record.MinTime = minTime - record.MaxTime = maxTime - record.Checksum = checksum - record.Count = BlockCount(buf) - - if err := e.write(&record); err != nil { - return err - } - } - fmt.Fprintln(e.w, "COMMIT;") - - if err := r.Close(); err != nil { - return fmt.Errorf("tsm1.SQLBlockExporter: cannot close reader: %s", err) - } - - return nil -} - -func (e *SQLBlockExporter) initialize() error { - if e.ShowSchema { - fmt.Fprintln(e.w, ` -CREATE TABLE IF NOT EXISTS blocks ( - filename TEXT NOT NULL, - org_id INTEGER NOT NULL, - bucket_id INTEGER NOT NULL, - key TEXT NOT NULL, - "type" TEXT NOT NULL, - min_time INTEGER NOT NULL, - max_time INTEGER NOT NULL, - checksum INTEGER NOT NULL, - count INTEGER NOT NULL -); - -CREATE INDEX idx_blocks_filename ON blocks (filename); -CREATE INDEX idx_blocks_org_id_bucket_id_key ON blocks (org_id, bucket_id, key); -`[1:]) - } - - e.initialized = true - - return nil -} - -func (e *SQLBlockExporter) write(record *blockExportRecord) error { - _, err := fmt.Fprintf(e.w, - "INSERT INTO blocks (filename, org_id, bucket_id, key, type, min_time, max_time, checksum, count) VALUES (%s, %d, %d, %s, %s, %d, %d, %d, %d);\n", - quoteSQL(record.Filename), - record.OrgID, - record.BucketID, - quoteSQL(record.Key), - quoteSQL(BlockTypeName(record.Type)), - record.MinTime, - record.MaxTime, - record.Checksum, - record.Count, - ) - return err -} - -type blockExportRecord struct { - Filename string - OrgID influxdb.ID - BucketID influxdb.ID - Key string - Type byte - MinTime int64 - MaxTime int64 - Checksum uint32 - Count int -} - -func quoteSQL(s string) string { - return `'` + sqlReplacer.Replace(toValidUTF8(s)) + `'` -} - -var sqlReplacer = strings.NewReplacer(`'`, `''`, "\x00", "") - -func toValidUTF8(s string) string { - return strings.Map(func(r rune) rune { - if r == utf8.RuneError { - return -1 - } - return r - }, s) -} diff --git a/tsdb/tsm1/block_exporter_test.go b/tsdb/tsm1/block_exporter_test.go deleted file mode 100644 index 8a5c118c51..0000000000 --- a/tsdb/tsm1/block_exporter_test.go +++ /dev/null @@ -1,47 +0,0 @@ -package tsm1 - -import ( - "bytes" - "fmt" - "os" - "testing" -) - -func TestSQLBlockExporter_Export(t *testing.T) { - dir := mustTempDir() - defer os.RemoveAll(dir) - f := mustTempFile(dir) - - // Write data. - if w, err := NewTSMWriter(f); err != nil { - t.Fatal(err) - } else if err := w.Write([]byte("cpu"), []Value{NewValue(0, int64(1))}); err != nil { - t.Fatal(err) - } else if err := w.Write([]byte("mem"), []Value{NewValue(0, int64(2))}); err != nil { - t.Fatal(err) - } else if err := w.WriteIndex(); err != nil { - t.Fatal(err) - } else if err := w.Close(); err != nil { - t.Fatal(err) - } - - // Expected output. - want := fmt.Sprintf(` -BEGIN TRANSACTION; -INSERT INTO blocks (filename, org_id, bucket_id, key, type, min_time, max_time, checksum, count) VALUES ('%s', 0, 0, 'cpu', 'integer', 0, 0, 3294968665, 1); -INSERT INTO blocks (filename, org_id, bucket_id, key, type, min_time, max_time, checksum, count) VALUES ('%s', 0, 0, 'mem', 'integer', 0, 0, 755408492, 1); -COMMIT; -`[1:], f.Name(), f.Name()) - - // Export file to SQL. - var buf bytes.Buffer - e := NewSQLBlockExporter(&buf) - e.ShowSchema = false - if err := e.ExportFile(f.Name()); err != nil { - t.Fatal(err) - } else if err := e.Close(); err != nil { - t.Fatal(err) - } else if got := buf.String(); got != want { - t.Fatalf("unexpected output:\ngot=%s\n--\nwant=%s", got, want) - } -} diff --git a/tsdb/tsm1/bool.go b/tsdb/tsm1/bool.go deleted file mode 100644 index 5c7ece9c46..0000000000 --- a/tsdb/tsm1/bool.go +++ /dev/null @@ -1,169 +0,0 @@ -package tsm1 - -// boolean encoding uses 1 bit per value. Each compressed byte slice contains a 1 byte header -// indicating the compression type, followed by a variable byte encoded length indicating -// how many booleans are packed in the slice. The remaining bytes contains 1 byte for every -// 8 boolean values encoded. - -import ( - "encoding/binary" - "fmt" -) - -// Note: an uncompressed boolean format is not yet implemented. -// booleanCompressedBitPacked is a bit packed format using 1 bit per boolean -const booleanCompressedBitPacked = 1 - -// BooleanEncoder encodes a series of booleans to an in-memory buffer. -type BooleanEncoder struct { - // The encoded bytes - bytes []byte - - // The current byte being encoded - b byte - - // The number of bools packed into b - i int - - // The total number of bools written - n int -} - -// NewBooleanEncoder returns a new instance of BooleanEncoder. -func NewBooleanEncoder(sz int) BooleanEncoder { - return BooleanEncoder{ - bytes: make([]byte, 0, (sz+7)/8), - } -} - -// Reset sets the encoder to its initial state. -func (e *BooleanEncoder) Reset() { - e.bytes = e.bytes[:0] - e.b = 0 - e.i = 0 - e.n = 0 -} - -// Write encodes b to the underlying buffer. -func (e *BooleanEncoder) Write(b bool) { - // If we have filled the current byte, flush it - if e.i >= 8 { - e.flush() - } - - // Use 1 bit for each boolean value, shift the current byte - // by 1 and set the least significant bit accordingly - e.b = e.b << 1 - if b { - e.b |= 1 - } - - // Increment the current boolean count - e.i++ - // Increment the total boolean count - e.n++ -} - -func (e *BooleanEncoder) flush() { - // Pad remaining byte w/ 0s - for e.i < 8 { - e.b = e.b << 1 - e.i++ - } - - // If we have bits set, append them to the byte slice - if e.i > 0 { - e.bytes = append(e.bytes, e.b) - e.b = 0 - e.i = 0 - } -} - -// Flush is no-op -func (e *BooleanEncoder) Flush() {} - -// Bytes returns a new byte slice containing the encoded booleans from previous calls to Write. -func (e *BooleanEncoder) Bytes() ([]byte, error) { - // Ensure the current byte is flushed - e.flush() - b := make([]byte, 10+1) - - // Store the encoding type in the 4 high bits of the first byte - b[0] = byte(booleanCompressedBitPacked) << 4 - - i := 1 - // Encode the number of booleans written - i += binary.PutUvarint(b[i:], uint64(e.n)) - - // Append the packed booleans - return append(b[:i], e.bytes...), nil -} - -// BooleanDecoder decodes a series of booleans from an in-memory buffer. -type BooleanDecoder struct { - b []byte - i int - n int - err error -} - -// SetBytes initializes the decoder with a new set of bytes to read from. -// This must be called before calling any other methods. -func (e *BooleanDecoder) SetBytes(b []byte) { - if len(b) == 0 { - return - } - - // First byte stores the encoding type, only have 1 bit-packet format - // currently ignore for now. - b = b[1:] - count, n := binary.Uvarint(b) - if n <= 0 { - e.err = fmt.Errorf("booleanDecoder: invalid count") - return - } - - e.b = b[n:] - e.i = -1 - e.n = int(count) - - if min := len(e.b) * 8; min < e.n { - // Shouldn't happen - TSM file was truncated/corrupted - e.n = min - } -} - -// Next returns whether there are any bits remaining in the decoder. -// It returns false if there was an error decoding. -// The error is available on the Error method. -func (e *BooleanDecoder) Next() bool { - if e.err != nil { - return false - } - - e.i++ - return e.i < e.n -} - -// Read returns the next bit from the decoder. -func (e *BooleanDecoder) Read() bool { - // Index into the byte slice - idx := e.i >> 3 // integer division by 8 - - // Bit position - pos := 7 - (e.i & 0x7) - - // The mask to select the bit - mask := byte(1 << uint(pos)) - - // The packed byte - v := e.b[idx] - - // Returns true if the bit is set - return v&mask == mask -} - -// Error returns the error encountered during decoding, if one occurred. -func (e *BooleanDecoder) Error() error { - return e.err -} diff --git a/tsdb/tsm1/bool_test.go b/tsdb/tsm1/bool_test.go deleted file mode 100644 index a361e42856..0000000000 --- a/tsdb/tsm1/bool_test.go +++ /dev/null @@ -1,171 +0,0 @@ -package tsm1_test - -import ( - "fmt" - "reflect" - "testing" - "testing/quick" - - "github.com/influxdata/influxdb/v2/tsdb/tsm1" -) - -func Test_BooleanEncoder_NoValues(t *testing.T) { - enc := tsm1.NewBooleanEncoder(0) - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var dec tsm1.BooleanDecoder - dec.SetBytes(b) - if dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } -} - -func Test_BooleanEncoder_Single(t *testing.T) { - enc := tsm1.NewBooleanEncoder(1) - v1 := true - enc.Write(v1) - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var dec tsm1.BooleanDecoder - dec.SetBytes(b) - if !dec.Next() { - t.Fatalf("unexpected next value: got false, exp true") - } - - if v1 != dec.Read() { - t.Fatalf("unexpected value: got %v, exp %v", dec.Read(), v1) - } -} - -func Test_BooleanEncoder_Multi_Compressed(t *testing.T) { - enc := tsm1.NewBooleanEncoder(10) - - values := make([]bool, 10) - for i := range values { - values[i] = i%2 == 0 - enc.Write(values[i]) - } - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if exp := 4; len(b) != exp { - t.Fatalf("unexpected length: got %v, exp %v", len(b), exp) - } - - var dec tsm1.BooleanDecoder - dec.SetBytes(b) - - for i, v := range values { - if !dec.Next() { - t.Fatalf("unexpected next value: got false, exp true") - } - if v != dec.Read() { - t.Fatalf("unexpected value at pos %d: got %v, exp %v", i, dec.Read(), v) - } - } - - if dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } -} - -func Test_BooleanEncoder_Quick(t *testing.T) { - if err := quick.Check(func(values []bool) bool { - expected := values - if values == nil { - expected = []bool{} - } - // Write values to encoder. - enc := tsm1.NewBooleanEncoder(1024) - for _, v := range values { - enc.Write(v) - } - - // Retrieve compressed bytes. - buf, err := enc.Bytes() - if err != nil { - t.Fatal(err) - } - - // Read values out of decoder. - got := make([]bool, 0, len(values)) - var dec tsm1.BooleanDecoder - dec.SetBytes(buf) - for dec.Next() { - got = append(got, dec.Read()) - } - - // Verify that input and output values match. - if !reflect.DeepEqual(expected, got) { - t.Fatalf("mismatch:\n\nexp=%#v\n\ngot=%#v\n\n", expected, got) - } - - return true - }, nil); err != nil { - t.Fatal(err) - } -} - -func Test_BooleanDecoder_Corrupt(t *testing.T) { - cases := []string{ - "", // Empty - "\x10\x90", // Packed: invalid count - "\x10\x7f", // Packed: count greater than remaining bits, multiple bytes expected - "\x10\x01", // Packed: count greater than remaining bits, one byte expected - } - - for _, c := range cases { - var dec tsm1.BooleanDecoder - dec.SetBytes([]byte(c)) - if dec.Next() { - t.Fatalf("exp next == false, got true for case %q", c) - } - } -} - -func BenchmarkBooleanDecoder_DecodeAll(b *testing.B) { - benchmarks := []int{ - 1, - 55, - 555, - 1000, - } - for _, size := range benchmarks { - e := tsm1.NewBooleanEncoder(size) - for i := 0; i < size; i++ { - e.Write(i&1 == 1) - } - bytes, err := e.Bytes() - if err != nil { - b.Fatalf("unexpected error: %v", err) - } - - b.Run(fmt.Sprintf("%d", size), func(b *testing.B) { - b.SetBytes(int64(len(bytes))) - - dst := make([]bool, size) - for i := 0; i < b.N; i++ { - var d tsm1.BooleanDecoder - d.SetBytes(bytes) - - var n int - for d.Next() { - dst[n] = d.Read() - n++ - } - if n != size { - b.Fatalf("expected to read %d booleans, but read %d", size, n) - } - } - }) - } -} diff --git a/tsdb/tsm1/cache.go b/tsdb/tsm1/cache.go deleted file mode 100644 index 9826cd2254..0000000000 --- a/tsdb/tsm1/cache.go +++ /dev/null @@ -1,782 +0,0 @@ -package tsm1 - -import ( - "context" - "fmt" - "math" - "strings" - "sync" - "sync/atomic" - "time" - - "github.com/influxdata/influxdb/v2/kit/tracing" - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/storage/wal" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxql" - "github.com/prometheus/client_golang/prometheus" - "go.uber.org/zap" -) - -var ( - // ErrSnapshotInProgress is returned if a snapshot is attempted while one is already running. - ErrSnapshotInProgress = fmt.Errorf("snapshot in progress") -) - -// CacheMemorySizeLimitExceededError is the type of error returned from the cache when -// a write would place it over its size limit. -type CacheMemorySizeLimitExceededError struct { - Size uint64 - Limit uint64 -} - -func (c CacheMemorySizeLimitExceededError) Error() string { - return fmt.Sprintf("cache-max-memory-size exceeded: (%d/%d)", c.Size, c.Limit) -} - -// ErrCacheMemorySizeLimitExceeded returns an error indicating an operation -// could not be completed due to exceeding the cache-max-memory-size setting. -func ErrCacheMemorySizeLimitExceeded(n, limit uint64) error { - return CacheMemorySizeLimitExceededError{Size: n, Limit: limit} -} - -// Cache maintains an in-memory store of Values for a set of keys. -type Cache struct { - mu sync.RWMutex - store *ring - maxSize uint64 - - // snapshots are the cache objects that are currently being written to tsm files - // they're kept in memory while flushing so they can be queried along with the cache. - // they are read only and should never be modified - snapshot *Cache - snapshotting bool - - tracker *cacheTracker - lastSnapshot time.Time - lastWriteTime time.Time -} - -// NewCache returns an instance of a cache which will use a maximum of maxSize bytes of memory. -// Only used for engine caches, never for snapshots. -func NewCache(maxSize uint64) *Cache { - return &Cache{ - maxSize: maxSize, - store: newRing(), - lastSnapshot: time.Now(), - tracker: newCacheTracker(newCacheMetrics(nil), nil), - } -} - -// Write writes the set of values for the key to the cache. This function is goroutine-safe. -// It returns an error if the cache will exceed its max size by adding the new values. -func (c *Cache) Write(key []byte, values []Value) error { - addedSize := uint64(Values(values).Size()) - - // Enough room in the cache? - limit := c.maxSize - n := c.Size() + addedSize - - if limit > 0 && n > limit { - c.tracker.IncWritesErr() - c.tracker.AddWrittenBytesDrop(uint64(addedSize)) - return ErrCacheMemorySizeLimitExceeded(n, limit) - } - - newKey, err := c.store.write(key, values) - if err != nil { - c.tracker.IncWritesErr() - c.tracker.AddWrittenBytesErr(uint64(addedSize)) - return err - } - - if newKey { - addedSize += uint64(len(key)) - } - // Update the cache size and the memory size stat. - c.tracker.IncCacheSize(addedSize) - c.tracker.AddMemBytes(addedSize) - c.tracker.AddWrittenBytesOK(uint64(addedSize)) - c.tracker.IncWritesOK() - - return nil -} - -// WriteMulti writes the map of keys and associated values to the cache. This -// function is goroutine-safe. It returns an error if the cache will exceeded -// its max size by adding the new values. The write attempts to write as many -// values as possible. If one key fails, the others can still succeed and an -// error will be returned. -func (c *Cache) WriteMulti(values map[string][]Value) error { - var addedSize uint64 - for _, v := range values { - addedSize += uint64(Values(v).Size()) - } - - // Enough room in the cache? - limit := c.maxSize // maxSize is safe for reading without a lock. - n := c.Size() + addedSize - if limit > 0 && n > limit { - c.tracker.IncWritesErr() - c.tracker.AddWrittenBytesDrop(uint64(addedSize)) - return ErrCacheMemorySizeLimitExceeded(n, limit) - } - - var werr error - c.mu.RLock() - store := c.store - c.mu.RUnlock() - - var bytesWrittenErr uint64 - - // We'll optimistically set size here, and then decrement it for write errors. - for k, v := range values { - newKey, err := store.write([]byte(k), v) - if err != nil { - // The write failed, hold onto the error and adjust the size delta. - werr = err - addedSize -= uint64(Values(v).Size()) - bytesWrittenErr += uint64(Values(v).Size()) - } - - if newKey { - addedSize += uint64(len(k)) - } - } - - // Some points in the batch were dropped. An error is returned so - // error stat is incremented as well. - if werr != nil { - c.tracker.IncWritesErr() - c.tracker.IncWritesDrop() - c.tracker.AddWrittenBytesErr(bytesWrittenErr) - } - - // Update the memory size stat - c.tracker.IncCacheSize(addedSize) - c.tracker.AddMemBytes(addedSize) - c.tracker.IncWritesOK() - c.tracker.AddWrittenBytesOK(addedSize) - - c.mu.Lock() - c.lastWriteTime = time.Now() - c.mu.Unlock() - - return werr -} - -// Snapshot takes a snapshot of the current cache, adds it to the slice of caches that -// are being flushed, and resets the current cache with new values. -func (c *Cache) Snapshot() (*Cache, error) { - c.mu.Lock() - defer c.mu.Unlock() - - if c.snapshotting { - return nil, ErrSnapshotInProgress - } - - c.snapshotting = true - c.tracker.IncSnapshotsActive() // increment the number of times we tried to do this - - // If no snapshot exists, create a new one, otherwise update the existing snapshot - if c.snapshot == nil { - c.snapshot = &Cache{ - store: newRing(), - tracker: newCacheTracker(c.tracker.metrics, c.tracker.labels), - } - } - - // Did a prior snapshot exist that failed? If so, return the existing - // snapshot to retry. - if c.snapshot.Size() > 0 { - return c.snapshot, nil - } - - c.snapshot.store, c.store = c.store, c.snapshot.store - snapshotSize := c.Size() - - c.snapshot.tracker.SetSnapshotSize(snapshotSize) // Save the size of the snapshot on the snapshot cache - c.tracker.SetSnapshotSize(snapshotSize) // Save the size of the snapshot on the live cache - - // Reset the cache's store. - c.store.reset() - c.tracker.SetCacheSize(0) - c.lastSnapshot = time.Now() - - c.tracker.AddSnapshottedBytes(snapshotSize) // increment the number of bytes added to the snapshot - c.tracker.SetDiskBytes(0) - c.tracker.SetSnapshotsActive(0) - - return c.snapshot, nil -} - -// Deduplicate sorts the snapshot before returning it. The compactor and any queries -// coming in while it writes will need the values sorted. -func (c *Cache) Deduplicate() { - c.mu.RLock() - store := c.store - c.mu.RUnlock() - - // Apply a function that simply calls deduplicate on each entry in the ring. - // apply cannot return an error in this invocation. - _ = store.apply(func(_ []byte, e *entry) error { e.deduplicate(); return nil }) -} - -// ClearSnapshot removes the snapshot cache from the list of flushing caches and -// adjusts the size. -func (c *Cache) ClearSnapshot(success bool) { - c.mu.RLock() - snapStore := c.snapshot.store - c.mu.RUnlock() - - // reset the snapshot store outside of the write lock - if success { - snapStore.reset() - } - - c.mu.Lock() - defer c.mu.Unlock() - - c.snapshotting = false - - if success { - snapshotSize := c.tracker.SnapshotSize() - c.tracker.SetSnapshotsActive(0) - c.tracker.SubMemBytes(snapshotSize) // decrement the number of bytes in cache - - // Reset the snapshot to a fresh Cache. - c.snapshot = &Cache{ - store: c.snapshot.store, - tracker: newCacheTracker(c.tracker.metrics, c.tracker.labels), - } - - c.tracker.SetSnapshotSize(0) - c.tracker.SetDiskBytes(0) - c.tracker.SetSnapshotsActive(0) - } -} - -// Size returns the number of point-calcuated bytes the cache currently uses. -func (c *Cache) Size() uint64 { - return c.tracker.CacheSize() + c.tracker.SnapshotSize() -} - -// MaxSize returns the maximum number of bytes the cache may consume. -func (c *Cache) MaxSize() uint64 { - return c.maxSize -} - -func (c *Cache) Count() int { - c.mu.RLock() - n := c.store.count() - c.mu.RUnlock() - return n -} - -// Keys returns a sorted slice of all keys under management by the cache. -func (c *Cache) Keys() [][]byte { - c.mu.RLock() - store := c.store - c.mu.RUnlock() - return store.keys(true) -} - -func (c *Cache) Split(n int) []*Cache { - if n == 1 { - return []*Cache{c} - } - - caches := make([]*Cache, n) - storers := c.store.split(n) - for i := 0; i < n; i++ { - caches[i] = &Cache{ - store: storers[i], - } - } - return caches -} - -// Type returns the series type for a key. -func (c *Cache) Type(key []byte) (models.FieldType, error) { - c.mu.RLock() - e := c.store.entry(key) - if e == nil && c.snapshot != nil { - e = c.snapshot.store.entry(key) - } - c.mu.RUnlock() - - if e != nil { - typ, err := e.InfluxQLType() - if err != nil { - return models.Empty, errUnknownFieldType - } - - switch typ { - case influxql.Float: - return models.Float, nil - case influxql.Integer: - return models.Integer, nil - case influxql.Unsigned: - return models.Unsigned, nil - case influxql.Boolean: - return models.Boolean, nil - case influxql.String: - return models.String, nil - } - } - - return models.Empty, errUnknownFieldType -} - -// BlockType returns the TSM block type for the specified -// key or BlockUndefined if the type cannot be determined -// either because the key does not exist or there are no -// values for the key. -func (c *Cache) BlockType(key []byte) byte { - c.mu.RLock() - e := c.store.entry(key) - if e == nil && c.snapshot != nil { - e = c.snapshot.store.entry(key) - } - c.mu.RUnlock() - - if e != nil { - return e.BlockType() - } - - return BlockUndefined -} - -// AppendTimestamps appends ts with the timestamps for the specified key. -// It is the responsibility of the caller to sort and or deduplicate the slice. -func (c *Cache) AppendTimestamps(key []byte, ts []int64) []int64 { - var snapshotEntries *entry - - c.mu.RLock() - e := c.store.entry(key) - if c.snapshot != nil { - snapshotEntries = c.snapshot.store.entry(key) - } - c.mu.RUnlock() - - if e != nil { - ts = e.AppendTimestamps(ts) - } - if snapshotEntries != nil { - ts = snapshotEntries.AppendTimestamps(ts) - } - - return ts -} - -// Values returns a copy of all values, deduped and sorted, for the given key. -func (c *Cache) Values(key []byte) Values { - var snapshotEntries *entry - - c.mu.RLock() - e := c.store.entry(key) - if c.snapshot != nil { - snapshotEntries = c.snapshot.store.entry(key) - } - c.mu.RUnlock() - - if e == nil { - if snapshotEntries == nil { - // No values in hot cache or snapshots. - return nil - } - } else { - e.deduplicate() - } - - // Build the sequence of entries that will be returned, in the correct order. - // Calculate the required size of the destination buffer. - var entries []*entry - sz := 0 - - if snapshotEntries != nil { - snapshotEntries.deduplicate() // guarantee we are deduplicated - entries = append(entries, snapshotEntries) - sz += snapshotEntries.count() - } - - if e != nil { - entries = append(entries, e) - sz += e.count() - } - - // Any entries? If not, return. - if sz == 0 { - return nil - } - - // Create the buffer, and copy all hot values and snapshots. Individual - // entries are sorted at this point, so now the code has to check if the - // resultant buffer will be sorted from start to finish. - values := make(Values, 0, sz) - for _, e := range entries { - e.mu.RLock() - values = append(values, e.values...) - e.mu.RUnlock() - } - values = values.Deduplicate() - - return values -} - -// DeleteBucketRange removes values for all keys containing points -// with timestamps between min and max contained in the bucket identified -// by name from the cache. -func (c *Cache) DeleteBucketRange(ctx context.Context, name string, min, max int64, pred Predicate) { - span, _ := tracing.StartSpanFromContext(ctx) - defer span.Finish() - - // TODO(edd/jeff): find a way to optimize lock usage - c.mu.Lock() - defer c.mu.Unlock() - - var toDelete []string - var total uint64 - - // applySerial only errors if the closure returns an error. - _ = c.store.applySerial(func(k string, e *entry) error { - if !strings.HasPrefix(k, name) { - return nil - } - // TODO(edd): either use an unsafe conversion to []byte, or add a MatchesString - // method to tsm1.Predicate. - if pred != nil && !pred.Matches([]byte(k)) { - return nil - } - - total += uint64(e.size()) - - // if everything is being deleted, just stage it to be deleted and move on. - if min == math.MinInt64 && max == math.MaxInt64 { - toDelete = append(toDelete, k) - return nil - } - - // filter the values and subtract out the remaining bytes from the reduction. - e.filter(min, max) - total -= uint64(e.size()) - - // if it has no entries left, flag it to be deleted. - if e.count() == 0 { - toDelete = append(toDelete, k) - } - - return nil - }) - - for _, k := range toDelete { - total += uint64(len(k)) - // TODO(edd): either use unsafe conversion to []byte or add a removeString method. - c.store.remove([]byte(k)) - } - - c.tracker.DecCacheSize(total) - c.tracker.SetMemBytes(uint64(c.Size())) -} - -// SetMaxSize updates the memory limit of the cache. -func (c *Cache) SetMaxSize(size uint64) { - c.mu.Lock() - c.maxSize = size - c.mu.Unlock() -} - -// values returns the values for the key. It assumes the data is already sorted. -// It doesn't lock the cache but it does read-lock the entry if there is one for the key. -// values should only be used in compact.go in the CacheKeyIterator. -func (c *Cache) values(key []byte) Values { - e := c.store.entry(key) - if e == nil { - return nil - } - e.mu.RLock() - v := e.values - e.mu.RUnlock() - return v -} - -// ApplyEntryFn applies the function f to each entry in the Cache. -// ApplyEntryFn calls f on each entry in turn, within the same goroutine. -// It is safe for use by multiple goroutines. -func (c *Cache) ApplyEntryFn(f func(key string, entry *entry) error) error { - c.mu.RLock() - store := c.store - c.mu.RUnlock() - return store.applySerial(f) -} - -// CacheLoader processes a set of WAL segment files, and loads a cache with the data -// contained within those files. -type CacheLoader struct { - reader *wal.WALReader -} - -// NewCacheLoader returns a new instance of a CacheLoader. -func NewCacheLoader(files []string) *CacheLoader { - return &CacheLoader{ - reader: wal.NewWALReader(files), - } -} - -// Load returns a cache loaded with the data contained within the segment files. -func (cl *CacheLoader) Load(cache *Cache) error { - return cl.reader.Read(func(entry wal.WALEntry) error { - switch en := entry.(type) { - case *wal.WriteWALEntry: - return cache.WriteMulti(en.Values) - - case *wal.DeleteBucketRangeWALEntry: - var pred Predicate - if len(en.Predicate) > 0 { - var err error - pred, err = UnmarshalPredicate(en.Predicate) - if err != nil { - return err - } - } - - // TODO(edd): we need to clean up how we're encoding the prefix so that we - // don't have to remember to get it right everywhere we need to touch TSM data. - encoded := tsdb.EncodeName(en.OrgID, en.BucketID) - name := models.EscapeMeasurement(encoded[:]) - - cache.DeleteBucketRange(context.Background(), string(name), en.Min, en.Max, pred) - return nil - } - - return nil - }) -} - -// WithLogger sets the logger on the CacheLoader. -func (cl *CacheLoader) WithLogger(logger *zap.Logger) { - cl.reader.WithLogger(logger.With(zap.String("service", "cacheloader"))) -} - -// LastWriteTime returns the time that the cache was last written to. -func (c *Cache) LastWriteTime() time.Time { - c.mu.RLock() - defer c.mu.RUnlock() - return c.lastWriteTime -} - -// Age returns the age of the cache, which is the duration since it was last -// snapshotted. -func (c *Cache) Age() time.Duration { - c.mu.RLock() - defer c.mu.RUnlock() - return time.Since(c.lastSnapshot) -} - -// UpdateAge updates the age statistic based on the current time. -func (c *Cache) UpdateAge() { - c.mu.RLock() - defer c.mu.RUnlock() - c.tracker.SetAge(time.Since(c.lastSnapshot)) -} - -// cacheTracker tracks writes to the cache and snapshots. -// -// As well as being responsible for providing atomic reads and writes to the -// statistics, cacheTracker also mirrors any changes to the external prometheus -// metrics, which the Engine exposes. -// -// *NOTE* - cacheTracker fields should not be directory modified. Doing so -// could result in the Engine exposing inaccurate metrics. -type cacheTracker struct { - metrics *cacheMetrics - labels prometheus.Labels - snapshotsActive uint64 - snapshotSize uint64 - cacheSize uint64 - - // Used in testing. - memSizeBytes uint64 - snapshottedBytes uint64 - writesDropped uint64 - writesErr uint64 -} - -func newCacheTracker(metrics *cacheMetrics, defaultLabels prometheus.Labels) *cacheTracker { - return &cacheTracker{metrics: metrics, labels: defaultLabels} -} - -// Labels returns a copy of the default labels used by the tracker's metrics. -// The returned map is safe for modification. -func (t *cacheTracker) Labels() prometheus.Labels { - labels := make(prometheus.Labels, len(t.labels)) - for k, v := range t.labels { - labels[k] = v - } - return labels -} - -// AddMemBytes increases the number of in-memory cache bytes. -func (t *cacheTracker) AddMemBytes(bytes uint64) { - atomic.AddUint64(&t.memSizeBytes, bytes) - - labels := t.labels - t.metrics.MemSize.With(labels).Add(float64(bytes)) -} - -// SubMemBytes decreases the number of in-memory cache bytes. -func (t *cacheTracker) SubMemBytes(bytes uint64) { - atomic.AddUint64(&t.memSizeBytes, ^(bytes - 1)) - - labels := t.labels - t.metrics.MemSize.With(labels).Sub(float64(bytes)) -} - -// SetMemBytes sets the number of in-memory cache bytes. -func (t *cacheTracker) SetMemBytes(bytes uint64) { - atomic.StoreUint64(&t.memSizeBytes, bytes) - - labels := t.labels - t.metrics.MemSize.With(labels).Set(float64(bytes)) -} - -// AddBytesWritten increases the number of bytes written to the cache. -func (t *cacheTracker) AddBytesWritten(bytes uint64) { - labels := t.labels - t.metrics.MemSize.With(labels).Add(float64(bytes)) -} - -// AddSnapshottedBytes increases the number of bytes snapshotted. -func (t *cacheTracker) AddSnapshottedBytes(bytes uint64) { - atomic.AddUint64(&t.snapshottedBytes, bytes) - - labels := t.labels - t.metrics.SnapshottedBytes.With(labels).Add(float64(bytes)) -} - -// SetDiskBytes sets the number of bytes on disk used by snapshot data. -func (t *cacheTracker) SetDiskBytes(bytes uint64) { - labels := t.labels - t.metrics.DiskSize.With(labels).Set(float64(bytes)) -} - -// IncSnapshotsActive increases the number of active snapshots. -func (t *cacheTracker) IncSnapshotsActive() { - atomic.AddUint64(&t.snapshotsActive, 1) - - labels := t.labels - t.metrics.SnapshotsActive.With(labels).Inc() -} - -// SetSnapshotsActive sets the number of bytes on disk used by snapshot data. -func (t *cacheTracker) SetSnapshotsActive(n uint64) { - atomic.StoreUint64(&t.snapshotsActive, n) - - labels := t.labels - t.metrics.SnapshotsActive.With(labels).Set(float64(n)) -} - -// AddWrittenBytes increases the number of bytes written to the cache, with a required status. -func (t *cacheTracker) AddWrittenBytes(status string, bytes uint64) { - labels := t.Labels() - labels["status"] = status - t.metrics.WrittenBytes.With(labels).Add(float64(bytes)) -} - -// AddWrittenBytesOK increments the number of successful writes. -func (t *cacheTracker) AddWrittenBytesOK(bytes uint64) { t.AddWrittenBytes("ok", bytes) } - -// AddWrittenBytesError increments the number of writes that encountered an error. -func (t *cacheTracker) AddWrittenBytesErr(bytes uint64) { t.AddWrittenBytes("error", bytes) } - -// AddWrittenBytesDrop increments the number of writes that were dropped. -func (t *cacheTracker) AddWrittenBytesDrop(bytes uint64) { t.AddWrittenBytes("dropped", bytes) } - -// IncWrites increments the number of writes to the cache, with a required status. -func (t *cacheTracker) IncWrites(status string) { - labels := t.Labels() - labels["status"] = status - t.metrics.Writes.With(labels).Inc() -} - -// IncWritesOK increments the number of successful writes. -func (t *cacheTracker) IncWritesOK() { t.IncWrites("ok") } - -// IncWritesError increments the number of writes that encountered an error. -func (t *cacheTracker) IncWritesErr() { - atomic.AddUint64(&t.writesErr, 1) - - t.IncWrites("error") -} - -// IncWritesDrop increments the number of writes that were dropped. -func (t *cacheTracker) IncWritesDrop() { - atomic.AddUint64(&t.writesDropped, 1) - - t.IncWrites("dropped") -} - -// CacheSize returns the live cache size. -func (t *cacheTracker) CacheSize() uint64 { return atomic.LoadUint64(&t.cacheSize) } - -// IncCacheSize increases the live cache size by sz bytes. -func (t *cacheTracker) IncCacheSize(sz uint64) { atomic.AddUint64(&t.cacheSize, sz) } - -// DecCacheSize decreases the live cache size by sz bytes. -func (t *cacheTracker) DecCacheSize(sz uint64) { atomic.AddUint64(&t.cacheSize, ^(sz - 1)) } - -// SetCacheSize sets the live cache size to sz. -func (t *cacheTracker) SetCacheSize(sz uint64) { atomic.StoreUint64(&t.cacheSize, sz) } - -// SetSnapshotSize sets the last successful snapshot size. -func (t *cacheTracker) SetSnapshotSize(sz uint64) { atomic.StoreUint64(&t.snapshotSize, sz) } - -// SnapshotSize returns the last successful snapshot size. -func (t *cacheTracker) SnapshotSize() uint64 { return atomic.LoadUint64(&t.snapshotSize) } - -// SetAge sets the time since the last successful snapshot -func (t *cacheTracker) SetAge(d time.Duration) { - labels := t.Labels() - t.metrics.Age.With(labels).Set(d.Seconds()) -} - -const ( - valueTypeUndefined = 0 - valueTypeFloat64 = 1 - valueTypeInteger = 2 - valueTypeString = 3 - valueTypeBoolean = 4 - valueTypeUnsigned = 5 -) - -func valueType(v Value) byte { - switch v.(type) { - case FloatValue: - return valueTypeFloat64 - case IntegerValue: - return valueTypeInteger - case StringValue: - return valueTypeString - case BooleanValue: - return valueTypeBoolean - case UnsignedValue: - return valueTypeUnsigned - default: - return valueTypeUndefined - } -} - -var ( - valueTypeBlockType = [8]byte{ - valueTypeUndefined: BlockUndefined, - valueTypeFloat64: BlockFloat64, - valueTypeInteger: BlockInteger, - valueTypeString: BlockString, - valueTypeBoolean: BlockBoolean, - valueTypeUnsigned: BlockUnsigned, - 6: BlockUndefined, - 7: BlockUndefined, - } -) - -func valueTypeToBlockType(typ byte) byte { return valueTypeBlockType[typ&7] } diff --git a/tsdb/tsm1/cache_entry.go b/tsdb/tsm1/cache_entry.go deleted file mode 100644 index 6a4a74bff6..0000000000 --- a/tsdb/tsm1/cache_entry.go +++ /dev/null @@ -1,145 +0,0 @@ -package tsm1 - -import ( - "sync" - "sync/atomic" - - "github.com/influxdata/influxql" -) - -// entry is a set of values and some metadata. -type entry struct { - // Tracks the number of values in the entry. Must always be accessed via - // atomic; must be 8b aligned. - n int64 - - mu sync.RWMutex - values Values // All stored values. - - // The type of values stored. Read only so doesn't need to be protected by mu. - vtype byte -} - -// newEntryValues returns a new instance of entry with the given values. If the -// values are not valid, an error is returned. -func newEntryValues(values []Value) (*entry, error) { - e := &entry{ - values: make(Values, 0, len(values)), - n: int64(len(values)), - } - e.values = append(e.values, values...) - - // No values, don't check types and ordering - if len(values) == 0 { - return e, nil - } - - et := valueType(values[0]) - for _, v := range values { - // Make sure all the values are the same type - if et != valueType(v) { - return nil, errFieldTypeConflict - } - } - - // Set the type of values stored. - e.vtype = et - - return e, nil -} - -// add adds the given values to the entry. -func (e *entry) add(values []Value) error { - if len(values) == 0 { - return nil // Nothing to do. - } - - // Are any of the new values the wrong type? - if e.vtype != 0 { - for _, v := range values { - if e.vtype != valueType(v) { - return errFieldTypeConflict - } - } - } - - // entry currently has no values, so add the new ones and we're done. - e.mu.Lock() - if len(e.values) == 0 { - e.values = values - atomic.StoreInt64(&e.n, int64(len(e.values))) - e.vtype = valueType(values[0]) - e.mu.Unlock() - return nil - } - - // Append the new values to the existing ones... - e.values = append(e.values, values...) - atomic.StoreInt64(&e.n, int64(len(e.values))) - e.mu.Unlock() - return nil -} - -// deduplicate sorts and orders the entry's values. If values are already deduped and sorted, -// the function does no work and simply returns. -func (e *entry) deduplicate() { - e.mu.Lock() - defer e.mu.Unlock() - - if len(e.values) <= 1 { - return - } - e.values = e.values.Deduplicate() - atomic.StoreInt64(&e.n, int64(len(e.values))) -} - -// count returns the number of values in this entry. -func (e *entry) count() int { - return int(atomic.LoadInt64(&e.n)) -} - -// filter removes all values with timestamps between min and max inclusive. -func (e *entry) filter(min, max int64) { - e.mu.Lock() - if len(e.values) > 1 { - e.values = e.values.Deduplicate() - } - e.values = e.values.Exclude(min, max) - atomic.StoreInt64(&e.n, int64(len(e.values))) - e.mu.Unlock() -} - -// size returns the size of this entry in bytes. -func (e *entry) size() int { - e.mu.RLock() - sz := e.values.Size() - e.mu.RUnlock() - return sz -} - -// AppendTimestamps appends ts with the timestamps from the entry. -func (e *entry) AppendTimestamps(ts []int64) []int64 { - e.mu.RLock() - defer e.mu.RUnlock() - n := e.values.Len() - if n > 0 { - for i := range e.values { - ts = append(ts, e.values[i].UnixNano()) - } - } - return ts -} - -// InfluxQLType returns for the entry the data type of its values. -func (e *entry) InfluxQLType() (influxql.DataType, error) { - e.mu.RLock() - defer e.mu.RUnlock() - return e.values.InfluxQLType() -} - -// BlockType returns the data type for the entry as a block type. -func (e *entry) BlockType() byte { - // This value is mutated on create and does not need to be - // protected by a mutex. - return valueTypeToBlockType(e.vtype) -} diff --git a/tsdb/tsm1/cache_race_test.go b/tsdb/tsm1/cache_race_test.go deleted file mode 100644 index a3789c1645..0000000000 --- a/tsdb/tsm1/cache_race_test.go +++ /dev/null @@ -1,302 +0,0 @@ -package tsm1_test - -import ( - "fmt" - "math/rand" - "reflect" - "runtime" - "sort" - "sync" - "sync/atomic" - "testing" - "time" - - "github.com/influxdata/influxdb/v2/tsdb/tsm1" -) - -func TestCacheCheckConcurrentReadsAreSafe(t *testing.T) { - values := make(tsm1.Values, 1000) - timestamps := make([]int64, len(values)) - series := make([][]byte, 100) - for i := range timestamps { - timestamps[i] = int64(rand.Int63n(int64(len(values)))) - } - - for i := range values { - values[i] = tsm1.NewValue(timestamps[i*len(timestamps)/len(values)], float64(i)) - } - - for i := range series { - series[i] = []byte(fmt.Sprintf("series%d", i)) - } - - wg := sync.WaitGroup{} - c := tsm1.NewCache(1000000) - - ch := make(chan struct{}) - for _, s := range series { - for _, v := range values { - c.Write(s, tsm1.Values{v}) - } - wg.Add(3) - go func(s []byte) { - defer wg.Done() - <-ch - c.Values(s) - }(s) - go func(s []byte) { - defer wg.Done() - <-ch - c.Values(s) - }(s) - go func(s []byte) { - defer wg.Done() - <-ch - c.Values(s) - }(s) - } - close(ch) - wg.Wait() -} - -func TestCacheRace(t *testing.T) { - values := make(tsm1.Values, 1000) - timestamps := make([]int64, len(values)) - series := make([][]byte, 100) - for i := range timestamps { - timestamps[i] = int64(rand.Int63n(int64(len(values)))) - } - - for i := range values { - values[i] = tsm1.NewValue(timestamps[i*len(timestamps)/len(values)], float64(i)) - } - - for i := range series { - series[i] = []byte(fmt.Sprintf("series%d", i)) - } - - wg := sync.WaitGroup{} - c := tsm1.NewCache(1000000) - - ch := make(chan struct{}) - for _, s := range series { - for _, v := range values { - c.Write(s, tsm1.Values{v}) - } - wg.Add(1) - go func(s []byte) { - defer wg.Done() - <-ch - c.Values(s) - }(s) - } - - errC := make(chan error) - wg.Add(1) - go func() { - defer wg.Done() - <-ch - s, err := c.Snapshot() - if err == tsm1.ErrSnapshotInProgress { - return - } - - if err != nil { - errC <- fmt.Errorf("failed to snapshot cache: %v", err) - return - } - - s.Deduplicate() - c.ClearSnapshot(true) - }() - - close(ch) - - go func() { - wg.Wait() - close(errC) - }() - - for err := range errC { - if err != nil { - t.Error(err) - } - } -} - -func TestCacheRace2Compacters(t *testing.T) { - values := make(tsm1.Values, 1000) - timestamps := make([]int64, len(values)) - series := make([][]byte, 100) - for i := range timestamps { - timestamps[i] = int64(rand.Int63n(int64(len(values)))) - } - - for i := range values { - values[i] = tsm1.NewValue(timestamps[i*len(timestamps)/len(values)], float64(i)) - } - - for i := range series { - series[i] = []byte(fmt.Sprintf("series%d", i)) - } - - wg := sync.WaitGroup{} - c := tsm1.NewCache(1000000) - - ch := make(chan struct{}) - for _, s := range series { - for _, v := range values { - c.Write(s, tsm1.Values{v}) - } - wg.Add(1) - go func(s []byte) { - defer wg.Done() - <-ch - c.Values(s) - }(s) - } - fileCounter := 0 - mapFiles := map[int]bool{} - mu := sync.Mutex{} - errC := make(chan error) - for i := 0; i < 2; i++ { - wg.Add(1) - go func() { - defer wg.Done() - <-ch - s, err := c.Snapshot() - if err == tsm1.ErrSnapshotInProgress { - return - } - - if err != nil { - errC <- fmt.Errorf("failed to snapshot cache: %v", err) - return - } - - mu.Lock() - mapFiles[fileCounter] = true - fileCounter++ - myFiles := map[int]bool{} - for k, e := range mapFiles { - myFiles[k] = e - } - mu.Unlock() - s.Deduplicate() - c.ClearSnapshot(true) - mu.Lock() - defer mu.Unlock() - for k := range myFiles { - if _, ok := mapFiles[k]; !ok { - errC <- fmt.Errorf("something else deleted one of my files") - return - } else { - delete(mapFiles, k) - } - } - }() - } - close(ch) - - go func() { - wg.Wait() - close(errC) - }() - - for err := range errC { - if err != nil { - t.Error(err) - } - } -} - -func TestConcurrentReadAfterWrite(t *testing.T) { - t.Parallel() - - var starttime int64 = 1594785691 - series := [][]byte{[]byte("key1"), []byte("key2")} - - concurrency := runtime.GOMAXPROCS(0) * 2 - batch := 1024 - - errCh := make(chan error, concurrency) - closing := make(chan struct{}) - var wg sync.WaitGroup - - c := tsm1.NewCache(1024 * 1024 * 128) - for i := 0; i < concurrency; i++ { - wg.Add(1) - // read after read concurrently - go func() { - defer wg.Done() - for { - - select { - case <-closing: - errCh <- nil - return - default: - } - - ts := atomic.AddInt64(&starttime, int64(batch)) - writes := make(tsm1.Values, 0, batch) - for j := 0; j < batch; j++ { - writes = append(writes, - tsm1.NewValue(ts+int64(j), ts+int64(j))) - } - for _, key := range series { - if err := c.Write(key, writes); err != nil { - errCh <- err - return - } - } - for _, key := range series { - // check the read result - reads := c.Values(key) - - if len(reads) < len(writes) { - errCh <- fmt.Errorf("read count: %v less than write count: %v", len(reads), len(writes)) - return - } - - sort.Slice(reads, func(i, j int) bool { - return reads[i].UnixNano() < reads[j].UnixNano() - }) - - k := 0 - for j := range writes { - write := writes[j].Value() - - found := false - for k < len(reads) { - read := reads[k].Value() - if reflect.DeepEqual(read, write) { - found = true - break - } - k++ - } - - if !found { - errCh <- fmt.Errorf("write value: %v not found in reads", write) - return - } - } - } - } - }() - } - - // sleep for a little while and check - time.Sleep(time.Second * 20) - close(closing) - wg.Wait() - - for i := 0; i < concurrency; i++ { - err := <-errCh - if err != nil { - t.Fatal(err) - return - } - } -} diff --git a/tsdb/tsm1/cache_test.go b/tsdb/tsm1/cache_test.go deleted file mode 100644 index ef206da8ab..0000000000 --- a/tsdb/tsm1/cache_test.go +++ /dev/null @@ -1,872 +0,0 @@ -package tsm1 - -import ( - "context" - "errors" - "fmt" - "io/ioutil" - "math" - "math/rand" - "os" - "reflect" - "runtime" - "strings" - "sync" - "sync/atomic" - "testing" - - "github.com/golang/snappy" - "github.com/influxdata/influxdb/v2" - "github.com/influxdata/influxdb/v2/storage/wal" -) - -func TestCache_NewCache(t *testing.T) { - c := NewCache(100) - if c == nil { - t.Fatalf("failed to create new cache") - } - - if c.MaxSize() != 100 { - t.Fatalf("new cache max size not correct") - } - if c.Size() != 0 { - t.Fatalf("new cache size not correct") - } - if len(c.Keys()) != 0 { - t.Fatalf("new cache keys not correct: %v", c.Keys()) - } -} - -func TestCache_CacheWrite(t *testing.T) { - v0 := NewValue(1, 1.0) - v1 := NewValue(2, 2.0) - v2 := NewValue(3, 3.0) - values := Values{v0, v1, v2} - valuesSize := uint64(v0.Size() + v1.Size() + v2.Size()) - - c := NewCache(3 * valuesSize) - - if err := c.Write([]byte("foo"), values); err != nil { - t.Fatalf("failed to write key foo to cache: %s", err.Error()) - } - if err := c.Write([]byte("bar"), values); err != nil { - t.Fatalf("failed to write key foo to cache: %s", err.Error()) - } - if n := c.Size(); n != 2*valuesSize+6 { - t.Fatalf("cache size incorrect after 2 writes, exp %d, got %d", 2*valuesSize, n) - } - - if exp, keys := [][]byte{[]byte("bar"), []byte("foo")}, c.Keys(); !reflect.DeepEqual(keys, exp) { - t.Fatalf("cache keys incorrect after 2 writes, exp %v, got %v", exp, keys) - } -} - -func TestCache_CacheWrite_TypeConflict(t *testing.T) { - v0 := NewValue(1, 1.0) - v1 := NewValue(2, int(64)) - values := Values{v0, v1} - valuesSize := v0.Size() + v1.Size() - - c := NewCache(uint64(2 * valuesSize)) - - if err := c.Write([]byte("foo"), values[:1]); err != nil { - t.Fatalf("failed to write key foo to cache: %s", err.Error()) - } - - if err := c.Write([]byte("foo"), values[1:]); err == nil { - t.Fatalf("expected field type conflict") - } - - if exp, got := uint64(v0.Size())+3, c.Size(); exp != got { - t.Fatalf("cache size incorrect after 2 writes, exp %d, got %d", exp, got) - } -} - -func TestCache_CacheWriteMulti(t *testing.T) { - v0 := NewValue(1, 1.0) - v1 := NewValue(2, 2.0) - v2 := NewValue(3, 3.0) - values := Values{v0, v1, v2} - valuesSize := uint64(v0.Size() + v1.Size() + v2.Size()) - - c := NewCache(30 * valuesSize) - - if err := c.WriteMulti(map[string][]Value{"foo": values, "bar": values}); err != nil { - t.Fatalf("failed to write key foo to cache: %s", err.Error()) - } - if n := c.Size(); n != 2*valuesSize+6 { - t.Fatalf("cache size incorrect after 2 writes, exp %d, got %d", 2*valuesSize, n) - } - - if exp, keys := [][]byte{[]byte("bar"), []byte("foo")}, c.Keys(); !reflect.DeepEqual(keys, exp) { - t.Fatalf("cache keys incorrect after 2 writes, exp %v, got %v", exp, keys) - } -} - -// Tests that the cache stats and size are correctly maintained during writes. -func TestCache_WriteMulti_Stats(t *testing.T) { - vf := NewValue(1, 1.0) - vi := NewValue(1, int64(1)) - c := NewCache(60) - - // Fail one of the values in the write. - if err := c.WriteMulti(map[string][]Value{"foo": {vf}}); err != nil { - t.Fatalf("expected no error. got %v", err) - } - if err := c.WriteMulti(map[string][]Value{"foo": {vi}, "bar": {vf}}); err == nil { - t.Fatal("got no error") - } - - // Not enough room in the cache. - if err := c.WriteMulti(map[string][]Value{"foo": {vf, vf}}); err == nil { - t.Fatal("got no error") - } - - // Cache size decreased correctly. - if got, exp := c.Size(), uint64(3+3*8+3+8); got != exp { - t.Fatalf("got %v, expected %v", got, exp) - } - - // Write stats updated - if got, exp := atomic.LoadUint64(&c.tracker.writesDropped), uint64(1); got != exp { - t.Fatalf("got %v, expected %v", got, exp) - } else if got, exp := atomic.LoadUint64(&c.tracker.writesErr), uint64(2); got != exp { - t.Fatalf("got %v, expected %v", got, exp) - } -} - -func TestCache_CacheWriteMulti_TypeConflict(t *testing.T) { - v0 := NewValue(1, 1.0) - v1 := NewValue(2, 2.0) - v2 := NewValue(3, int64(3)) - values := Values{v0, v1, v2} - valuesSize := uint64(v0.Size() + v1.Size() + v2.Size()) - - c := NewCache(3 * valuesSize) - - if err := c.WriteMulti(map[string][]Value{"foo": values[:1], "bar": values[1:]}); err == nil { - t.Fatalf(" expected field type conflict") - } - - if exp, got := uint64(v0.Size())+3, c.Size(); exp != got { - t.Fatalf("cache size incorrect after 2 writes, exp %d, got %d", exp, got) - } - - if exp, keys := [][]byte{[]byte("foo")}, c.Keys(); !reflect.DeepEqual(keys, exp) { - t.Fatalf("cache keys incorrect after 2 writes, exp %v, got %v", exp, keys) - } -} - -func TestCache_Cache_DeleteBucketRange(t *testing.T) { - v0 := NewValue(1, 1.0) - v1 := NewValue(2, 2.0) - v2 := NewValue(3, 3.0) - values := Values{v0, v1, v2} - valuesSize := uint64(v0.Size() + v1.Size() + v2.Size()) - - c := NewCache(30 * valuesSize) - - if err := c.WriteMulti(map[string][]Value{"foo": values, "bar": values}); err != nil { - t.Fatalf("failed to write key foo to cache: %s", err.Error()) - } - if n := c.Size(); n != 2*valuesSize+6 { - t.Fatalf("cache size incorrect after 2 writes, exp %d, got %d", 2*valuesSize, n) - } - - if exp, keys := [][]byte{[]byte("bar"), []byte("foo")}, c.Keys(); !reflect.DeepEqual(keys, exp) { - t.Fatalf("cache keys incorrect after 2 writes, exp %v, got %v", exp, keys) - } - - c.DeleteBucketRange(context.Background(), "bar", 2, math.MaxInt64, nil) - - if exp, keys := [][]byte{[]byte("bar"), []byte("foo")}, c.Keys(); !reflect.DeepEqual(keys, exp) { - t.Fatalf("cache keys incorrect after delete, exp %v, got %v", exp, keys) - } - - if got, exp := c.Size(), valuesSize+uint64(v0.Size())+6; exp != got { - t.Fatalf("cache size incorrect after delete, exp %d, got %d", exp, got) - } - - if got, exp := len(c.Values([]byte("bar"))), 1; got != exp { - t.Fatalf("cache values mismatch: got %v, exp %v", got, exp) - } - - if got, exp := len(c.Values([]byte("foo"))), 3; got != exp { - t.Fatalf("cache values mismatch: got %v, exp %v", got, exp) - } -} - -func TestCache_DeleteBucketRange_NoValues(t *testing.T) { - v0 := NewValue(1, 1.0) - v1 := NewValue(2, 2.0) - v2 := NewValue(3, 3.0) - values := Values{v0, v1, v2} - valuesSize := uint64(v0.Size() + v1.Size() + v2.Size()) - - c := NewCache(3 * valuesSize) - - if err := c.WriteMulti(map[string][]Value{"foo": values}); err != nil { - t.Fatalf("failed to write key foo to cache: %s", err.Error()) - } - if n := c.Size(); n != valuesSize+3 { - t.Fatalf("cache size incorrect after 2 writes, exp %d, got %d", 2*valuesSize, n) - } - - if exp, keys := [][]byte{[]byte("foo")}, c.Keys(); !reflect.DeepEqual(keys, exp) { - t.Fatalf("cache keys incorrect after 2 writes, exp %v, got %v", exp, keys) - } - - c.DeleteBucketRange(context.Background(), "foo", math.MinInt64, math.MaxInt64, nil) - - if exp, keys := 0, len(c.Keys()); !reflect.DeepEqual(keys, exp) { - t.Fatalf("cache keys incorrect after 2 writes, exp %v, got %v", exp, keys) - } - - if got, exp := c.Size(), uint64(0); exp != got { - t.Fatalf("cache size incorrect after 2 writes, exp %d, got %d", exp, got) - } - - if got, exp := len(c.Values([]byte("foo"))), 0; got != exp { - t.Fatalf("cache values mismatch: got %v, exp %v", got, exp) - } -} - -func TestCache_DeleteBucketRange_NotSorted(t *testing.T) { - v0 := NewValue(1, 1.0) - v1 := NewValue(3, 3.0) - v2 := NewValue(2, 2.0) - values := Values{v0, v1, v2} - valuesSize := uint64(v0.Size() + v1.Size() + v2.Size()) - - c := NewCache(3 * valuesSize) - - if err := c.WriteMulti(map[string][]Value{"foo": values}); err != nil { - t.Fatalf("failed to write key foo to cache: %s", err.Error()) - } - if n := c.Size(); n != valuesSize+3 { - t.Fatalf("cache size incorrect after 2 writes, exp %d, got %d", 2*valuesSize, n) - } - - if exp, keys := [][]byte{[]byte("foo")}, c.Keys(); !reflect.DeepEqual(keys, exp) { - t.Fatalf("cache keys incorrect after 2 writes, exp %v, got %v", exp, keys) - } - - c.DeleteBucketRange(context.Background(), "foo", 1, 3, nil) - - if exp, keys := 0, len(c.Keys()); !reflect.DeepEqual(keys, exp) { - t.Fatalf("cache keys incorrect after delete, exp %v, got %v", exp, keys) - } - - if got, exp := c.Size(), uint64(0); exp != got { - t.Fatalf("cache size incorrect after delete, exp %d, got %d", exp, got) - } - - if got, exp := len(c.Values([]byte("foo"))), 0; got != exp { - t.Fatalf("cache values mismatch: got %v, exp %v", got, exp) - } -} - -func TestCache_DeleteBucketRange_NonExistent(t *testing.T) { - c := NewCache(1024) - - c.DeleteBucketRange(context.Background(), "bar", math.MinInt64, math.MaxInt64, nil) - - if got, exp := c.Size(), uint64(0); exp != got { - t.Fatalf("cache size incorrect exp %d, got %d", exp, got) - } -} - -type stringPredicate string - -func (s stringPredicate) Clone() influxdb.Predicate { return s } -func (s stringPredicate) Matches(k []byte) bool { return string(s) == string(k) } -func (s stringPredicate) Marshal() ([]byte, error) { return nil, errors.New("unused") } - -func TestCache_Cache_DeleteBucketRange_WithPredicate(t *testing.T) { - v0 := NewValue(1, 1.0) - v1 := NewValue(2, 2.0) - v2 := NewValue(3, 3.0) - values := Values{v0, v1, v2} - valuesSize := uint64(v0.Size() + v1.Size() + v2.Size()) - - c := NewCache(30 * valuesSize) - - if err := c.WriteMulti(map[string][]Value{"foo": values, "fee": values}); err != nil { - t.Fatalf("failed to write key foo to cache: %s", err.Error()) - } - if n := c.Size(); n != 2*valuesSize+6 { - t.Fatalf("cache size incorrect after 2 writes, exp %d, got %d", 2*valuesSize, n) - } - - if exp, keys := [][]byte{[]byte("fee"), []byte("foo")}, c.Keys(); !reflect.DeepEqual(keys, exp) { - t.Fatalf("cache keys incorrect after 2 writes, exp %v, got %v", exp, keys) - } - - c.DeleteBucketRange(context.Background(), "f", 2, math.MaxInt64, stringPredicate("fee")) - - if exp, keys := [][]byte{[]byte("fee"), []byte("foo")}, c.Keys(); !reflect.DeepEqual(keys, exp) { - t.Fatalf("cache keys incorrect after delete, exp %v, got %v", exp, keys) - } - - if got, exp := c.Size(), valuesSize+uint64(v0.Size())+6; exp != got { - t.Fatalf("cache size incorrect after delete, exp %d, got %d", exp, got) - } - - if got, exp := len(c.Values([]byte("fee"))), 1; got != exp { - t.Fatalf("cache values mismatch: got %v, exp %v", got, exp) - } - - if got, exp := len(c.Values([]byte("foo"))), 3; got != exp { - t.Fatalf("cache values mismatch: got %v, exp %v", got, exp) - } -} - -// This tests writing two batches to the same series. The first batch -// is sorted. The second batch is also sorted but contains duplicates. -func TestCache_CacheWriteMulti_Duplicates(t *testing.T) { - v0 := NewValue(2, 1.0) - v1 := NewValue(3, 1.0) - values0 := Values{v0, v1} - - v3 := NewValue(4, 2.0) - v4 := NewValue(5, 3.0) - v5 := NewValue(5, 3.0) - values1 := Values{v3, v4, v5} - - c := NewCache(0) - - if err := c.WriteMulti(map[string][]Value{"foo": values0}); err != nil { - t.Fatalf("failed to write key foo to cache: %s", err.Error()) - } - - if err := c.WriteMulti(map[string][]Value{"foo": values1}); err != nil { - t.Fatalf("failed to write key foo to cache: %s", err.Error()) - } - - if exp, keys := [][]byte{[]byte("foo")}, c.Keys(); !reflect.DeepEqual(keys, exp) { - t.Fatalf("cache keys incorrect after 2 writes, exp %v, got %v", exp, keys) - } - - expAscValues := Values{v0, v1, v3, v5} - if exp, got := len(expAscValues), len(c.Values([]byte("foo"))); exp != got { - t.Fatalf("value count mismatch: exp: %v, got %v", exp, got) - } - if deduped := c.Values([]byte("foo")); !reflect.DeepEqual(expAscValues, deduped) { - t.Fatalf("deduped ascending values for foo incorrect, exp: %v, got %v", expAscValues, deduped) - } -} - -func TestCache_CacheValues(t *testing.T) { - v0 := NewValue(1, 0.0) - v1 := NewValue(2, 2.0) - v2 := NewValue(3, 3.0) - v3 := NewValue(1, 1.0) - v4 := NewValue(4, 4.0) - - c := NewCache(512) - if deduped := c.Values([]byte("no such key")); deduped != nil { - t.Fatalf("Values returned for no such key") - } - - if err := c.Write([]byte("foo"), Values{v0, v1, v2, v3}); err != nil { - t.Fatalf("failed to write 3 values, key foo to cache: %s", err.Error()) - } - if err := c.Write([]byte("foo"), Values{v4}); err != nil { - t.Fatalf("failed to write 1 value, key foo to cache: %s", err.Error()) - } - - expAscValues := Values{v3, v1, v2, v4} - if deduped := c.Values([]byte("foo")); !reflect.DeepEqual(expAscValues, deduped) { - t.Fatalf("deduped ascending values for foo incorrect, exp: %v, got %v", expAscValues, deduped) - } -} - -func TestCache_CacheSnapshot(t *testing.T) { - v0 := NewValue(2, 0.0) - v1 := NewValue(3, 2.0) - v2 := NewValue(4, 3.0) - v3 := NewValue(5, 4.0) - v4 := NewValue(6, 5.0) - v5 := NewValue(1, 5.0) - v6 := NewValue(7, 5.0) - v7 := NewValue(2, 5.0) - - c := NewCache(512) - if err := c.Write([]byte("foo"), Values{v0, v1, v2, v3}); err != nil { - t.Fatalf("failed to write 3 values, key foo to cache: %s", err.Error()) - } - - // Grab snapshot, and ensure it's as expected. - snapshot, err := c.Snapshot() - if err != nil { - t.Fatalf("failed to snapshot cache: %v", err) - } - - expValues := Values{v0, v1, v2, v3} - if deduped := snapshot.values([]byte("foo")); !reflect.DeepEqual(expValues, deduped) { - t.Fatalf("snapshotted values for foo incorrect, exp: %v, got %v", expValues, deduped) - } - - // Ensure cache is still as expected. - if deduped := c.Values([]byte("foo")); !reflect.DeepEqual(expValues, deduped) { - t.Fatalf("post-snapshot values for foo incorrect, exp: %v, got %v", expValues, deduped) - } - - // Write a new value to the cache. - if err := c.Write([]byte("foo"), Values{v4}); err != nil { - t.Fatalf("failed to write post-snap value, key foo to cache: %s", err.Error()) - } - expValues = Values{v0, v1, v2, v3, v4} - if deduped := c.Values([]byte("foo")); !reflect.DeepEqual(expValues, deduped) { - t.Fatalf("post-snapshot write values for foo incorrect, exp: %v, got %v", expValues, deduped) - } - - // Write a new, out-of-order, value to the cache. - if err := c.Write([]byte("foo"), Values{v5}); err != nil { - t.Fatalf("failed to write post-snap value, key foo to cache: %s", err.Error()) - } - expValues = Values{v5, v0, v1, v2, v3, v4} - if deduped := c.Values([]byte("foo")); !reflect.DeepEqual(expValues, deduped) { - t.Fatalf("post-snapshot out-of-order write values for foo incorrect, exp: %v, got %v", expValues, deduped) - } - - // Clear snapshot, ensuring non-snapshot data untouched. - c.ClearSnapshot(true) - - expValues = Values{v5, v4} - if deduped := c.Values([]byte("foo")); !reflect.DeepEqual(expValues, deduped) { - t.Fatalf("post-clear values for foo incorrect, exp: %v, got %v", expValues, deduped) - } - - // Create another snapshot - _, err = c.Snapshot() - if err != nil { - t.Fatalf("failed to snapshot cache: %v", err) - } - - if err := c.Write([]byte("foo"), Values{v4, v5}); err != nil { - t.Fatalf("failed to write post-snap value, key foo to cache: %s", err.Error()) - } - - c.ClearSnapshot(true) - - _, err = c.Snapshot() - if err != nil { - t.Fatalf("failed to snapshot cache: %v", err) - } - - if err := c.Write([]byte("foo"), Values{v6, v7}); err != nil { - t.Fatalf("failed to write post-snap value, key foo to cache: %s", err.Error()) - } - - expValues = Values{v5, v7, v4, v6} - if deduped := c.Values([]byte("foo")); !reflect.DeepEqual(expValues, deduped) { - t.Fatalf("post-snapshot out-of-order write values for foo incorrect, exp: %v, got %v", expValues, deduped) - } -} - -// Tests that Snapshot updates statistics correctly. -func TestCache_Snapshot_Stats(t *testing.T) { - limit := uint64(16) - c := NewCache(limit) - - values := map[string][]Value{"foo": {NewValue(1, 1.0)}} - if err := c.WriteMulti(values); err != nil { - t.Fatal(err) - } - - if got, exp := atomic.LoadUint64(&c.tracker.memSizeBytes), uint64(16)+3; got != exp { - t.Fatalf("got %v, expected %v", got, exp) - } - - _, err := c.Snapshot() - if err != nil { - t.Fatal(err) - } - - // Store size should have been reset. - if got, exp := c.Size(), uint64(16)+3; got != exp { - t.Fatalf("got %v, expected %v", got, exp) - } - - // Cached bytes should have been increased. - if got, exp := atomic.LoadUint64(&c.tracker.snapshottedBytes), uint64(16)+3; got != exp { - t.Fatalf("got %v, expected %v", got, exp) - } - - if got, exp := atomic.LoadUint64(&c.tracker.memSizeBytes), uint64(16)+3; got != exp { - t.Fatalf("got %v, expected %v", got, exp) - } -} - -func TestCache_CacheEmptySnapshot(t *testing.T) { - c := NewCache(512) - - // Grab snapshot, and ensure it's as expected. - snapshot, err := c.Snapshot() - if err != nil { - t.Fatalf("failed to snapshot cache: %v", err) - } - if deduped := snapshot.values([]byte("foo")); !reflect.DeepEqual(Values(nil), deduped) { - t.Fatalf("snapshotted values for foo incorrect, exp: %v, got %v", nil, deduped) - } - - // Ensure cache is still as expected. - if deduped := c.Values([]byte("foo")); !reflect.DeepEqual(Values(nil), deduped) { - t.Fatalf("post-snapshotted values for foo incorrect, exp: %v, got %v", Values(nil), deduped) - } - - // Clear snapshot. - c.ClearSnapshot(true) - if deduped := c.Values([]byte("foo")); !reflect.DeepEqual(Values(nil), deduped) { - t.Fatalf("post-snapshot-clear values for foo incorrect, exp: %v, got %v", Values(nil), deduped) - } -} - -func TestCache_CacheWriteMemoryExceeded(t *testing.T) { - v0 := NewValue(1, 1.0) - v1 := NewValue(2, 2.0) - - c := NewCache(uint64(v1.Size())) - - if err := c.Write([]byte("foo"), Values{v0}); err != nil { - t.Fatalf("failed to write key foo to cache: %s", err.Error()) - } - if exp, keys := [][]byte{[]byte("foo")}, c.Keys(); !reflect.DeepEqual(keys, exp) { - t.Fatalf("cache keys incorrect after writes, exp %v, got %v", exp, keys) - } - if err := c.Write([]byte("bar"), Values{v1}); err == nil || !strings.Contains(err.Error(), "cache-max-memory-size") { - t.Fatalf("wrong error writing key bar to cache: %v", err) - } - - // Grab snapshot, write should still fail since we're still using the memory. - _, err := c.Snapshot() - if err != nil { - t.Fatalf("failed to snapshot cache: %v", err) - } - if err := c.Write([]byte("bar"), Values{v1}); err == nil || !strings.Contains(err.Error(), "cache-max-memory-size") { - t.Fatalf("wrong error writing key bar to cache: %v", err) - } - - // Clear the snapshot and the write should now succeed. - c.ClearSnapshot(true) - if err := c.Write([]byte("bar"), Values{v1}); err != nil { - t.Fatalf("failed to write key foo to cache: %s", err.Error()) - } - expAscValues := Values{v1} - if deduped := c.Values([]byte("bar")); !reflect.DeepEqual(expAscValues, deduped) { - t.Fatalf("deduped ascending values for bar incorrect, exp: %v, got %v", expAscValues, deduped) - } -} - -func TestCache_Deduplicate_Concurrent(t *testing.T) { - if testing.Short() || os.Getenv("GORACE") != "" || os.Getenv("APPVEYOR") != "" { - t.Skip("Skipping test in short, race, appveyor mode.") - } - - values := make(map[string][]Value) - - for i := 0; i < 1000; i++ { - for j := 0; j < 100; j++ { - values[fmt.Sprintf("cpu%d", i)] = []Value{NewValue(int64(i+j)+int64(rand.Intn(10)), float64(i))} - } - } - - wg := sync.WaitGroup{} - c := NewCache(1000000) - - wg.Add(1) - go func() { - defer wg.Done() - for i := 0; i < 1000; i++ { - c.WriteMulti(values) - } - }() - - wg.Add(1) - go func() { - defer wg.Done() - for i := 0; i < 1000; i++ { - c.Deduplicate() - } - }() - - wg.Wait() -} - -// Ensure the CacheLoader can correctly load from a single segment, even if it's corrupted. -func TestCacheLoader_LoadSingle(t *testing.T) { - // Create a WAL segment. - dir := mustTempDir() - defer os.RemoveAll(dir) - f := mustTempFile(dir) - w := wal.NewWALSegmentWriter(f) - - p1 := NewValue(1, 1.1) - p2 := NewValue(1, int64(1)) - p3 := NewValue(1, true) - - values := map[string][]Value{ - "foo": {p1}, - "bar": {p2}, - "baz": {p3}, - } - - entry := &wal.WriteWALEntry{ - Values: values, - } - - if err := w.Write(mustMarshalEntry(entry)); err != nil { - t.Fatal("write points", err) - } - - if err := w.Flush(); err != nil { - t.Fatalf("flush error: %v", err) - } - - // Load the cache using the segment. - cache := NewCache(1024) - loader := NewCacheLoader([]string{f.Name()}) - if err := loader.Load(cache); err != nil { - t.Fatalf("failed to load cache: %s", err.Error()) - } - - // Check the cache. - if values := cache.Values([]byte("foo")); !reflect.DeepEqual(values, Values{p1}) { - t.Fatalf("cache key foo not as expected, got %v, exp %v", values, Values{p1}) - } - if values := cache.Values([]byte("bar")); !reflect.DeepEqual(values, Values{p2}) { - t.Fatalf("cache key foo not as expected, got %v, exp %v", values, Values{p2}) - } - if values := cache.Values([]byte("baz")); !reflect.DeepEqual(values, Values{p3}) { - t.Fatalf("cache key foo not as expected, got %v, exp %v", values, Values{p3}) - } - - // Corrupt the WAL segment. - if _, err := f.Write([]byte{1, 4, 0, 0, 0}); err != nil { - t.Fatalf("corrupt WAL segment: %s", err.Error()) - } - - // Reload the cache using the segment. - cache = NewCache(1024) - loader = NewCacheLoader([]string{f.Name()}) - if err := loader.Load(cache); err != nil { - t.Fatalf("failed to load cache: %s", err.Error()) - } - - // Check the cache. - if values := cache.Values([]byte("foo")); !reflect.DeepEqual(values, Values{p1}) { - t.Fatalf("cache key foo not as expected, got %v, exp %v", values, Values{p1}) - } - if values := cache.Values([]byte("bar")); !reflect.DeepEqual(values, Values{p2}) { - t.Fatalf("cache key bar not as expected, got %v, exp %v", values, Values{p2}) - } - if values := cache.Values([]byte("baz")); !reflect.DeepEqual(values, Values{p3}) { - t.Fatalf("cache key baz not as expected, got %v, exp %v", values, Values{p3}) - } -} - -// Ensure the CacheLoader can correctly load from two segments, even if one is corrupted. -func TestCacheLoader_LoadDouble(t *testing.T) { - // Create a WAL segment. - dir := mustTempDir() - defer os.RemoveAll(dir) - f1, f2 := mustTempFile(dir), mustTempFile(dir) - w1, w2 := wal.NewWALSegmentWriter(f1), wal.NewWALSegmentWriter(f2) - - p1 := NewValue(1, 1.1) - p2 := NewValue(1, int64(1)) - p3 := NewValue(1, true) - p4 := NewValue(1, "string") - - // Write first and second segment. - - segmentWrite := func(w *wal.WALSegmentWriter, values map[string][]Value) { - entry := &wal.WriteWALEntry{ - Values: values, - } - if err := w1.Write(mustMarshalEntry(entry)); err != nil { - t.Fatal("write points", err) - } - if err := w1.Flush(); err != nil { - t.Fatalf("flush error: %v", err) - } - } - - values := map[string][]Value{ - "foo": {p1}, - "bar": {p2}, - } - segmentWrite(w1, values) - values = map[string][]Value{ - "baz": {p3}, - "qux": {p4}, - } - segmentWrite(w2, values) - - // Corrupt the first WAL segment. - if _, err := f1.Write([]byte{1, 4, 0, 0, 0}); err != nil { - t.Fatalf("corrupt WAL segment: %s", err.Error()) - } - - // Load the cache using the segments. - cache := NewCache(1024) - loader := NewCacheLoader([]string{f1.Name(), f2.Name()}) - if err := loader.Load(cache); err != nil { - t.Fatalf("failed to load cache: %s", err.Error()) - } - - // Check the cache. - if values := cache.Values([]byte("foo")); !reflect.DeepEqual(values, Values{p1}) { - t.Fatalf("cache key foo not as expected, got %v, exp %v", values, Values{p1}) - } - if values := cache.Values([]byte("bar")); !reflect.DeepEqual(values, Values{p2}) { - t.Fatalf("cache key bar not as expected, got %v, exp %v", values, Values{p2}) - } - if values := cache.Values([]byte("baz")); !reflect.DeepEqual(values, Values{p3}) { - t.Fatalf("cache key baz not as expected, got %v, exp %v", values, Values{p3}) - } - if values := cache.Values([]byte("qux")); !reflect.DeepEqual(values, Values{p4}) { - t.Fatalf("cache key qux not as expected, got %v, exp %v", values, Values{p4}) - } -} - -func TestCache_Split(t *testing.T) { - v0 := NewValue(1, 1.0) - v1 := NewValue(2, 2.0) - v2 := NewValue(3, 3.0) - values := Values{v0, v1, v2} - valuesSize := uint64(v0.Size() + v1.Size() + v2.Size()) - - c := NewCache(0) - - if err := c.Write([]byte("foo"), values); err != nil { - t.Fatalf("failed to write key foo to cache: %s", err.Error()) - } - if err := c.Write([]byte("bar"), values); err != nil { - t.Fatalf("failed to write key foo to cache: %s", err.Error()) - } - - if err := c.Write([]byte("baz"), values); err != nil { - t.Fatalf("failed to write key foo to cache: %s", err.Error()) - } - - if n := c.Size(); n != 3*valuesSize+9 { - t.Fatalf("cache size incorrect after 3 writes, exp %d, got %d", 3*valuesSize*9, n) - } - - splits := c.Split(3) - keys := make(map[string]int) - for _, s := range splits { - for _, k := range s.Keys() { - keys[string(k)] = s.Values(k).Size() - } - } - - for _, key := range []string{"foo", "bar", "baz"} { - if _, ok := keys[key]; !ok { - t.Fatalf("missing key, exp %s, got %v", key, nil) - } - } -} - -func mustTempDir() string { - dir, err := ioutil.TempDir("", "tsm1-test") - if err != nil { - panic(fmt.Sprintf("failed to create temp dir: %v", err)) - } - return dir -} - -func mustTempFile(dir string) *os.File { - f, err := ioutil.TempFile(dir, "tsm1test") - if err != nil { - panic(fmt.Sprintf("failed to create temp file: %v", err)) - } - return f -} - -func mustMarshalEntry(entry wal.WALEntry) (wal.WalEntryType, []byte) { - bytes := make([]byte, 1024<<2) - - b, err := entry.Encode(bytes) - if err != nil { - panic(fmt.Sprintf("error encoding: %v", err)) - } - - return entry.Type(), snappy.Encode(b, b) -} - -var fvSize = uint64(NewValue(1, float64(1)).Size()) - -func BenchmarkCacheFloatEntries(b *testing.B) { - cache := NewCache(uint64(b.N)*fvSize + 4) - vals := make([][]Value, b.N) - for i := 0; i < b.N; i++ { - vals[i] = []Value{NewValue(1, float64(i))} - } - b.ResetTimer() - - for i := 0; i < b.N; i++ { - if err := cache.Write([]byte("test"), vals[i]); err != nil { - b.Fatal("err:", err, "i:", i, "N:", b.N) - } - } -} - -type points struct { - key []byte - vals []Value -} - -func BenchmarkCacheParallelFloatEntries(b *testing.B) { - c := b.N * runtime.GOMAXPROCS(0) - cache := NewCache(uint64(c)*fvSize*10 + 20*5) - vals := make([]points, c) - for i := 0; i < c; i++ { - v := make([]Value, 10) - for j := 0; j < 10; j++ { - v[j] = NewValue(1, float64(i+j)) - } - vals[i] = points{key: []byte(fmt.Sprintf("cpu%v", rand.Intn(20))), vals: v} - } - i := int32(-1) - b.ResetTimer() - - b.RunParallel(func(pb *testing.PB) { - for pb.Next() { - j := atomic.AddInt32(&i, 1) - v := vals[j] - if err := cache.Write(v.key, v.vals); err != nil { - b.Fatal("err:", err, "j:", j, "N:", b.N) - } - } - }) -} - -func BenchmarkEntry_add(b *testing.B) { - b.RunParallel(func(pb *testing.PB) { - for pb.Next() { - b.StopTimer() - values := make([]Value, 10) - for i := 0; i < 10; i++ { - values[i] = NewValue(int64(i+1), float64(i)) - } - - otherValues := make([]Value, 10) - for i := 0; i < 10; i++ { - otherValues[i] = NewValue(1, float64(i)) - } - - entry, err := newEntryValues(values) - if err != nil { - b.Fatal(err) - } - - b.StartTimer() - if err := entry.add(otherValues); err != nil { - b.Fatal(err) - } - } - }) -} diff --git a/tsdb/tsm1/cachestatus_string.go b/tsdb/tsm1/cachestatus_string.go deleted file mode 100644 index ab21010916..0000000000 --- a/tsdb/tsm1/cachestatus_string.go +++ /dev/null @@ -1,29 +0,0 @@ -// Code generated by "stringer -type=CacheStatus"; DO NOT EDIT. - -package tsm1 - -import "strconv" - -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[CacheStatusOkay-0] - _ = x[CacheStatusSizeExceeded-1] - _ = x[CacheStatusAgeExceeded-2] - _ = x[CacheStatusColdNoWrites-3] - _ = x[CacheStatusRetention-4] - _ = x[CacheStatusFullCompaction-5] - _ = x[CacheStatusBackup-6] -} - -const _CacheStatus_name = "CacheStatusOkayCacheStatusSizeExceededCacheStatusAgeExceededCacheStatusColdNoWritesCacheStatusRetentionCacheStatusFullCompactionCacheStatusBackup" - -var _CacheStatus_index = [...]uint8{0, 15, 38, 60, 83, 103, 128, 145} - -func (i CacheStatus) String() string { - if i < 0 || i >= CacheStatus(len(_CacheStatus_index)-1) { - return "CacheStatus(" + strconv.FormatInt(int64(i), 10) + ")" - } - return _CacheStatus_name[_CacheStatus_index[i]:_CacheStatus_index[i+1]] -} diff --git a/tsdb/tsm1/compact.gen.go b/tsdb/tsm1/compact.gen.go deleted file mode 100644 index 16c811d195..0000000000 --- a/tsdb/tsm1/compact.gen.go +++ /dev/null @@ -1,2103 +0,0 @@ -// Generated by tmpl -// https://github.com/benbjohnson/tmpl -// -// DO NOT EDIT! -// Source: compact.gen.go.tmpl - -package tsm1 - -import ( - "sort" - - "github.com/influxdata/influxdb/v2/tsdb/cursors" -) - -// merge combines the next set of blocks into merged blocks. -func (k *tsmKeyIterator) mergeFloat() { - // No blocks left, or pending merged values, we're done - if len(k.blocks) == 0 && len(k.merged) == 0 && len(k.mergedFloatValues) == 0 { - return - } - - sort.Stable(k.blocks) - - dedup := len(k.mergedFloatValues) != 0 - if len(k.blocks) > 0 && !dedup { - // If we have more than one block or any partially tombstoned blocks, we many need to dedup - dedup = len(k.blocks[0].tombstones) > 0 || k.blocks[0].partiallyRead() - - // Quickly scan each block to see if any overlap with the prior block, if they overlap then - // we need to dedup as there may be duplicate points now - for i := 1; !dedup && i < len(k.blocks); i++ { - dedup = k.blocks[i].partiallyRead() || - k.blocks[i].overlapsTimeRange(k.blocks[i-1].minTime, k.blocks[i-1].maxTime) || - len(k.blocks[i].tombstones) > 0 - } - - } - - k.merged = k.combineFloat(dedup) -} - -// combine returns a new set of blocks using the current blocks in the buffers. If dedup -// is true, all the blocks will be decoded, dedup and sorted in in order. If dedup is false, -// only blocks that are smaller than the chunk size will be decoded and combined. -func (k *tsmKeyIterator) combineFloat(dedup bool) blocks { - if dedup { - for len(k.mergedFloatValues) < k.size && len(k.blocks) > 0 { - for len(k.blocks) > 0 && k.blocks[0].read() { - k.blocks = k.blocks[1:] - } - - if len(k.blocks) == 0 { - break - } - first := k.blocks[0] - minTime := first.minTime - maxTime := first.maxTime - - // Adjust the min time to the start of any overlapping blocks. - for i := 0; i < len(k.blocks); i++ { - if k.blocks[i].overlapsTimeRange(minTime, maxTime) && !k.blocks[i].read() { - if k.blocks[i].minTime < minTime { - minTime = k.blocks[i].minTime - } - if k.blocks[i].maxTime > minTime && k.blocks[i].maxTime < maxTime { - maxTime = k.blocks[i].maxTime - } - } - } - - // We have some overlapping blocks so decode all, append in order and then dedup - for i := 0; i < len(k.blocks); i++ { - if !k.blocks[i].overlapsTimeRange(minTime, maxTime) || k.blocks[i].read() { - continue - } - - v, err := DecodeFloatBlock(k.blocks[i].b, &[]FloatValue{}) - if err != nil { - k.err = err - return nil - } - - // Remove values we already read - v = FloatValues(v).Exclude(k.blocks[i].readMin, k.blocks[i].readMax) - - // Filter out only the values for overlapping block - v = FloatValues(v).Include(minTime, maxTime) - if len(v) > 0 { - // Record that we read a subset of the block - k.blocks[i].markRead(v[0].UnixNano(), v[len(v)-1].UnixNano()) - } - - // Apply each tombstone to the block - for _, ts := range k.blocks[i].tombstones { - v = FloatValues(v).Exclude(ts.Min, ts.Max) - } - - k.mergedFloatValues = k.mergedFloatValues.Merge(v) - } - } - - // Since we combined multiple blocks, we could have more values than we should put into - // a single block. We need to chunk them up into groups and re-encode them. - return k.chunkFloat(nil) - } else { - var i int - - for i < len(k.blocks) { - - // skip this block if it's values were already read - if k.blocks[i].read() { - i++ - continue - } - // If we this block is already full, just add it as is - if BlockCount(k.blocks[i].b) >= k.size { - k.merged = append(k.merged, k.blocks[i]) - } else { - break - } - i++ - } - - if k.fast { - for i < len(k.blocks) { - // skip this block if it's values were already read - if k.blocks[i].read() { - i++ - continue - } - - k.merged = append(k.merged, k.blocks[i]) - i++ - } - } - - // If we only have 1 blocks left, just append it as is and avoid decoding/recoding - if i == len(k.blocks)-1 { - if !k.blocks[i].read() { - k.merged = append(k.merged, k.blocks[i]) - } - i++ - } - - // The remaining blocks can be combined and we know that they do not overlap and - // so we can just append each, sort and re-encode. - for i < len(k.blocks) && len(k.mergedFloatValues) < k.size { - if k.blocks[i].read() { - i++ - continue - } - - v, err := DecodeFloatBlock(k.blocks[i].b, &[]FloatValue{}) - if err != nil { - k.err = err - return nil - } - - // Apply each tombstone to the block - for _, ts := range k.blocks[i].tombstones { - v = FloatValues(v).Exclude(ts.Min, ts.Max) - } - - k.blocks[i].markRead(k.blocks[i].minTime, k.blocks[i].maxTime) - - k.mergedFloatValues = k.mergedFloatValues.Merge(v) - i++ - } - - k.blocks = k.blocks[i:] - - return k.chunkFloat(k.merged) - } -} - -func (k *tsmKeyIterator) chunkFloat(dst blocks) blocks { - if len(k.mergedFloatValues) > k.size { - values := k.mergedFloatValues[:k.size] - cb, err := FloatValues(values).Encode(nil) - if err != nil { - k.err = err - return nil - } - - dst = append(dst, &block{ - minTime: values[0].UnixNano(), - maxTime: values[len(values)-1].UnixNano(), - key: k.key, - b: cb, - }) - k.mergedFloatValues = k.mergedFloatValues[k.size:] - return dst - } - - // Re-encode the remaining values into the last block - if len(k.mergedFloatValues) > 0 { - cb, err := FloatValues(k.mergedFloatValues).Encode(nil) - if err != nil { - k.err = err - return nil - } - - dst = append(dst, &block{ - minTime: k.mergedFloatValues[0].UnixNano(), - maxTime: k.mergedFloatValues[len(k.mergedFloatValues)-1].UnixNano(), - key: k.key, - b: cb, - }) - k.mergedFloatValues = k.mergedFloatValues[:0] - } - return dst -} - -// merge combines the next set of blocks into merged blocks. -func (k *tsmKeyIterator) mergeInteger() { - // No blocks left, or pending merged values, we're done - if len(k.blocks) == 0 && len(k.merged) == 0 && len(k.mergedIntegerValues) == 0 { - return - } - - sort.Stable(k.blocks) - - dedup := len(k.mergedIntegerValues) != 0 - if len(k.blocks) > 0 && !dedup { - // If we have more than one block or any partially tombstoned blocks, we many need to dedup - dedup = len(k.blocks[0].tombstones) > 0 || k.blocks[0].partiallyRead() - - // Quickly scan each block to see if any overlap with the prior block, if they overlap then - // we need to dedup as there may be duplicate points now - for i := 1; !dedup && i < len(k.blocks); i++ { - dedup = k.blocks[i].partiallyRead() || - k.blocks[i].overlapsTimeRange(k.blocks[i-1].minTime, k.blocks[i-1].maxTime) || - len(k.blocks[i].tombstones) > 0 - } - - } - - k.merged = k.combineInteger(dedup) -} - -// combine returns a new set of blocks using the current blocks in the buffers. If dedup -// is true, all the blocks will be decoded, dedup and sorted in in order. If dedup is false, -// only blocks that are smaller than the chunk size will be decoded and combined. -func (k *tsmKeyIterator) combineInteger(dedup bool) blocks { - if dedup { - for len(k.mergedIntegerValues) < k.size && len(k.blocks) > 0 { - for len(k.blocks) > 0 && k.blocks[0].read() { - k.blocks = k.blocks[1:] - } - - if len(k.blocks) == 0 { - break - } - first := k.blocks[0] - minTime := first.minTime - maxTime := first.maxTime - - // Adjust the min time to the start of any overlapping blocks. - for i := 0; i < len(k.blocks); i++ { - if k.blocks[i].overlapsTimeRange(minTime, maxTime) && !k.blocks[i].read() { - if k.blocks[i].minTime < minTime { - minTime = k.blocks[i].minTime - } - if k.blocks[i].maxTime > minTime && k.blocks[i].maxTime < maxTime { - maxTime = k.blocks[i].maxTime - } - } - } - - // We have some overlapping blocks so decode all, append in order and then dedup - for i := 0; i < len(k.blocks); i++ { - if !k.blocks[i].overlapsTimeRange(minTime, maxTime) || k.blocks[i].read() { - continue - } - - v, err := DecodeIntegerBlock(k.blocks[i].b, &[]IntegerValue{}) - if err != nil { - k.err = err - return nil - } - - // Remove values we already read - v = IntegerValues(v).Exclude(k.blocks[i].readMin, k.blocks[i].readMax) - - // Filter out only the values for overlapping block - v = IntegerValues(v).Include(minTime, maxTime) - if len(v) > 0 { - // Record that we read a subset of the block - k.blocks[i].markRead(v[0].UnixNano(), v[len(v)-1].UnixNano()) - } - - // Apply each tombstone to the block - for _, ts := range k.blocks[i].tombstones { - v = IntegerValues(v).Exclude(ts.Min, ts.Max) - } - - k.mergedIntegerValues = k.mergedIntegerValues.Merge(v) - } - } - - // Since we combined multiple blocks, we could have more values than we should put into - // a single block. We need to chunk them up into groups and re-encode them. - return k.chunkInteger(nil) - } else { - var i int - - for i < len(k.blocks) { - - // skip this block if it's values were already read - if k.blocks[i].read() { - i++ - continue - } - // If we this block is already full, just add it as is - if BlockCount(k.blocks[i].b) >= k.size { - k.merged = append(k.merged, k.blocks[i]) - } else { - break - } - i++ - } - - if k.fast { - for i < len(k.blocks) { - // skip this block if it's values were already read - if k.blocks[i].read() { - i++ - continue - } - - k.merged = append(k.merged, k.blocks[i]) - i++ - } - } - - // If we only have 1 blocks left, just append it as is and avoid decoding/recoding - if i == len(k.blocks)-1 { - if !k.blocks[i].read() { - k.merged = append(k.merged, k.blocks[i]) - } - i++ - } - - // The remaining blocks can be combined and we know that they do not overlap and - // so we can just append each, sort and re-encode. - for i < len(k.blocks) && len(k.mergedIntegerValues) < k.size { - if k.blocks[i].read() { - i++ - continue - } - - v, err := DecodeIntegerBlock(k.blocks[i].b, &[]IntegerValue{}) - if err != nil { - k.err = err - return nil - } - - // Apply each tombstone to the block - for _, ts := range k.blocks[i].tombstones { - v = IntegerValues(v).Exclude(ts.Min, ts.Max) - } - - k.blocks[i].markRead(k.blocks[i].minTime, k.blocks[i].maxTime) - - k.mergedIntegerValues = k.mergedIntegerValues.Merge(v) - i++ - } - - k.blocks = k.blocks[i:] - - return k.chunkInteger(k.merged) - } -} - -func (k *tsmKeyIterator) chunkInteger(dst blocks) blocks { - if len(k.mergedIntegerValues) > k.size { - values := k.mergedIntegerValues[:k.size] - cb, err := IntegerValues(values).Encode(nil) - if err != nil { - k.err = err - return nil - } - - dst = append(dst, &block{ - minTime: values[0].UnixNano(), - maxTime: values[len(values)-1].UnixNano(), - key: k.key, - b: cb, - }) - k.mergedIntegerValues = k.mergedIntegerValues[k.size:] - return dst - } - - // Re-encode the remaining values into the last block - if len(k.mergedIntegerValues) > 0 { - cb, err := IntegerValues(k.mergedIntegerValues).Encode(nil) - if err != nil { - k.err = err - return nil - } - - dst = append(dst, &block{ - minTime: k.mergedIntegerValues[0].UnixNano(), - maxTime: k.mergedIntegerValues[len(k.mergedIntegerValues)-1].UnixNano(), - key: k.key, - b: cb, - }) - k.mergedIntegerValues = k.mergedIntegerValues[:0] - } - return dst -} - -// merge combines the next set of blocks into merged blocks. -func (k *tsmKeyIterator) mergeUnsigned() { - // No blocks left, or pending merged values, we're done - if len(k.blocks) == 0 && len(k.merged) == 0 && len(k.mergedUnsignedValues) == 0 { - return - } - - sort.Stable(k.blocks) - - dedup := len(k.mergedUnsignedValues) != 0 - if len(k.blocks) > 0 && !dedup { - // If we have more than one block or any partially tombstoned blocks, we many need to dedup - dedup = len(k.blocks[0].tombstones) > 0 || k.blocks[0].partiallyRead() - - // Quickly scan each block to see if any overlap with the prior block, if they overlap then - // we need to dedup as there may be duplicate points now - for i := 1; !dedup && i < len(k.blocks); i++ { - dedup = k.blocks[i].partiallyRead() || - k.blocks[i].overlapsTimeRange(k.blocks[i-1].minTime, k.blocks[i-1].maxTime) || - len(k.blocks[i].tombstones) > 0 - } - - } - - k.merged = k.combineUnsigned(dedup) -} - -// combine returns a new set of blocks using the current blocks in the buffers. If dedup -// is true, all the blocks will be decoded, dedup and sorted in in order. If dedup is false, -// only blocks that are smaller than the chunk size will be decoded and combined. -func (k *tsmKeyIterator) combineUnsigned(dedup bool) blocks { - if dedup { - for len(k.mergedUnsignedValues) < k.size && len(k.blocks) > 0 { - for len(k.blocks) > 0 && k.blocks[0].read() { - k.blocks = k.blocks[1:] - } - - if len(k.blocks) == 0 { - break - } - first := k.blocks[0] - minTime := first.minTime - maxTime := first.maxTime - - // Adjust the min time to the start of any overlapping blocks. - for i := 0; i < len(k.blocks); i++ { - if k.blocks[i].overlapsTimeRange(minTime, maxTime) && !k.blocks[i].read() { - if k.blocks[i].minTime < minTime { - minTime = k.blocks[i].minTime - } - if k.blocks[i].maxTime > minTime && k.blocks[i].maxTime < maxTime { - maxTime = k.blocks[i].maxTime - } - } - } - - // We have some overlapping blocks so decode all, append in order and then dedup - for i := 0; i < len(k.blocks); i++ { - if !k.blocks[i].overlapsTimeRange(minTime, maxTime) || k.blocks[i].read() { - continue - } - - v, err := DecodeUnsignedBlock(k.blocks[i].b, &[]UnsignedValue{}) - if err != nil { - k.err = err - return nil - } - - // Remove values we already read - v = UnsignedValues(v).Exclude(k.blocks[i].readMin, k.blocks[i].readMax) - - // Filter out only the values for overlapping block - v = UnsignedValues(v).Include(minTime, maxTime) - if len(v) > 0 { - // Record that we read a subset of the block - k.blocks[i].markRead(v[0].UnixNano(), v[len(v)-1].UnixNano()) - } - - // Apply each tombstone to the block - for _, ts := range k.blocks[i].tombstones { - v = UnsignedValues(v).Exclude(ts.Min, ts.Max) - } - - k.mergedUnsignedValues = k.mergedUnsignedValues.Merge(v) - } - } - - // Since we combined multiple blocks, we could have more values than we should put into - // a single block. We need to chunk them up into groups and re-encode them. - return k.chunkUnsigned(nil) - } else { - var i int - - for i < len(k.blocks) { - - // skip this block if it's values were already read - if k.blocks[i].read() { - i++ - continue - } - // If we this block is already full, just add it as is - if BlockCount(k.blocks[i].b) >= k.size { - k.merged = append(k.merged, k.blocks[i]) - } else { - break - } - i++ - } - - if k.fast { - for i < len(k.blocks) { - // skip this block if it's values were already read - if k.blocks[i].read() { - i++ - continue - } - - k.merged = append(k.merged, k.blocks[i]) - i++ - } - } - - // If we only have 1 blocks left, just append it as is and avoid decoding/recoding - if i == len(k.blocks)-1 { - if !k.blocks[i].read() { - k.merged = append(k.merged, k.blocks[i]) - } - i++ - } - - // The remaining blocks can be combined and we know that they do not overlap and - // so we can just append each, sort and re-encode. - for i < len(k.blocks) && len(k.mergedUnsignedValues) < k.size { - if k.blocks[i].read() { - i++ - continue - } - - v, err := DecodeUnsignedBlock(k.blocks[i].b, &[]UnsignedValue{}) - if err != nil { - k.err = err - return nil - } - - // Apply each tombstone to the block - for _, ts := range k.blocks[i].tombstones { - v = UnsignedValues(v).Exclude(ts.Min, ts.Max) - } - - k.blocks[i].markRead(k.blocks[i].minTime, k.blocks[i].maxTime) - - k.mergedUnsignedValues = k.mergedUnsignedValues.Merge(v) - i++ - } - - k.blocks = k.blocks[i:] - - return k.chunkUnsigned(k.merged) - } -} - -func (k *tsmKeyIterator) chunkUnsigned(dst blocks) blocks { - if len(k.mergedUnsignedValues) > k.size { - values := k.mergedUnsignedValues[:k.size] - cb, err := UnsignedValues(values).Encode(nil) - if err != nil { - k.err = err - return nil - } - - dst = append(dst, &block{ - minTime: values[0].UnixNano(), - maxTime: values[len(values)-1].UnixNano(), - key: k.key, - b: cb, - }) - k.mergedUnsignedValues = k.mergedUnsignedValues[k.size:] - return dst - } - - // Re-encode the remaining values into the last block - if len(k.mergedUnsignedValues) > 0 { - cb, err := UnsignedValues(k.mergedUnsignedValues).Encode(nil) - if err != nil { - k.err = err - return nil - } - - dst = append(dst, &block{ - minTime: k.mergedUnsignedValues[0].UnixNano(), - maxTime: k.mergedUnsignedValues[len(k.mergedUnsignedValues)-1].UnixNano(), - key: k.key, - b: cb, - }) - k.mergedUnsignedValues = k.mergedUnsignedValues[:0] - } - return dst -} - -// merge combines the next set of blocks into merged blocks. -func (k *tsmKeyIterator) mergeString() { - // No blocks left, or pending merged values, we're done - if len(k.blocks) == 0 && len(k.merged) == 0 && len(k.mergedStringValues) == 0 { - return - } - - sort.Stable(k.blocks) - - dedup := len(k.mergedStringValues) != 0 - if len(k.blocks) > 0 && !dedup { - // If we have more than one block or any partially tombstoned blocks, we many need to dedup - dedup = len(k.blocks[0].tombstones) > 0 || k.blocks[0].partiallyRead() - - // Quickly scan each block to see if any overlap with the prior block, if they overlap then - // we need to dedup as there may be duplicate points now - for i := 1; !dedup && i < len(k.blocks); i++ { - dedup = k.blocks[i].partiallyRead() || - k.blocks[i].overlapsTimeRange(k.blocks[i-1].minTime, k.blocks[i-1].maxTime) || - len(k.blocks[i].tombstones) > 0 - } - - } - - k.merged = k.combineString(dedup) -} - -// combine returns a new set of blocks using the current blocks in the buffers. If dedup -// is true, all the blocks will be decoded, dedup and sorted in in order. If dedup is false, -// only blocks that are smaller than the chunk size will be decoded and combined. -func (k *tsmKeyIterator) combineString(dedup bool) blocks { - if dedup { - for len(k.mergedStringValues) < k.size && len(k.blocks) > 0 { - for len(k.blocks) > 0 && k.blocks[0].read() { - k.blocks = k.blocks[1:] - } - - if len(k.blocks) == 0 { - break - } - first := k.blocks[0] - minTime := first.minTime - maxTime := first.maxTime - - // Adjust the min time to the start of any overlapping blocks. - for i := 0; i < len(k.blocks); i++ { - if k.blocks[i].overlapsTimeRange(minTime, maxTime) && !k.blocks[i].read() { - if k.blocks[i].minTime < minTime { - minTime = k.blocks[i].minTime - } - if k.blocks[i].maxTime > minTime && k.blocks[i].maxTime < maxTime { - maxTime = k.blocks[i].maxTime - } - } - } - - // We have some overlapping blocks so decode all, append in order and then dedup - for i := 0; i < len(k.blocks); i++ { - if !k.blocks[i].overlapsTimeRange(minTime, maxTime) || k.blocks[i].read() { - continue - } - - v, err := DecodeStringBlock(k.blocks[i].b, &[]StringValue{}) - if err != nil { - k.err = err - return nil - } - - // Remove values we already read - v = StringValues(v).Exclude(k.blocks[i].readMin, k.blocks[i].readMax) - - // Filter out only the values for overlapping block - v = StringValues(v).Include(minTime, maxTime) - if len(v) > 0 { - // Record that we read a subset of the block - k.blocks[i].markRead(v[0].UnixNano(), v[len(v)-1].UnixNano()) - } - - // Apply each tombstone to the block - for _, ts := range k.blocks[i].tombstones { - v = StringValues(v).Exclude(ts.Min, ts.Max) - } - - k.mergedStringValues = k.mergedStringValues.Merge(v) - } - } - - // Since we combined multiple blocks, we could have more values than we should put into - // a single block. We need to chunk them up into groups and re-encode them. - return k.chunkString(nil) - } else { - var i int - - for i < len(k.blocks) { - - // skip this block if it's values were already read - if k.blocks[i].read() { - i++ - continue - } - // If we this block is already full, just add it as is - if BlockCount(k.blocks[i].b) >= k.size { - k.merged = append(k.merged, k.blocks[i]) - } else { - break - } - i++ - } - - if k.fast { - for i < len(k.blocks) { - // skip this block if it's values were already read - if k.blocks[i].read() { - i++ - continue - } - - k.merged = append(k.merged, k.blocks[i]) - i++ - } - } - - // If we only have 1 blocks left, just append it as is and avoid decoding/recoding - if i == len(k.blocks)-1 { - if !k.blocks[i].read() { - k.merged = append(k.merged, k.blocks[i]) - } - i++ - } - - // The remaining blocks can be combined and we know that they do not overlap and - // so we can just append each, sort and re-encode. - for i < len(k.blocks) && len(k.mergedStringValues) < k.size { - if k.blocks[i].read() { - i++ - continue - } - - v, err := DecodeStringBlock(k.blocks[i].b, &[]StringValue{}) - if err != nil { - k.err = err - return nil - } - - // Apply each tombstone to the block - for _, ts := range k.blocks[i].tombstones { - v = StringValues(v).Exclude(ts.Min, ts.Max) - } - - k.blocks[i].markRead(k.blocks[i].minTime, k.blocks[i].maxTime) - - k.mergedStringValues = k.mergedStringValues.Merge(v) - i++ - } - - k.blocks = k.blocks[i:] - - return k.chunkString(k.merged) - } -} - -func (k *tsmKeyIterator) chunkString(dst blocks) blocks { - if len(k.mergedStringValues) > k.size { - values := k.mergedStringValues[:k.size] - cb, err := StringValues(values).Encode(nil) - if err != nil { - k.err = err - return nil - } - - dst = append(dst, &block{ - minTime: values[0].UnixNano(), - maxTime: values[len(values)-1].UnixNano(), - key: k.key, - b: cb, - }) - k.mergedStringValues = k.mergedStringValues[k.size:] - return dst - } - - // Re-encode the remaining values into the last block - if len(k.mergedStringValues) > 0 { - cb, err := StringValues(k.mergedStringValues).Encode(nil) - if err != nil { - k.err = err - return nil - } - - dst = append(dst, &block{ - minTime: k.mergedStringValues[0].UnixNano(), - maxTime: k.mergedStringValues[len(k.mergedStringValues)-1].UnixNano(), - key: k.key, - b: cb, - }) - k.mergedStringValues = k.mergedStringValues[:0] - } - return dst -} - -// merge combines the next set of blocks into merged blocks. -func (k *tsmKeyIterator) mergeBoolean() { - // No blocks left, or pending merged values, we're done - if len(k.blocks) == 0 && len(k.merged) == 0 && len(k.mergedBooleanValues) == 0 { - return - } - - sort.Stable(k.blocks) - - dedup := len(k.mergedBooleanValues) != 0 - if len(k.blocks) > 0 && !dedup { - // If we have more than one block or any partially tombstoned blocks, we many need to dedup - dedup = len(k.blocks[0].tombstones) > 0 || k.blocks[0].partiallyRead() - - // Quickly scan each block to see if any overlap with the prior block, if they overlap then - // we need to dedup as there may be duplicate points now - for i := 1; !dedup && i < len(k.blocks); i++ { - dedup = k.blocks[i].partiallyRead() || - k.blocks[i].overlapsTimeRange(k.blocks[i-1].minTime, k.blocks[i-1].maxTime) || - len(k.blocks[i].tombstones) > 0 - } - - } - - k.merged = k.combineBoolean(dedup) -} - -// combine returns a new set of blocks using the current blocks in the buffers. If dedup -// is true, all the blocks will be decoded, dedup and sorted in in order. If dedup is false, -// only blocks that are smaller than the chunk size will be decoded and combined. -func (k *tsmKeyIterator) combineBoolean(dedup bool) blocks { - if dedup { - for len(k.mergedBooleanValues) < k.size && len(k.blocks) > 0 { - for len(k.blocks) > 0 && k.blocks[0].read() { - k.blocks = k.blocks[1:] - } - - if len(k.blocks) == 0 { - break - } - first := k.blocks[0] - minTime := first.minTime - maxTime := first.maxTime - - // Adjust the min time to the start of any overlapping blocks. - for i := 0; i < len(k.blocks); i++ { - if k.blocks[i].overlapsTimeRange(minTime, maxTime) && !k.blocks[i].read() { - if k.blocks[i].minTime < minTime { - minTime = k.blocks[i].minTime - } - if k.blocks[i].maxTime > minTime && k.blocks[i].maxTime < maxTime { - maxTime = k.blocks[i].maxTime - } - } - } - - // We have some overlapping blocks so decode all, append in order and then dedup - for i := 0; i < len(k.blocks); i++ { - if !k.blocks[i].overlapsTimeRange(minTime, maxTime) || k.blocks[i].read() { - continue - } - - v, err := DecodeBooleanBlock(k.blocks[i].b, &[]BooleanValue{}) - if err != nil { - k.err = err - return nil - } - - // Remove values we already read - v = BooleanValues(v).Exclude(k.blocks[i].readMin, k.blocks[i].readMax) - - // Filter out only the values for overlapping block - v = BooleanValues(v).Include(minTime, maxTime) - if len(v) > 0 { - // Record that we read a subset of the block - k.blocks[i].markRead(v[0].UnixNano(), v[len(v)-1].UnixNano()) - } - - // Apply each tombstone to the block - for _, ts := range k.blocks[i].tombstones { - v = BooleanValues(v).Exclude(ts.Min, ts.Max) - } - - k.mergedBooleanValues = k.mergedBooleanValues.Merge(v) - } - } - - // Since we combined multiple blocks, we could have more values than we should put into - // a single block. We need to chunk them up into groups and re-encode them. - return k.chunkBoolean(nil) - } else { - var i int - - for i < len(k.blocks) { - - // skip this block if it's values were already read - if k.blocks[i].read() { - i++ - continue - } - // If we this block is already full, just add it as is - if BlockCount(k.blocks[i].b) >= k.size { - k.merged = append(k.merged, k.blocks[i]) - } else { - break - } - i++ - } - - if k.fast { - for i < len(k.blocks) { - // skip this block if it's values were already read - if k.blocks[i].read() { - i++ - continue - } - - k.merged = append(k.merged, k.blocks[i]) - i++ - } - } - - // If we only have 1 blocks left, just append it as is and avoid decoding/recoding - if i == len(k.blocks)-1 { - if !k.blocks[i].read() { - k.merged = append(k.merged, k.blocks[i]) - } - i++ - } - - // The remaining blocks can be combined and we know that they do not overlap and - // so we can just append each, sort and re-encode. - for i < len(k.blocks) && len(k.mergedBooleanValues) < k.size { - if k.blocks[i].read() { - i++ - continue - } - - v, err := DecodeBooleanBlock(k.blocks[i].b, &[]BooleanValue{}) - if err != nil { - k.err = err - return nil - } - - // Apply each tombstone to the block - for _, ts := range k.blocks[i].tombstones { - v = BooleanValues(v).Exclude(ts.Min, ts.Max) - } - - k.blocks[i].markRead(k.blocks[i].minTime, k.blocks[i].maxTime) - - k.mergedBooleanValues = k.mergedBooleanValues.Merge(v) - i++ - } - - k.blocks = k.blocks[i:] - - return k.chunkBoolean(k.merged) - } -} - -func (k *tsmKeyIterator) chunkBoolean(dst blocks) blocks { - if len(k.mergedBooleanValues) > k.size { - values := k.mergedBooleanValues[:k.size] - cb, err := BooleanValues(values).Encode(nil) - if err != nil { - k.err = err - return nil - } - - dst = append(dst, &block{ - minTime: values[0].UnixNano(), - maxTime: values[len(values)-1].UnixNano(), - key: k.key, - b: cb, - }) - k.mergedBooleanValues = k.mergedBooleanValues[k.size:] - return dst - } - - // Re-encode the remaining values into the last block - if len(k.mergedBooleanValues) > 0 { - cb, err := BooleanValues(k.mergedBooleanValues).Encode(nil) - if err != nil { - k.err = err - return nil - } - - dst = append(dst, &block{ - minTime: k.mergedBooleanValues[0].UnixNano(), - maxTime: k.mergedBooleanValues[len(k.mergedBooleanValues)-1].UnixNano(), - key: k.key, - b: cb, - }) - k.mergedBooleanValues = k.mergedBooleanValues[:0] - } - return dst -} - -// merge combines the next set of blocks into merged blocks. -func (k *tsmBatchKeyIterator) mergeFloat() { - // No blocks left, or pending merged values, we're done - if len(k.blocks) == 0 && len(k.merged) == 0 && k.mergedFloatValues.Len() == 0 { - return - } - - sort.Stable(k.blocks) - - dedup := k.mergedFloatValues.Len() != 0 - if len(k.blocks) > 0 && !dedup { - // If we have more than one block or any partially tombstoned blocks, we many need to dedup - dedup = len(k.blocks[0].tombstones) > 0 || k.blocks[0].partiallyRead() - - // Quickly scan each block to see if any overlap with the prior block, if they overlap then - // we need to dedup as there may be duplicate points now - for i := 1; !dedup && i < len(k.blocks); i++ { - dedup = k.blocks[i].partiallyRead() || - k.blocks[i].overlapsTimeRange(k.blocks[i-1].minTime, k.blocks[i-1].maxTime) || - len(k.blocks[i].tombstones) > 0 - } - - } - - k.merged = k.combineFloat(dedup) -} - -// combine returns a new set of blocks using the current blocks in the buffers. If dedup -// is true, all the blocks will be decoded, dedup and sorted in in order. If dedup is false, -// only blocks that are smaller than the chunk size will be decoded and combined. -func (k *tsmBatchKeyIterator) combineFloat(dedup bool) blocks { - if dedup { - for k.mergedFloatValues.Len() < k.size && len(k.blocks) > 0 { - for len(k.blocks) > 0 && k.blocks[0].read() { - k.blocks = k.blocks[1:] - } - - if len(k.blocks) == 0 { - break - } - first := k.blocks[0] - minTime := first.minTime - maxTime := first.maxTime - - // Adjust the min time to the start of any overlapping blocks. - for i := 0; i < len(k.blocks); i++ { - if k.blocks[i].overlapsTimeRange(minTime, maxTime) && !k.blocks[i].read() { - if k.blocks[i].minTime < minTime { - minTime = k.blocks[i].minTime - } - if k.blocks[i].maxTime > minTime && k.blocks[i].maxTime < maxTime { - maxTime = k.blocks[i].maxTime - } - } - } - - // We have some overlapping blocks so decode all, append in order and then dedup - for i := 0; i < len(k.blocks); i++ { - if !k.blocks[i].overlapsTimeRange(minTime, maxTime) || k.blocks[i].read() { - continue - } - - var v cursors.FloatArray - var err error - if err = DecodeFloatArrayBlock(k.blocks[i].b, &v); err != nil { - k.err = err - return nil - } - - // Invariant: v.MaxTime() == k.blocks[i].maxTime - if k.blocks[i].maxTime != v.MaxTime() { - if maxTime == k.blocks[i].maxTime { - maxTime = v.MaxTime() - } - k.blocks[i].maxTime = v.MaxTime() - } - - // Remove values we already read - v.Exclude(k.blocks[i].readMin, k.blocks[i].readMax) - - // Filter out only the values for overlapping block - v.Include(minTime, maxTime) - if v.Len() > 0 { - // Record that we read a subset of the block - k.blocks[i].markRead(v.MinTime(), v.MaxTime()) - } - - // Apply each tombstone to the block - for _, ts := range k.blocks[i].tombstones { - v.Exclude(ts.Min, ts.Max) - } - - k.mergedFloatValues.Merge(&v) - } - } - - // Since we combined multiple blocks, we could have more values than we should put into - // a single block. We need to chunk them up into groups and re-encode them. - return k.chunkFloat(nil) - } - var i int - - for i < len(k.blocks) { - - // skip this block if it's values were already read - if k.blocks[i].read() { - i++ - continue - } - // If we this block is already full, just add it as is - if BlockCount(k.blocks[i].b) >= k.size { - k.merged = append(k.merged, k.blocks[i]) - } else { - break - } - i++ - } - - if k.fast { - for i < len(k.blocks) { - // skip this block if it's values were already read - if k.blocks[i].read() { - i++ - continue - } - - k.merged = append(k.merged, k.blocks[i]) - i++ - } - } - - // If we only have 1 blocks left, just append it as is and avoid decoding/recoding - if i == len(k.blocks)-1 { - if !k.blocks[i].read() { - k.merged = append(k.merged, k.blocks[i]) - } - i++ - } - - // The remaining blocks can be combined and we know that they do not overlap and - // so we can just append each, sort and re-encode. - for i < len(k.blocks) && k.mergedFloatValues.Len() < k.size { - if k.blocks[i].read() { - i++ - continue - } - - var v cursors.FloatArray - if err := DecodeFloatArrayBlock(k.blocks[i].b, &v); err != nil { - k.err = err - return nil - } - - // Invariant: v.MaxTime() == k.blocks[i].maxTime - if k.blocks[i].maxTime != v.MaxTime() { - k.blocks[i].maxTime = v.MaxTime() - } - - // Apply each tombstone to the block - for _, ts := range k.blocks[i].tombstones { - v.Exclude(ts.Min, ts.Max) - } - - k.blocks[i].markRead(k.blocks[i].minTime, k.blocks[i].maxTime) - - k.mergedFloatValues.Merge(&v) - i++ - } - - k.blocks = k.blocks[i:] - - return k.chunkFloat(k.merged) -} - -func (k *tsmBatchKeyIterator) chunkFloat(dst blocks) blocks { - if k.mergedFloatValues.Len() > k.size { - var values cursors.FloatArray - values.Timestamps = k.mergedFloatValues.Timestamps[:k.size] - minTime, maxTime := values.Timestamps[0], values.Timestamps[len(values.Timestamps)-1] - values.Values = k.mergedFloatValues.Values[:k.size] - - cb, err := EncodeFloatArrayBlock(&values, nil) // TODO(edd): pool this buffer - if err != nil { - k.err = err - return nil - } - - dst = append(dst, &block{ - minTime: minTime, - maxTime: maxTime, - key: k.key, - b: cb, - }) - k.mergedFloatValues.Timestamps = k.mergedFloatValues.Timestamps[k.size:] - k.mergedFloatValues.Values = k.mergedFloatValues.Values[k.size:] - return dst - } - - // Re-encode the remaining values into the last block - if k.mergedFloatValues.Len() > 0 { - minTime, maxTime := k.mergedFloatValues.Timestamps[0], k.mergedFloatValues.Timestamps[len(k.mergedFloatValues.Timestamps)-1] - cb, err := EncodeFloatArrayBlock(k.mergedFloatValues, nil) // TODO(edd): pool this buffer - if err != nil { - k.err = err - return nil - } - - dst = append(dst, &block{ - minTime: minTime, - maxTime: maxTime, - key: k.key, - b: cb, - }) - k.mergedFloatValues.Timestamps = k.mergedFloatValues.Timestamps[:0] - k.mergedFloatValues.Values = k.mergedFloatValues.Values[:0] - } - return dst -} - -// merge combines the next set of blocks into merged blocks. -func (k *tsmBatchKeyIterator) mergeInteger() { - // No blocks left, or pending merged values, we're done - if len(k.blocks) == 0 && len(k.merged) == 0 && k.mergedIntegerValues.Len() == 0 { - return - } - - sort.Stable(k.blocks) - - dedup := k.mergedIntegerValues.Len() != 0 - if len(k.blocks) > 0 && !dedup { - // If we have more than one block or any partially tombstoned blocks, we many need to dedup - dedup = len(k.blocks[0].tombstones) > 0 || k.blocks[0].partiallyRead() - - // Quickly scan each block to see if any overlap with the prior block, if they overlap then - // we need to dedup as there may be duplicate points now - for i := 1; !dedup && i < len(k.blocks); i++ { - dedup = k.blocks[i].partiallyRead() || - k.blocks[i].overlapsTimeRange(k.blocks[i-1].minTime, k.blocks[i-1].maxTime) || - len(k.blocks[i].tombstones) > 0 - } - - } - - k.merged = k.combineInteger(dedup) -} - -// combine returns a new set of blocks using the current blocks in the buffers. If dedup -// is true, all the blocks will be decoded, dedup and sorted in in order. If dedup is false, -// only blocks that are smaller than the chunk size will be decoded and combined. -func (k *tsmBatchKeyIterator) combineInteger(dedup bool) blocks { - if dedup { - for k.mergedIntegerValues.Len() < k.size && len(k.blocks) > 0 { - for len(k.blocks) > 0 && k.blocks[0].read() { - k.blocks = k.blocks[1:] - } - - if len(k.blocks) == 0 { - break - } - first := k.blocks[0] - minTime := first.minTime - maxTime := first.maxTime - - // Adjust the min time to the start of any overlapping blocks. - for i := 0; i < len(k.blocks); i++ { - if k.blocks[i].overlapsTimeRange(minTime, maxTime) && !k.blocks[i].read() { - if k.blocks[i].minTime < minTime { - minTime = k.blocks[i].minTime - } - if k.blocks[i].maxTime > minTime && k.blocks[i].maxTime < maxTime { - maxTime = k.blocks[i].maxTime - } - } - } - - // We have some overlapping blocks so decode all, append in order and then dedup - for i := 0; i < len(k.blocks); i++ { - if !k.blocks[i].overlapsTimeRange(minTime, maxTime) || k.blocks[i].read() { - continue - } - - var v cursors.IntegerArray - var err error - if err = DecodeIntegerArrayBlock(k.blocks[i].b, &v); err != nil { - k.err = err - return nil - } - - // Invariant: v.MaxTime() == k.blocks[i].maxTime - if k.blocks[i].maxTime != v.MaxTime() { - if maxTime == k.blocks[i].maxTime { - maxTime = v.MaxTime() - } - k.blocks[i].maxTime = v.MaxTime() - } - - // Remove values we already read - v.Exclude(k.blocks[i].readMin, k.blocks[i].readMax) - - // Filter out only the values for overlapping block - v.Include(minTime, maxTime) - if v.Len() > 0 { - // Record that we read a subset of the block - k.blocks[i].markRead(v.MinTime(), v.MaxTime()) - } - - // Apply each tombstone to the block - for _, ts := range k.blocks[i].tombstones { - v.Exclude(ts.Min, ts.Max) - } - - k.mergedIntegerValues.Merge(&v) - } - } - - // Since we combined multiple blocks, we could have more values than we should put into - // a single block. We need to chunk them up into groups and re-encode them. - return k.chunkInteger(nil) - } - var i int - - for i < len(k.blocks) { - - // skip this block if it's values were already read - if k.blocks[i].read() { - i++ - continue - } - // If we this block is already full, just add it as is - if BlockCount(k.blocks[i].b) >= k.size { - k.merged = append(k.merged, k.blocks[i]) - } else { - break - } - i++ - } - - if k.fast { - for i < len(k.blocks) { - // skip this block if it's values were already read - if k.blocks[i].read() { - i++ - continue - } - - k.merged = append(k.merged, k.blocks[i]) - i++ - } - } - - // If we only have 1 blocks left, just append it as is and avoid decoding/recoding - if i == len(k.blocks)-1 { - if !k.blocks[i].read() { - k.merged = append(k.merged, k.blocks[i]) - } - i++ - } - - // The remaining blocks can be combined and we know that they do not overlap and - // so we can just append each, sort and re-encode. - for i < len(k.blocks) && k.mergedIntegerValues.Len() < k.size { - if k.blocks[i].read() { - i++ - continue - } - - var v cursors.IntegerArray - if err := DecodeIntegerArrayBlock(k.blocks[i].b, &v); err != nil { - k.err = err - return nil - } - - // Invariant: v.MaxTime() == k.blocks[i].maxTime - if k.blocks[i].maxTime != v.MaxTime() { - k.blocks[i].maxTime = v.MaxTime() - } - - // Apply each tombstone to the block - for _, ts := range k.blocks[i].tombstones { - v.Exclude(ts.Min, ts.Max) - } - - k.blocks[i].markRead(k.blocks[i].minTime, k.blocks[i].maxTime) - - k.mergedIntegerValues.Merge(&v) - i++ - } - - k.blocks = k.blocks[i:] - - return k.chunkInteger(k.merged) -} - -func (k *tsmBatchKeyIterator) chunkInteger(dst blocks) blocks { - if k.mergedIntegerValues.Len() > k.size { - var values cursors.IntegerArray - values.Timestamps = k.mergedIntegerValues.Timestamps[:k.size] - minTime, maxTime := values.Timestamps[0], values.Timestamps[len(values.Timestamps)-1] - values.Values = k.mergedIntegerValues.Values[:k.size] - - cb, err := EncodeIntegerArrayBlock(&values, nil) // TODO(edd): pool this buffer - if err != nil { - k.err = err - return nil - } - - dst = append(dst, &block{ - minTime: minTime, - maxTime: maxTime, - key: k.key, - b: cb, - }) - k.mergedIntegerValues.Timestamps = k.mergedIntegerValues.Timestamps[k.size:] - k.mergedIntegerValues.Values = k.mergedIntegerValues.Values[k.size:] - return dst - } - - // Re-encode the remaining values into the last block - if k.mergedIntegerValues.Len() > 0 { - minTime, maxTime := k.mergedIntegerValues.Timestamps[0], k.mergedIntegerValues.Timestamps[len(k.mergedIntegerValues.Timestamps)-1] - cb, err := EncodeIntegerArrayBlock(k.mergedIntegerValues, nil) // TODO(edd): pool this buffer - if err != nil { - k.err = err - return nil - } - - dst = append(dst, &block{ - minTime: minTime, - maxTime: maxTime, - key: k.key, - b: cb, - }) - k.mergedIntegerValues.Timestamps = k.mergedIntegerValues.Timestamps[:0] - k.mergedIntegerValues.Values = k.mergedIntegerValues.Values[:0] - } - return dst -} - -// merge combines the next set of blocks into merged blocks. -func (k *tsmBatchKeyIterator) mergeUnsigned() { - // No blocks left, or pending merged values, we're done - if len(k.blocks) == 0 && len(k.merged) == 0 && k.mergedUnsignedValues.Len() == 0 { - return - } - - sort.Stable(k.blocks) - - dedup := k.mergedUnsignedValues.Len() != 0 - if len(k.blocks) > 0 && !dedup { - // If we have more than one block or any partially tombstoned blocks, we many need to dedup - dedup = len(k.blocks[0].tombstones) > 0 || k.blocks[0].partiallyRead() - - // Quickly scan each block to see if any overlap with the prior block, if they overlap then - // we need to dedup as there may be duplicate points now - for i := 1; !dedup && i < len(k.blocks); i++ { - dedup = k.blocks[i].partiallyRead() || - k.blocks[i].overlapsTimeRange(k.blocks[i-1].minTime, k.blocks[i-1].maxTime) || - len(k.blocks[i].tombstones) > 0 - } - - } - - k.merged = k.combineUnsigned(dedup) -} - -// combine returns a new set of blocks using the current blocks in the buffers. If dedup -// is true, all the blocks will be decoded, dedup and sorted in in order. If dedup is false, -// only blocks that are smaller than the chunk size will be decoded and combined. -func (k *tsmBatchKeyIterator) combineUnsigned(dedup bool) blocks { - if dedup { - for k.mergedUnsignedValues.Len() < k.size && len(k.blocks) > 0 { - for len(k.blocks) > 0 && k.blocks[0].read() { - k.blocks = k.blocks[1:] - } - - if len(k.blocks) == 0 { - break - } - first := k.blocks[0] - minTime := first.minTime - maxTime := first.maxTime - - // Adjust the min time to the start of any overlapping blocks. - for i := 0; i < len(k.blocks); i++ { - if k.blocks[i].overlapsTimeRange(minTime, maxTime) && !k.blocks[i].read() { - if k.blocks[i].minTime < minTime { - minTime = k.blocks[i].minTime - } - if k.blocks[i].maxTime > minTime && k.blocks[i].maxTime < maxTime { - maxTime = k.blocks[i].maxTime - } - } - } - - // We have some overlapping blocks so decode all, append in order and then dedup - for i := 0; i < len(k.blocks); i++ { - if !k.blocks[i].overlapsTimeRange(minTime, maxTime) || k.blocks[i].read() { - continue - } - - var v cursors.UnsignedArray - var err error - if err = DecodeUnsignedArrayBlock(k.blocks[i].b, &v); err != nil { - k.err = err - return nil - } - - // Invariant: v.MaxTime() == k.blocks[i].maxTime - if k.blocks[i].maxTime != v.MaxTime() { - if maxTime == k.blocks[i].maxTime { - maxTime = v.MaxTime() - } - k.blocks[i].maxTime = v.MaxTime() - } - - // Remove values we already read - v.Exclude(k.blocks[i].readMin, k.blocks[i].readMax) - - // Filter out only the values for overlapping block - v.Include(minTime, maxTime) - if v.Len() > 0 { - // Record that we read a subset of the block - k.blocks[i].markRead(v.MinTime(), v.MaxTime()) - } - - // Apply each tombstone to the block - for _, ts := range k.blocks[i].tombstones { - v.Exclude(ts.Min, ts.Max) - } - - k.mergedUnsignedValues.Merge(&v) - } - } - - // Since we combined multiple blocks, we could have more values than we should put into - // a single block. We need to chunk them up into groups and re-encode them. - return k.chunkUnsigned(nil) - } - var i int - - for i < len(k.blocks) { - - // skip this block if it's values were already read - if k.blocks[i].read() { - i++ - continue - } - // If we this block is already full, just add it as is - if BlockCount(k.blocks[i].b) >= k.size { - k.merged = append(k.merged, k.blocks[i]) - } else { - break - } - i++ - } - - if k.fast { - for i < len(k.blocks) { - // skip this block if it's values were already read - if k.blocks[i].read() { - i++ - continue - } - - k.merged = append(k.merged, k.blocks[i]) - i++ - } - } - - // If we only have 1 blocks left, just append it as is and avoid decoding/recoding - if i == len(k.blocks)-1 { - if !k.blocks[i].read() { - k.merged = append(k.merged, k.blocks[i]) - } - i++ - } - - // The remaining blocks can be combined and we know that they do not overlap and - // so we can just append each, sort and re-encode. - for i < len(k.blocks) && k.mergedUnsignedValues.Len() < k.size { - if k.blocks[i].read() { - i++ - continue - } - - var v cursors.UnsignedArray - if err := DecodeUnsignedArrayBlock(k.blocks[i].b, &v); err != nil { - k.err = err - return nil - } - - // Invariant: v.MaxTime() == k.blocks[i].maxTime - if k.blocks[i].maxTime != v.MaxTime() { - k.blocks[i].maxTime = v.MaxTime() - } - - // Apply each tombstone to the block - for _, ts := range k.blocks[i].tombstones { - v.Exclude(ts.Min, ts.Max) - } - - k.blocks[i].markRead(k.blocks[i].minTime, k.blocks[i].maxTime) - - k.mergedUnsignedValues.Merge(&v) - i++ - } - - k.blocks = k.blocks[i:] - - return k.chunkUnsigned(k.merged) -} - -func (k *tsmBatchKeyIterator) chunkUnsigned(dst blocks) blocks { - if k.mergedUnsignedValues.Len() > k.size { - var values cursors.UnsignedArray - values.Timestamps = k.mergedUnsignedValues.Timestamps[:k.size] - minTime, maxTime := values.Timestamps[0], values.Timestamps[len(values.Timestamps)-1] - values.Values = k.mergedUnsignedValues.Values[:k.size] - - cb, err := EncodeUnsignedArrayBlock(&values, nil) // TODO(edd): pool this buffer - if err != nil { - k.err = err - return nil - } - - dst = append(dst, &block{ - minTime: minTime, - maxTime: maxTime, - key: k.key, - b: cb, - }) - k.mergedUnsignedValues.Timestamps = k.mergedUnsignedValues.Timestamps[k.size:] - k.mergedUnsignedValues.Values = k.mergedUnsignedValues.Values[k.size:] - return dst - } - - // Re-encode the remaining values into the last block - if k.mergedUnsignedValues.Len() > 0 { - minTime, maxTime := k.mergedUnsignedValues.Timestamps[0], k.mergedUnsignedValues.Timestamps[len(k.mergedUnsignedValues.Timestamps)-1] - cb, err := EncodeUnsignedArrayBlock(k.mergedUnsignedValues, nil) // TODO(edd): pool this buffer - if err != nil { - k.err = err - return nil - } - - dst = append(dst, &block{ - minTime: minTime, - maxTime: maxTime, - key: k.key, - b: cb, - }) - k.mergedUnsignedValues.Timestamps = k.mergedUnsignedValues.Timestamps[:0] - k.mergedUnsignedValues.Values = k.mergedUnsignedValues.Values[:0] - } - return dst -} - -// merge combines the next set of blocks into merged blocks. -func (k *tsmBatchKeyIterator) mergeString() { - // No blocks left, or pending merged values, we're done - if len(k.blocks) == 0 && len(k.merged) == 0 && k.mergedStringValues.Len() == 0 { - return - } - - sort.Stable(k.blocks) - - dedup := k.mergedStringValues.Len() != 0 - if len(k.blocks) > 0 && !dedup { - // If we have more than one block or any partially tombstoned blocks, we many need to dedup - dedup = len(k.blocks[0].tombstones) > 0 || k.blocks[0].partiallyRead() - - // Quickly scan each block to see if any overlap with the prior block, if they overlap then - // we need to dedup as there may be duplicate points now - for i := 1; !dedup && i < len(k.blocks); i++ { - dedup = k.blocks[i].partiallyRead() || - k.blocks[i].overlapsTimeRange(k.blocks[i-1].minTime, k.blocks[i-1].maxTime) || - len(k.blocks[i].tombstones) > 0 - } - - } - - k.merged = k.combineString(dedup) -} - -// combine returns a new set of blocks using the current blocks in the buffers. If dedup -// is true, all the blocks will be decoded, dedup and sorted in in order. If dedup is false, -// only blocks that are smaller than the chunk size will be decoded and combined. -func (k *tsmBatchKeyIterator) combineString(dedup bool) blocks { - if dedup { - for k.mergedStringValues.Len() < k.size && len(k.blocks) > 0 { - for len(k.blocks) > 0 && k.blocks[0].read() { - k.blocks = k.blocks[1:] - } - - if len(k.blocks) == 0 { - break - } - first := k.blocks[0] - minTime := first.minTime - maxTime := first.maxTime - - // Adjust the min time to the start of any overlapping blocks. - for i := 0; i < len(k.blocks); i++ { - if k.blocks[i].overlapsTimeRange(minTime, maxTime) && !k.blocks[i].read() { - if k.blocks[i].minTime < minTime { - minTime = k.blocks[i].minTime - } - if k.blocks[i].maxTime > minTime && k.blocks[i].maxTime < maxTime { - maxTime = k.blocks[i].maxTime - } - } - } - - // We have some overlapping blocks so decode all, append in order and then dedup - for i := 0; i < len(k.blocks); i++ { - if !k.blocks[i].overlapsTimeRange(minTime, maxTime) || k.blocks[i].read() { - continue - } - - var v cursors.StringArray - var err error - if err = DecodeStringArrayBlock(k.blocks[i].b, &v); err != nil { - k.err = err - return nil - } - - // Invariant: v.MaxTime() == k.blocks[i].maxTime - if k.blocks[i].maxTime != v.MaxTime() { - if maxTime == k.blocks[i].maxTime { - maxTime = v.MaxTime() - } - k.blocks[i].maxTime = v.MaxTime() - } - - // Remove values we already read - v.Exclude(k.blocks[i].readMin, k.blocks[i].readMax) - - // Filter out only the values for overlapping block - v.Include(minTime, maxTime) - if v.Len() > 0 { - // Record that we read a subset of the block - k.blocks[i].markRead(v.MinTime(), v.MaxTime()) - } - - // Apply each tombstone to the block - for _, ts := range k.blocks[i].tombstones { - v.Exclude(ts.Min, ts.Max) - } - - k.mergedStringValues.Merge(&v) - } - } - - // Since we combined multiple blocks, we could have more values than we should put into - // a single block. We need to chunk them up into groups and re-encode them. - return k.chunkString(nil) - } - var i int - - for i < len(k.blocks) { - - // skip this block if it's values were already read - if k.blocks[i].read() { - i++ - continue - } - // If we this block is already full, just add it as is - if BlockCount(k.blocks[i].b) >= k.size { - k.merged = append(k.merged, k.blocks[i]) - } else { - break - } - i++ - } - - if k.fast { - for i < len(k.blocks) { - // skip this block if it's values were already read - if k.blocks[i].read() { - i++ - continue - } - - k.merged = append(k.merged, k.blocks[i]) - i++ - } - } - - // If we only have 1 blocks left, just append it as is and avoid decoding/recoding - if i == len(k.blocks)-1 { - if !k.blocks[i].read() { - k.merged = append(k.merged, k.blocks[i]) - } - i++ - } - - // The remaining blocks can be combined and we know that they do not overlap and - // so we can just append each, sort and re-encode. - for i < len(k.blocks) && k.mergedStringValues.Len() < k.size { - if k.blocks[i].read() { - i++ - continue - } - - var v cursors.StringArray - if err := DecodeStringArrayBlock(k.blocks[i].b, &v); err != nil { - k.err = err - return nil - } - - // Invariant: v.MaxTime() == k.blocks[i].maxTime - if k.blocks[i].maxTime != v.MaxTime() { - k.blocks[i].maxTime = v.MaxTime() - } - - // Apply each tombstone to the block - for _, ts := range k.blocks[i].tombstones { - v.Exclude(ts.Min, ts.Max) - } - - k.blocks[i].markRead(k.blocks[i].minTime, k.blocks[i].maxTime) - - k.mergedStringValues.Merge(&v) - i++ - } - - k.blocks = k.blocks[i:] - - return k.chunkString(k.merged) -} - -func (k *tsmBatchKeyIterator) chunkString(dst blocks) blocks { - if k.mergedStringValues.Len() > k.size { - var values cursors.StringArray - values.Timestamps = k.mergedStringValues.Timestamps[:k.size] - minTime, maxTime := values.Timestamps[0], values.Timestamps[len(values.Timestamps)-1] - values.Values = k.mergedStringValues.Values[:k.size] - - cb, err := EncodeStringArrayBlock(&values, nil) // TODO(edd): pool this buffer - if err != nil { - k.err = err - return nil - } - - dst = append(dst, &block{ - minTime: minTime, - maxTime: maxTime, - key: k.key, - b: cb, - }) - k.mergedStringValues.Timestamps = k.mergedStringValues.Timestamps[k.size:] - k.mergedStringValues.Values = k.mergedStringValues.Values[k.size:] - return dst - } - - // Re-encode the remaining values into the last block - if k.mergedStringValues.Len() > 0 { - minTime, maxTime := k.mergedStringValues.Timestamps[0], k.mergedStringValues.Timestamps[len(k.mergedStringValues.Timestamps)-1] - cb, err := EncodeStringArrayBlock(k.mergedStringValues, nil) // TODO(edd): pool this buffer - if err != nil { - k.err = err - return nil - } - - dst = append(dst, &block{ - minTime: minTime, - maxTime: maxTime, - key: k.key, - b: cb, - }) - k.mergedStringValues.Timestamps = k.mergedStringValues.Timestamps[:0] - k.mergedStringValues.Values = k.mergedStringValues.Values[:0] - } - return dst -} - -// merge combines the next set of blocks into merged blocks. -func (k *tsmBatchKeyIterator) mergeBoolean() { - // No blocks left, or pending merged values, we're done - if len(k.blocks) == 0 && len(k.merged) == 0 && k.mergedBooleanValues.Len() == 0 { - return - } - - sort.Stable(k.blocks) - - dedup := k.mergedBooleanValues.Len() != 0 - if len(k.blocks) > 0 && !dedup { - // If we have more than one block or any partially tombstoned blocks, we many need to dedup - dedup = len(k.blocks[0].tombstones) > 0 || k.blocks[0].partiallyRead() - - // Quickly scan each block to see if any overlap with the prior block, if they overlap then - // we need to dedup as there may be duplicate points now - for i := 1; !dedup && i < len(k.blocks); i++ { - dedup = k.blocks[i].partiallyRead() || - k.blocks[i].overlapsTimeRange(k.blocks[i-1].minTime, k.blocks[i-1].maxTime) || - len(k.blocks[i].tombstones) > 0 - } - - } - - k.merged = k.combineBoolean(dedup) -} - -// combine returns a new set of blocks using the current blocks in the buffers. If dedup -// is true, all the blocks will be decoded, dedup and sorted in in order. If dedup is false, -// only blocks that are smaller than the chunk size will be decoded and combined. -func (k *tsmBatchKeyIterator) combineBoolean(dedup bool) blocks { - if dedup { - for k.mergedBooleanValues.Len() < k.size && len(k.blocks) > 0 { - for len(k.blocks) > 0 && k.blocks[0].read() { - k.blocks = k.blocks[1:] - } - - if len(k.blocks) == 0 { - break - } - first := k.blocks[0] - minTime := first.minTime - maxTime := first.maxTime - - // Adjust the min time to the start of any overlapping blocks. - for i := 0; i < len(k.blocks); i++ { - if k.blocks[i].overlapsTimeRange(minTime, maxTime) && !k.blocks[i].read() { - if k.blocks[i].minTime < minTime { - minTime = k.blocks[i].minTime - } - if k.blocks[i].maxTime > minTime && k.blocks[i].maxTime < maxTime { - maxTime = k.blocks[i].maxTime - } - } - } - - // We have some overlapping blocks so decode all, append in order and then dedup - for i := 0; i < len(k.blocks); i++ { - if !k.blocks[i].overlapsTimeRange(minTime, maxTime) || k.blocks[i].read() { - continue - } - - var v cursors.BooleanArray - var err error - if err = DecodeBooleanArrayBlock(k.blocks[i].b, &v); err != nil { - k.err = err - return nil - } - - // Invariant: v.MaxTime() == k.blocks[i].maxTime - if k.blocks[i].maxTime != v.MaxTime() { - if maxTime == k.blocks[i].maxTime { - maxTime = v.MaxTime() - } - k.blocks[i].maxTime = v.MaxTime() - } - - // Remove values we already read - v.Exclude(k.blocks[i].readMin, k.blocks[i].readMax) - - // Filter out only the values for overlapping block - v.Include(minTime, maxTime) - if v.Len() > 0 { - // Record that we read a subset of the block - k.blocks[i].markRead(v.MinTime(), v.MaxTime()) - } - - // Apply each tombstone to the block - for _, ts := range k.blocks[i].tombstones { - v.Exclude(ts.Min, ts.Max) - } - - k.mergedBooleanValues.Merge(&v) - } - } - - // Since we combined multiple blocks, we could have more values than we should put into - // a single block. We need to chunk them up into groups and re-encode them. - return k.chunkBoolean(nil) - } - var i int - - for i < len(k.blocks) { - - // skip this block if it's values were already read - if k.blocks[i].read() { - i++ - continue - } - // If we this block is already full, just add it as is - if BlockCount(k.blocks[i].b) >= k.size { - k.merged = append(k.merged, k.blocks[i]) - } else { - break - } - i++ - } - - if k.fast { - for i < len(k.blocks) { - // skip this block if it's values were already read - if k.blocks[i].read() { - i++ - continue - } - - k.merged = append(k.merged, k.blocks[i]) - i++ - } - } - - // If we only have 1 blocks left, just append it as is and avoid decoding/recoding - if i == len(k.blocks)-1 { - if !k.blocks[i].read() { - k.merged = append(k.merged, k.blocks[i]) - } - i++ - } - - // The remaining blocks can be combined and we know that they do not overlap and - // so we can just append each, sort and re-encode. - for i < len(k.blocks) && k.mergedBooleanValues.Len() < k.size { - if k.blocks[i].read() { - i++ - continue - } - - var v cursors.BooleanArray - if err := DecodeBooleanArrayBlock(k.blocks[i].b, &v); err != nil { - k.err = err - return nil - } - - // Invariant: v.MaxTime() == k.blocks[i].maxTime - if k.blocks[i].maxTime != v.MaxTime() { - k.blocks[i].maxTime = v.MaxTime() - } - - // Apply each tombstone to the block - for _, ts := range k.blocks[i].tombstones { - v.Exclude(ts.Min, ts.Max) - } - - k.blocks[i].markRead(k.blocks[i].minTime, k.blocks[i].maxTime) - - k.mergedBooleanValues.Merge(&v) - i++ - } - - k.blocks = k.blocks[i:] - - return k.chunkBoolean(k.merged) -} - -func (k *tsmBatchKeyIterator) chunkBoolean(dst blocks) blocks { - if k.mergedBooleanValues.Len() > k.size { - var values cursors.BooleanArray - values.Timestamps = k.mergedBooleanValues.Timestamps[:k.size] - minTime, maxTime := values.Timestamps[0], values.Timestamps[len(values.Timestamps)-1] - values.Values = k.mergedBooleanValues.Values[:k.size] - - cb, err := EncodeBooleanArrayBlock(&values, nil) // TODO(edd): pool this buffer - if err != nil { - k.err = err - return nil - } - - dst = append(dst, &block{ - minTime: minTime, - maxTime: maxTime, - key: k.key, - b: cb, - }) - k.mergedBooleanValues.Timestamps = k.mergedBooleanValues.Timestamps[k.size:] - k.mergedBooleanValues.Values = k.mergedBooleanValues.Values[k.size:] - return dst - } - - // Re-encode the remaining values into the last block - if k.mergedBooleanValues.Len() > 0 { - minTime, maxTime := k.mergedBooleanValues.Timestamps[0], k.mergedBooleanValues.Timestamps[len(k.mergedBooleanValues.Timestamps)-1] - cb, err := EncodeBooleanArrayBlock(k.mergedBooleanValues, nil) // TODO(edd): pool this buffer - if err != nil { - k.err = err - return nil - } - - dst = append(dst, &block{ - minTime: minTime, - maxTime: maxTime, - key: k.key, - b: cb, - }) - k.mergedBooleanValues.Timestamps = k.mergedBooleanValues.Timestamps[:0] - k.mergedBooleanValues.Values = k.mergedBooleanValues.Values[:0] - } - return dst -} diff --git a/tsdb/tsm1/compact.gen.go.tmpl b/tsdb/tsm1/compact.gen.go.tmpl deleted file mode 100644 index f35fa2b0fb..0000000000 --- a/tsdb/tsm1/compact.gen.go.tmpl +++ /dev/null @@ -1,433 +0,0 @@ -package tsm1 - -import ( - "sort" - - "github.com/influxdata/influxdb/v2/tsdb/cursors" -) - -{{range .}} - -// merge combines the next set of blocks into merged blocks. -func (k *tsmKeyIterator) merge{{.Name}}() { - // No blocks left, or pending merged values, we're done - if len(k.blocks) == 0 && len(k.merged) == 0 && len(k.merged{{.Name}}Values) == 0 { - return - } - - sort.Stable(k.blocks) - - dedup := len(k.merged{{.Name}}Values) != 0 - if len(k.blocks) > 0 && !dedup { - // If we have more than one block or any partially tombstoned blocks, we many need to dedup - dedup = len(k.blocks[0].tombstones) > 0 || k.blocks[0].partiallyRead() - - // Quickly scan each block to see if any overlap with the prior block, if they overlap then - // we need to dedup as there may be duplicate points now - for i := 1; !dedup && i < len(k.blocks); i++ { - dedup = k.blocks[i].partiallyRead() || - k.blocks[i].overlapsTimeRange(k.blocks[i-1].minTime, k.blocks[i-1].maxTime) || - len(k.blocks[i].tombstones) > 0 - } - - } - - k.merged = k.combine{{.Name}}(dedup) -} - -// combine returns a new set of blocks using the current blocks in the buffers. If dedup -// is true, all the blocks will be decoded, dedup and sorted in in order. If dedup is false, -// only blocks that are smaller than the chunk size will be decoded and combined. -func (k *tsmKeyIterator) combine{{.Name}}(dedup bool) blocks { - if dedup { - for len(k.merged{{.Name}}Values) < k.size && len(k.blocks) > 0 { - for len(k.blocks) > 0 && k.blocks[0].read() { - k.blocks = k.blocks[1:] - } - - if len(k.blocks) == 0 { - break - } - first := k.blocks[0] - minTime := first.minTime - maxTime := first.maxTime - - // Adjust the min time to the start of any overlapping blocks. - for i := 0; i < len(k.blocks); i++ { - if k.blocks[i].overlapsTimeRange(minTime, maxTime) && !k.blocks[i].read() { - if k.blocks[i].minTime < minTime { - minTime = k.blocks[i].minTime - } - if k.blocks[i].maxTime > minTime && k.blocks[i].maxTime < maxTime { - maxTime = k.blocks[i].maxTime - } - } - } - - // We have some overlapping blocks so decode all, append in order and then dedup - for i := 0; i < len(k.blocks); i++ { - if !k.blocks[i].overlapsTimeRange(minTime, maxTime) || k.blocks[i].read() { - continue - } - - v, err := Decode{{.Name}}Block(k.blocks[i].b, &[]{{.Name}}Value{}) - if err != nil { - k.err = err - return nil - } - - // Remove values we already read - v = {{.Name}}Values(v).Exclude(k.blocks[i].readMin, k.blocks[i].readMax) - - // Filter out only the values for overlapping block - v = {{.Name}}Values(v).Include(minTime, maxTime) - if len(v) > 0 { - // Record that we read a subset of the block - k.blocks[i].markRead(v[0].UnixNano(), v[len(v)-1].UnixNano()) - } - - // Apply each tombstone to the block - for _, ts := range k.blocks[i].tombstones { - v = {{.Name}}Values(v).Exclude(ts.Min, ts.Max) - } - - k.merged{{.Name}}Values = k.merged{{.Name}}Values.Merge(v) - } - } - - // Since we combined multiple blocks, we could have more values than we should put into - // a single block. We need to chunk them up into groups and re-encode them. - return k.chunk{{.Name}}(nil) - } else { - var i int - - for i < len(k.blocks) { - - // skip this block if it's values were already read - if k.blocks[i].read() { - i++ - continue - } - // If we this block is already full, just add it as is - if BlockCount(k.blocks[i].b) >= k.size { - k.merged = append(k.merged, k.blocks[i]) - } else { - break - } - i++ - } - - if k.fast { - for i < len(k.blocks) { - // skip this block if it's values were already read - if k.blocks[i].read() { - i++ - continue - } - - k.merged = append(k.merged, k.blocks[i]) - i++ - } - } - - // If we only have 1 blocks left, just append it as is and avoid decoding/recoding - if i == len(k.blocks)-1 { - if !k.blocks[i].read() { - k.merged = append(k.merged, k.blocks[i]) - } - i++ - } - - // The remaining blocks can be combined and we know that they do not overlap and - // so we can just append each, sort and re-encode. - for i < len(k.blocks) && len(k.merged{{.Name}}Values) < k.size { - if k.blocks[i].read() { - i++ - continue - } - - v, err := Decode{{.Name}}Block(k.blocks[i].b, &[]{{.Name}}Value{}) - if err != nil { - k.err = err - return nil - } - - // Apply each tombstone to the block - for _, ts := range k.blocks[i].tombstones { - v = {{.Name}}Values(v).Exclude(ts.Min, ts.Max) - } - - k.blocks[i].markRead(k.blocks[i].minTime, k.blocks[i].maxTime) - - k.merged{{.Name}}Values = k.merged{{.Name}}Values.Merge(v) - i++ - } - - k.blocks = k.blocks[i:] - - return k.chunk{{.Name}}(k.merged) - } -} - -func (k *tsmKeyIterator) chunk{{.Name}}(dst blocks) blocks { - if len(k.merged{{.Name}}Values) > k.size { - values := k.merged{{.Name}}Values[:k.size] - cb, err := {{.Name}}Values(values).Encode(nil) - if err != nil { - k.err = err - return nil - } - - dst = append(dst, &block{ - minTime: values[0].UnixNano(), - maxTime: values[len(values)-1].UnixNano(), - key: k.key, - b: cb, - }) - k.merged{{.Name}}Values = k.merged{{.Name}}Values[k.size:] - return dst - } - - // Re-encode the remaining values into the last block - if len(k.merged{{.Name}}Values) > 0 { - cb, err := {{.Name}}Values(k.merged{{.Name}}Values).Encode(nil) - if err != nil { - k.err = err - return nil - } - - dst = append(dst, &block{ - minTime: k.merged{{.Name}}Values[0].UnixNano(), - maxTime: k.merged{{.Name}}Values[len(k.merged{{.Name}}Values)-1].UnixNano(), - key: k.key, - b: cb, - }) - k.merged{{.Name}}Values = k.merged{{.Name}}Values[:0] - } - return dst -} - -{{ end }} - -{{range .}} -// merge combines the next set of blocks into merged blocks. -func (k *tsmBatchKeyIterator) merge{{.Name}}() { - // No blocks left, or pending merged values, we're done - if len(k.blocks) == 0 && len(k.merged) == 0 && k.merged{{.Name}}Values.Len() == 0 { - return - } - - sort.Stable(k.blocks) - - dedup := k.merged{{.Name}}Values.Len() != 0 - if len(k.blocks) > 0 && !dedup { - // If we have more than one block or any partially tombstoned blocks, we many need to dedup - dedup = len(k.blocks[0].tombstones) > 0 || k.blocks[0].partiallyRead() - - // Quickly scan each block to see if any overlap with the prior block, if they overlap then - // we need to dedup as there may be duplicate points now - for i := 1; !dedup && i < len(k.blocks); i++ { - dedup = k.blocks[i].partiallyRead() || - k.blocks[i].overlapsTimeRange(k.blocks[i-1].minTime, k.blocks[i-1].maxTime) || - len(k.blocks[i].tombstones) > 0 - } - - } - - k.merged = k.combine{{.Name}}(dedup) -} - -// combine returns a new set of blocks using the current blocks in the buffers. If dedup -// is true, all the blocks will be decoded, dedup and sorted in in order. If dedup is false, -// only blocks that are smaller than the chunk size will be decoded and combined. -func (k *tsmBatchKeyIterator) combine{{.Name}}(dedup bool) blocks { - if dedup { - for k.merged{{.Name}}Values.Len() < k.size && len(k.blocks) > 0 { - for len(k.blocks) > 0 && k.blocks[0].read() { - k.blocks = k.blocks[1:] - } - - if len(k.blocks) == 0 { - break - } - first := k.blocks[0] - minTime := first.minTime - maxTime := first.maxTime - - // Adjust the min time to the start of any overlapping blocks. - for i := 0; i < len(k.blocks); i++ { - if k.blocks[i].overlapsTimeRange(minTime, maxTime) && !k.blocks[i].read() { - if k.blocks[i].minTime < minTime { - minTime = k.blocks[i].minTime - } - if k.blocks[i].maxTime > minTime && k.blocks[i].maxTime < maxTime { - maxTime = k.blocks[i].maxTime - } - } - } - - // We have some overlapping blocks so decode all, append in order and then dedup - for i := 0; i < len(k.blocks); i++ { - if !k.blocks[i].overlapsTimeRange(minTime, maxTime) || k.blocks[i].read() { - continue - } - - var v cursors.{{.Name}}Array - var err error - if err = Decode{{.Name}}ArrayBlock(k.blocks[i].b, &v); err != nil { - k.err = err - return nil - } - - // Invariant: v.MaxTime() == k.blocks[i].maxTime - if k.blocks[i].maxTime != v.MaxTime() { - if maxTime == k.blocks[i].maxTime { - maxTime = v.MaxTime() - } - k.blocks[i].maxTime = v.MaxTime() - } - - // Remove values we already read - v.Exclude(k.blocks[i].readMin, k.blocks[i].readMax) - - // Filter out only the values for overlapping block - v.Include(minTime, maxTime) - if v.Len() > 0 { - // Record that we read a subset of the block - k.blocks[i].markRead(v.MinTime(), v.MaxTime()) - } - - // Apply each tombstone to the block - for _, ts := range k.blocks[i].tombstones { - v.Exclude(ts.Min, ts.Max) - } - - k.merged{{.Name}}Values.Merge(&v) - } - } - - // Since we combined multiple blocks, we could have more values than we should put into - // a single block. We need to chunk them up into groups and re-encode them. - return k.chunk{{.Name}}(nil) - } - var i int - - for i < len(k.blocks) { - - // skip this block if it's values were already read - if k.blocks[i].read() { - i++ - continue - } - // If we this block is already full, just add it as is - if BlockCount(k.blocks[i].b) >= k.size { - k.merged = append(k.merged, k.blocks[i]) - } else { - break - } - i++ - } - - if k.fast { - for i < len(k.blocks) { - // skip this block if it's values were already read - if k.blocks[i].read() { - i++ - continue - } - - k.merged = append(k.merged, k.blocks[i]) - i++ - } - } - - // If we only have 1 blocks left, just append it as is and avoid decoding/recoding - if i == len(k.blocks)-1 { - if !k.blocks[i].read() { - k.merged = append(k.merged, k.blocks[i]) - } - i++ - } - - // The remaining blocks can be combined and we know that they do not overlap and - // so we can just append each, sort and re-encode. - for i < len(k.blocks) && k.merged{{.Name}}Values.Len() < k.size { - if k.blocks[i].read() { - i++ - continue - } - - var v cursors.{{.Name}}Array - if err := Decode{{.Name}}ArrayBlock(k.blocks[i].b, &v); err != nil { - k.err = err - return nil - } - - // Invariant: v.MaxTime() == k.blocks[i].maxTime - if k.blocks[i].maxTime != v.MaxTime() { - k.blocks[i].maxTime = v.MaxTime() - } - - // Apply each tombstone to the block - for _, ts := range k.blocks[i].tombstones { - v.Exclude(ts.Min, ts.Max) - } - - k.blocks[i].markRead(k.blocks[i].minTime, k.blocks[i].maxTime) - - k.merged{{.Name}}Values.Merge(&v) - i++ - } - - k.blocks = k.blocks[i:] - - return k.chunk{{.Name}}(k.merged) -} - -func (k *tsmBatchKeyIterator) chunk{{.Name}}(dst blocks) blocks { - if k.merged{{.Name}}Values.Len() > k.size { - var values cursors.{{.Name}}Array - values.Timestamps = k.merged{{.Name}}Values.Timestamps[:k.size] - minTime, maxTime := values.Timestamps[0], values.Timestamps[len(values.Timestamps)-1] - values.Values = k.merged{{.Name}}Values.Values[:k.size] - - cb, err := Encode{{.Name}}ArrayBlock(&values, nil) // TODO(edd): pool this buffer - if err != nil { - k.err = err - return nil - } - - dst = append(dst, &block{ - minTime: minTime, - maxTime: maxTime, - key: k.key, - b: cb, - }) - k.merged{{.Name}}Values.Timestamps = k.merged{{.Name}}Values.Timestamps[k.size:] - k.merged{{.Name}}Values.Values = k.merged{{.Name}}Values.Values[k.size:] - return dst - } - - // Re-encode the remaining values into the last block - if k.merged{{.Name}}Values.Len() > 0 { - minTime, maxTime := k.merged{{.Name}}Values.Timestamps[0], k.merged{{.Name}}Values.Timestamps[len(k.merged{{.Name}}Values.Timestamps)-1] - cb, err := Encode{{.Name}}ArrayBlock(k.merged{{.Name}}Values, nil) // TODO(edd): pool this buffer - if err != nil { - k.err = err - return nil - } - - dst = append(dst, &block{ - minTime: minTime, - maxTime: maxTime, - key: k.key, - b: cb, - }) - k.merged{{.Name}}Values.Timestamps = k.merged{{.Name}}Values.Timestamps[:0] - k.merged{{.Name}}Values.Values = k.merged{{.Name}}Values.Values[:0] - } - return dst -} - - -{{ end }} diff --git a/tsdb/tsm1/compact.gen.go.tmpldata b/tsdb/tsm1/compact.gen.go.tmpldata deleted file mode 100644 index 236ba310ba..0000000000 --- a/tsdb/tsm1/compact.gen.go.tmpldata +++ /dev/null @@ -1,22 +0,0 @@ -[ - { - "Name":"Float", - "name":"float" - }, - { - "Name":"Integer", - "name":"integer" - }, - { - "Name":"Unsigned", - "name":"unsigned" - }, - { - "Name":"String", - "name":"string" - }, - { - "Name":"Boolean", - "name":"boolean" - } -] diff --git a/tsdb/tsm1/compact.go b/tsdb/tsm1/compact.go deleted file mode 100644 index 7e8628473c..0000000000 --- a/tsdb/tsm1/compact.go +++ /dev/null @@ -1,2110 +0,0 @@ -package tsm1 - -// Compactions are the process of creating read-optimized TSM files. -// The files are created by converting write-optimized WAL entries -// to read-optimized TSM format. They can also be created from existing -// TSM files when there are tombstone records that neeed to be removed, points -// that were overwritten by later writes and need to updated, or multiple -// smaller TSM files need to be merged to reduce file counts and improve -// compression ratios. -// -// The compaction process is stream-oriented using multiple readers and -// iterators. The resulting stream is written sorted and chunked to allow for -// one-pass writing of a new TSM file. - -import ( - "bytes" - "context" - "fmt" - "io" - "math" - "os" - "path/filepath" - "runtime" - "sort" - "sync" - "sync/atomic" - "time" - - "github.com/influxdata/influxdb/v2/kit/tracing" - "github.com/influxdata/influxdb/v2/pkg/limiter" - "github.com/influxdata/influxdb/v2/tsdb/cursors" -) - -const maxTSMFileSize = uint32(2048 * 1024 * 1024) // 2GB - -const ( - // CompactionTempExtension is the extension used for temporary files created during compaction. - CompactionTempExtension = "tmp" - - // TSMFileExtension is the extension used for TSM files. - TSMFileExtension = "tsm" - - // TSSFileExtension is the extension used for TSM stats files. - TSSFileExtension = "tss" -) - -var ( - errMaxFileExceeded = fmt.Errorf("max file exceeded") - errSnapshotsDisabled = fmt.Errorf("snapshots disabled") - errCompactionsDisabled = fmt.Errorf("compactions disabled") -) - -type errCompactionInProgress struct { - err error -} - -// Error returns the string representation of the error, to satisfy the error interface. -func (e errCompactionInProgress) Error() string { - if e.err != nil { - return fmt.Sprintf("compaction in progress: %s", e.err) - } - return "compaction in progress" -} - -type errCompactionAborted struct { - err error -} - -func (e errCompactionAborted) Error() string { - if e.err != nil { - return fmt.Sprintf("compaction aborted: %s", e.err) - } - return "compaction aborted" -} - -// CompactionGroup represents a list of files eligible to be compacted together. -type CompactionGroup []string - -// CompactionPlanner determines what TSM files and WAL segments to include in a -// given compaction run. -type CompactionPlanner interface { - Plan(lastWrite time.Time) []CompactionGroup - PlanLevel(level int) []CompactionGroup - PlanOptimize() []CompactionGroup - Release(group []CompactionGroup) - FullyCompacted() bool - - // ForceFull causes the planner to return a full compaction plan the next - // time Plan() is called if there are files that could be compacted. - ForceFull() - - SetFileStore(fs *FileStore) -} - -// DefaultPlanner implements CompactionPlanner using a strategy to roll up -// multiple generations of TSM files into larger files in stages. It attempts -// to minimize the number of TSM files on disk while rolling up a bounder number -// of files. -type DefaultPlanner struct { - FileStore fileStore - - // compactFullWriteColdDuration specifies the length of time after - // which if no writes have been committed to the WAL, the engine will - // do a full compaction of the TSM files in this shard. This duration - // should always be greater than the CacheFlushWriteColdDuraion - compactFullWriteColdDuration time.Duration - - // lastPlanCheck is the last time Plan was called - lastPlanCheck time.Time - - mu sync.RWMutex - // lastFindGenerations is the last time findGenerations was run - lastFindGenerations time.Time - - // lastGenerations is the last set of generations found by findGenerations - lastGenerations tsmGenerations - - // forceFull causes the next full plan requests to plan any files - // that may need to be compacted. Normally, these files are skipped and scheduled - // infrequently as the plans are more expensive to run. - forceFull bool - - // filesInUse is the set of files that have been returned as part of a plan and might - // be being compacted. Two plans should not return the same file at any given time. - filesInUse map[string]struct{} -} - -type fileStore interface { - Stats() []FileStat - LastModified() time.Time - BlockCount(path string, idx int) int - ParseFileName(path string) (int, int, error) -} - -func NewDefaultPlanner(fs fileStore, writeColdDuration time.Duration) *DefaultPlanner { - return &DefaultPlanner{ - FileStore: fs, - compactFullWriteColdDuration: writeColdDuration, - filesInUse: make(map[string]struct{}), - } -} - -// tsmGeneration represents the TSM files within a generation. -// 000001-01.tsm, 000001-02.tsm would be in the same generation -// 000001 each with different sequence numbers. -type tsmGeneration struct { - id int - files []FileStat - parseFileName ParseFileNameFunc -} - -func newTsmGeneration(id int, parseFileNameFunc ParseFileNameFunc) *tsmGeneration { - return &tsmGeneration{ - id: id, - parseFileName: parseFileNameFunc, - } -} - -// size returns the total size of the files in the generation. -func (t *tsmGeneration) size() uint64 { - var n uint64 - for _, f := range t.files { - n += uint64(f.Size) - } - return n -} - -// compactionLevel returns the level of the files in this generation. -func (t *tsmGeneration) level() int { - // Level 0 is always created from the result of a cache compaction. It generates - // 1 file with a sequence num of 1. Level 2 is generated by compacting multiple - // level 1 files. Level 3 is generate by compacting multiple level 2 files. Level - // 4 is for anything else. - _, seq, _ := t.parseFileName(t.files[0].Path) - if seq < 4 { - return seq - } - - return 4 -} - -// count returns the number of files in the generation. -func (t *tsmGeneration) count() int { - return len(t.files) -} - -// hasTombstones returns true if there are keys removed for any of the files. -func (t *tsmGeneration) hasTombstones() bool { - for _, f := range t.files { - if f.HasTombstone { - return true - } - } - return false -} - -func (c *DefaultPlanner) SetFileStore(fs *FileStore) { - c.FileStore = fs -} - -func (c *DefaultPlanner) ParseFileName(path string) (int, int, error) { - return c.FileStore.ParseFileName(path) -} - -// FullyCompacted returns true if the shard is fully compacted. -func (c *DefaultPlanner) FullyCompacted() bool { - gens := c.findGenerations(false) - return len(gens) <= 1 && !gens.hasTombstones() -} - -// ForceFull causes the planner to return a full compaction plan the next time -// a plan is requested. When ForceFull is called, level and optimize plans will -// not return plans until a full plan is requested and released. -func (c *DefaultPlanner) ForceFull() { - c.mu.Lock() - defer c.mu.Unlock() - c.forceFull = true -} - -// PlanLevel returns a set of TSM files to rewrite for a specific level. -func (c *DefaultPlanner) PlanLevel(level int) []CompactionGroup { - // If a full plan has been requested, don't plan any levels which will prevent - // the full plan from acquiring them. - c.mu.RLock() - if c.forceFull { - c.mu.RUnlock() - return nil - } - c.mu.RUnlock() - - // Determine the generations from all files on disk. We need to treat - // a generation conceptually as a single file even though it may be - // split across several files in sequence. - generations := c.findGenerations(true) - - // If there is only one generation and no tombstones, then there's nothing to - // do. - if len(generations) <= 1 && !generations.hasTombstones() { - return nil - } - - // Group each generation by level such that two adjacent generations in the same - // level become part of the same group. - var currentGen tsmGenerations - var groups []tsmGenerations - for i := 0; i < len(generations); i++ { - cur := generations[i] - - // See if this generation is orphan'd which would prevent it from being further - // compacted until a final full compactin runs. - if i < len(generations)-1 { - if cur.level() < generations[i+1].level() { - currentGen = append(currentGen, cur) - continue - } - } - - if len(currentGen) == 0 || currentGen.level() == cur.level() { - currentGen = append(currentGen, cur) - continue - } - groups = append(groups, currentGen) - - currentGen = tsmGenerations{} - currentGen = append(currentGen, cur) - } - - if len(currentGen) > 0 { - groups = append(groups, currentGen) - } - - // Remove any groups in the wrong level - var levelGroups []tsmGenerations - for _, cur := range groups { - if cur.level() == level { - levelGroups = append(levelGroups, cur) - } - } - - minGenerations := 4 - if level == 1 { - minGenerations = 8 - } - - var cGroups []CompactionGroup - for _, group := range levelGroups { - for _, chunk := range group.chunk(minGenerations) { - var cGroup CompactionGroup - var hasTombstones bool - for _, gen := range chunk { - if gen.hasTombstones() { - hasTombstones = true - } - for _, file := range gen.files { - cGroup = append(cGroup, file.Path) - } - } - - if len(chunk) < minGenerations && !hasTombstones { - continue - } - - cGroups = append(cGroups, cGroup) - } - } - - if !c.acquire(cGroups) { - return nil - } - - return cGroups -} - -// PlanOptimize returns all TSM files if they are in different generations in order -// to optimize the index across TSM files. Each returned compaction group can be -// compacted concurrently. -func (c *DefaultPlanner) PlanOptimize() []CompactionGroup { - // If a full plan has been requested, don't plan any levels which will prevent - // the full plan from acquiring them. - c.mu.RLock() - if c.forceFull { - c.mu.RUnlock() - return nil - } - c.mu.RUnlock() - - // Determine the generations from all files on disk. We need to treat - // a generation conceptually as a single file even though it may be - // split across several files in sequence. - generations := c.findGenerations(true) - - // If there is only one generation and no tombstones, then there's nothing to - // do. - if len(generations) <= 1 && !generations.hasTombstones() { - return nil - } - - // Group each generation by level such that two adjacent generations in the same - // level become part of the same group. - var currentGen tsmGenerations - var groups []tsmGenerations - for i := 0; i < len(generations); i++ { - cur := generations[i] - - // Skip the file if it's over the max size and contains a full block and it does not have any tombstones - if cur.count() > 2 && cur.size() > uint64(maxTSMFileSize) && c.FileStore.BlockCount(cur.files[0].Path, 1) == MaxPointsPerBlock && !cur.hasTombstones() { - continue - } - - // See if this generation is orphan'd which would prevent it from being further - // compacted until a final full compactin runs. - if i < len(generations)-1 { - if cur.level() < generations[i+1].level() { - currentGen = append(currentGen, cur) - continue - } - } - - if len(currentGen) == 0 || currentGen.level() == cur.level() { - currentGen = append(currentGen, cur) - continue - } - groups = append(groups, currentGen) - - currentGen = tsmGenerations{} - currentGen = append(currentGen, cur) - } - - if len(currentGen) > 0 { - groups = append(groups, currentGen) - } - - // Only optimize level 4 files since using lower-levels will collide - // with the level planners - var levelGroups []tsmGenerations - for _, cur := range groups { - if cur.level() == 4 { - levelGroups = append(levelGroups, cur) - } - } - - var cGroups []CompactionGroup - for _, group := range levelGroups { - // Skip the group if it's not worthwhile to optimize it - if len(group) < 4 && !group.hasTombstones() { - continue - } - - var cGroup CompactionGroup - for _, gen := range group { - for _, file := range gen.files { - cGroup = append(cGroup, file.Path) - } - } - - cGroups = append(cGroups, cGroup) - } - - if !c.acquire(cGroups) { - return nil - } - - return cGroups -} - -// Plan returns a set of TSM files to rewrite for level 4 or higher. The planning returns -// multiple groups if possible to allow compactions to run concurrently. -func (c *DefaultPlanner) Plan(lastWrite time.Time) []CompactionGroup { - generations := c.findGenerations(true) - - c.mu.RLock() - forceFull := c.forceFull - c.mu.RUnlock() - - // first check if we should be doing a full compaction because nothing has been written in a long time - if forceFull || c.compactFullWriteColdDuration > 0 && time.Since(lastWrite) > c.compactFullWriteColdDuration && len(generations) > 1 { - - // Reset the full schedule if we planned because of it. - if forceFull { - c.mu.Lock() - c.forceFull = false - c.mu.Unlock() - } - - var tsmFiles []string - var genCount int - for i, group := range generations { - var skip bool - - // Skip the file if it's over the max size and contains a full block and it does not have any tombstones - if len(generations) > 2 && group.size() > uint64(maxTSMFileSize) && c.FileStore.BlockCount(group.files[0].Path, 1) == MaxPointsPerBlock && !group.hasTombstones() { - skip = true - } - - // We need to look at the level of the next file because it may need to be combined with this generation - // but won't get picked up on it's own if this generation is skipped. This allows the most recently - // created files to get picked up by the full compaction planner and avoids having a few less optimally - // compressed files. - if i < len(generations)-1 { - if generations[i+1].level() <= 3 { - skip = false - } - } - - if skip { - continue - } - - for _, f := range group.files { - tsmFiles = append(tsmFiles, f.Path) - } - genCount += 1 - } - sort.Strings(tsmFiles) - - // Make sure we have more than 1 file and more than 1 generation - if len(tsmFiles) <= 1 || genCount <= 1 { - return nil - } - - group := []CompactionGroup{tsmFiles} - if !c.acquire(group) { - return nil - } - return group - } - - // don't plan if nothing has changed in the filestore - if c.lastPlanCheck.After(c.FileStore.LastModified()) && !generations.hasTombstones() { - return nil - } - - c.lastPlanCheck = time.Now() - - // If there is only one generation, return early to avoid re-compacting the same file - // over and over again. - if len(generations) <= 1 && !generations.hasTombstones() { - return nil - } - - // Need to find the ending point for level 4 files. They will be the oldest files. We scan - // each generation in descending break once we see a file less than 4. - end := 0 - start := 0 - for i, g := range generations { - if g.level() <= 3 { - break - } - end = i + 1 - } - - // As compactions run, the oldest files get bigger. We don't want to re-compact them during - // this planning if they are maxed out so skip over any we see. - var hasTombstones bool - for i, g := range generations[:end] { - if g.hasTombstones() { - hasTombstones = true - } - - if hasTombstones { - continue - } - - // Skip the file if it's over the max size and contains a full block or the generation is split - // over multiple files. In the latter case, that would mean the data in the file spilled over - // the 2GB limit. - if g.size() > uint64(maxTSMFileSize) && c.FileStore.BlockCount(g.files[0].Path, 1) == MaxPointsPerBlock { - start = i + 1 - } - - // This is an edge case that can happen after multiple compactions run. The files at the beginning - // can become larger faster than ones after them. We want to skip those really big ones and just - // compact the smaller ones until they are closer in size. - if i > 0 { - if g.size()*2 < generations[i-1].size() { - start = i - break - } - } - } - - // step is how may files to compact in a group. We want to clamp it at 4 but also stil - // return groups smaller than 4. - step := 4 - if step > end { - step = end - } - - // slice off the generations that we'll examine - generations = generations[start:end] - - // Loop through the generations in groups of size step and see if we can compact all (or - // some of them as group) - groups := []tsmGenerations{} - for i := 0; i < len(generations); i += step { - var skipGroup bool - startIndex := i - - for j := i; j < i+step && j < len(generations); j++ { - gen := generations[j] - lvl := gen.level() - - // Skip compacting this group if there happens to be any lower level files in the - // middle. These will get picked up by the level compactors. - if lvl <= 3 { - skipGroup = true - break - } - - // Skip the file if it's over the max size and it contains a full block - if gen.size() >= uint64(maxTSMFileSize) && c.FileStore.BlockCount(gen.files[0].Path, 1) == MaxPointsPerBlock && !gen.hasTombstones() { - startIndex++ - continue - } - } - - if skipGroup { - continue - } - - endIndex := i + step - if endIndex > len(generations) { - endIndex = len(generations) - } - if endIndex-startIndex > 0 { - groups = append(groups, generations[startIndex:endIndex]) - } - } - - if len(groups) == 0 { - return nil - } - - // With the groups, we need to evaluate whether the group as a whole can be compacted - compactable := []tsmGenerations{} - for _, group := range groups { - //if we don't have enough generations to compact, skip it - if len(group) < 4 && !group.hasTombstones() { - continue - } - compactable = append(compactable, group) - } - - // All the files to be compacted must be compacted in order. We need to convert each - // group to the actual set of files in that group to be compacted. - var tsmFiles []CompactionGroup - for _, c := range compactable { - var cGroup CompactionGroup - for _, group := range c { - for _, f := range group.files { - cGroup = append(cGroup, f.Path) - } - } - sort.Strings(cGroup) - tsmFiles = append(tsmFiles, cGroup) - } - - if !c.acquire(tsmFiles) { - return nil - } - return tsmFiles -} - -// findGenerations groups all the TSM files by generation based -// on their filename, then returns the generations in descending order (newest first). -// If skipInUse is true, tsm files that are part of an existing compaction plan -// are not returned. -func (c *DefaultPlanner) findGenerations(skipInUse bool) tsmGenerations { - c.mu.Lock() - defer c.mu.Unlock() - - last := c.lastFindGenerations - lastGen := c.lastGenerations - - if !last.IsZero() && c.FileStore.LastModified().Equal(last) { - return lastGen - } - - genTime := c.FileStore.LastModified() - tsmStats := c.FileStore.Stats() - generations := make(map[int]*tsmGeneration, len(tsmStats)) - for _, f := range tsmStats { - gen, _, _ := c.ParseFileName(f.Path) - - // Skip any files that are assigned to a current compaction plan - if _, ok := c.filesInUse[f.Path]; skipInUse && ok { - continue - } - - group := generations[gen] - if group == nil { - group = newTsmGeneration(gen, c.ParseFileName) - generations[gen] = group - } - group.files = append(group.files, f) - } - - orderedGenerations := make(tsmGenerations, 0, len(generations)) - for _, g := range generations { - orderedGenerations = append(orderedGenerations, g) - } - if !orderedGenerations.IsSorted() { - sort.Sort(orderedGenerations) - } - - c.lastFindGenerations = genTime - c.lastGenerations = orderedGenerations - - return orderedGenerations -} - -func (c *DefaultPlanner) acquire(groups []CompactionGroup) bool { - c.mu.Lock() - defer c.mu.Unlock() - - // See if the new files are already in use - for _, g := range groups { - for _, f := range g { - if _, ok := c.filesInUse[f]; ok { - return false - } - } - } - - // Mark all the new files in use - for _, g := range groups { - for _, f := range g { - c.filesInUse[f] = struct{}{} - } - } - return true -} - -// Release removes the files reference in each compaction group allowing new plans -// to be able to use them. -func (c *DefaultPlanner) Release(groups []CompactionGroup) { - c.mu.Lock() - defer c.mu.Unlock() - for _, g := range groups { - for _, f := range g { - delete(c.filesInUse, f) - } - } -} - -// Compactor merges multiple TSM files into new files or -// writes a Cache into 1 or more TSM files. -type Compactor struct { - Dir string - Size int - - FileStore interface { - SetCurrentGenerationFunc(func() int) - NextGeneration() int - TSMReader(path string) *TSMReader - } - - // RateLimit is the limit for disk writes for all concurrent compactions. - RateLimit limiter.Rate - - formatFileName FormatFileNameFunc - parseFileName ParseFileNameFunc - - mu sync.RWMutex - snapshotsEnabled bool - compactionsEnabled bool - - // lastSnapshotDuration is the amount of time the last snapshot took to complete. - lastSnapshotDuration time.Duration - - snapshotLatencies *latencies - - // The channel to signal that any in progress snapshots should be aborted. - snapshotsInterrupt chan struct{} - // The channel to signal that any in progress level compactions should be aborted. - compactionsInterrupt chan struct{} - - files map[string]struct{} -} - -// NewCompactor returns a new instance of Compactor. -func NewCompactor() *Compactor { - return &Compactor{ - formatFileName: DefaultFormatFileName, - parseFileName: DefaultParseFileName, - } -} - -func (c *Compactor) WithFormatFileNameFunc(formatFileNameFunc FormatFileNameFunc) { - c.formatFileName = formatFileNameFunc -} - -func (c *Compactor) WithParseFileNameFunc(parseFileNameFunc ParseFileNameFunc) { - c.parseFileName = parseFileNameFunc -} - -// Open initializes the Compactor. -func (c *Compactor) Open() { - c.mu.Lock() - defer c.mu.Unlock() - if c.snapshotsEnabled || c.compactionsEnabled { - return - } - - c.snapshotsEnabled = true - c.compactionsEnabled = true - c.snapshotsInterrupt = make(chan struct{}) - c.compactionsInterrupt = make(chan struct{}) - c.snapshotLatencies = &latencies{values: make([]time.Duration, 4)} - - c.files = make(map[string]struct{}) -} - -// Close disables the Compactor. -func (c *Compactor) Close() { - c.mu.Lock() - defer c.mu.Unlock() - if !(c.snapshotsEnabled || c.compactionsEnabled) { - return - } - c.snapshotsEnabled = false - c.compactionsEnabled = false - if c.compactionsInterrupt != nil { - close(c.compactionsInterrupt) - } - if c.snapshotsInterrupt != nil { - close(c.snapshotsInterrupt) - } -} - -// DisableSnapshots disables the compactor from performing snapshots. -func (c *Compactor) DisableSnapshots() { - c.mu.Lock() - c.snapshotsEnabled = false - if c.snapshotsInterrupt != nil { - close(c.snapshotsInterrupt) - c.snapshotsInterrupt = nil - } - c.mu.Unlock() -} - -// EnableSnapshots allows the compactor to perform snapshots. -func (c *Compactor) EnableSnapshots() { - c.mu.Lock() - c.snapshotsEnabled = true - if c.snapshotsInterrupt == nil { - c.snapshotsInterrupt = make(chan struct{}) - } - c.mu.Unlock() -} - -// DisableSnapshots disables the compactor from performing compactions. -func (c *Compactor) DisableCompactions() { - c.mu.Lock() - c.compactionsEnabled = false - if c.compactionsInterrupt != nil { - close(c.compactionsInterrupt) - c.compactionsInterrupt = nil - } - c.mu.Unlock() -} - -// EnableCompactions allows the compactor to perform compactions. -func (c *Compactor) EnableCompactions() { - c.mu.Lock() - c.compactionsEnabled = true - if c.compactionsInterrupt == nil { - c.compactionsInterrupt = make(chan struct{}) - } - c.mu.Unlock() -} - -// WriteSnapshot writes a Cache snapshot to one or more new TSM files. -func (c *Compactor) WriteSnapshot(ctx context.Context, cache *Cache) ([]string, error) { - span, _ := tracing.StartSpanFromContext(ctx) - defer span.Finish() - - c.mu.RLock() - enabled := c.snapshotsEnabled - intC := c.snapshotsInterrupt - c.mu.RUnlock() - - if !enabled { - return nil, errSnapshotsDisabled - } - - start := time.Now() - card := cache.Count() - - // Enable throttling if we have lower cardinality or snapshots are going fast. - throttle := card < 3e6 && c.snapshotLatencies.avg() < 15*time.Second - - // Write snapshost concurrently if cardinality is relatively high. - concurrency := card / 2e6 - if concurrency < 1 { - concurrency = 1 - } - - // Special case very high cardinality, use max concurrency and don't throttle writes. - if card >= 3e6 { - concurrency = 4 - throttle = false - } - - splits := cache.Split(concurrency) - - type res struct { - files []string - err error - } - - resC := make(chan res, concurrency) - for i := 0; i < concurrency; i++ { - go func(sp *Cache) { - iter := NewCacheKeyIterator(sp, MaxPointsPerBlock, intC) - files, err := c.writeNewFiles(c.FileStore.NextGeneration(), 0, nil, iter, throttle) - resC <- res{files: files, err: err} - - }(splits[i]) - } - - var err error - files := make([]string, 0, concurrency) - for i := 0; i < concurrency; i++ { - result := <-resC - if result.err != nil { - err = result.err - } - files = append(files, result.files...) - } - - dur := time.Since(start).Truncate(time.Second) - - c.mu.Lock() - - // See if we were disabled while writing a snapshot - enabled = c.snapshotsEnabled - c.lastSnapshotDuration = dur - c.snapshotLatencies.add(time.Since(start)) - c.mu.Unlock() - - if !enabled { - return nil, errSnapshotsDisabled - } - - return files, err -} - -// compact writes multiple smaller TSM files into 1 or more larger files. -func (c *Compactor) compact(fast bool, tsmFiles []string) ([]string, error) { - size := c.Size - if size <= 0 { - size = MaxPointsPerBlock - } - - c.mu.RLock() - intC := c.compactionsInterrupt - c.mu.RUnlock() - - // The new compacted files need to added to the max generation in the - // set. We need to find that max generation as well as the max sequence - // number to ensure we write to the next unique location. - var maxGeneration, maxSequence int - for _, f := range tsmFiles { - gen, seq, err := c.parseFileName(f) - if err != nil { - return nil, err - } - - if gen > maxGeneration { - maxGeneration = gen - maxSequence = seq - } - - if gen == maxGeneration && seq > maxSequence { - maxSequence = seq - } - } - - // For each TSM file, create a TSM reader - var trs []*TSMReader - for _, file := range tsmFiles { - select { - case <-intC: - return nil, errCompactionAborted{} - default: - } - - tr := c.FileStore.TSMReader(file) - if tr == nil { - // This would be a bug if this occurred as tsmFiles passed in should only be - // assigned to one compaction at any one time. A nil tr would mean the file - // doesn't exist. - return nil, errCompactionAborted{fmt.Errorf("bad plan: %s", file)} - } - defer tr.Unref() // inform that we're done with this reader when this method returns. - trs = append(trs, tr) - } - - if len(trs) == 0 { - return nil, nil - } - - tsm, err := NewTSMBatchKeyIterator(size, fast, intC, trs...) - if err != nil { - return nil, err - } - - return c.writeNewFiles(maxGeneration, maxSequence, tsmFiles, tsm, true) -} - -// CompactFull writes multiple smaller TSM files into 1 or more larger files. -func (c *Compactor) CompactFull(tsmFiles []string) ([]string, error) { - c.mu.RLock() - enabled := c.compactionsEnabled - c.mu.RUnlock() - - if !enabled { - return nil, errCompactionsDisabled - } - - if !c.add(tsmFiles) { - return nil, errCompactionInProgress{} - } - defer c.remove(tsmFiles) - - files, err := c.compact(false, tsmFiles) - - // See if we were disabled while writing a snapshot - c.mu.RLock() - enabled = c.compactionsEnabled - c.mu.RUnlock() - - if !enabled { - if err := c.removeTmpFiles(files); err != nil { - return nil, err - } - return nil, errCompactionsDisabled - } - - return files, err -} - -// CompactFast writes multiple smaller TSM files into 1 or more larger files. -func (c *Compactor) CompactFast(tsmFiles []string) ([]string, error) { - c.mu.RLock() - enabled := c.compactionsEnabled - c.mu.RUnlock() - - if !enabled { - return nil, errCompactionsDisabled - } - - if !c.add(tsmFiles) { - return nil, errCompactionInProgress{} - } - defer c.remove(tsmFiles) - - files, err := c.compact(true, tsmFiles) - - // See if we were disabled while writing a snapshot - c.mu.RLock() - enabled = c.compactionsEnabled - c.mu.RUnlock() - - if !enabled { - if err := c.removeTmpFiles(files); err != nil { - return nil, err - } - return nil, errCompactionsDisabled - } - - return files, err - -} - -// removeTmpFiles is responsible for cleaning up a compaction that -// was started, but then abandoned before the temporary files were dealt with. -func (c *Compactor) removeTmpFiles(files []string) error { - for _, f := range files { - if err := os.Remove(f); err != nil { - return fmt.Errorf("error removing temp compaction file: %v", err) - } - } - return nil -} - -// writeNewFiles writes from the iterator into new TSM files, rotating -// to a new file once it has reached the max TSM file size. -func (c *Compactor) writeNewFiles(generation, sequence int, src []string, iter KeyIterator, throttle bool) ([]string, error) { - // These are the new TSM files written - var files []string - - for { - sequence++ - - // New TSM files are written to a temp file and renamed when fully completed. - fileName := filepath.Join(c.Dir, c.formatFileName(generation, sequence)+"."+TSMFileExtension+"."+TmpTSMFileExtension) - statsFileName := StatsFilename(fileName) - - // Write as much as possible to this file - err := c.write(fileName, iter, throttle) - - // We've hit the max file limit and there is more to write. Create a new file - // and continue. - if err == errMaxFileExceeded || err == ErrMaxBlocksExceeded { - files = append(files, fileName) - continue - } else if err == ErrNoValues { - // If the file only contained tombstoned entries, then it would be a 0 length - // file that we can drop. - if err := os.RemoveAll(fileName); err != nil { - return nil, err - } else if err := os.RemoveAll(statsFileName); err != nil && !os.IsNotExist(err) { - return nil, err - } - break - } else if _, ok := err.(errCompactionInProgress); ok { - // Don't clean up the file as another compaction is using it. This should not happen as the - // planner keeps track of which files are assigned to compaction plans now. - return nil, err - } else if err != nil { - // Remove any tmp files we already completed - for _, f := range files { - if err := os.RemoveAll(f); err != nil { - return nil, err - } else if err := os.RemoveAll(StatsFilename(f)); err != nil && !os.IsNotExist(err) { - return nil, err - } - } - // We hit an error and didn't finish the compaction. Remove the temp file and abort. - if err := os.RemoveAll(fileName); err != nil { - return nil, err - } else if err := os.RemoveAll(statsFileName); err != nil && !os.IsNotExist(err) { - return nil, err - } - return nil, err - } - - files = append(files, fileName) - break - } - - return files, nil -} - -func (c *Compactor) write(path string, iter KeyIterator, throttle bool) (err error) { - fd, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR|os.O_EXCL, 0666) - if err != nil { - return errCompactionInProgress{err: err} - } - - // syncingWriter ensures that whatever we wrap the above file descriptor in - // it will always be able to be synced by the tsm writer, since it does - // type assertions to attempt to sync. - type syncingWriter interface { - io.Writer - Sync() error - } - - // Create the write for the new TSM file. - var ( - w TSMWriter - limitWriter syncingWriter = fd - ) - - if c.RateLimit != nil && throttle { - limitWriter = limiter.NewWriterWithRate(fd, c.RateLimit) - } - - // Use a disk based TSM buffer if it looks like we might create a big index - // in memory. - if iter.EstimatedIndexSize() > 64*1024*1024 { - w, err = NewTSMWriterWithDiskBuffer(limitWriter) - if err != nil { - return err - } - } else { - w, err = NewTSMWriter(limitWriter) - if err != nil { - return err - } - } - - defer func() { - closeErr := w.Close() - if err == nil { - err = closeErr - } - - // Check for errors where we should not remove the file - _, inProgress := err.(errCompactionInProgress) - maxBlocks := err == ErrMaxBlocksExceeded - maxFileSize := err == errMaxFileExceeded - if inProgress || maxBlocks || maxFileSize { - return - } - - if err != nil { - w.Remove() - } - }() - - for iter.Next() { - c.mu.RLock() - enabled := c.snapshotsEnabled || c.compactionsEnabled - c.mu.RUnlock() - - if !enabled { - return errCompactionAborted{} - } - // Each call to read returns the next sorted key (or the prior one if there are - // more values to write). The size of values will be less than or equal to our - // chunk size (1000) - key, minTime, maxTime, block, err := iter.Read() - if err != nil { - return err - } - - if minTime > maxTime { - return fmt.Errorf("invalid index entry for block. min=%d, max=%d", minTime, maxTime) - } - - // Write the key and value - if err := w.WriteBlock(key, minTime, maxTime, block); err == ErrMaxBlocksExceeded { - if err := w.WriteIndex(); err != nil { - return err - } - return err - } else if err != nil { - return err - } - - // If we have a max file size configured and we're over it, close out the file - // and return the error. - if w.Size() > maxTSMFileSize { - if err := w.WriteIndex(); err != nil { - return err - } - - return errMaxFileExceeded - } - } - - // Were there any errors encountered during iteration? - if err := iter.Err(); err != nil { - return err - } - - // We're all done. Close out the file. - if err := w.WriteIndex(); err != nil { - return err - } - return nil -} - -func (c *Compactor) add(files []string) bool { - c.mu.Lock() - defer c.mu.Unlock() - - // See if the new files are already in use - for _, f := range files { - if _, ok := c.files[f]; ok { - return false - } - } - - // Mark all the new files in use - for _, f := range files { - c.files[f] = struct{}{} - } - return true -} - -func (c *Compactor) remove(files []string) { - c.mu.Lock() - defer c.mu.Unlock() - for _, f := range files { - delete(c.files, f) - } -} - -// KeyIterator allows iteration over set of keys and values in sorted order. -type KeyIterator interface { - // Next returns true if there are any values remaining in the iterator. - Next() bool - - // Read returns the key, time range, and raw data for the next block, - // or any error that occurred. - Read() (key []byte, minTime int64, maxTime int64, data []byte, err error) - - // Close closes the iterator. - Close() error - - // Err returns any errors encountered during iteration. - Err() error - - // EstimatedIndexSize returns the estimated size of the index that would - // be required to store all the series and entries in the KeyIterator. - EstimatedIndexSize() int -} - -// tsmKeyIterator implements the KeyIterator for set of TSMReaders. Iteration produces -// keys in sorted order and the values between the keys sorted and deduped. If any of -// the readers have associated tombstone entries, they are returned as part of iteration. -type tsmKeyIterator struct { - // readers is the set of readers it produce a sorted key run with - readers []*TSMReader - - // values is the temporary buffers for each key that is returned by a reader - values map[string][]Value - - // pos is the current key position within the corresponding readers slice. A value of - // pos[0] = 1, means the reader[0] is currently at key 1 in its ordered index. - pos []int - - // err is any error we received while iterating values. - err error - - // indicates whether the iterator should choose a faster merging strategy over a more - // optimally compressed one. If fast is true, multiple blocks will just be added as is - // and not combined. In some cases, a slower path will need to be utilized even when - // fast is true to prevent overlapping blocks of time for the same key. - // If false, the blocks will be decoded and duplicated (if needed) and - // then chunked into the maximally sized blocks. - fast bool - - // size is the maximum number of values to encode in a single block - size int - - // key is the current key lowest key across all readers that has not be fully exhausted - // of values. - key []byte - typ byte - - iterators []*BlockIterator - blocks blocks - - buf []blocks - - // mergeValues are decoded blocks that have been combined - mergedFloatValues FloatValues - mergedIntegerValues IntegerValues - mergedUnsignedValues UnsignedValues - mergedBooleanValues BooleanValues - mergedStringValues StringValues - - // merged are encoded blocks that have been combined or used as is - // without decode - merged blocks - interrupt chan struct{} -} - -type block struct { - key []byte - minTime, maxTime int64 - typ byte - b []byte - tombstones []TimeRange - - // readMin, readMax are the timestamps range of values have been - // read and encoded from this block. - readMin, readMax int64 -} - -func (b *block) overlapsTimeRange(min, max int64) bool { - return b.minTime <= max && b.maxTime >= min -} - -func (b *block) read() bool { - return b.readMin <= b.minTime && b.readMax >= b.maxTime -} - -func (b *block) markRead(min, max int64) { - if min < b.readMin { - b.readMin = min - } - - if max > b.readMax { - b.readMax = max - } -} - -func (b *block) partiallyRead() bool { - // If readMin and readMax are still the initial values, nothing has been read. - if b.readMin == int64(math.MaxInt64) && b.readMax == int64(math.MinInt64) { - return false - } - return b.readMin != b.minTime || b.readMax != b.maxTime -} - -type blocks []*block - -func (a blocks) Len() int { return len(a) } - -func (a blocks) Less(i, j int) bool { - cmp := bytes.Compare(a[i].key, a[j].key) - if cmp == 0 { - return a[i].minTime < a[j].minTime && a[i].maxTime < a[j].minTime - } - return cmp < 0 -} - -func (a blocks) Swap(i, j int) { a[i], a[j] = a[j], a[i] } - -// NewTSMKeyIterator returns a new TSM key iterator from readers. -// size indicates the maximum number of values to encode in a single block. -func NewTSMKeyIterator(size int, fast bool, interrupt chan struct{}, readers ...*TSMReader) (KeyIterator, error) { - var iter []*BlockIterator - for _, r := range readers { - iter = append(iter, r.BlockIterator()) - } - - return &tsmKeyIterator{ - readers: readers, - values: map[string][]Value{}, - pos: make([]int, len(readers)), - size: size, - iterators: iter, - fast: fast, - buf: make([]blocks, len(iter)), - interrupt: interrupt, - }, nil -} - -func (k *tsmKeyIterator) hasMergedValues() bool { - return len(k.mergedFloatValues) > 0 || - len(k.mergedIntegerValues) > 0 || - len(k.mergedUnsignedValues) > 0 || - len(k.mergedStringValues) > 0 || - len(k.mergedBooleanValues) > 0 -} - -func (k *tsmKeyIterator) EstimatedIndexSize() int { - var size uint32 - for _, r := range k.readers { - size += r.IndexSize() - } - return int(size) / len(k.readers) -} - -// Next returns true if there are any values remaining in the iterator. -func (k *tsmKeyIterator) Next() bool { -RETRY: - // Any merged blocks pending? - if len(k.merged) > 0 { - k.merged = k.merged[1:] - if len(k.merged) > 0 { - return true - } - } - - // Any merged values pending? - if k.hasMergedValues() { - k.merge() - if len(k.merged) > 0 || k.hasMergedValues() { - return true - } - } - - // If we still have blocks from the last read, merge them - if len(k.blocks) > 0 { - k.merge() - if len(k.merged) > 0 || k.hasMergedValues() { - return true - } - } - - // Read the next block from each TSM iterator - for i, v := range k.buf { - if len(v) == 0 { - iter := k.iterators[i] - if iter.Next() { - key, minTime, maxTime, typ, _, b, err := iter.Read() - if err != nil { - k.err = err - } - - var blk *block - if cap(k.buf[i]) > len(k.buf[i]) { - k.buf[i] = k.buf[i][:len(k.buf[i])+1] - blk = k.buf[i][len(k.buf[i])-1] - if blk == nil { - blk = &block{} - k.buf[i][len(k.buf[i])-1] = blk - } - } else { - blk = &block{} - k.buf[i] = append(k.buf[i], blk) - } - blk.minTime = minTime - blk.maxTime = maxTime - blk.key = key - blk.typ = typ - blk.b = b - blk.readMin = math.MaxInt64 - blk.readMax = math.MinInt64 - - // This block may have ranges of time removed from it that would - // reduce the block min and max time. - blk.tombstones = iter.r.TombstoneRange(key, blk.tombstones[:0]) - - blockKey := key - for bytes.Equal(iter.PeekNext(), blockKey) { - iter.Next() - key, minTime, maxTime, typ, _, b, err := iter.Read() - if err != nil { - k.err = err - } - - var blk *block - if cap(k.buf[i]) > len(k.buf[i]) { - k.buf[i] = k.buf[i][:len(k.buf[i])+1] - blk = k.buf[i][len(k.buf[i])-1] - if blk == nil { - blk = &block{} - k.buf[i][len(k.buf[i])-1] = blk - } - } else { - blk = &block{} - k.buf[i] = append(k.buf[i], blk) - } - - blk.minTime = minTime - blk.maxTime = maxTime - blk.key = key - blk.typ = typ - blk.b = b - blk.readMin = math.MaxInt64 - blk.readMax = math.MinInt64 - blk.tombstones = iter.r.TombstoneRange(key, blk.tombstones[:0]) - } - } - - if iter.Err() != nil { - k.err = iter.Err() - } - } - } - - // Each reader could have a different key that it's currently at, need to find - // the next smallest one to keep the sort ordering. - var minKey []byte - var minType byte - for _, b := range k.buf { - // block could be nil if the iterator has been exhausted for that file - if len(b) == 0 { - continue - } - if len(minKey) == 0 || bytes.Compare(b[0].key, minKey) < 0 { - minKey = b[0].key - minType = b[0].typ - } - } - k.key = minKey - k.typ = minType - - // Now we need to find all blocks that match the min key so we can combine and dedupe - // the blocks if necessary - for i, b := range k.buf { - if len(b) == 0 { - continue - } - if bytes.Equal(b[0].key, k.key) { - k.blocks = append(k.blocks, b...) - k.buf[i] = k.buf[i][:0] - } - } - - if len(k.blocks) == 0 { - return false - } - - k.merge() - - // After merging all the values for this key, we might not have any. (e.g. they were all deleted - // through many tombstones). In this case, move on to the next key instead of ending iteration. - if len(k.merged) == 0 { - goto RETRY - } - - return len(k.merged) > 0 -} - -// merge combines the next set of blocks into merged blocks. -func (k *tsmKeyIterator) merge() { - switch k.typ { - case BlockFloat64: - k.mergeFloat() - case BlockInteger: - k.mergeInteger() - case BlockUnsigned: - k.mergeUnsigned() - case BlockBoolean: - k.mergeBoolean() - case BlockString: - k.mergeString() - default: - k.err = fmt.Errorf("unknown block type: %v", k.typ) - } -} - -func (k *tsmKeyIterator) Read() ([]byte, int64, int64, []byte, error) { - // See if compactions were disabled while we were running. - select { - case <-k.interrupt: - return nil, 0, 0, nil, errCompactionAborted{} - default: - } - - if len(k.merged) == 0 { - return nil, 0, 0, nil, k.err - } - - block := k.merged[0] - return block.key, block.minTime, block.maxTime, block.b, k.err -} - -func (k *tsmKeyIterator) Close() error { - k.values = nil - k.pos = nil - k.iterators = nil - for _, r := range k.readers { - if err := r.Close(); err != nil { - return err - } - } - return nil -} - -// Error returns any errors encountered during iteration. -func (k *tsmKeyIterator) Err() error { - return k.err -} - -// tsmBatchKeyIterator implements the KeyIterator for set of TSMReaders. Iteration produces -// keys in sorted order and the values between the keys sorted and deduped. If any of -// the readers have associated tombstone entries, they are returned as part of iteration. -type tsmBatchKeyIterator struct { - // readers is the set of readers it produce a sorted key run with - readers []*TSMReader - - // values is the temporary buffers for each key that is returned by a reader - values map[string][]Value - - // pos is the current key position within the corresponding readers slice. A value of - // pos[0] = 1, means the reader[0] is currently at key 1 in its ordered index. - pos []int - - // err is any error we received while iterating values. - err error - - // indicates whether the iterator should choose a faster merging strategy over a more - // optimally compressed one. If fast is true, multiple blocks will just be added as is - // and not combined. In some cases, a slower path will need to be utilized even when - // fast is true to prevent overlapping blocks of time for the same key. - // If false, the blocks will be decoded and duplicated (if needed) and - // then chunked into the maximally sized blocks. - fast bool - - // size is the maximum number of values to encode in a single block - size int - - // key is the current key lowest key across all readers that has not be fully exhausted - // of values. - key []byte - typ byte - - iterators []*BlockIterator - blocks blocks - - buf []blocks - - // mergeValues are decoded blocks that have been combined - mergedFloatValues *cursors.FloatArray - mergedIntegerValues *cursors.IntegerArray - mergedUnsignedValues *cursors.UnsignedArray - mergedBooleanValues *cursors.BooleanArray - mergedStringValues *cursors.StringArray - - // merged are encoded blocks that have been combined or used as is - // without decode - merged blocks - interrupt chan struct{} -} - -// NewTSMBatchKeyIterator returns a new TSM key iterator from readers. -// size indicates the maximum number of values to encode in a single block. -func NewTSMBatchKeyIterator(size int, fast bool, interrupt chan struct{}, readers ...*TSMReader) (KeyIterator, error) { - var iter []*BlockIterator - for _, r := range readers { - iter = append(iter, r.BlockIterator()) - } - - return &tsmBatchKeyIterator{ - readers: readers, - values: map[string][]Value{}, - pos: make([]int, len(readers)), - size: size, - iterators: iter, - fast: fast, - buf: make([]blocks, len(iter)), - mergedFloatValues: &cursors.FloatArray{}, - mergedIntegerValues: &cursors.IntegerArray{}, - mergedUnsignedValues: &cursors.UnsignedArray{}, - mergedBooleanValues: &cursors.BooleanArray{}, - mergedStringValues: &cursors.StringArray{}, - interrupt: interrupt, - }, nil -} - -func (k *tsmBatchKeyIterator) hasMergedValues() bool { - return k.mergedFloatValues.Len() > 0 || - k.mergedIntegerValues.Len() > 0 || - k.mergedUnsignedValues.Len() > 0 || - k.mergedStringValues.Len() > 0 || - k.mergedBooleanValues.Len() > 0 -} - -func (k *tsmBatchKeyIterator) EstimatedIndexSize() int { - var size uint32 - for _, r := range k.readers { - size += r.IndexSize() - } - return int(size) / len(k.readers) -} - -// Next returns true if there are any values remaining in the iterator. -func (k *tsmBatchKeyIterator) Next() bool { -RETRY: - // Any merged blocks pending? - if len(k.merged) > 0 { - k.merged = k.merged[1:] - if len(k.merged) > 0 { - return true - } - } - - // Any merged values pending? - if k.hasMergedValues() { - k.merge() - if len(k.merged) > 0 || k.hasMergedValues() { - return true - } - } - - // If we still have blocks from the last read, merge them - if len(k.blocks) > 0 { - k.merge() - if len(k.merged) > 0 || k.hasMergedValues() { - return true - } - } - - // Read the next block from each TSM iterator - for i, v := range k.buf { - if len(v) != 0 { - continue - } - - iter := k.iterators[i] - if iter.Next() { - key, minTime, maxTime, typ, _, b, err := iter.Read() - if err != nil { - k.err = err - } - - var blk *block - if cap(k.buf[i]) > len(k.buf[i]) { - k.buf[i] = k.buf[i][:len(k.buf[i])+1] - blk = k.buf[i][len(k.buf[i])-1] - if blk == nil { - blk = &block{} - k.buf[i][len(k.buf[i])-1] = blk - } - } else { - blk = &block{} - k.buf[i] = append(k.buf[i], blk) - } - blk.minTime = minTime - blk.maxTime = maxTime - blk.key = key - blk.typ = typ - blk.b = b - blk.readMin = math.MaxInt64 - blk.readMax = math.MinInt64 - - // This block may have ranges of time removed from it that would - // reduce the block min and max time. - blk.tombstones = iter.r.TombstoneRange(key, blk.tombstones[:0]) - - blockKey := key - for bytes.Equal(iter.PeekNext(), blockKey) { - iter.Next() - key, minTime, maxTime, typ, _, b, err := iter.Read() - if err != nil { - k.err = err - } - - var blk *block - if cap(k.buf[i]) > len(k.buf[i]) { - k.buf[i] = k.buf[i][:len(k.buf[i])+1] - blk = k.buf[i][len(k.buf[i])-1] - if blk == nil { - blk = &block{} - k.buf[i][len(k.buf[i])-1] = blk - } - } else { - blk = &block{} - k.buf[i] = append(k.buf[i], blk) - } - - blk.minTime = minTime - blk.maxTime = maxTime - blk.key = key - blk.typ = typ - blk.b = b - blk.readMin = math.MaxInt64 - blk.readMax = math.MinInt64 - blk.tombstones = iter.r.TombstoneRange(key, blk.tombstones[:0]) - } - } - - if iter.Err() != nil { - k.err = iter.Err() - } - } - - // Each reader could have a different key that it's currently at, need to find - // the next smallest one to keep the sort ordering. - var minKey []byte - var minType byte - for _, b := range k.buf { - // block could be nil if the iterator has been exhausted for that file - if len(b) == 0 { - continue - } - if len(minKey) == 0 || bytes.Compare(b[0].key, minKey) < 0 { - minKey = b[0].key - minType = b[0].typ - } - } - k.key = minKey - k.typ = minType - - // Now we need to find all blocks that match the min key so we can combine and dedupe - // the blocks if necessary - for i, b := range k.buf { - if len(b) == 0 { - continue - } - if bytes.Equal(b[0].key, k.key) { - k.blocks = append(k.blocks, b...) - k.buf[i] = k.buf[i][:0] - } - } - - if len(k.blocks) == 0 { - return false - } - - k.merge() - - // After merging all the values for this key, we might not have any. (e.g. they were all deleted - // through many tombstones). In this case, move on to the next key instead of ending iteration. - if len(k.merged) == 0 { - goto RETRY - } - - return len(k.merged) > 0 -} - -// merge combines the next set of blocks into merged blocks. -func (k *tsmBatchKeyIterator) merge() { - switch k.typ { - case BlockFloat64: - k.mergeFloat() - case BlockInteger: - k.mergeInteger() - case BlockUnsigned: - k.mergeUnsigned() - case BlockBoolean: - k.mergeBoolean() - case BlockString: - k.mergeString() - default: - k.err = fmt.Errorf("unknown block type: %v", k.typ) - } -} - -func (k *tsmBatchKeyIterator) Read() ([]byte, int64, int64, []byte, error) { - // See if compactions were disabled while we were running. - select { - case <-k.interrupt: - return nil, 0, 0, nil, errCompactionAborted{} - default: - } - - if len(k.merged) == 0 { - return nil, 0, 0, nil, k.err - } - - block := k.merged[0] - return block.key, block.minTime, block.maxTime, block.b, k.err -} - -func (k *tsmBatchKeyIterator) Close() error { - k.values = nil - k.pos = nil - k.iterators = nil - for _, r := range k.readers { - if err := r.Close(); err != nil { - return err - } - } - return nil -} - -// Error returns any errors encountered during iteration. -func (k *tsmBatchKeyIterator) Err() error { - return k.err -} - -type cacheKeyIterator struct { - cache *Cache - size int - order [][]byte - - i int - blocks [][]cacheBlock - ready []chan struct{} - interrupt chan struct{} - err error -} - -type cacheBlock struct { - k []byte - minTime, maxTime int64 - b []byte - err error -} - -// NewCacheKeyIterator returns a new KeyIterator from a Cache. -func NewCacheKeyIterator(cache *Cache, size int, interrupt chan struct{}) KeyIterator { - keys := cache.Keys() - - chans := make([]chan struct{}, len(keys)) - for i := 0; i < len(keys); i++ { - chans[i] = make(chan struct{}, 1) - } - - cki := &cacheKeyIterator{ - i: -1, - size: size, - cache: cache, - order: keys, - ready: chans, - blocks: make([][]cacheBlock, len(keys)), - interrupt: interrupt, - } - go cki.encode() - return cki -} - -func (c *cacheKeyIterator) EstimatedIndexSize() int { - var n int - for _, v := range c.order { - n += len(v) - } - return n -} - -func (c *cacheKeyIterator) encode() { - concurrency := runtime.GOMAXPROCS(0) - n := len(c.ready) - - // Divide the keyset across each CPU - chunkSize := 1 - idx := uint64(0) - - for i := 0; i < concurrency; i++ { - // Run one goroutine per CPU and encode a section of the key space concurrently - go func() { - tenc := getTimeEncoder(MaxPointsPerBlock) - fenc := getFloatEncoder(MaxPointsPerBlock) - benc := getBooleanEncoder(MaxPointsPerBlock) - uenc := getUnsignedEncoder(MaxPointsPerBlock) - senc := getStringEncoder(MaxPointsPerBlock) - ienc := getIntegerEncoder(MaxPointsPerBlock) - - defer putTimeEncoder(tenc) - defer putFloatEncoder(fenc) - defer putBooleanEncoder(benc) - defer putUnsignedEncoder(uenc) - defer putStringEncoder(senc) - defer putIntegerEncoder(ienc) - - for { - i := int(atomic.AddUint64(&idx, uint64(chunkSize))) - chunkSize - - if i >= n { - break - } - - key := c.order[i] - values := c.cache.values(key) - - for len(values) > 0 { - - end := len(values) - if end > c.size { - end = c.size - } - - minTime, maxTime := values[0].UnixNano(), values[end-1].UnixNano() - var b []byte - var err error - - switch values[0].(type) { - case FloatValue: - b, err = encodeFloatBlockUsing(nil, values[:end], tenc, fenc) - case IntegerValue: - b, err = encodeIntegerBlockUsing(nil, values[:end], tenc, ienc) - case UnsignedValue: - b, err = encodeUnsignedBlockUsing(nil, values[:end], tenc, uenc) - case BooleanValue: - b, err = encodeBooleanBlockUsing(nil, values[:end], tenc, benc) - case StringValue: - b, err = encodeStringBlockUsing(nil, values[:end], tenc, senc) - default: - b, err = Values(values[:end]).Encode(nil) - } - - values = values[end:] - - c.blocks[i] = append(c.blocks[i], cacheBlock{ - k: key, - minTime: minTime, - maxTime: maxTime, - b: b, - err: err, - }) - - if err != nil { - c.err = err - } - } - // Notify this key is fully encoded - c.ready[i] <- struct{}{} - } - }() - } -} - -func (c *cacheKeyIterator) Next() bool { - if c.i >= 0 && c.i < len(c.ready) && len(c.blocks[c.i]) > 0 { - c.blocks[c.i] = c.blocks[c.i][1:] - if len(c.blocks[c.i]) > 0 { - return true - } - } - c.i++ - - if c.i >= len(c.ready) { - return false - } - - <-c.ready[c.i] - return true -} - -func (c *cacheKeyIterator) Read() ([]byte, int64, int64, []byte, error) { - // See if snapshot compactions were disabled while we were running. - select { - case <-c.interrupt: - c.err = errCompactionAborted{} - return nil, 0, 0, nil, c.err - default: - } - - blk := c.blocks[c.i][0] - return blk.k, blk.minTime, blk.maxTime, blk.b, blk.err -} - -func (c *cacheKeyIterator) Close() error { - return nil -} - -func (c *cacheKeyIterator) Err() error { - return c.err -} - -type tsmGenerations []*tsmGeneration - -func (a tsmGenerations) Len() int { return len(a) } -func (a tsmGenerations) Less(i, j int) bool { return a[i].id < a[j].id } -func (a tsmGenerations) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a tsmGenerations) hasTombstones() bool { - for _, g := range a { - if g.hasTombstones() { - return true - } - } - return false -} - -func (a tsmGenerations) level() int { - var level int - for _, g := range a { - lev := g.level() - if lev > level { - level = lev - } - } - return level -} - -func (a tsmGenerations) chunk(size int) []tsmGenerations { - var chunks []tsmGenerations - for len(a) > 0 { - if len(a) >= size { - chunks = append(chunks, a[:size]) - a = a[size:] - } else { - chunks = append(chunks, a) - a = a[len(a):] - } - } - return chunks -} - -func (a tsmGenerations) IsSorted() bool { - if len(a) == 1 { - return true - } - - for i := 1; i < len(a); i++ { - if a.Less(i, i-1) { - return false - } - } - return true -} - -type latencies struct { - i int - values []time.Duration -} - -func (l *latencies) add(t time.Duration) { - l.values[l.i%len(l.values)] = t - l.i++ -} - -func (l *latencies) avg() time.Duration { - var n int64 - var sum time.Duration - for _, v := range l.values { - if v == 0 { - continue - } - sum += v - n++ - } - - if n > 0 { - return time.Duration(int64(sum) / n) - } - return time.Duration(0) -} diff --git a/tsdb/tsm1/compact_test.go b/tsdb/tsm1/compact_test.go deleted file mode 100644 index 4e3dd3572b..0000000000 --- a/tsdb/tsm1/compact_test.go +++ /dev/null @@ -1,3023 +0,0 @@ -package tsm1_test - -import ( - "bufio" - "context" - "fmt" - "math" - "os" - "path/filepath" - "sort" - "testing" - "time" - - "github.com/google/go-cmp/cmp" - "github.com/influxdata/influxdb/v2/pkg/fs" - "github.com/influxdata/influxdb/v2/tsdb/cursors" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" -) - -// Tests compacting a Cache snapshot into a single TSM file -func TestCompactor_Snapshot(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - - v1 := tsm1.NewValue(1, float64(1)) - v2 := tsm1.NewValue(1, float64(1)) - v3 := tsm1.NewValue(2, float64(2)) - - points1 := map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {v1}, - "cpu,host=B#!~#value": {v2, v3}, - } - - c := tsm1.NewCache(0) - for k, v := range points1 { - if err := c.Write([]byte(k), v); err != nil { - t.Fatalf("failed to write key foo to cache: %s", err.Error()) - } - } - - compactor := tsm1.NewCompactor() - compactor.Dir = dir - compactor.FileStore = &fakeFileStore{} - - files, err := compactor.WriteSnapshot(context.Background(), c) - if err == nil { - t.Fatalf("expected error writing snapshot: %v", err) - } - if len(files) > 0 { - t.Fatalf("no files should be compacted: got %v", len(files)) - - } - - compactor.Open() - - files, err = compactor.WriteSnapshot(context.Background(), c) - if err != nil { - t.Fatalf("unexpected error writing snapshot: %v", err) - } - - if got, exp := len(files), 1; got != exp { - t.Fatalf("files length mismatch: got %v, exp %v", got, exp) - } - - r := MustOpenTSMReader(files[0]) - - if got, exp := r.KeyCount(), 2; got != exp { - t.Fatalf("keys length mismatch: got %v, exp %v", got, exp) - } - - var data = []struct { - key string - points []tsm1.Value - }{ - {"cpu,host=A#!~#value", []tsm1.Value{v1}}, - {"cpu,host=B#!~#value", []tsm1.Value{v2, v3}}, - } - - for _, p := range data { - values, err := r.ReadAll([]byte(p.key)) - if err != nil { - t.Fatalf("unexpected error reading: %v", err) - } - - if got, exp := len(values), len(p.points); got != exp { - t.Fatalf("values length mismatch: got %v, exp %v", got, exp) - } - - for i, point := range p.points { - assertValueEqual(t, values[i], point) - } - } -} - -func TestCompactor_CompactFullLastTimestamp(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - - var vals tsm1.Values - ts := int64(1e9) - for i := 0; i < 120; i++ { - vals = append(vals, tsm1.NewIntegerValue(ts, 1)) - ts += 1e9 - } - // 121st timestamp skips a second - ts += 1e9 - vals = append(vals, tsm1.NewIntegerValue(ts, 1)) - writes := map[string][]tsm1.Value{ - "cpu,host=A#!~#value": vals[:100], - } - f1 := MustWriteTSM(dir, 1, writes) - - writes = map[string][]tsm1.Value{ - "cpu,host=A#!~#value": vals[100:], - } - f2 := MustWriteTSM(dir, 2, writes) - - fs := &fakeFileStore{} - defer fs.Close() - compactor := tsm1.NewCompactor() - compactor.Dir = dir - compactor.FileStore = fs - compactor.Open() - - files, err := compactor.CompactFull([]string{f1, f2}) - if err != nil { - t.Fatalf("unexpected error writing snapshot: %v", err) - } - - r := MustOpenTSMReader(files[0]) - entries, err := r.ReadEntries([]byte("cpu,host=A#!~#value"), nil) - if err != nil { - t.Fatal(err) - } - _, b, err := r.ReadBytes(&entries[0], nil) - if err != nil { - t.Fatalf("ReadBytes: unexpected error %v", err) - } - var a cursors.IntegerArray - err = tsm1.DecodeIntegerArrayBlock(b, &a) - if err != nil { - t.Fatalf("DecodeIntegerArrayBlock: unexpected error %v", err) - } - - if a.MaxTime() != entries[0].MaxTime { - t.Fatalf("expected MaxTime == a.MaxTime()") - } -} - -// Ensures that a compaction will properly merge multiple TSM files -func TestCompactor_CompactFull(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - - // write 3 TSM files with different data and one new point - a1 := tsm1.NewValue(1, 1.1) - writes := map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {a1}, - } - f1 := MustWriteTSM(dir, 1, writes) - - a2 := tsm1.NewValue(2, 1.2) - b1 := tsm1.NewValue(1, 2.1) - writes = map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {a2}, - "cpu,host=B#!~#value": {b1}, - } - f2 := MustWriteTSM(dir, 2, writes) - - a3 := tsm1.NewValue(1, 1.3) - c1 := tsm1.NewValue(1, 3.1) - writes = map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {a3}, - "cpu,host=C#!~#value": {c1}, - } - f3 := MustWriteTSM(dir, 3, writes) - - fs := &fakeFileStore{} - defer fs.Close() - compactor := tsm1.NewCompactor() - compactor.Dir = dir - compactor.FileStore = fs - - files, err := compactor.CompactFull([]string{f1, f2, f3}) - if err == nil { - t.Fatalf("expected error writing snapshot: %v", err) - } - if len(files) > 0 { - t.Fatalf("no files should be compacted: got %v", len(files)) - - } - - compactor.Open() - - files, err = compactor.CompactFull([]string{f1, f2, f3}) - if err != nil { - t.Fatalf("unexpected error writing snapshot: %v", err) - } - - if got, exp := len(files), 1; got != exp { - t.Fatalf("files length mismatch: got %v, exp %v", got, exp) - } - - stats := tsm1.NewMeasurementStats() - if f, err := os.Open(tsm1.StatsFilename(files[0])); err != nil { - t.Fatal(err) - } else if _, err := stats.ReadFrom(bufio.NewReader(f)); err != nil { - t.Fatal(err) - } else if err := f.Close(); err != nil { - t.Fatal(err) - } else if diff := cmp.Diff(stats, tsm1.MeasurementStats{"cpu": 112}); diff != "" { - t.Fatal(diff) - } - - expGen, expSeq, err := tsm1.DefaultParseFileName(f3) - if err != nil { - t.Fatalf("unexpected error parsing file name: %v", err) - } - expSeq = expSeq + 1 - - gotGen, gotSeq, err := tsm1.DefaultParseFileName(files[0]) - if err != nil { - t.Fatalf("unexpected error parsing file name: %v", err) - } - - if gotGen != expGen { - t.Fatalf("wrong generation for new file: got %v, exp %v", gotGen, expGen) - } - - if gotSeq != expSeq { - t.Fatalf("wrong sequence for new file: got %v, exp %v", gotSeq, expSeq) - } - - r := MustOpenTSMReader(files[0]) - - if got, exp := r.KeyCount(), 3; got != exp { - t.Fatalf("keys length mismatch: got %v, exp %v", got, exp) - } - - var data = []struct { - key string - points []tsm1.Value - }{ - {"cpu,host=A#!~#value", []tsm1.Value{a3, a2}}, - {"cpu,host=B#!~#value", []tsm1.Value{b1}}, - {"cpu,host=C#!~#value", []tsm1.Value{c1}}, - } - - for _, p := range data { - values, err := r.ReadAll([]byte(p.key)) - if err != nil { - t.Fatalf("unexpected error reading: %v", err) - } - - if got, exp := len(values), len(p.points); got != exp { - t.Fatalf("values length mismatch %s: got %v, exp %v", p.key, got, exp) - } - - for i, point := range p.points { - assertValueEqual(t, values[i], point) - } - } -} - -// Ensures that a compaction will properly merge multiple TSM files -func TestCompactor_Compact_OverlappingBlocks(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - - // write 3 TSM files with different data and one new point - a1 := tsm1.NewValue(4, 1.1) - a2 := tsm1.NewValue(5, 1.1) - a3 := tsm1.NewValue(7, 1.1) - - writes := map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {a1, a2, a3}, - } - f1 := MustWriteTSM(dir, 1, writes) - - c1 := tsm1.NewValue(3, 1.2) - c2 := tsm1.NewValue(8, 1.2) - c3 := tsm1.NewValue(9, 1.2) - - writes = map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {c1, c2, c3}, - } - f3 := MustWriteTSM(dir, 3, writes) - - fs := &fakeFileStore{} - defer fs.Close() - compactor := tsm1.NewCompactor() - compactor.Dir = dir - compactor.FileStore = fs - compactor.Size = 2 - - compactor.Open() - - files, err := compactor.CompactFast([]string{f1, f3}) - if err != nil { - t.Fatalf("unexpected error writing snapshot: %v", err) - } - - if got, exp := len(files), 1; got != exp { - t.Fatalf("files length mismatch: got %v, exp %v", got, exp) - } - - stats := tsm1.NewMeasurementStats() - if f, err := os.Open(tsm1.StatsFilename(files[0])); err != nil { - t.Fatal(err) - } else if _, err := stats.ReadFrom(bufio.NewReader(f)); err != nil { - t.Fatal(err) - } else if err := f.Close(); err != nil { - t.Fatal(err) - } else if diff := cmp.Diff(stats, tsm1.MeasurementStats{"cpu": 116}); diff != "" { - t.Fatal(diff) - } - - r := MustOpenTSMReader(files[0]) - - if got, exp := r.KeyCount(), 1; got != exp { - t.Fatalf("keys length mismatch: got %v, exp %v", got, exp) - } - - var data = []struct { - key string - points []tsm1.Value - }{ - {"cpu,host=A#!~#value", []tsm1.Value{c1, a1, a2, a3, c2, c3}}, - } - - for _, p := range data { - values, err := r.ReadAll([]byte(p.key)) - if err != nil { - t.Fatalf("unexpected error reading: %v", err) - } - - if got, exp := len(values), len(p.points); got != exp { - t.Fatalf("values length mismatch %s: got %v, exp %v", p.key, got, exp) - } - - for i, point := range p.points { - assertValueEqual(t, values[i], point) - } - } -} - -// Ensures that a compaction will properly merge multiple TSM files -func TestCompactor_Compact_OverlappingBlocksMultiple(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - - // write 3 TSM files with different data and one new point - a1 := tsm1.NewValue(4, 1.1) - a2 := tsm1.NewValue(5, 1.1) - a3 := tsm1.NewValue(7, 1.1) - - writes := map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {a1, a2, a3}, - } - f1 := MustWriteTSM(dir, 1, writes) - - b1 := tsm1.NewValue(1, 1.2) - b2 := tsm1.NewValue(2, 1.2) - b3 := tsm1.NewValue(6, 1.2) - - writes = map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {b1, b2, b3}, - } - f2 := MustWriteTSM(dir, 2, writes) - - c1 := tsm1.NewValue(3, 1.2) - c2 := tsm1.NewValue(8, 1.2) - c3 := tsm1.NewValue(9, 1.2) - - writes = map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {c1, c2, c3}, - } - f3 := MustWriteTSM(dir, 3, writes) - - fs := &fakeFileStore{} - defer fs.Close() - compactor := tsm1.NewCompactor() - compactor.Dir = dir - compactor.FileStore = fs - compactor.Size = 2 - - compactor.Open() - - files, err := compactor.CompactFast([]string{f1, f2, f3}) - if err != nil { - t.Fatalf("unexpected error writing snapshot: %v", err) - } - - if got, exp := len(files), 1; got != exp { - t.Fatalf("files length mismatch: got %v, exp %v", got, exp) - } - - stats := tsm1.NewMeasurementStats() - if f, err := os.Open(tsm1.StatsFilename(files[0])); err != nil { - t.Fatal(err) - } else if _, err := stats.ReadFrom(bufio.NewReader(f)); err != nil { - t.Fatal(err) - } else if err := f.Close(); err != nil { - t.Fatal(err) - } else if diff := cmp.Diff(stats, tsm1.MeasurementStats{"cpu": 202}); diff != "" { - t.Fatal(diff) - } - - r := MustOpenTSMReader(files[0]) - - if got, exp := r.KeyCount(), 1; got != exp { - t.Fatalf("keys length mismatch: got %v, exp %v", got, exp) - } - - var data = []struct { - key string - points []tsm1.Value - }{ - {"cpu,host=A#!~#value", []tsm1.Value{b1, b2, c1, a1, a2, b3, a3, c2, c3}}, - } - - for _, p := range data { - values, err := r.ReadAll([]byte(p.key)) - if err != nil { - t.Fatalf("unexpected error reading: %v", err) - } - - if got, exp := len(values), len(p.points); got != exp { - t.Fatalf("values length mismatch %s: got %v, exp %v", p.key, got, exp) - } - - for i, point := range p.points { - assertValueEqual(t, values[i], point) - } - } -} - -func TestCompactor_Compact_UnsortedBlocks(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - - // write 2 TSM files with different data and one new point - a1 := tsm1.NewValue(4, 1.1) - a2 := tsm1.NewValue(5, 1.1) - a3 := tsm1.NewValue(6, 1.1) - - writes := map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {a1, a2, a3}, - } - f1 := MustWriteTSM(dir, 1, writes) - - b1 := tsm1.NewValue(1, 1.2) - b2 := tsm1.NewValue(2, 1.2) - b3 := tsm1.NewValue(3, 1.2) - - writes = map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {b1, b2, b3}, - } - f2 := MustWriteTSM(dir, 2, writes) - - compactor := tsm1.NewCompactor() - compactor.Dir = dir - compactor.FileStore = &fakeFileStore{} - compactor.Size = 2 - - compactor.Open() - - files, err := compactor.CompactFast([]string{f1, f2}) - if err != nil { - t.Fatalf("unexpected error writing snapshot: %v", err) - } - - if got, exp := len(files), 1; got != exp { - t.Fatalf("files length mismatch: got %v, exp %v", got, exp) - } - - r := MustOpenTSMReader(files[0]) - - if got, exp := r.KeyCount(), 1; got != exp { - t.Fatalf("keys length mismatch: got %v, exp %v", got, exp) - } - - var data = []struct { - key string - points []tsm1.Value - }{ - {"cpu,host=A#!~#value", []tsm1.Value{b1, b2, b3, a1, a2, a3}}, - } - - for _, p := range data { - values, err := r.ReadAll([]byte(p.key)) - if err != nil { - t.Fatalf("unexpected error reading: %v", err) - } - - if got, exp := len(values), len(p.points); got != exp { - t.Fatalf("values length mismatch %s: got %v, exp %v", p.key, got, exp) - } - - for i, point := range p.points { - assertValueEqual(t, values[i], point) - } - } -} - -func TestCompactor_Compact_UnsortedBlocksOverlapping(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - - // write 3 TSM files where two blocks are overlapping and with unsorted order - a1 := tsm1.NewValue(1, 1.1) - a2 := tsm1.NewValue(2, 1.1) - - writes := map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {a1, a2}, - } - f1 := MustWriteTSM(dir, 1, writes) - - b1 := tsm1.NewValue(3, 1.2) - b2 := tsm1.NewValue(4, 1.2) - - writes = map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {b1, b2}, - } - f2 := MustWriteTSM(dir, 2, writes) - - c1 := tsm1.NewValue(1, 1.1) - c2 := tsm1.NewValue(2, 1.1) - - writes = map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {c1, c2}, - } - f3 := MustWriteTSM(dir, 3, writes) - - compactor := tsm1.NewCompactor() - compactor.Dir = dir - compactor.FileStore = &fakeFileStore{} - compactor.Size = 2 - - compactor.Open() - - files, err := compactor.CompactFast([]string{f1, f2, f3}) - if err != nil { - t.Fatalf("unexpected error writing snapshot: %v", err) - } - - if got, exp := len(files), 1; got != exp { - t.Fatalf("files length mismatch: got %v, exp %v", got, exp) - } - - r := MustOpenTSMReader(files[0]) - - if got, exp := r.KeyCount(), 1; got != exp { - t.Fatalf("keys length mismatch: got %v, exp %v", got, exp) - } - - var data = []struct { - key string - points []tsm1.Value - }{ - {"cpu,host=A#!~#value", []tsm1.Value{a1, a2, b1, b2}}, - } - - for _, p := range data { - values, err := r.ReadAll([]byte(p.key)) - if err != nil { - t.Fatalf("unexpected error reading: %v", err) - } - - if got, exp := len(values), len(p.points); got != exp { - t.Fatalf("values length mismatch %s: got %v, exp %v", p.key, got, exp) - } - - for i, point := range p.points { - assertValueEqual(t, values[i], point) - } - } -} - -// Ensures that a compaction will properly merge multiple TSM files -func TestCompactor_CompactFull_SkipFullBlocks(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - - // write 3 TSM files with different data and one new point - a1 := tsm1.NewValue(1, 1.1) - a2 := tsm1.NewValue(2, 1.2) - writes := map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {a1, a2}, - } - f1 := MustWriteTSM(dir, 1, writes) - - a3 := tsm1.NewValue(3, 1.3) - writes = map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {a3}, - } - f2 := MustWriteTSM(dir, 2, writes) - - a4 := tsm1.NewValue(4, 1.4) - writes = map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {a4}, - } - f3 := MustWriteTSM(dir, 3, writes) - - fs := &fakeFileStore{} - defer fs.Close() - compactor := tsm1.NewCompactor() - compactor.Dir = dir - compactor.FileStore = fs - compactor.Size = 2 - compactor.Open() - - files, err := compactor.CompactFull([]string{f1, f2, f3}) - if err != nil { - t.Fatalf("unexpected error writing snapshot: %v", err) - } - - if got, exp := len(files), 1; got != exp { - t.Fatalf("files length mismatch: got %v, exp %v", got, exp) - } - - expGen, expSeq, err := tsm1.DefaultParseFileName(f3) - if err != nil { - t.Fatalf("unexpected error parsing file name: %v", err) - } - expSeq = expSeq + 1 - - gotGen, gotSeq, err := tsm1.DefaultParseFileName(files[0]) - if err != nil { - t.Fatalf("unexpected error parsing file name: %v", err) - } - - if gotGen != expGen { - t.Fatalf("wrong generation for new file: got %v, exp %v", gotGen, expGen) - } - - if gotSeq != expSeq { - t.Fatalf("wrong sequence for new file: got %v, exp %v", gotSeq, expSeq) - } - - r := MustOpenTSMReader(files[0]) - - if got, exp := r.KeyCount(), 1; got != exp { - t.Fatalf("keys length mismatch: got %v, exp %v", got, exp) - } - - var data = []struct { - key string - points []tsm1.Value - }{ - {"cpu,host=A#!~#value", []tsm1.Value{a1, a2, a3, a4}}, - } - - for _, p := range data { - values, err := r.ReadAll([]byte(p.key)) - if err != nil { - t.Fatalf("unexpected error reading: %v", err) - } - - if got, exp := len(values), len(p.points); got != exp { - t.Fatalf("values length mismatch %s: got %v, exp %v", p.key, got, exp) - } - - for i, point := range p.points { - assertValueEqual(t, values[i], point) - } - } - - entries, err := r.ReadEntries([]byte("cpu,host=A#!~#value"), nil) - if err != nil { - t.Fatal(err) - } - if got, exp := len(entries), 2; got != exp { - t.Fatalf("block count mismatch: got %v, exp %v", got, exp) - } -} - -// Ensures that a full compaction will skip over blocks that have the full -// range of time contained in the block tombstoned -func TestCompactor_CompactFull_TombstonedSkipBlock(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - - // write 3 TSM files with different data and one new point - a1 := tsm1.NewValue(1, 1.1) - a2 := tsm1.NewValue(2, 1.2) - writes := map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {a1, a2}, - } - f1 := MustWriteTSM(dir, 1, writes) - - ts := tsm1.NewTombstoner(f1, nil) - ts.AddRange([][]byte{[]byte("cpu,host=A#!~#value")}, math.MinInt64, math.MaxInt64) - - if err := ts.Flush(); err != nil { - t.Fatalf("unexpected error flushing tombstone: %v", err) - } - - a3 := tsm1.NewValue(3, 1.3) - writes = map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {a3}, - } - f2 := MustWriteTSM(dir, 2, writes) - - a4 := tsm1.NewValue(4, 1.4) - writes = map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {a4}, - } - f3 := MustWriteTSM(dir, 3, writes) - - fs := &fakeFileStore{} - defer fs.Close() - compactor := tsm1.NewCompactor() - compactor.Dir = dir - compactor.FileStore = fs - compactor.Size = 2 - compactor.Open() - - files, err := compactor.CompactFull([]string{f1, f2, f3}) - if err != nil { - t.Fatalf("unexpected error writing snapshot: %v", err) - } - - if got, exp := len(files), 1; got != exp { - t.Fatalf("files length mismatch: got %v, exp %v", got, exp) - } - - stats := tsm1.NewMeasurementStats() - if f, err := os.Open(tsm1.StatsFilename(files[0])); err != nil { - t.Fatal(err) - } else if _, err := stats.ReadFrom(bufio.NewReader(f)); err != nil { - t.Fatal(err) - } else if err := f.Close(); err != nil { - t.Fatal(err) - } else if diff := cmp.Diff(stats, tsm1.MeasurementStats{"cpu": 44}); diff != "" { - t.Fatal(diff) - } - - expGen, expSeq, err := tsm1.DefaultParseFileName(f3) - if err != nil { - t.Fatalf("unexpected error parsing file name: %v", err) - } - expSeq = expSeq + 1 - - gotGen, gotSeq, err := tsm1.DefaultParseFileName(files[0]) - if err != nil { - t.Fatalf("unexpected error parsing file name: %v", err) - } - - if gotGen != expGen { - t.Fatalf("wrong generation for new file: got %v, exp %v", gotGen, expGen) - } - - if gotSeq != expSeq { - t.Fatalf("wrong sequence for new file: got %v, exp %v", gotSeq, expSeq) - } - - r := MustOpenTSMReader(files[0]) - - if got, exp := r.KeyCount(), 1; got != exp { - t.Fatalf("keys length mismatch: got %v, exp %v", got, exp) - } - - var data = []struct { - key string - points []tsm1.Value - }{ - {"cpu,host=A#!~#value", []tsm1.Value{a3, a4}}, - } - - for _, p := range data { - values, err := r.ReadAll([]byte(p.key)) - if err != nil { - t.Fatalf("unexpected error reading: %v", err) - } - - if got, exp := len(values), len(p.points); got != exp { - t.Fatalf("values length mismatch %s: got %v, exp %v", p.key, got, exp) - } - - for i, point := range p.points { - assertValueEqual(t, values[i], point) - } - } - - entries, err := r.ReadEntries([]byte("cpu,host=A#!~#value"), nil) - if err != nil { - t.Fatal(err) - } - if got, exp := len(entries), 1; got != exp { - t.Fatalf("block count mismatch: got %v, exp %v", got, exp) - } -} - -// Ensures that a full compaction will decode and combine blocks with -// partial tombstoned values -func TestCompactor_CompactFull_TombstonedPartialBlock(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - - // write 3 TSM files with different data and one new point - a1 := tsm1.NewValue(1, 1.1) - a2 := tsm1.NewValue(2, 1.2) - writes := map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {a1, a2}, - } - f1 := MustWriteTSM(dir, 1, writes) - - ts := tsm1.NewTombstoner(f1, nil) - // a1 should remain after compaction - ts.AddRange([][]byte{[]byte("cpu,host=A#!~#value")}, 2, math.MaxInt64) - - if err := ts.Flush(); err != nil { - t.Fatalf("unexpected error flushing tombstone: %v", err) - } - - a3 := tsm1.NewValue(3, 1.3) - writes = map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {a3}, - } - f2 := MustWriteTSM(dir, 2, writes) - - a4 := tsm1.NewValue(4, 1.4) - writes = map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {a4}, - } - f3 := MustWriteTSM(dir, 3, writes) - - fs := &fakeFileStore{} - defer fs.Close() - compactor := tsm1.NewCompactor() - compactor.Dir = dir - compactor.FileStore = fs - compactor.Size = 2 - compactor.Open() - - files, err := compactor.CompactFull([]string{f1, f2, f3}) - if err != nil { - t.Fatalf("unexpected error writing snapshot: %v", err) - } - - if got, exp := len(files), 1; got != exp { - t.Fatalf("files length mismatch: got %v, exp %v", got, exp) - } - - stats := tsm1.NewMeasurementStats() - if f, err := os.Open(tsm1.StatsFilename(files[0])); err != nil { - t.Fatal(err) - } else if _, err := stats.ReadFrom(bufio.NewReader(f)); err != nil { - t.Fatal(err) - } else if err := f.Close(); err != nil { - t.Fatal(err) - } else if diff := cmp.Diff(stats, tsm1.MeasurementStats{"cpu": 78}); diff != "" { - t.Fatal(diff) - } - - expGen, expSeq, err := tsm1.DefaultParseFileName(f3) - if err != nil { - t.Fatalf("unexpected error parsing file name: %v", err) - } - expSeq = expSeq + 1 - - gotGen, gotSeq, err := tsm1.DefaultParseFileName(files[0]) - if err != nil { - t.Fatalf("unexpected error parsing file name: %v", err) - } - - if gotGen != expGen { - t.Fatalf("wrong generation for new file: got %v, exp %v", gotGen, expGen) - } - - if gotSeq != expSeq { - t.Fatalf("wrong sequence for new file: got %v, exp %v", gotSeq, expSeq) - } - - r := MustOpenTSMReader(files[0]) - - if got, exp := r.KeyCount(), 1; got != exp { - t.Fatalf("keys length mismatch: got %v, exp %v", got, exp) - } - - var data = []struct { - key string - points []tsm1.Value - }{ - {"cpu,host=A#!~#value", []tsm1.Value{a1, a3, a4}}, - } - - for _, p := range data { - values, err := r.ReadAll([]byte(p.key)) - if err != nil { - t.Fatalf("unexpected error reading: %v", err) - } - - if got, exp := len(values), len(p.points); got != exp { - t.Fatalf("values length mismatch %s: got %v, exp %v", p.key, got, exp) - } - - for i, point := range p.points { - assertValueEqual(t, values[i], point) - } - } - - entries, err := r.ReadEntries([]byte("cpu,host=A#!~#value"), nil) - if err != nil { - t.Fatal(err) - } - if got, exp := len(entries), 2; got != exp { - t.Fatalf("block count mismatch: got %v, exp %v", got, exp) - } -} - -// Ensures that a full compaction will decode and combine blocks with -// multiple tombstoned ranges within the block e.g. (t1, t2, t3, t4) -// having t2 and t3 removed -func TestCompactor_CompactFull_TombstonedMultipleRanges(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - - // write 3 TSM files with different data and one new point - a1 := tsm1.NewValue(1, 1.1) - a2 := tsm1.NewValue(2, 1.2) - a3 := tsm1.NewValue(3, 1.3) - a4 := tsm1.NewValue(4, 1.4) - - writes := map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {a1, a2, a3, a4}, - } - f1 := MustWriteTSM(dir, 1, writes) - - ts := tsm1.NewTombstoner(f1, nil) - // a1, a3 should remain after compaction - ts.AddRange([][]byte{[]byte("cpu,host=A#!~#value")}, 2, 2) - ts.AddRange([][]byte{[]byte("cpu,host=A#!~#value")}, 4, 4) - - if err := ts.Flush(); err != nil { - t.Fatalf("unexpected error flushing tombstone: %v", err) - } - - a5 := tsm1.NewValue(5, 1.5) - writes = map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {a5}, - } - f2 := MustWriteTSM(dir, 2, writes) - - a6 := tsm1.NewValue(6, 1.6) - writes = map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {a6}, - } - f3 := MustWriteTSM(dir, 3, writes) - - fs := &fakeFileStore{} - defer fs.Close() - compactor := tsm1.NewCompactor() - compactor.Dir = dir - compactor.FileStore = fs - compactor.Size = 2 - compactor.Open() - - files, err := compactor.CompactFull([]string{f1, f2, f3}) - if err != nil { - t.Fatalf("unexpected error writing snapshot: %v", err) - } - - if got, exp := len(files), 1; got != exp { - t.Fatalf("files length mismatch: got %v, exp %v", got, exp) - } - - expGen, expSeq, err := tsm1.DefaultParseFileName(f3) - if err != nil { - t.Fatalf("unexpected error parsing file name: %v", err) - } - expSeq = expSeq + 1 - - gotGen, gotSeq, err := tsm1.DefaultParseFileName(files[0]) - if err != nil { - t.Fatalf("unexpected error parsing file name: %v", err) - } - - if gotGen != expGen { - t.Fatalf("wrong generation for new file: got %v, exp %v", gotGen, expGen) - } - - if gotSeq != expSeq { - t.Fatalf("wrong sequence for new file: got %v, exp %v", gotSeq, expSeq) - } - - r := MustOpenTSMReader(files[0]) - - if got, exp := r.KeyCount(), 1; got != exp { - t.Fatalf("keys length mismatch: got %v, exp %v", got, exp) - } - - var data = []struct { - key string - points []tsm1.Value - }{ - {"cpu,host=A#!~#value", []tsm1.Value{a1, a3, a5, a6}}, - } - - for _, p := range data { - values, err := r.ReadAll([]byte(p.key)) - if err != nil { - t.Fatalf("unexpected error reading: %v", err) - } - - if got, exp := len(values), len(p.points); got != exp { - t.Fatalf("values length mismatch %s: got %v, exp %v", p.key, got, exp) - } - - for i, point := range p.points { - assertValueEqual(t, values[i], point) - } - } - - entries, err := r.ReadEntries([]byte("cpu,host=A#!~#value"), nil) - if err != nil { - t.Fatal(err) - } - if got, exp := len(entries), 2; got != exp { - t.Fatalf("block count mismatch: got %v, exp %v", got, exp) - } -} - -// Ensures that a compaction will properly rollover to a new file when the -// max keys per blocks is exceeded -func TestCompactor_CompactFull_MaxKeys(t *testing.T) { - // This test creates a lot of data and causes timeout failures for these envs - if testing.Short() || os.Getenv("CI") != "" || os.Getenv("GORACE") != "" { - t.Skip("Skipping max keys compaction test") - } - dir := MustTempDir() - defer os.RemoveAll(dir) - - // write two files where the first contains a single key with the maximum - // number of full blocks that can fit in a TSM file - f1, f1Name := MustTSMWriter(dir, 1) - values := make([]tsm1.Value, 1000) - for i := 0; i < 65534; i++ { - values = values[:0] - for j := 0; j < 1000; j++ { - values = append(values, tsm1.NewValue(int64(i*1000+j), int64(1))) - } - if err := f1.Write([]byte("cpu,host=A#!~#value"), values); err != nil { - t.Fatalf("write tsm f1: %v", err) - } - } - if err := f1.WriteIndex(); err != nil { - t.Fatalf("write index f1: %v", err) - } - f1.Close() - - // Write a new file with 2 blocks that when compacted would exceed the max - // blocks - f2, f2Name := MustTSMWriter(dir, 2) - for i := 0; i < 2; i++ { - lastTimeStamp := values[len(values)-1].UnixNano() + 1 - values = values[:0] - for j := lastTimeStamp; j < lastTimeStamp+1000; j++ { - values = append(values, tsm1.NewValue(int64(j), int64(1))) - } - if err := f2.Write([]byte("cpu,host=A#!~#value"), values); err != nil { - t.Fatalf("write tsm f1: %v", err) - } - } - - if err := f2.WriteIndex(); err != nil { - t.Fatalf("write index f2: %v", err) - } - f2.Close() - - fs := &fakeFileStore{} - defer fs.Close() - compactor := tsm1.NewCompactor() - compactor.Dir = dir - compactor.FileStore = fs - compactor.Open() - - // Compact both files, should get 2 files back - files, err := compactor.CompactFull([]string{f1Name, f2Name}) - if err != nil { - t.Fatalf("unexpected error writing snapshot: %v", err) - } - - if got, exp := len(files), 2; got != exp { - t.Fatalf("files length mismatch: got %v, exp %v", got, exp) - } - - expGen, expSeq, err := tsm1.DefaultParseFileName(f2Name) - if err != nil { - t.Fatalf("unexpected error parsing file name: %v", err) - } - expSeq = expSeq + 1 - - gotGen, gotSeq, err := tsm1.DefaultParseFileName(files[0]) - if err != nil { - t.Fatalf("unexpected error parsing file name: %v", err) - } - - if gotGen != expGen { - t.Fatalf("wrong generation for new file: got %v, exp %v", gotGen, expGen) - } - - if gotSeq != expSeq { - t.Fatalf("wrong sequence for new file: got %v, exp %v", gotSeq, expSeq) - } -} - -// Tests that a single TSM file can be read and iterated over -func TestTSMKeyIterator_Single(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - - v1 := tsm1.NewValue(1, 1.1) - writes := map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {v1}, - } - - r := MustTSMReader(dir, 1, writes) - - iter, err := tsm1.NewTSMKeyIterator(1, false, nil, r) - if err != nil { - t.Fatalf("unexpected error creating WALKeyIterator: %v", err) - } - - var readValues bool - for iter.Next() { - key, _, _, block, err := iter.Read() - if err != nil { - t.Fatalf("unexpected error read: %v", err) - } - - values, err := tsm1.DecodeBlock(block, nil) - if err != nil { - t.Fatalf("unexpected error decode: %v", err) - } - - if got, exp := string(key), "cpu,host=A#!~#value"; got != exp { - t.Fatalf("key mismatch: got %v, exp %v", got, exp) - } - - if got, exp := len(values), len(writes); got != exp { - t.Fatalf("values length mismatch: got %v, exp %v", got, exp) - } - - for _, v := range values { - readValues = true - assertValueEqual(t, v, v1) - } - } - - if !readValues { - t.Fatalf("failed to read any values") - } -} - -// Tests that duplicate point values are merged. There is only one case -// where this could happen and that is when a compaction completed and we replace -// the old TSM file with a new one and we crash just before deleting the old file. -// No data is lost but the same point time/value would exist in two files until -// compaction corrects it. -func TestTSMKeyIterator_Duplicate(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - - v1 := tsm1.NewValue(1, int64(1)) - v2 := tsm1.NewValue(1, int64(2)) - - writes1 := map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {v1}, - } - - r1 := MustTSMReader(dir, 1, writes1) - - writes2 := map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {v2}, - } - - r2 := MustTSMReader(dir, 2, writes2) - - iter, err := tsm1.NewTSMKeyIterator(1, false, nil, r1, r2) - if err != nil { - t.Fatalf("unexpected error creating WALKeyIterator: %v", err) - } - - var readValues bool - for iter.Next() { - key, _, _, block, err := iter.Read() - if err != nil { - t.Fatalf("unexpected error read: %v", err) - } - - values, err := tsm1.DecodeBlock(block, nil) - if err != nil { - t.Fatalf("unexpected error decode: %v", err) - } - - if got, exp := string(key), "cpu,host=A#!~#value"; got != exp { - t.Fatalf("key mismatch: got %v, exp %v", got, exp) - } - - if got, exp := len(values), 1; got != exp { - t.Fatalf("values length mismatch: got %v, exp %v", got, exp) - } - - readValues = true - assertValueEqual(t, values[0], v2) - } - - if !readValues { - t.Fatalf("failed to read any values") - } -} - -// Tests that deleted keys are not seen during iteration with -// TSM files. -func TestTSMKeyIterator_MultipleKeysDeleted(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - - v1 := tsm1.NewValue(2, int64(1)) - points1 := map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {v1}, - } - - r1 := MustTSMReader(dir, 1, points1) - if e := r1.Delete([][]byte{[]byte("cpu,host=A#!~#value")}); nil != e { - t.Fatal(e) - } - - v2 := tsm1.NewValue(1, float64(1)) - v3 := tsm1.NewValue(1, float64(1)) - - points2 := map[string][]tsm1.Value{ - "cpu,host=A#!~#count": {v2}, - "cpu,host=B#!~#value": {v3}, - } - - r2 := MustTSMReader(dir, 2, points2) - r2.Delete([][]byte{[]byte("cpu,host=A#!~#count")}) - - iter, err := tsm1.NewTSMKeyIterator(1, false, nil, r1, r2) - if err != nil { - t.Fatalf("unexpected error creating WALKeyIterator: %v", err) - } - - var readValues bool - var data = []struct { - key string - value tsm1.Value - }{ - {"cpu,host=B#!~#value", v3}, - } - - for iter.Next() { - key, _, _, block, err := iter.Read() - if err != nil { - t.Fatalf("unexpected error read: %v", err) - } - - values, err := tsm1.DecodeBlock(block, nil) - if err != nil { - t.Fatalf("unexpected error decode: %v", err) - } - - if got, exp := string(key), data[0].key; got != exp { - t.Fatalf("key mismatch: got %v, exp %v", got, exp) - } - - if got, exp := len(values), 1; got != exp { - t.Fatalf("values length mismatch: got %v, exp %v", got, exp) - } - readValues = true - - assertValueEqual(t, values[0], data[0].value) - data = data[1:] - } - - if !readValues { - t.Fatalf("failed to read any values") - } -} - -// Tests that deleted keys are not seen during iteration with -// TSM files. -func TestTSMKeyIterator_SingleDeletes(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - - v1 := tsm1.NewValue(10, int64(1)) - v2 := tsm1.NewValue(20, int64(1)) - v3 := tsm1.NewValue(30, int64(1)) - v4 := tsm1.NewValue(40, int64(1)) - v5 := tsm1.NewValue(50, int64(1)) - v6 := tsm1.NewValue(60, int64(1)) - - points1 := map[string][]tsm1.Value{ - "cpu,host=0#!~#value": {v1, v2}, - "cpu,host=A#!~#value": {v5, v6}, - "cpu,host=B#!~#value": {v3, v4}, - "cpu,host=C#!~#value": {v1, v2}, - "cpu,host=D#!~#value": {v1, v2}, - } - - r1 := MustTSMReader(dir, 1, points1) - - if e := r1.DeleteRange([][]byte{[]byte("cpu,host=A#!~#value")}, 50, 50); nil != e { - t.Fatal(e) - } - if e := r1.DeleteRange([][]byte{[]byte("cpu,host=A#!~#value")}, 60, 60); nil != e { - t.Fatal(e) - } - if e := r1.DeleteRange([][]byte{[]byte("cpu,host=C#!~#value")}, 10, 10); nil != e { - t.Fatal(e) - } - if e := r1.DeleteRange([][]byte{[]byte("cpu,host=C#!~#value")}, 60, 60); nil != e { - t.Fatal(e) - } - if e := r1.DeleteRange([][]byte{[]byte("cpu,host=C#!~#value")}, 20, 20); nil != e { - t.Fatal(e) - } - - iter, err := tsm1.NewTSMKeyIterator(1, false, nil, r1) - if err != nil { - t.Fatalf("unexpected error creating WALKeyIterator: %v", err) - } - - var readValues int - var data = []struct { - key string - value tsm1.Value - }{ - {"cpu,host=0#!~#value", v1}, - {"cpu,host=B#!~#value", v3}, - {"cpu,host=D#!~#value", v1}, - } - - for iter.Next() { - key, _, _, block, err := iter.Read() - if err != nil { - t.Fatalf("unexpected error read: %v", err) - } - - values, err := tsm1.DecodeBlock(block, nil) - if err != nil { - t.Fatalf("unexpected error decode: %v", err) - } - - if exp, got := string(key), data[0].key; exp != got { - t.Fatalf("key mismatch: got %v, exp %v", exp, got) - } - - if exp, got := len(values), 2; exp != got { - t.Fatalf("values length mismatch: exp %v, got %v", exp, got) - } - readValues++ - - assertValueEqual(t, values[0], data[0].value) - data = data[1:] - } - - if exp, got := 3, readValues; exp != got { - t.Fatalf("failed to read expected values: exp %v, got %v", exp, got) - } -} - -// Tests that the TSMKeyIterator will abort if the interrupt channel is closed -func TestTSMKeyIterator_Abort(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - - v1 := tsm1.NewValue(1, 1.1) - writes := map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {v1}, - } - - r := MustTSMReader(dir, 1, writes) - - intC := make(chan struct{}) - iter, err := tsm1.NewTSMKeyIterator(1, false, intC, r) - if err != nil { - t.Fatalf("unexpected error creating WALKeyIterator: %v", err) - } - - var aborted bool - for iter.Next() { - // Abort - close(intC) - - _, _, _, _, err := iter.Read() - if err == nil { - t.Fatalf("unexpected error read: %v", err) - } - aborted = err != nil - } - - if !aborted { - t.Fatalf("iteration not aborted") - } -} - -func TestCacheKeyIterator_Single(t *testing.T) { - v0 := tsm1.NewValue(1, 1.0) - - writes := map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {v0}, - } - - c := tsm1.NewCache(0) - - for k, v := range writes { - if err := c.Write([]byte(k), v); err != nil { - t.Fatalf("failed to write key foo to cache: %s", err.Error()) - } - } - - iter := tsm1.NewCacheKeyIterator(c, 1, nil) - var readValues bool - for iter.Next() { - key, _, _, block, err := iter.Read() - if err != nil { - t.Fatalf("unexpected error read: %v", err) - } - - values, err := tsm1.DecodeBlock(block, nil) - if err != nil { - t.Fatalf("unexpected error decode: %v", err) - } - - if got, exp := string(key), "cpu,host=A#!~#value"; got != exp { - t.Fatalf("key mismatch: got %v, exp %v", got, exp) - } - - if got, exp := len(values), len(writes); got != exp { - t.Fatalf("values length mismatch: got %v, exp %v", got, exp) - } - - for _, v := range values { - readValues = true - assertValueEqual(t, v, v0) - } - } - - if !readValues { - t.Fatalf("failed to read any values") - } -} - -func TestCacheKeyIterator_Chunked(t *testing.T) { - v0 := tsm1.NewValue(1, 1.0) - v1 := tsm1.NewValue(2, 2.0) - - writes := map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {v0, v1}, - } - - c := tsm1.NewCache(0) - - for k, v := range writes { - if err := c.Write([]byte(k), v); err != nil { - t.Fatalf("failed to write key foo to cache: %s", err.Error()) - } - } - - iter := tsm1.NewCacheKeyIterator(c, 1, nil) - var readValues bool - var chunk int - for iter.Next() { - key, _, _, block, err := iter.Read() - if err != nil { - t.Fatalf("unexpected error read: %v", err) - } - - values, err := tsm1.DecodeBlock(block, nil) - if err != nil { - t.Fatalf("unexpected error decode: %v", err) - } - - if got, exp := string(key), "cpu,host=A#!~#value"; got != exp { - t.Fatalf("key mismatch: got %v, exp %v", got, exp) - } - - if got, exp := len(values), 1; got != exp { - t.Fatalf("values length mismatch: got %v, exp %v", got, exp) - } - - for _, v := range values { - readValues = true - assertValueEqual(t, v, writes["cpu,host=A#!~#value"][chunk]) - } - chunk++ - } - - if !readValues { - t.Fatalf("failed to read any values") - } -} - -// Tests that the CacheKeyIterator will abort if the interrupt channel is closed -func TestCacheKeyIterator_Abort(t *testing.T) { - v0 := tsm1.NewValue(1, 1.0) - - writes := map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {v0}, - } - - c := tsm1.NewCache(0) - - for k, v := range writes { - if err := c.Write([]byte(k), v); err != nil { - t.Fatalf("failed to write key foo to cache: %s", err.Error()) - } - } - - intC := make(chan struct{}) - - iter := tsm1.NewCacheKeyIterator(c, 1, intC) - - var aborted bool - for iter.Next() { - //Abort - close(intC) - - _, _, _, _, err := iter.Read() - if err == nil { - t.Fatalf("unexpected error read: %v", err) - } - aborted = err != nil - } - - if !aborted { - t.Fatalf("iteration not aborted") - } -} - -func TestDefaultPlanner_Plan_Min(t *testing.T) { - cp := tsm1.NewDefaultPlanner( - &fakeFileStore{ - PathsFn: func() []tsm1.FileStat { - return []tsm1.FileStat{ - { - Path: "01-01.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "02-01.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "03-1.tsm1", - Size: 251 * 1024 * 1024, - }, - } - }, - }, tsm1.DefaultCompactFullWriteColdDuration, - ) - - tsm := cp.Plan(time.Now()) - if exp, got := 0, len(tsm); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } -} - -// Ensure that if there are older files that can be compacted together but a newer -// file that is in a larger step, the older ones will get compacted. -func TestDefaultPlanner_Plan_CombineSequence(t *testing.T) { - data := []tsm1.FileStat{ - { - Path: "01-04.tsm1", - Size: 128 * 1024 * 1024, - }, - { - Path: "02-04.tsm1", - Size: 128 * 1024 * 1024, - }, - { - Path: "03-04.tsm1", - Size: 128 * 1024 * 1024, - }, - { - Path: "04-04.tsm1", - Size: 128 * 1024 * 1024, - }, - { - Path: "06-02.tsm1", - Size: 67 * 1024 * 1024, - }, - { - Path: "07-02.tsm1", - Size: 128 * 1024 * 1024, - }, - { - Path: "08-01.tsm1", - Size: 251 * 1024 * 1024, - }, - } - - cp := tsm1.NewDefaultPlanner( - &fakeFileStore{ - PathsFn: func() []tsm1.FileStat { - return data - }, - }, tsm1.DefaultCompactFullWriteColdDuration, - ) - - expFiles := []tsm1.FileStat{data[0], data[1], data[2], data[3]} - tsm := cp.Plan(time.Now()) - if exp, got := len(expFiles), len(tsm[0]); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } - - for i, p := range expFiles { - if got, exp := tsm[0][i], p.Path; got != exp { - t.Fatalf("tsm file mismatch: got %v, exp %v", got, exp) - } - } -} - -// Ensure that the planner grabs the smallest compaction step -func TestDefaultPlanner_Plan_MultipleGroups(t *testing.T) { - data := []tsm1.FileStat{ - { - Path: "01-04.tsm1", - Size: 64 * 1024 * 1024, - }, - { - Path: "02-04.tsm1", - Size: 64 * 1024 * 1024, - }, - { - Path: "03-04.tsm1", - Size: 64 * 1024 * 1024, - }, - { - Path: "04-04.tsm1", - Size: 129 * 1024 * 1024, - }, - { - Path: "05-04.tsm1", - Size: 129 * 1024 * 1024, - }, - { - Path: "06-04.tsm1", - Size: 129 * 1024 * 1024, - }, - { - Path: "07-04.tsm1", - Size: 129 * 1024 * 1024, - }, - { - Path: "08-04.tsm1", - Size: 129 * 1024 * 1024, - }, - { - Path: "09-04.tsm1", // should be skipped - Size: 129 * 1024 * 1024, - }, - } - - cp := tsm1.NewDefaultPlanner(&fakeFileStore{ - PathsFn: func() []tsm1.FileStat { - return data - }, - }, tsm1.DefaultCompactFullWriteColdDuration) - - expFiles := []tsm1.FileStat{data[0], data[1], data[2], data[3], - data[4], data[5], data[6], data[7]} - tsm := cp.Plan(time.Now()) - - if got, exp := len(tsm), 2; got != exp { - t.Fatalf("compaction group length mismatch: got %v, exp %v", got, exp) - } - - if exp, got := len(expFiles[:4]), len(tsm[0]); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } - - if exp, got := len(expFiles[4:]), len(tsm[1]); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } - - for i, p := range expFiles[:4] { - if got, exp := tsm[0][i], p.Path; got != exp { - t.Fatalf("tsm file mismatch: got %v, exp %v", got, exp) - } - } - - for i, p := range expFiles[4:] { - if got, exp := tsm[1][i], p.Path; got != exp { - t.Fatalf("tsm file mismatch: got %v, exp %v", got, exp) - } - } - -} - -// Ensure that the planner grabs the smallest compaction step -func TestDefaultPlanner_PlanLevel_SmallestCompactionStep(t *testing.T) { - data := []tsm1.FileStat{ - { - Path: "01-03.tsm1", - Size: 251 * 1024 * 1024, - }, - { - Path: "02-03.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "03-03.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "04-03.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "05-01.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "06-01.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "07-01.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "08-01.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "09-01.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "10-01.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "11-01.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "12-01.tsm1", - Size: 1 * 1024 * 1024, - }, - } - - cp := tsm1.NewDefaultPlanner( - &fakeFileStore{ - PathsFn: func() []tsm1.FileStat { - return data - }, - }, tsm1.DefaultCompactFullWriteColdDuration, - ) - - expFiles := []tsm1.FileStat{data[4], data[5], data[6], data[7], data[8], data[9], data[10], data[11]} - tsm := cp.PlanLevel(1) - if exp, got := len(expFiles), len(tsm[0]); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } - - for i, p := range expFiles { - if got, exp := tsm[0][i], p.Path; got != exp { - t.Fatalf("tsm file mismatch: got %v, exp %v", got, exp) - } - } -} - -func TestDefaultPlanner_PlanLevel_SplitFile(t *testing.T) { - data := []tsm1.FileStat{ - { - Path: "01-03.tsm1", - Size: 251 * 1024 * 1024, - }, - { - Path: "02-03.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "03-03.tsm1", - Size: 2 * 1024 * 1024 * 1024, - }, - { - Path: "03-04.tsm1", - Size: 10 * 1024 * 1024, - }, - { - Path: "04-03.tsm1", - Size: 10 * 1024 * 1024, - }, - { - Path: "05-01.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "06-01.tsm1", - Size: 1 * 1024 * 1024, - }, - } - - cp := tsm1.NewDefaultPlanner( - &fakeFileStore{ - PathsFn: func() []tsm1.FileStat { - return data - }, - }, tsm1.DefaultCompactFullWriteColdDuration, - ) - - expFiles := []tsm1.FileStat{data[0], data[1], data[2], data[3], data[4]} - tsm := cp.PlanLevel(3) - if exp, got := len(expFiles), len(tsm[0]); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } - - for i, p := range expFiles { - if got, exp := tsm[0][i], p.Path; got != exp { - t.Fatalf("tsm file mismatch: got %v, exp %v", got, exp) - } - } -} - -func TestDefaultPlanner_PlanLevel_IsolatedHighLevel(t *testing.T) { - data := []tsm1.FileStat{ - { - Path: "01-02.tsm1", - Size: 251 * 1024 * 1024, - }, - { - Path: "02-02.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "03-03.tsm1", - Size: 2 * 1024 * 1024 * 1024, - }, - { - Path: "03-04.tsm1", - Size: 2 * 1024 * 1024 * 1024, - }, - { - Path: "04-02.tsm1", - Size: 10 * 1024 * 1024, - }, - { - Path: "05-02.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "06-02.tsm1", - Size: 1 * 1024 * 1024, - }, - } - - cp := tsm1.NewDefaultPlanner( - &fakeFileStore{ - PathsFn: func() []tsm1.FileStat { - return data - }, - }, tsm1.DefaultCompactFullWriteColdDuration, - ) - - expFiles := []tsm1.FileStat{} - tsm := cp.PlanLevel(3) - if exp, got := len(expFiles), len(tsm); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } -} - -func TestDefaultPlanner_PlanLevel3_MinFiles(t *testing.T) { - data := []tsm1.FileStat{ - { - Path: "01-03.tsm1", - Size: 251 * 1024 * 1024, - }, - { - Path: "02-03.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "03-01.tsm1", - Size: 2 * 1024 * 1024 * 1024, - }, - { - Path: "04-01.tsm1", - Size: 10 * 1024 * 1024, - }, - { - Path: "05-02.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "06-01.tsm1", - Size: 1 * 1024 * 1024, - }, - } - - cp := tsm1.NewDefaultPlanner( - &fakeFileStore{ - PathsFn: func() []tsm1.FileStat { - return data - }, - }, tsm1.DefaultCompactFullWriteColdDuration, - ) - - expFiles := []tsm1.FileStat{} - tsm := cp.PlanLevel(3) - if exp, got := len(expFiles), len(tsm); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } -} - -func TestDefaultPlanner_PlanLevel2_MinFiles(t *testing.T) { - data := []tsm1.FileStat{ - { - Path: "02-04.tsm1", - Size: 251 * 1024 * 1024, - }, - - { - Path: "03-02.tsm1", - Size: 251 * 1024 * 1024, - }, - { - Path: "03-03.tsm1", - Size: 1 * 1024 * 1024, - }, - } - - cp := tsm1.NewDefaultPlanner( - &fakeFileStore{ - PathsFn: func() []tsm1.FileStat { - return data - }, - }, tsm1.DefaultCompactFullWriteColdDuration, - ) - - expFiles := []tsm1.FileStat{} - tsm := cp.PlanLevel(2) - if exp, got := len(expFiles), len(tsm); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } -} - -func TestDefaultPlanner_PlanLevel_Tombstone(t *testing.T) { - data := []tsm1.FileStat{ - { - Path: "01-03.tsm1", - Size: 251 * 1024 * 1024, - HasTombstone: true, - }, - { - Path: "02-03.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "03-01.tsm1", - Size: 2 * 1024 * 1024 * 1024, - }, - { - Path: "04-01.tsm1", - Size: 10 * 1024 * 1024, - }, - { - Path: "05-02.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "06-01.tsm1", - Size: 1 * 1024 * 1024, - }, - } - - cp := tsm1.NewDefaultPlanner( - &fakeFileStore{ - PathsFn: func() []tsm1.FileStat { - return data - }, - }, tsm1.DefaultCompactFullWriteColdDuration, - ) - - expFiles := []tsm1.FileStat{data[0], data[1]} - tsm := cp.PlanLevel(3) - if exp, got := len(expFiles), len(tsm[0]); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } - - for i, p := range expFiles { - if got, exp := tsm[0][i], p.Path; got != exp { - t.Fatalf("tsm file mismatch: got %v, exp %v", got, exp) - } - } -} - -func TestDefaultPlanner_PlanLevel_Multiple(t *testing.T) { - data := []tsm1.FileStat{ - { - Path: "01-01.tsm1", - Size: 251 * 1024 * 1024, - }, - { - Path: "02-01.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "03-01.tsm1", - Size: 2 * 1024 * 1024 * 1024, - }, - { - Path: "04-01.tsm1", - Size: 10 * 1024 * 1024, - }, - { - Path: "05-01.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "06-01.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "07-01.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "08-01.tsm1", - Size: 1 * 1024 * 1024, - }, - } - - cp := tsm1.NewDefaultPlanner( - &fakeFileStore{ - PathsFn: func() []tsm1.FileStat { - return data - }, - }, tsm1.DefaultCompactFullWriteColdDuration, - ) - - expFiles1 := []tsm1.FileStat{data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7]} - - tsm := cp.PlanLevel(1) - if exp, got := len(expFiles1), len(tsm[0]); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } - - for i, p := range expFiles1 { - if got, exp := tsm[0][i], p.Path; got != exp { - t.Fatalf("tsm file mismatch: got %v, exp %v", got, exp) - } - } -} - -func TestDefaultPlanner_PlanLevel_InUse(t *testing.T) { - data := []tsm1.FileStat{ - { - Path: "01-01.tsm1", - Size: 251 * 1024 * 1024, - }, - { - Path: "02-01.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "03-01.tsm1", - Size: 2 * 1024 * 1024 * 1024, - }, - { - Path: "04-01.tsm1", - Size: 10 * 1024 * 1024, - }, - { - Path: "05-01.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "06-01.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "07-01.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "08-01.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "09-01.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "10-01.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "11-01.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "12-01.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "13-01.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "14-01.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "15-01.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "16-01.tsm1", - Size: 1 * 1024 * 1024, - }, - } - - cp := tsm1.NewDefaultPlanner( - &fakeFileStore{ - PathsFn: func() []tsm1.FileStat { - return data - }, - }, tsm1.DefaultCompactFullWriteColdDuration, - ) - - expFiles1 := data[0:8] - expFiles2 := data[8:16] - - tsm := cp.PlanLevel(1) - if exp, got := len(expFiles1), len(tsm[0]); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } - - for i, p := range expFiles1 { - if got, exp := tsm[0][i], p.Path; got != exp { - t.Fatalf("tsm file mismatch: got %v, exp %v", got, exp) - } - } - - if exp, got := len(expFiles2), len(tsm[1]); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } - - for i, p := range expFiles2 { - if got, exp := tsm[1][i], p.Path; got != exp { - t.Fatalf("tsm file mismatch: got %v, exp %v", got, exp) - } - } - - cp.Release(tsm[1:]) - - tsm = cp.PlanLevel(1) - if exp, got := len(expFiles2), len(tsm[0]); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } - - for i, p := range expFiles2 { - if got, exp := tsm[0][i], p.Path; got != exp { - t.Fatalf("tsm file mismatch: got %v, exp %v", got, exp) - } - } -} - -func TestDefaultPlanner_PlanOptimize_NoLevel4(t *testing.T) { - data := []tsm1.FileStat{ - { - Path: "01-03.tsm1", - Size: 251 * 1024 * 1024, - }, - { - Path: "02-03.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "03-03.tsm1", - Size: 2 * 1024 * 1024 * 1024, - }, - } - - cp := tsm1.NewDefaultPlanner( - &fakeFileStore{ - PathsFn: func() []tsm1.FileStat { - return data - }, - }, tsm1.DefaultCompactFullWriteColdDuration, - ) - - expFiles := []tsm1.FileStat{} - tsm := cp.PlanOptimize() - if exp, got := len(expFiles), len(tsm); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } -} - -func TestDefaultPlanner_PlanOptimize_Level4(t *testing.T) { - data := []tsm1.FileStat{ - { - Path: "01-04.tsm1", - Size: 251 * 1024 * 1024, - }, - { - Path: "02-04.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "03-04.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "04-04.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "05-03.tsm1", - Size: 2 * 1024 * 1024 * 1024, - }, - { - Path: "06-04.tsm1", - Size: 2 * 1024 * 1024 * 1024, - }, - { - Path: "07-03.tsm1", - Size: 2 * 1024 * 1024 * 1024, - }, - } - - cp := tsm1.NewDefaultPlanner( - &fakeFileStore{ - PathsFn: func() []tsm1.FileStat { - return data - }, - }, tsm1.DefaultCompactFullWriteColdDuration, - ) - - expFiles1 := []tsm1.FileStat{data[0], data[1], data[2], data[3], data[4], data[5]} - tsm := cp.PlanOptimize() - if exp, got := 1, len(tsm); exp != got { - t.Fatalf("group length mismatch: got %v, exp %v", got, exp) - } - - if exp, got := len(expFiles1), len(tsm[0]); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } - - for i, p := range expFiles1 { - if got, exp := tsm[0][i], p.Path; got != exp { - t.Fatalf("tsm file mismatch: got %v, exp %v", got, exp) - } - } -} - -func TestDefaultPlanner_PlanOptimize_Multiple(t *testing.T) { - data := []tsm1.FileStat{ - { - Path: "01-04.tsm1", - Size: 251 * 1024 * 1024, - }, - { - Path: "02-04.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "03-04.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "04-04.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "05-03.tsm1", - Size: 2 * 1024 * 1024 * 1024, - }, - { - Path: "06-03.tsm1", - Size: 2 * 1024 * 1024 * 1024, - }, - { - Path: "07-04.tsm1", - Size: 2 * 1024 * 1024 * 1024, - }, - { - Path: "08-04.tsm1", - Size: 2 * 1024 * 1024 * 1024, - }, - { - Path: "09-04.tsm1", - Size: 2 * 1024 * 1024 * 1024, - }, - { - Path: "10-04.tsm1", - Size: 2 * 1024 * 1024 * 1024, - }, - } - - cp := tsm1.NewDefaultPlanner( - &fakeFileStore{ - PathsFn: func() []tsm1.FileStat { - return data - }, - }, tsm1.DefaultCompactFullWriteColdDuration, - ) - - expFiles1 := []tsm1.FileStat{data[0], data[1], data[2], data[3]} - expFiles2 := []tsm1.FileStat{data[6], data[7], data[8], data[9]} - - tsm := cp.PlanOptimize() - if exp, got := 2, len(tsm); exp != got { - t.Fatalf("group length mismatch: got %v, exp %v", got, exp) - } - - if exp, got := len(expFiles1), len(tsm[0]); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } - - for i, p := range expFiles1 { - if got, exp := tsm[0][i], p.Path; got != exp { - t.Fatalf("tsm file mismatch: got %v, exp %v", got, exp) - } - } - - if exp, got := len(expFiles2), len(tsm[1]); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } - - for i, p := range expFiles2 { - if got, exp := tsm[1][i], p.Path; got != exp { - t.Fatalf("tsm file mismatch: got %v, exp %v", got, exp) - } - } -} - -func TestDefaultPlanner_PlanOptimize_Optimized(t *testing.T) { - data := []tsm1.FileStat{ - { - Path: "01-03.tsm1", - Size: 251 * 1024 * 1024, - }, - { - Path: "01-04.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "01-05.tsm1", - Size: 2 * 1024 * 1024 * 1024, - }, - } - - cp := tsm1.NewDefaultPlanner( - &fakeFileStore{ - PathsFn: func() []tsm1.FileStat { - return data - }, - }, tsm1.DefaultCompactFullWriteColdDuration, - ) - - expFiles := []tsm1.FileStat{} - tsm := cp.PlanOptimize() - if exp, got := len(expFiles), len(tsm); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } -} - -func TestDefaultPlanner_PlanOptimize_Tombstones(t *testing.T) { - data := []tsm1.FileStat{ - { - Path: "01-04.tsm1", - Size: 251 * 1024 * 1024, - }, - { - Path: "01-05.tsm1", - Size: 1 * 1024 * 1024, - HasTombstone: true, - }, - { - Path: "02-06.tsm1", - Size: 2 * 1024 * 1024 * 1024, - }, - } - - cp := tsm1.NewDefaultPlanner( - &fakeFileStore{ - PathsFn: func() []tsm1.FileStat { - return data - }, - }, tsm1.DefaultCompactFullWriteColdDuration, - ) - - expFiles := []tsm1.FileStat{data[0], data[1], data[2]} - tsm := cp.PlanOptimize() - if exp, got := len(expFiles), len(tsm[0]); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } - - for i, p := range expFiles { - if got, exp := tsm[0][i], p.Path; got != exp { - t.Fatalf("tsm file mismatch: got %v, exp %v", got, exp) - } - } - -} - -// Ensure that the planner will compact all files if no writes -// have happened in some interval -func TestDefaultPlanner_Plan_FullOnCold(t *testing.T) { - data := []tsm1.FileStat{ - { - Path: "01-01.tsm1", - Size: 513 * 1024 * 1024, - }, - { - Path: "02-02.tsm1", - Size: 129 * 1024 * 1024, - }, - { - Path: "03-02.tsm1", - Size: 33 * 1024 * 1024, - }, - { - Path: "04-02.tsm1", - Size: 1 * 1024 * 1024, - }, - { - Path: "05-02.tsm1", - Size: 10 * 1024 * 1024, - }, - { - Path: "06-01.tsm1", - Size: 2 * 1024 * 1024, - }, - } - - cp := tsm1.NewDefaultPlanner( - &fakeFileStore{ - PathsFn: func() []tsm1.FileStat { - return data - }, - }, - time.Nanosecond, - ) - - tsm := cp.Plan(time.Now().Add(-time.Second)) - if exp, got := len(data), len(tsm[0]); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } - - for i, p := range data { - if got, exp := tsm[0][i], p.Path; got != exp { - t.Fatalf("tsm file mismatch: got %v, exp %v", got, exp) - } - } -} - -// Ensure that the planner will not return files that are over the max -// allowable size -func TestDefaultPlanner_Plan_SkipMaxSizeFiles(t *testing.T) { - data := []tsm1.FileStat{ - { - Path: "01-01.tsm1", - Size: 2049 * 1024 * 1024, - }, - { - Path: "02-02.tsm1", - Size: 2049 * 1024 * 1024, - }, - } - - cp := tsm1.NewDefaultPlanner( - &fakeFileStore{ - PathsFn: func() []tsm1.FileStat { - return data - }, - }, tsm1.DefaultCompactFullWriteColdDuration, - ) - - tsm := cp.Plan(time.Now()) - if exp, got := 0, len(tsm); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } -} - -// Ensure that the planner will not return files that are over the max -// allowable size -func TestDefaultPlanner_Plan_SkipPlanningAfterFull(t *testing.T) { - testSet := []tsm1.FileStat{ - { - Path: "01-05.tsm1", - Size: 256 * 1024 * 1024, - }, - { - Path: "02-05.tsm1", - Size: 256 * 1024 * 1024, - }, - { - Path: "03-05.tsm1", - Size: 256 * 1024 * 1024, - }, - { - Path: "04-04.tsm1", - Size: 256 * 1024 * 1024, - }, - } - - fs := &fakeFileStore{ - PathsFn: func() []tsm1.FileStat { - return testSet - }, - blockCount: 1000, - } - - cp := tsm1.NewDefaultPlanner(fs, time.Nanosecond) - plan := cp.Plan(time.Now().Add(-time.Second)) - // first verify that our test set would return files - if exp, got := 4, len(plan[0]); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } - cp.Release(plan) - - // skip planning if all files are over the limit - over := []tsm1.FileStat{ - { - Path: "01-05.tsm1", - Size: 2049 * 1024 * 1024, - }, - { - Path: "02-05.tsm1", - Size: 2049 * 1024 * 1024, - }, - { - Path: "03-05.tsm1", - Size: 2049 * 1024 * 1024, - }, - { - Path: "04-05.tsm1", - Size: 2049 * 1024 * 1024, - }, - { - Path: "05-05.tsm1", - Size: 2049 * 1024 * 1024, - }, - } - - overFs := &fakeFileStore{ - PathsFn: func() []tsm1.FileStat { - return over - }, - blockCount: 1000, - } - - cp.FileStore = overFs - plan = cp.Plan(time.Now().Add(-time.Second)) - if exp, got := 0, len(plan); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } - cp.Release(plan) - - plan = cp.PlanOptimize() - // ensure the optimize planner would pick this up - if exp, got := 1, len(plan); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } - cp.Release(plan) - - cp.FileStore = fs - // ensure that it will plan if last modified has changed - fs.lastModified = time.Now() - - if exp, got := 4, len(cp.Plan(time.Now())[0]); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } -} - -// Tests that 2 generations, each over 2 GB and the second in level 2 does -// not return just the first generation. This was a case where full planning -// would get repeatedly plan the same files and never stop. -func TestDefaultPlanner_Plan_TwoGenLevel3(t *testing.T) { - data := []tsm1.FileStat{ - { - Path: "000002245-000001666.tsm", - Size: 2049 * 1024 * 1024, - }, - { - Path: "000002245-000001667.tsm", - Size: 2049 * 1024 * 1024, - }, - { - Path: "000002245-000001668.tsm", - Size: 2049 * 1024 * 1024, - }, - { - Path: "000002245-000001669.tsm", - Size: 2049 * 1024 * 1024, - }, - { - Path: "000002245-000001670.tsm", - Size: 2049 * 1024 * 1024, - }, - { - Path: "000002245-000001671.tsm", - Size: 2049 * 1024 * 1024, - }, - { - Path: "000002245-000001672.tsm", - Size: 2049 * 1024 * 1024, - }, - { - Path: "000002245-000001673.tsm", - Size: 192631258, - }, - { - Path: "000002246-000000002.tsm", - Size: 2049 * 1024 * 1024, - }, - { - Path: "000002246-000000003.tsm", - Size: 192631258, - }, - } - - cp := tsm1.NewDefaultPlanner( - &fakeFileStore{ - blockCount: 1000, - PathsFn: func() []tsm1.FileStat { - return data - }, - }, - time.Hour) - - tsm := cp.Plan(time.Now().Add(-24 * time.Hour)) - if exp, got := 1, len(tsm); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } -} - -// Ensure that the planner will return files over the max file -// size, but do not contain full blocks -func TestDefaultPlanner_Plan_NotFullOverMaxsize(t *testing.T) { - testSet := []tsm1.FileStat{ - { - Path: "01-05.tsm1", - Size: 256 * 1024 * 1024, - }, - { - Path: "02-05.tsm1", - Size: 256 * 1024 * 1024, - }, - { - Path: "03-05.tsm1", - Size: 256 * 1024 * 1024, - }, - { - Path: "04-04.tsm1", - Size: 256 * 1024 * 1024, - }, - } - - fs := &fakeFileStore{ - PathsFn: func() []tsm1.FileStat { - return testSet - }, - blockCount: 100, - } - - cp := tsm1.NewDefaultPlanner( - fs, - time.Nanosecond, - ) - - plan := cp.Plan(time.Now().Add(-time.Second)) - // first verify that our test set would return files - if exp, got := 4, len(plan[0]); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } - cp.Release(plan) - - // skip planning if all files are over the limit - over := []tsm1.FileStat{ - { - Path: "01-05.tsm1", - Size: 2049 * 1024 * 1024, - }, - { - Path: "02-05.tsm1", - Size: 2049 * 1024 * 1024, - }, - } - - overFs := &fakeFileStore{ - PathsFn: func() []tsm1.FileStat { - return over - }, - blockCount: 100, - } - - cp.FileStore = overFs - if exp, got := 1, len(cp.Plan(time.Now().Add(-time.Second))); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } -} - -// Ensure that the planner will compact files that are past the smallest step -// size even if there is a single file in the smaller step size -func TestDefaultPlanner_Plan_CompactsMiddleSteps(t *testing.T) { - data := []tsm1.FileStat{ - { - Path: "01-04.tsm1", - Size: 64 * 1024 * 1024, - }, - { - Path: "02-04.tsm1", - Size: 64 * 1024 * 1024, - }, - { - Path: "03-04.tsm1", - Size: 64 * 1024 * 1024, - }, - { - Path: "04-04.tsm1", - Size: 64 * 1024 * 1024, - }, - { - Path: "05-02.tsm1", - Size: 2 * 1024 * 1024, - }, - } - - cp := tsm1.NewDefaultPlanner( - &fakeFileStore{ - PathsFn: func() []tsm1.FileStat { - return data - }, - }, tsm1.DefaultCompactFullWriteColdDuration, - ) - - expFiles := []tsm1.FileStat{data[0], data[1], data[2], data[3]} - tsm := cp.Plan(time.Now()) - if exp, got := len(expFiles), len(tsm[0]); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } - - for i, p := range expFiles { - if got, exp := tsm[0][i], p.Path; got != exp { - t.Fatalf("tsm file mismatch: got %v, exp %v", got, exp) - } - } -} - -func TestDefaultPlanner_Plan_LargeGeneration(t *testing.T) { - cp := tsm1.NewDefaultPlanner( - &fakeFileStore{ - PathsFn: func() []tsm1.FileStat { - return []tsm1.FileStat{ - { - Path: "000000278-000000006.tsm", - Size: 2148340232, - }, - { - Path: "000000278-000000007.tsm", - Size: 2148356556, - }, - { - Path: "000000278-000000008.tsm", - Size: 167780181, - }, - { - Path: "000000278-000047040.tsm", - Size: 2148728539, - }, - { - Path: "000000278-000047041.tsm", - Size: 701863692, - }, - } - }, - }, tsm1.DefaultCompactFullWriteColdDuration, - ) - - tsm := cp.Plan(time.Now()) - if exp, got := 0, len(tsm); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } -} - -func TestDefaultPlanner_Plan_ForceFull(t *testing.T) { - cp := tsm1.NewDefaultPlanner( - &fakeFileStore{ - PathsFn: func() []tsm1.FileStat { - return []tsm1.FileStat{ - { - Path: "000000001-000000001.tsm", - Size: 2148340232, - }, - { - Path: "000000002-000000001.tsm", - Size: 2148356556, - }, - { - Path: "000000003-000000001.tsm", - Size: 167780181, - }, - { - Path: "000000004-000000001.tsm", - Size: 2148728539, - }, - { - Path: "000000005-000000001.tsm", - Size: 2148340232, - }, - { - Path: "000000006-000000001.tsm", - Size: 2148356556, - }, - { - Path: "000000007-000000001.tsm", - Size: 167780181, - }, - { - Path: "000000008-000000001.tsm", - Size: 2148728539, - }, - { - Path: "000000009-000000002.tsm", - Size: 701863692, - }, - { - Path: "000000010-000000002.tsm", - Size: 701863692, - }, - { - Path: "000000011-000000002.tsm", - Size: 701863692, - }, - { - Path: "000000012-000000002.tsm", - Size: 701863692, - }, - { - Path: "000000013-000000002.tsm", - Size: 701863692, - }, - } - }, - }, tsm1.DefaultCompactFullWriteColdDuration, - ) - - tsm := cp.PlanLevel(1) - if exp, got := 1, len(tsm); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } - cp.Release(tsm) - - tsm = cp.PlanLevel(2) - if exp, got := 1, len(tsm); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } - cp.Release(tsm) - - cp.ForceFull() - - // Level plans should not return any plans - tsm = cp.PlanLevel(1) - if exp, got := 0, len(tsm); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } - cp.Release(tsm) - - tsm = cp.PlanLevel(2) - if exp, got := 0, len(tsm); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } - cp.Release(tsm) - - tsm = cp.Plan(time.Now()) - if exp, got := 1, len(tsm); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } - - if got, exp := len(tsm[0]), 13; got != exp { - t.Fatalf("plan length mismatch: got %v, exp %v", got, exp) - } - cp.Release(tsm) - - // Level plans should return plans now that Plan has been called - tsm = cp.PlanLevel(1) - if exp, got := 1, len(tsm); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } - cp.Release(tsm) - - tsm = cp.PlanLevel(2) - if exp, got := 1, len(tsm); got != exp { - t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) - } - cp.Release(tsm) - -} - -func assertValueEqual(t *testing.T, a, b tsm1.Value) { - if got, exp := a.UnixNano(), b.UnixNano(); got != exp { - t.Fatalf("time mismatch: got %v, exp %v", got, exp) - } - if got, exp := a.Value(), b.Value(); got != exp { - t.Fatalf("value mismatch: got %v, exp %v", got, exp) - } -} - -func MustTSMWriter(dir string, gen int) (tsm1.TSMWriter, string) { - f := MustTempFile(dir) - oldName := f.Name() - - // Windows can't rename a file while it's open. Close first, rename and - // then re-open - if err := f.Close(); err != nil { - panic(fmt.Sprintf("close temp file: %v", err)) - } - - newName := filepath.Join(filepath.Dir(oldName), tsm1.DefaultFormatFileName(gen, 1)+".tsm") - if err := fs.RenameFile(oldName, newName); err != nil { - panic(fmt.Sprintf("create tsm file: %v", err)) - } - - var err error - f, err = os.OpenFile(newName, os.O_RDWR, 0666) - if err != nil { - panic(fmt.Sprintf("open tsm files: %v", err)) - } - - w, err := tsm1.NewTSMWriter(f) - if err != nil { - panic(fmt.Sprintf("create TSM writer: %v", err)) - } - - return w, newName -} - -func MustWriteTSM(dir string, gen int, values map[string][]tsm1.Value) string { - w, name := MustTSMWriter(dir, gen) - - keys := make([]string, 0, len(values)) - for k := range values { - keys = append(keys, k) - } - sort.Strings(keys) - - for _, k := range keys { - if err := w.Write([]byte(k), values[k]); err != nil { - panic(fmt.Sprintf("write TSM value: %v", err)) - } - } - - if err := w.WriteIndex(); err != nil { - panic(fmt.Sprintf("write TSM index: %v", err)) - } - - if err := w.Close(); err != nil { - panic(fmt.Sprintf("write TSM close: %v", err)) - } - - return name -} - -func MustTSMReader(dir string, gen int, values map[string][]tsm1.Value) *tsm1.TSMReader { - return MustOpenTSMReader(MustWriteTSM(dir, gen, values)) -} - -func MustOpenTSMReader(name string) *tsm1.TSMReader { - f, err := os.Open(name) - if err != nil { - panic(fmt.Sprintf("open file: %v", err)) - } - - r, err := tsm1.NewTSMReader(f) - if err != nil { - panic(fmt.Sprintf("new reader: %v", err)) - } - return r -} - -type fakeFileStore struct { - PathsFn func() []tsm1.FileStat - lastModified time.Time - blockCount int - readers []*tsm1.TSMReader -} - -func (w *fakeFileStore) Stats() []tsm1.FileStat { - return w.PathsFn() -} - -func (w *fakeFileStore) NextGeneration() int { - return 1 -} - -func (w *fakeFileStore) SetCurrentGenerationFunc(fn func() int) {} - -func (w *fakeFileStore) LastModified() time.Time { - return w.lastModified -} - -func (w *fakeFileStore) BlockCount(path string, idx int) int { - return w.blockCount -} - -func (w *fakeFileStore) TSMReader(path string) *tsm1.TSMReader { - r := MustOpenTSMReader(path) - w.readers = append(w.readers, r) - r.Ref() - return r -} - -func (w *fakeFileStore) Close() { - for _, r := range w.readers { - r.Close() - } - w.readers = nil -} - -func (w *fakeFileStore) ParseFileName(path string) (int, int, error) { - return tsm1.DefaultParseFileName(path) -} diff --git a/tsdb/tsm1/config.go b/tsdb/tsm1/config.go deleted file mode 100644 index b1175e93eb..0000000000 --- a/tsdb/tsm1/config.go +++ /dev/null @@ -1,151 +0,0 @@ -package tsm1 - -import ( - "runtime" - "time" - - "github.com/influxdata/influxdb/v2/toml" -) - -var DefaultMaxConcurrentOpens = runtime.GOMAXPROCS(0) - -const ( - DefaultMADVWillNeed = false - - // DefaultLargeSeriesWriteThreshold is the number of series per write - // that requires the series index be pregrown before insert. - DefaultLargeSeriesWriteThreshold = 10000 -) - -// Config contains all of the configuration necessary to run a tsm1 engine. -type Config struct { - // MacConcurrentOpens controls the concurrency of opening tsm files during - // engine opening. - MaxConcurrentOpens int `toml:"max-concurrent-opens"` - - // MADVWillNeed controls whether we hint to the kernel that we intend to page - // in mmap'd sections of TSM files. This setting defaults to off, as it has - // been found to be problematic in some cases. It may help users who have - // slow disks. - MADVWillNeed bool `toml:"use-madv-willneed"` - - // LargeSeriesWriteThreshold is the threshold before a write requires - // preallocation to improve throughput. Currently used in the series file. - LargeSeriesWriteThreshold int `toml:"large-series-write-threshold"` - - Compaction CompactionConfig `toml:"compaction"` - Cache CacheConfig `toml:"cache"` -} - -// NewConfig constructs a Config with the default values. -func NewConfig() Config { - return Config{ - MaxConcurrentOpens: DefaultMaxConcurrentOpens, - MADVWillNeed: DefaultMADVWillNeed, - LargeSeriesWriteThreshold: DefaultLargeSeriesWriteThreshold, - - Cache: NewCacheConfig(), - Compaction: CompactionConfig{ - FullWriteColdDuration: toml.Duration(DefaultCompactFullWriteColdDuration), - Throughput: toml.Size(DefaultCompactThroughput), - ThroughputBurst: toml.Size(DefaultCompactThroughputBurst), - MaxConcurrent: DefaultCompactMaxConcurrent, - }, - } -} - -const ( - DefaultCompactFullWriteColdDuration = time.Duration(4 * time.Hour) - DefaultCompactThroughput = 48 * 1024 * 1024 - DefaultCompactThroughputBurst = 48 * 1024 * 1024 - DefaultCompactMaxConcurrent = 0 -) - -// CompactionConfing holds all of the configuration for compactions. Eventually we want -// to move this out of tsm1 so that it can be scheduled more intelligently. -type CompactionConfig struct { - // FullWriteColdDuration is the duration at which the engine will compact all TSM - // files in a shard if it hasn't received a write or delete - FullWriteColdDuration toml.Duration `toml:"full-write-cold-duration"` - - // Throughput is the rate limit in bytes per second that we will allow TSM compactions - // to write to disk. Not that short bursts are allowed to happen at a possibly larger - // value, set by CompactThroughputBurst. A value of 0 here will disable compaction rate - // limiting - Throughput toml.Size `toml:"throughput"` - - // ThroughputBurst is the rate limit in bytes per second that we will allow TSM compactions - // to write to disk. If this is not set, the burst value will be set to equal the normal - // throughput - ThroughputBurst toml.Size `toml:"throughput-burst"` - - // MaxConcurrent is the maximum number of concurrent full and level compactions that can - // run at one time. A value of 0 results in 50% of runtime.GOMAXPROCS(0) used at runtime. - MaxConcurrent int `toml:"max-concurrent"` -} - -// Default Cache configuration values. -const ( - DefaultCacheMaxMemorySize = toml.Size(1024 << 20) // 1GB - DefaultCacheSnapshotMemorySize = toml.Size(25 << 20) // 25MB - DefaultCacheSnapshotAgeDuration = toml.Duration(0) // Defaults to off. - DefaultCacheSnapshotWriteColdDuration = toml.Duration(10 * time.Minute) // Ten minutes -) - -// CacheConfig holds all of the configuration for the in memory cache of values that -// are waiting to be snapshot. -type CacheConfig struct { - // MaxMemorySize is the maximum size a shard's cache can reach before it starts - // rejecting writes. - MaxMemorySize toml.Size `toml:"max-memory-size"` - - // SnapshotMemorySize is the size at which the engine will snapshot the cache and - // write it to a TSM file, freeing up memory - SnapshotMemorySize toml.Size `toml:"snapshot-memory-size"` - - // SnapshotAgeDuration, when set, will ensure that the cache is always snapshotted - // if it's age is greater than this duration, regardless of the cache's size. - SnapshotAgeDuration toml.Duration `toml:"snapshot-age-duration"` - - // SnapshotWriteColdDuration is the length of time at which the engine will snapshot - // the cache and write it to a new TSM file if the shard hasn't received writes or - // deletes. - // - // SnapshotWriteColdDuration should not be larger than SnapshotAgeDuration - SnapshotWriteColdDuration toml.Duration `toml:"snapshot-write-cold-duration"` -} - -// NewCacheConfig initialises a new CacheConfig with default values. -func NewCacheConfig() CacheConfig { - return CacheConfig{ - MaxMemorySize: DefaultCacheMaxMemorySize, - SnapshotMemorySize: DefaultCacheSnapshotMemorySize, - SnapshotAgeDuration: DefaultCacheSnapshotAgeDuration, - SnapshotWriteColdDuration: DefaultCacheSnapshotWriteColdDuration, - } -} - -// Default WAL configuration values. -const ( - DefaultWALEnabled = true - DefaultWALFsyncDelay = time.Duration(0) -) - -// WALConfig holds all of the configuration about the WAL. -type WALConfig struct { - // Enabled controls if the WAL is enabled. - Enabled bool `toml:"enabled"` - - // WALFsyncDelay is the amount of time that a write will wait before fsyncing. A - // duration greater than 0 can be used to batch up multiple fsync calls. This is - // useful for slower disks or when WAL write contention is seen. A value of 0 fsyncs - // every write to the WAL. - FsyncDelay toml.Duration `toml:"fsync-delay"` -} - -func NewWALConfig() WALConfig { - return WALConfig{ - Enabled: DefaultWALEnabled, - FsyncDelay: toml.Duration(DefaultWALFsyncDelay), - } -} diff --git a/tsdb/tsm1/encoding.gen.go b/tsdb/tsm1/encoding.gen.go deleted file mode 100644 index 7c20d26eb8..0000000000 --- a/tsdb/tsm1/encoding.gen.go +++ /dev/null @@ -1,1557 +0,0 @@ -// Generated by tmpl -// https://github.com/benbjohnson/tmpl -// -// DO NOT EDIT! -// Source: encoding.gen.go.tmpl - -package tsm1 - -import ( - "sort" - - "github.com/influxdata/influxdb/v2/tsdb/cursors" -) - -// Values represents a slice of values. -type Values []Value - -func (a Values) MinTime() int64 { - return a[0].UnixNano() -} - -func (a Values) MaxTime() int64 { - return a[len(a)-1].UnixNano() -} - -func (a Values) Size() int { - sz := 0 - for _, v := range a { - sz += v.Size() - } - return sz -} - -// Deduplicate returns a new slice with any values that have the same timestamp removed. -// The Value that appears last in the slice is the one that is kept. The returned -// Values are sorted if necessary. -func (a Values) Deduplicate() Values { - if len(a) <= 1 { - return a - } - - // See if we're already sorted and deduped - var needSort bool - for i := 1; i < len(a); i++ { - if a[i-1].UnixNano() >= a[i].UnixNano() { - needSort = true - break - } - } - - if !needSort { - return a - } - - sort.Stable(a) - var i int - for j := 1; j < len(a); j++ { - v := a[j] - if v.UnixNano() != a[i].UnixNano() { - i++ - } - a[i] = v - - } - return a[:i+1] -} - -// Exclude returns the subset of values not in [min, max]. The values must -// be deduplicated and sorted before calling Exclude or the results are undefined. -func (a Values) Exclude(min, max int64) Values { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - return a - } - - // a[rmin].UnixNano() ≥ min - // a[rmax].UnixNano() ≥ max - - if rmax < len(a) { - if a[rmax].UnixNano() == max { - rmax++ - } - rest := len(a) - rmax - if rest > 0 { - b := a[:rmin+rest] - copy(b[rmin:], a[rmax:]) - return b - } - } - - return a[:rmin] -} - -// Include returns the subset values between min and max inclusive. The values must -// be deduplicated and sorted before calling Exclude or the results are undefined. -func (a Values) Include(min, max int64) Values { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - return nil - } - - // a[rmin].UnixNano() ≥ min - // a[rmax].UnixNano() ≥ max - - if rmax < len(a) && a[rmax].UnixNano() == max { - rmax++ - } - - if rmin > -1 { - b := a[:rmax-rmin] - copy(b, a[rmin:rmax]) - return b - } - - return a[:rmax] -} - -// search performs a binary search for UnixNano() v in a -// and returns the position, i, where v would be inserted. -// An additional check of a[i].UnixNano() == v is necessary -// to determine if the value v exists. -func (a Values) search(v int64) int { - // Define: f(x) → a[x].UnixNano() < v - // Define: f(-1) == true, f(n) == false - // Invariant: f(lo-1) == true, f(hi) == false - lo := 0 - hi := len(a) - for lo < hi { - mid := int(uint(lo+hi) >> 1) - if a[mid].UnixNano() < v { - lo = mid + 1 // preserves f(lo-1) == true - } else { - hi = mid // preserves f(hi) == false - } - } - - // lo == hi - return lo -} - -// FindRange returns the positions where min and max would be -// inserted into the array. If a[0].UnixNano() > max or -// a[len-1].UnixNano() < min then FindRange returns (-1, -1) -// indicating the array is outside the [min, max]. The values must -// be deduplicated and sorted before calling Exclude or the results -// are undefined. -func (a Values) FindRange(min, max int64) (int, int) { - if len(a) == 0 || min > max { - return -1, -1 - } - - minVal := a[0].UnixNano() - maxVal := a[len(a)-1].UnixNano() - - if maxVal < min || minVal > max { - return -1, -1 - } - - return a.search(min), a.search(max) -} - -// Merge overlays b to top of a. If two values conflict with -// the same timestamp, b is used. Both a and b must be sorted -// in ascending order. -func (a Values) Merge(b Values) Values { - if len(a) == 0 { - return b - } - - if len(b) == 0 { - return a - } - - // Normally, both a and b should not contain duplicates. Due to a bug in older versions, it's - // possible stored blocks might contain duplicate values. Remove them if they exists before - // merging. - a = a.Deduplicate() - b = b.Deduplicate() - - if a[len(a)-1].UnixNano() < b[0].UnixNano() { - return append(a, b...) - } - - if b[len(b)-1].UnixNano() < a[0].UnixNano() { - return append(b, a...) - } - - out := make(Values, 0, len(a)+len(b)) - for len(a) > 0 && len(b) > 0 { - if a[0].UnixNano() < b[0].UnixNano() { - out, a = append(out, a[0]), a[1:] - } else if len(b) > 0 && a[0].UnixNano() == b[0].UnixNano() { - a = a[1:] - } else { - out, b = append(out, b[0]), b[1:] - } - } - if len(a) > 0 { - return append(out, a...) - } - return append(out, b...) -} - -// Sort methods -func (a Values) Len() int { return len(a) } -func (a Values) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a Values) Less(i, j int) bool { return a[i].UnixNano() < a[j].UnixNano() } - -// FloatValues represents a slice of Float values. -type FloatValues []FloatValue - -func NewFloatArrayFromValues(v FloatValues) *cursors.FloatArray { - a := cursors.NewFloatArrayLen(len(v)) - for i, val := range v { - a.Timestamps[i] = val.UnixNano() - a.Values[i] = val.RawValue() - } - return a -} - -func (a FloatValues) MinTime() int64 { - return a[0].UnixNano() -} - -func (a FloatValues) MaxTime() int64 { - return a[len(a)-1].UnixNano() -} - -func (a FloatValues) Size() int { - sz := 0 - for _, v := range a { - sz += v.Size() - } - return sz -} - -// Deduplicate returns a new slice with any values that have the same timestamp removed. -// The Value that appears last in the slice is the one that is kept. The returned -// Values are sorted if necessary. -func (a FloatValues) Deduplicate() FloatValues { - if len(a) <= 1 { - return a - } - - // See if we're already sorted and deduped - var needSort bool - for i := 1; i < len(a); i++ { - if a[i-1].UnixNano() >= a[i].UnixNano() { - needSort = true - break - } - } - - if !needSort { - return a - } - - sort.Stable(a) - var i int - for j := 1; j < len(a); j++ { - v := a[j] - if v.UnixNano() != a[i].UnixNano() { - i++ - } - a[i] = v - - } - return a[:i+1] -} - -// Exclude returns the subset of values not in [min, max]. The values must -// be deduplicated and sorted before calling Exclude or the results are undefined. -func (a FloatValues) Exclude(min, max int64) FloatValues { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - return a - } - - // a[rmin].UnixNano() ≥ min - // a[rmax].UnixNano() ≥ max - - if rmax < len(a) { - if a[rmax].UnixNano() == max { - rmax++ - } - rest := len(a) - rmax - if rest > 0 { - b := a[:rmin+rest] - copy(b[rmin:], a[rmax:]) - return b - } - } - - return a[:rmin] -} - -// Include returns the subset values between min and max inclusive. The values must -// be deduplicated and sorted before calling Exclude or the results are undefined. -func (a FloatValues) Include(min, max int64) FloatValues { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - return nil - } - - // a[rmin].UnixNano() ≥ min - // a[rmax].UnixNano() ≥ max - - if rmax < len(a) && a[rmax].UnixNano() == max { - rmax++ - } - - if rmin > -1 { - b := a[:rmax-rmin] - copy(b, a[rmin:rmax]) - return b - } - - return a[:rmax] -} - -// search performs a binary search for UnixNano() v in a -// and returns the position, i, where v would be inserted. -// An additional check of a[i].UnixNano() == v is necessary -// to determine if the value v exists. -func (a FloatValues) search(v int64) int { - // Define: f(x) → a[x].UnixNano() < v - // Define: f(-1) == true, f(n) == false - // Invariant: f(lo-1) == true, f(hi) == false - lo := 0 - hi := len(a) - for lo < hi { - mid := int(uint(lo+hi) >> 1) - if a[mid].UnixNano() < v { - lo = mid + 1 // preserves f(lo-1) == true - } else { - hi = mid // preserves f(hi) == false - } - } - - // lo == hi - return lo -} - -// FindRange returns the positions where min and max would be -// inserted into the array. If a[0].UnixNano() > max or -// a[len-1].UnixNano() < min then FindRange returns (-1, -1) -// indicating the array is outside the [min, max]. The values must -// be deduplicated and sorted before calling Exclude or the results -// are undefined. -func (a FloatValues) FindRange(min, max int64) (int, int) { - if len(a) == 0 || min > max { - return -1, -1 - } - - minVal := a[0].UnixNano() - maxVal := a[len(a)-1].UnixNano() - - if maxVal < min || minVal > max { - return -1, -1 - } - - return a.search(min), a.search(max) -} - -// Merge overlays b to top of a. If two values conflict with -// the same timestamp, b is used. Both a and b must be sorted -// in ascending order. -func (a FloatValues) Merge(b FloatValues) FloatValues { - if len(a) == 0 { - return b - } - - if len(b) == 0 { - return a - } - - // Normally, both a and b should not contain duplicates. Due to a bug in older versions, it's - // possible stored blocks might contain duplicate values. Remove them if they exists before - // merging. - a = a.Deduplicate() - b = b.Deduplicate() - - if a[len(a)-1].UnixNano() < b[0].UnixNano() { - return append(a, b...) - } - - if b[len(b)-1].UnixNano() < a[0].UnixNano() { - return append(b, a...) - } - - out := make(FloatValues, 0, len(a)+len(b)) - for len(a) > 0 && len(b) > 0 { - if a[0].UnixNano() < b[0].UnixNano() { - out, a = append(out, a[0]), a[1:] - } else if len(b) > 0 && a[0].UnixNano() == b[0].UnixNano() { - a = a[1:] - } else { - out, b = append(out, b[0]), b[1:] - } - } - if len(a) > 0 { - return append(out, a...) - } - return append(out, b...) -} - -func (a FloatValues) Encode(buf []byte) ([]byte, error) { - return encodeFloatValuesBlock(buf, a) -} - -func EncodeFloatArrayBlock(a *cursors.FloatArray, b []byte) ([]byte, error) { - if a.Len() == 0 { - return nil, nil - } - - // TODO(edd): These need to be pooled. - var vb []byte - var tb []byte - var err error - - if vb, err = FloatArrayEncodeAll(a.Values, vb); err != nil { - return nil, err - } - - if tb, err = TimeArrayEncodeAll(a.Timestamps, tb); err != nil { - return nil, err - } - - // Prepend the first timestamp of the block in the first 8 bytes and the block - // in the next byte, followed by the block - return packBlock(b, BlockFloat64, tb, vb), nil -} - -func encodeFloatValuesBlock(buf []byte, values []FloatValue) ([]byte, error) { - if len(values) == 0 { - return nil, nil - } - - venc := getFloatEncoder(len(values)) - tsenc := getTimeEncoder(len(values)) - - var b []byte - err := func() error { - for _, v := range values { - tsenc.Write(v.UnixNano()) - venc.Write(v.RawValue()) - } - venc.Flush() - - // Encoded timestamp values - tb, err := tsenc.Bytes() - if err != nil { - return err - } - // Encoded values - vb, err := venc.Bytes() - if err != nil { - return err - } - - // Prepend the first timestamp of the block in the first 8 bytes and the block - // in the next byte, followed by the block - b = packBlock(buf, BlockFloat64, tb, vb) - - return nil - }() - - putTimeEncoder(tsenc) - putFloatEncoder(venc) - - return b, err -} - -// Sort methods -func (a FloatValues) Len() int { return len(a) } -func (a FloatValues) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a FloatValues) Less(i, j int) bool { return a[i].UnixNano() < a[j].UnixNano() } - -// IntegerValues represents a slice of Integer values. -type IntegerValues []IntegerValue - -func NewIntegerArrayFromValues(v IntegerValues) *cursors.IntegerArray { - a := cursors.NewIntegerArrayLen(len(v)) - for i, val := range v { - a.Timestamps[i] = val.UnixNano() - a.Values[i] = val.RawValue() - } - return a -} - -func (a IntegerValues) MinTime() int64 { - return a[0].UnixNano() -} - -func (a IntegerValues) MaxTime() int64 { - return a[len(a)-1].UnixNano() -} - -func (a IntegerValues) Size() int { - sz := 0 - for _, v := range a { - sz += v.Size() - } - return sz -} - -// Deduplicate returns a new slice with any values that have the same timestamp removed. -// The Value that appears last in the slice is the one that is kept. The returned -// Values are sorted if necessary. -func (a IntegerValues) Deduplicate() IntegerValues { - if len(a) <= 1 { - return a - } - - // See if we're already sorted and deduped - var needSort bool - for i := 1; i < len(a); i++ { - if a[i-1].UnixNano() >= a[i].UnixNano() { - needSort = true - break - } - } - - if !needSort { - return a - } - - sort.Stable(a) - var i int - for j := 1; j < len(a); j++ { - v := a[j] - if v.UnixNano() != a[i].UnixNano() { - i++ - } - a[i] = v - - } - return a[:i+1] -} - -// Exclude returns the subset of values not in [min, max]. The values must -// be deduplicated and sorted before calling Exclude or the results are undefined. -func (a IntegerValues) Exclude(min, max int64) IntegerValues { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - return a - } - - // a[rmin].UnixNano() ≥ min - // a[rmax].UnixNano() ≥ max - - if rmax < len(a) { - if a[rmax].UnixNano() == max { - rmax++ - } - rest := len(a) - rmax - if rest > 0 { - b := a[:rmin+rest] - copy(b[rmin:], a[rmax:]) - return b - } - } - - return a[:rmin] -} - -// Include returns the subset values between min and max inclusive. The values must -// be deduplicated and sorted before calling Exclude or the results are undefined. -func (a IntegerValues) Include(min, max int64) IntegerValues { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - return nil - } - - // a[rmin].UnixNano() ≥ min - // a[rmax].UnixNano() ≥ max - - if rmax < len(a) && a[rmax].UnixNano() == max { - rmax++ - } - - if rmin > -1 { - b := a[:rmax-rmin] - copy(b, a[rmin:rmax]) - return b - } - - return a[:rmax] -} - -// search performs a binary search for UnixNano() v in a -// and returns the position, i, where v would be inserted. -// An additional check of a[i].UnixNano() == v is necessary -// to determine if the value v exists. -func (a IntegerValues) search(v int64) int { - // Define: f(x) → a[x].UnixNano() < v - // Define: f(-1) == true, f(n) == false - // Invariant: f(lo-1) == true, f(hi) == false - lo := 0 - hi := len(a) - for lo < hi { - mid := int(uint(lo+hi) >> 1) - if a[mid].UnixNano() < v { - lo = mid + 1 // preserves f(lo-1) == true - } else { - hi = mid // preserves f(hi) == false - } - } - - // lo == hi - return lo -} - -// FindRange returns the positions where min and max would be -// inserted into the array. If a[0].UnixNano() > max or -// a[len-1].UnixNano() < min then FindRange returns (-1, -1) -// indicating the array is outside the [min, max]. The values must -// be deduplicated and sorted before calling Exclude or the results -// are undefined. -func (a IntegerValues) FindRange(min, max int64) (int, int) { - if len(a) == 0 || min > max { - return -1, -1 - } - - minVal := a[0].UnixNano() - maxVal := a[len(a)-1].UnixNano() - - if maxVal < min || minVal > max { - return -1, -1 - } - - return a.search(min), a.search(max) -} - -// Merge overlays b to top of a. If two values conflict with -// the same timestamp, b is used. Both a and b must be sorted -// in ascending order. -func (a IntegerValues) Merge(b IntegerValues) IntegerValues { - if len(a) == 0 { - return b - } - - if len(b) == 0 { - return a - } - - // Normally, both a and b should not contain duplicates. Due to a bug in older versions, it's - // possible stored blocks might contain duplicate values. Remove them if they exists before - // merging. - a = a.Deduplicate() - b = b.Deduplicate() - - if a[len(a)-1].UnixNano() < b[0].UnixNano() { - return append(a, b...) - } - - if b[len(b)-1].UnixNano() < a[0].UnixNano() { - return append(b, a...) - } - - out := make(IntegerValues, 0, len(a)+len(b)) - for len(a) > 0 && len(b) > 0 { - if a[0].UnixNano() < b[0].UnixNano() { - out, a = append(out, a[0]), a[1:] - } else if len(b) > 0 && a[0].UnixNano() == b[0].UnixNano() { - a = a[1:] - } else { - out, b = append(out, b[0]), b[1:] - } - } - if len(a) > 0 { - return append(out, a...) - } - return append(out, b...) -} - -func (a IntegerValues) Encode(buf []byte) ([]byte, error) { - return encodeIntegerValuesBlock(buf, a) -} - -func EncodeIntegerArrayBlock(a *cursors.IntegerArray, b []byte) ([]byte, error) { - if a.Len() == 0 { - return nil, nil - } - - // TODO(edd): These need to be pooled. - var vb []byte - var tb []byte - var err error - - if vb, err = IntegerArrayEncodeAll(a.Values, vb); err != nil { - return nil, err - } - - if tb, err = TimeArrayEncodeAll(a.Timestamps, tb); err != nil { - return nil, err - } - - // Prepend the first timestamp of the block in the first 8 bytes and the block - // in the next byte, followed by the block - return packBlock(b, BlockInteger, tb, vb), nil -} - -func encodeIntegerValuesBlock(buf []byte, values []IntegerValue) ([]byte, error) { - if len(values) == 0 { - return nil, nil - } - - venc := getIntegerEncoder(len(values)) - tsenc := getTimeEncoder(len(values)) - - var b []byte - err := func() error { - for _, v := range values { - tsenc.Write(v.UnixNano()) - venc.Write(v.RawValue()) - } - venc.Flush() - - // Encoded timestamp values - tb, err := tsenc.Bytes() - if err != nil { - return err - } - // Encoded values - vb, err := venc.Bytes() - if err != nil { - return err - } - - // Prepend the first timestamp of the block in the first 8 bytes and the block - // in the next byte, followed by the block - b = packBlock(buf, BlockInteger, tb, vb) - - return nil - }() - - putTimeEncoder(tsenc) - putIntegerEncoder(venc) - - return b, err -} - -// Sort methods -func (a IntegerValues) Len() int { return len(a) } -func (a IntegerValues) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a IntegerValues) Less(i, j int) bool { return a[i].UnixNano() < a[j].UnixNano() } - -// UnsignedValues represents a slice of Unsigned values. -type UnsignedValues []UnsignedValue - -func NewUnsignedArrayFromValues(v UnsignedValues) *cursors.UnsignedArray { - a := cursors.NewUnsignedArrayLen(len(v)) - for i, val := range v { - a.Timestamps[i] = val.UnixNano() - a.Values[i] = val.RawValue() - } - return a -} - -func (a UnsignedValues) MinTime() int64 { - return a[0].UnixNano() -} - -func (a UnsignedValues) MaxTime() int64 { - return a[len(a)-1].UnixNano() -} - -func (a UnsignedValues) Size() int { - sz := 0 - for _, v := range a { - sz += v.Size() - } - return sz -} - -// Deduplicate returns a new slice with any values that have the same timestamp removed. -// The Value that appears last in the slice is the one that is kept. The returned -// Values are sorted if necessary. -func (a UnsignedValues) Deduplicate() UnsignedValues { - if len(a) <= 1 { - return a - } - - // See if we're already sorted and deduped - var needSort bool - for i := 1; i < len(a); i++ { - if a[i-1].UnixNano() >= a[i].UnixNano() { - needSort = true - break - } - } - - if !needSort { - return a - } - - sort.Stable(a) - var i int - for j := 1; j < len(a); j++ { - v := a[j] - if v.UnixNano() != a[i].UnixNano() { - i++ - } - a[i] = v - - } - return a[:i+1] -} - -// Exclude returns the subset of values not in [min, max]. The values must -// be deduplicated and sorted before calling Exclude or the results are undefined. -func (a UnsignedValues) Exclude(min, max int64) UnsignedValues { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - return a - } - - // a[rmin].UnixNano() ≥ min - // a[rmax].UnixNano() ≥ max - - if rmax < len(a) { - if a[rmax].UnixNano() == max { - rmax++ - } - rest := len(a) - rmax - if rest > 0 { - b := a[:rmin+rest] - copy(b[rmin:], a[rmax:]) - return b - } - } - - return a[:rmin] -} - -// Include returns the subset values between min and max inclusive. The values must -// be deduplicated and sorted before calling Exclude or the results are undefined. -func (a UnsignedValues) Include(min, max int64) UnsignedValues { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - return nil - } - - // a[rmin].UnixNano() ≥ min - // a[rmax].UnixNano() ≥ max - - if rmax < len(a) && a[rmax].UnixNano() == max { - rmax++ - } - - if rmin > -1 { - b := a[:rmax-rmin] - copy(b, a[rmin:rmax]) - return b - } - - return a[:rmax] -} - -// search performs a binary search for UnixNano() v in a -// and returns the position, i, where v would be inserted. -// An additional check of a[i].UnixNano() == v is necessary -// to determine if the value v exists. -func (a UnsignedValues) search(v int64) int { - // Define: f(x) → a[x].UnixNano() < v - // Define: f(-1) == true, f(n) == false - // Invariant: f(lo-1) == true, f(hi) == false - lo := 0 - hi := len(a) - for lo < hi { - mid := int(uint(lo+hi) >> 1) - if a[mid].UnixNano() < v { - lo = mid + 1 // preserves f(lo-1) == true - } else { - hi = mid // preserves f(hi) == false - } - } - - // lo == hi - return lo -} - -// FindRange returns the positions where min and max would be -// inserted into the array. If a[0].UnixNano() > max or -// a[len-1].UnixNano() < min then FindRange returns (-1, -1) -// indicating the array is outside the [min, max]. The values must -// be deduplicated and sorted before calling Exclude or the results -// are undefined. -func (a UnsignedValues) FindRange(min, max int64) (int, int) { - if len(a) == 0 || min > max { - return -1, -1 - } - - minVal := a[0].UnixNano() - maxVal := a[len(a)-1].UnixNano() - - if maxVal < min || minVal > max { - return -1, -1 - } - - return a.search(min), a.search(max) -} - -// Merge overlays b to top of a. If two values conflict with -// the same timestamp, b is used. Both a and b must be sorted -// in ascending order. -func (a UnsignedValues) Merge(b UnsignedValues) UnsignedValues { - if len(a) == 0 { - return b - } - - if len(b) == 0 { - return a - } - - // Normally, both a and b should not contain duplicates. Due to a bug in older versions, it's - // possible stored blocks might contain duplicate values. Remove them if they exists before - // merging. - a = a.Deduplicate() - b = b.Deduplicate() - - if a[len(a)-1].UnixNano() < b[0].UnixNano() { - return append(a, b...) - } - - if b[len(b)-1].UnixNano() < a[0].UnixNano() { - return append(b, a...) - } - - out := make(UnsignedValues, 0, len(a)+len(b)) - for len(a) > 0 && len(b) > 0 { - if a[0].UnixNano() < b[0].UnixNano() { - out, a = append(out, a[0]), a[1:] - } else if len(b) > 0 && a[0].UnixNano() == b[0].UnixNano() { - a = a[1:] - } else { - out, b = append(out, b[0]), b[1:] - } - } - if len(a) > 0 { - return append(out, a...) - } - return append(out, b...) -} - -func (a UnsignedValues) Encode(buf []byte) ([]byte, error) { - return encodeUnsignedValuesBlock(buf, a) -} - -func EncodeUnsignedArrayBlock(a *cursors.UnsignedArray, b []byte) ([]byte, error) { - if a.Len() == 0 { - return nil, nil - } - - // TODO(edd): These need to be pooled. - var vb []byte - var tb []byte - var err error - - if vb, err = UnsignedArrayEncodeAll(a.Values, vb); err != nil { - return nil, err - } - - if tb, err = TimeArrayEncodeAll(a.Timestamps, tb); err != nil { - return nil, err - } - - // Prepend the first timestamp of the block in the first 8 bytes and the block - // in the next byte, followed by the block - return packBlock(b, BlockUnsigned, tb, vb), nil -} - -func encodeUnsignedValuesBlock(buf []byte, values []UnsignedValue) ([]byte, error) { - if len(values) == 0 { - return nil, nil - } - - venc := getUnsignedEncoder(len(values)) - tsenc := getTimeEncoder(len(values)) - - var b []byte - err := func() error { - for _, v := range values { - tsenc.Write(v.UnixNano()) - venc.Write(int64(v.RawValue())) - } - venc.Flush() - - // Encoded timestamp values - tb, err := tsenc.Bytes() - if err != nil { - return err - } - // Encoded values - vb, err := venc.Bytes() - if err != nil { - return err - } - - // Prepend the first timestamp of the block in the first 8 bytes and the block - // in the next byte, followed by the block - b = packBlock(buf, BlockUnsigned, tb, vb) - - return nil - }() - - putTimeEncoder(tsenc) - putUnsignedEncoder(venc) - - return b, err -} - -// Sort methods -func (a UnsignedValues) Len() int { return len(a) } -func (a UnsignedValues) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a UnsignedValues) Less(i, j int) bool { return a[i].UnixNano() < a[j].UnixNano() } - -// StringValues represents a slice of String values. -type StringValues []StringValue - -func NewStringArrayFromValues(v StringValues) *cursors.StringArray { - a := cursors.NewStringArrayLen(len(v)) - for i, val := range v { - a.Timestamps[i] = val.UnixNano() - a.Values[i] = val.RawValue() - } - return a -} - -func (a StringValues) MinTime() int64 { - return a[0].UnixNano() -} - -func (a StringValues) MaxTime() int64 { - return a[len(a)-1].UnixNano() -} - -func (a StringValues) Size() int { - sz := 0 - for _, v := range a { - sz += v.Size() - } - return sz -} - -// Deduplicate returns a new slice with any values that have the same timestamp removed. -// The Value that appears last in the slice is the one that is kept. The returned -// Values are sorted if necessary. -func (a StringValues) Deduplicate() StringValues { - if len(a) <= 1 { - return a - } - - // See if we're already sorted and deduped - var needSort bool - for i := 1; i < len(a); i++ { - if a[i-1].UnixNano() >= a[i].UnixNano() { - needSort = true - break - } - } - - if !needSort { - return a - } - - sort.Stable(a) - var i int - for j := 1; j < len(a); j++ { - v := a[j] - if v.UnixNano() != a[i].UnixNano() { - i++ - } - a[i] = v - - } - return a[:i+1] -} - -// Exclude returns the subset of values not in [min, max]. The values must -// be deduplicated and sorted before calling Exclude or the results are undefined. -func (a StringValues) Exclude(min, max int64) StringValues { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - return a - } - - // a[rmin].UnixNano() ≥ min - // a[rmax].UnixNano() ≥ max - - if rmax < len(a) { - if a[rmax].UnixNano() == max { - rmax++ - } - rest := len(a) - rmax - if rest > 0 { - b := a[:rmin+rest] - copy(b[rmin:], a[rmax:]) - return b - } - } - - return a[:rmin] -} - -// Include returns the subset values between min and max inclusive. The values must -// be deduplicated and sorted before calling Exclude or the results are undefined. -func (a StringValues) Include(min, max int64) StringValues { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - return nil - } - - // a[rmin].UnixNano() ≥ min - // a[rmax].UnixNano() ≥ max - - if rmax < len(a) && a[rmax].UnixNano() == max { - rmax++ - } - - if rmin > -1 { - b := a[:rmax-rmin] - copy(b, a[rmin:rmax]) - return b - } - - return a[:rmax] -} - -// search performs a binary search for UnixNano() v in a -// and returns the position, i, where v would be inserted. -// An additional check of a[i].UnixNano() == v is necessary -// to determine if the value v exists. -func (a StringValues) search(v int64) int { - // Define: f(x) → a[x].UnixNano() < v - // Define: f(-1) == true, f(n) == false - // Invariant: f(lo-1) == true, f(hi) == false - lo := 0 - hi := len(a) - for lo < hi { - mid := int(uint(lo+hi) >> 1) - if a[mid].UnixNano() < v { - lo = mid + 1 // preserves f(lo-1) == true - } else { - hi = mid // preserves f(hi) == false - } - } - - // lo == hi - return lo -} - -// FindRange returns the positions where min and max would be -// inserted into the array. If a[0].UnixNano() > max or -// a[len-1].UnixNano() < min then FindRange returns (-1, -1) -// indicating the array is outside the [min, max]. The values must -// be deduplicated and sorted before calling Exclude or the results -// are undefined. -func (a StringValues) FindRange(min, max int64) (int, int) { - if len(a) == 0 || min > max { - return -1, -1 - } - - minVal := a[0].UnixNano() - maxVal := a[len(a)-1].UnixNano() - - if maxVal < min || minVal > max { - return -1, -1 - } - - return a.search(min), a.search(max) -} - -// Merge overlays b to top of a. If two values conflict with -// the same timestamp, b is used. Both a and b must be sorted -// in ascending order. -func (a StringValues) Merge(b StringValues) StringValues { - if len(a) == 0 { - return b - } - - if len(b) == 0 { - return a - } - - // Normally, both a and b should not contain duplicates. Due to a bug in older versions, it's - // possible stored blocks might contain duplicate values. Remove them if they exists before - // merging. - a = a.Deduplicate() - b = b.Deduplicate() - - if a[len(a)-1].UnixNano() < b[0].UnixNano() { - return append(a, b...) - } - - if b[len(b)-1].UnixNano() < a[0].UnixNano() { - return append(b, a...) - } - - out := make(StringValues, 0, len(a)+len(b)) - for len(a) > 0 && len(b) > 0 { - if a[0].UnixNano() < b[0].UnixNano() { - out, a = append(out, a[0]), a[1:] - } else if len(b) > 0 && a[0].UnixNano() == b[0].UnixNano() { - a = a[1:] - } else { - out, b = append(out, b[0]), b[1:] - } - } - if len(a) > 0 { - return append(out, a...) - } - return append(out, b...) -} - -func (a StringValues) Encode(buf []byte) ([]byte, error) { - return encodeStringValuesBlock(buf, a) -} - -func EncodeStringArrayBlock(a *cursors.StringArray, b []byte) ([]byte, error) { - if a.Len() == 0 { - return nil, nil - } - - // TODO(edd): These need to be pooled. - var vb []byte - var tb []byte - var err error - - if vb, err = StringArrayEncodeAll(a.Values, vb); err != nil { - return nil, err - } - - if tb, err = TimeArrayEncodeAll(a.Timestamps, tb); err != nil { - return nil, err - } - - // Prepend the first timestamp of the block in the first 8 bytes and the block - // in the next byte, followed by the block - return packBlock(b, BlockString, tb, vb), nil -} - -func encodeStringValuesBlock(buf []byte, values []StringValue) ([]byte, error) { - if len(values) == 0 { - return nil, nil - } - - venc := getStringEncoder(len(values)) - tsenc := getTimeEncoder(len(values)) - - var b []byte - err := func() error { - for _, v := range values { - tsenc.Write(v.UnixNano()) - venc.Write(v.RawValue()) - } - venc.Flush() - - // Encoded timestamp values - tb, err := tsenc.Bytes() - if err != nil { - return err - } - // Encoded values - vb, err := venc.Bytes() - if err != nil { - return err - } - - // Prepend the first timestamp of the block in the first 8 bytes and the block - // in the next byte, followed by the block - b = packBlock(buf, BlockString, tb, vb) - - return nil - }() - - putTimeEncoder(tsenc) - putStringEncoder(venc) - - return b, err -} - -// Sort methods -func (a StringValues) Len() int { return len(a) } -func (a StringValues) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a StringValues) Less(i, j int) bool { return a[i].UnixNano() < a[j].UnixNano() } - -// BooleanValues represents a slice of Boolean values. -type BooleanValues []BooleanValue - -func NewBooleanArrayFromValues(v BooleanValues) *cursors.BooleanArray { - a := cursors.NewBooleanArrayLen(len(v)) - for i, val := range v { - a.Timestamps[i] = val.UnixNano() - a.Values[i] = val.RawValue() - } - return a -} - -func (a BooleanValues) MinTime() int64 { - return a[0].UnixNano() -} - -func (a BooleanValues) MaxTime() int64 { - return a[len(a)-1].UnixNano() -} - -func (a BooleanValues) Size() int { - sz := 0 - for _, v := range a { - sz += v.Size() - } - return sz -} - -// Deduplicate returns a new slice with any values that have the same timestamp removed. -// The Value that appears last in the slice is the one that is kept. The returned -// Values are sorted if necessary. -func (a BooleanValues) Deduplicate() BooleanValues { - if len(a) <= 1 { - return a - } - - // See if we're already sorted and deduped - var needSort bool - for i := 1; i < len(a); i++ { - if a[i-1].UnixNano() >= a[i].UnixNano() { - needSort = true - break - } - } - - if !needSort { - return a - } - - sort.Stable(a) - var i int - for j := 1; j < len(a); j++ { - v := a[j] - if v.UnixNano() != a[i].UnixNano() { - i++ - } - a[i] = v - - } - return a[:i+1] -} - -// Exclude returns the subset of values not in [min, max]. The values must -// be deduplicated and sorted before calling Exclude or the results are undefined. -func (a BooleanValues) Exclude(min, max int64) BooleanValues { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - return a - } - - // a[rmin].UnixNano() ≥ min - // a[rmax].UnixNano() ≥ max - - if rmax < len(a) { - if a[rmax].UnixNano() == max { - rmax++ - } - rest := len(a) - rmax - if rest > 0 { - b := a[:rmin+rest] - copy(b[rmin:], a[rmax:]) - return b - } - } - - return a[:rmin] -} - -// Include returns the subset values between min and max inclusive. The values must -// be deduplicated and sorted before calling Exclude or the results are undefined. -func (a BooleanValues) Include(min, max int64) BooleanValues { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - return nil - } - - // a[rmin].UnixNano() ≥ min - // a[rmax].UnixNano() ≥ max - - if rmax < len(a) && a[rmax].UnixNano() == max { - rmax++ - } - - if rmin > -1 { - b := a[:rmax-rmin] - copy(b, a[rmin:rmax]) - return b - } - - return a[:rmax] -} - -// search performs a binary search for UnixNano() v in a -// and returns the position, i, where v would be inserted. -// An additional check of a[i].UnixNano() == v is necessary -// to determine if the value v exists. -func (a BooleanValues) search(v int64) int { - // Define: f(x) → a[x].UnixNano() < v - // Define: f(-1) == true, f(n) == false - // Invariant: f(lo-1) == true, f(hi) == false - lo := 0 - hi := len(a) - for lo < hi { - mid := int(uint(lo+hi) >> 1) - if a[mid].UnixNano() < v { - lo = mid + 1 // preserves f(lo-1) == true - } else { - hi = mid // preserves f(hi) == false - } - } - - // lo == hi - return lo -} - -// FindRange returns the positions where min and max would be -// inserted into the array. If a[0].UnixNano() > max or -// a[len-1].UnixNano() < min then FindRange returns (-1, -1) -// indicating the array is outside the [min, max]. The values must -// be deduplicated and sorted before calling Exclude or the results -// are undefined. -func (a BooleanValues) FindRange(min, max int64) (int, int) { - if len(a) == 0 || min > max { - return -1, -1 - } - - minVal := a[0].UnixNano() - maxVal := a[len(a)-1].UnixNano() - - if maxVal < min || minVal > max { - return -1, -1 - } - - return a.search(min), a.search(max) -} - -// Merge overlays b to top of a. If two values conflict with -// the same timestamp, b is used. Both a and b must be sorted -// in ascending order. -func (a BooleanValues) Merge(b BooleanValues) BooleanValues { - if len(a) == 0 { - return b - } - - if len(b) == 0 { - return a - } - - // Normally, both a and b should not contain duplicates. Due to a bug in older versions, it's - // possible stored blocks might contain duplicate values. Remove them if they exists before - // merging. - a = a.Deduplicate() - b = b.Deduplicate() - - if a[len(a)-1].UnixNano() < b[0].UnixNano() { - return append(a, b...) - } - - if b[len(b)-1].UnixNano() < a[0].UnixNano() { - return append(b, a...) - } - - out := make(BooleanValues, 0, len(a)+len(b)) - for len(a) > 0 && len(b) > 0 { - if a[0].UnixNano() < b[0].UnixNano() { - out, a = append(out, a[0]), a[1:] - } else if len(b) > 0 && a[0].UnixNano() == b[0].UnixNano() { - a = a[1:] - } else { - out, b = append(out, b[0]), b[1:] - } - } - if len(a) > 0 { - return append(out, a...) - } - return append(out, b...) -} - -func (a BooleanValues) Encode(buf []byte) ([]byte, error) { - return encodeBooleanValuesBlock(buf, a) -} - -func EncodeBooleanArrayBlock(a *cursors.BooleanArray, b []byte) ([]byte, error) { - if a.Len() == 0 { - return nil, nil - } - - // TODO(edd): These need to be pooled. - var vb []byte - var tb []byte - var err error - - if vb, err = BooleanArrayEncodeAll(a.Values, vb); err != nil { - return nil, err - } - - if tb, err = TimeArrayEncodeAll(a.Timestamps, tb); err != nil { - return nil, err - } - - // Prepend the first timestamp of the block in the first 8 bytes and the block - // in the next byte, followed by the block - return packBlock(b, BlockBoolean, tb, vb), nil -} - -func encodeBooleanValuesBlock(buf []byte, values []BooleanValue) ([]byte, error) { - if len(values) == 0 { - return nil, nil - } - - venc := getBooleanEncoder(len(values)) - tsenc := getTimeEncoder(len(values)) - - var b []byte - err := func() error { - for _, v := range values { - tsenc.Write(v.UnixNano()) - venc.Write(v.RawValue()) - } - venc.Flush() - - // Encoded timestamp values - tb, err := tsenc.Bytes() - if err != nil { - return err - } - // Encoded values - vb, err := venc.Bytes() - if err != nil { - return err - } - - // Prepend the first timestamp of the block in the first 8 bytes and the block - // in the next byte, followed by the block - b = packBlock(buf, BlockBoolean, tb, vb) - - return nil - }() - - putTimeEncoder(tsenc) - putBooleanEncoder(venc) - - return b, err -} - -// Sort methods -func (a BooleanValues) Len() int { return len(a) } -func (a BooleanValues) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a BooleanValues) Less(i, j int) bool { return a[i].UnixNano() < a[j].UnixNano() } diff --git a/tsdb/tsm1/encoding.gen.go.tmpl b/tsdb/tsm1/encoding.gen.go.tmpl deleted file mode 100644 index f4fc3c1831..0000000000 --- a/tsdb/tsm1/encoding.gen.go.tmpl +++ /dev/null @@ -1,287 +0,0 @@ -package tsm1 - -import ( - "sort" - - "github.com/influxdata/influxdb/v2/tsdb/cursors" -) - -{{range .}} - -// {{.Name}}Values represents a slice of {{.Name}} values. -type {{.Name}}Values []{{.Name}}Value - -{{if ne .Name ""}} -func New{{.Name}}ArrayFromValues(v {{.Name}}Values) *cursors.{{.Name}}Array { - a := cursors.New{{.Name}}ArrayLen(len(v)) - for i, val := range v { - a.Timestamps[i] = val.UnixNano() - a.Values[i] = val.RawValue() - } - return a -} -{{end}} - -func (a {{.Name}}Values) MinTime() int64 { - return a[0].UnixNano() -} - -func (a {{.Name}}Values) MaxTime() int64 { - return a[len(a)-1].UnixNano() -} - -func (a {{.Name}}Values) Size() int { - sz := 0 - for _, v := range a { - sz += v.Size() - } - return sz -} - -// Deduplicate returns a new slice with any values that have the same timestamp removed. -// The Value that appears last in the slice is the one that is kept. The returned -// Values are sorted if necessary. -func (a {{.Name}}Values) Deduplicate() {{.Name}}Values { - if len(a) <= 1 { - return a - } - - // See if we're already sorted and deduped - var needSort bool - for i := 1; i < len(a); i++ { - if a[i-1].UnixNano() >= a[i].UnixNano() { - needSort = true - break - } - } - - if !needSort { - return a - } - - sort.Stable(a) - var i int - for j := 1; j < len(a); j++ { - v := a[j] - if v.UnixNano() != a[i].UnixNano() { - i++ - } - a[i] = v - - } - return a[:i+1] -} - -// Exclude returns the subset of values not in [min, max]. The values must -// be deduplicated and sorted before calling Exclude or the results are undefined. -func (a {{.Name}}Values) Exclude(min, max int64) {{.Name}}Values { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - return a - } - - // a[rmin].UnixNano() ≥ min - // a[rmax].UnixNano() ≥ max - - if rmax < len(a) { - if a[rmax].UnixNano() == max { - rmax++ - } - rest := len(a)-rmax - if rest > 0 { - b := a[:rmin+rest] - copy(b[rmin:], a[rmax:]) - return b - } - } - - return a[:rmin] -} - -// Include returns the subset values between min and max inclusive. The values must -// be deduplicated and sorted before calling Exclude or the results are undefined. -func (a {{.Name}}Values) Include(min, max int64) {{.Name}}Values { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - return nil - } - - // a[rmin].UnixNano() ≥ min - // a[rmax].UnixNano() ≥ max - - if rmax < len(a) && a[rmax].UnixNano() == max { - rmax++ - } - - if rmin > -1 { - b := a[:rmax-rmin] - copy(b, a[rmin:rmax]) - return b - } - - return a[:rmax] -} - -// search performs a binary search for UnixNano() v in a -// and returns the position, i, where v would be inserted. -// An additional check of a[i].UnixNano() == v is necessary -// to determine if the value v exists. -func (a {{.Name}}Values) search(v int64) int { - // Define: f(x) → a[x].UnixNano() < v - // Define: f(-1) == true, f(n) == false - // Invariant: f(lo-1) == true, f(hi) == false - lo := 0 - hi := len(a) - for lo < hi { - mid := int(uint(lo+hi) >> 1) - if a[mid].UnixNano() < v { - lo = mid + 1 // preserves f(lo-1) == true - } else { - hi = mid // preserves f(hi) == false - } - } - - // lo == hi - return lo -} - -// FindRange returns the positions where min and max would be -// inserted into the array. If a[0].UnixNano() > max or -// a[len-1].UnixNano() < min then FindRange returns (-1, -1) -// indicating the array is outside the [min, max]. The values must -// be deduplicated and sorted before calling Exclude or the results -// are undefined. -func (a {{.Name}}Values) FindRange(min, max int64) (int, int) { - if len(a) == 0 || min > max { - return -1, -1 - } - - minVal := a[0].UnixNano() - maxVal := a[len(a)-1].UnixNano() - - if maxVal < min || minVal > max { - return -1, -1 - } - - return a.search(min), a.search(max) -} - -// Merge overlays b to top of a. If two values conflict with -// the same timestamp, b is used. Both a and b must be sorted -// in ascending order. -func (a {{.Name}}Values) Merge(b {{.Name}}Values) {{.Name}}Values { - if len(a) == 0 { - return b - } - - if len(b) == 0 { - return a - } - - // Normally, both a and b should not contain duplicates. Due to a bug in older versions, it's - // possible stored blocks might contain duplicate values. Remove them if they exists before - // merging. - a = a.Deduplicate() - b = b.Deduplicate() - - if a[len(a)-1].UnixNano() < b[0].UnixNano() { - return append(a, b...) - } - - if b[len(b)-1].UnixNano() < a[0].UnixNano() { - return append(b, a...) - } - - out := make({{.Name}}Values, 0, len(a)+len(b)) - for len(a) > 0 && len(b) > 0 { - if a[0].UnixNano() < b[0].UnixNano() { - out, a = append(out, a[0]), a[1:] - } else if len(b) > 0 && a[0].UnixNano() == b[0].UnixNano() { - a = a[1:] - } else { - out, b = append(out, b[0]), b[1:] - } - } - if len(a) > 0 { - return append(out, a...) - } - return append(out, b...) -} - -{{ if ne .Name "" }} -func (a {{.Name}}Values) Encode(buf []byte) ([]byte, error) { - return encode{{.Name}}ValuesBlock(buf, a) -} - -func Encode{{ .Name }}ArrayBlock(a *cursors.{{ .Name }}Array, b []byte) ([]byte, error) { - if a.Len() == 0 { - return nil, nil - } - - // TODO(edd): These need to be pooled. - var vb []byte - var tb []byte - var err error - - if vb, err = {{ .Name }}ArrayEncodeAll(a.Values, vb); err != nil { - return nil, err - } - - if tb, err = TimeArrayEncodeAll(a.Timestamps, tb); err != nil { - return nil, err - } - - // Prepend the first timestamp of the block in the first 8 bytes and the block - // in the next byte, followed by the block - return packBlock(b, {{ .Type }}, tb, vb), nil -} - -func encode{{ .Name }}ValuesBlock(buf []byte, values []{{.Name}}Value) ([]byte, error) { - if len(values) == 0 { - return nil, nil - } - - venc := get{{ .Name }}Encoder(len(values)) - tsenc := getTimeEncoder(len(values)) - - var b []byte - err := func() error { - for _, v := range values { - tsenc.Write(v.UnixNano()) - venc.Write({{if .CastType}}{{.CastType}}(v.RawValue()){{else}}v.RawValue(){{end}}) - } - venc.Flush() - - // Encoded timestamp values - tb, err := tsenc.Bytes() - if err != nil { - return err - } - // Encoded values - vb, err := venc.Bytes() - if err != nil { - return err - } - - // Prepend the first timestamp of the block in the first 8 bytes and the block - // in the next byte, followed by the block - b = packBlock(buf, {{ .Type }}, tb, vb) - - return nil - }() - - putTimeEncoder(tsenc) - put{{.Name}}Encoder(venc) - - return b, err -} - -{{ end }} - -// Sort methods -func (a {{.Name}}Values) Len() int { return len(a) } -func (a {{.Name}}Values) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a {{.Name}}Values) Less(i, j int) bool { return a[i].UnixNano() < a[j].UnixNano() } - - -{{ end }} diff --git a/tsdb/tsm1/encoding.gen.go.tmpldata b/tsdb/tsm1/encoding.gen.go.tmpldata deleted file mode 100644 index 8da0286951..0000000000 --- a/tsdb/tsm1/encoding.gen.go.tmpldata +++ /dev/null @@ -1,38 +0,0 @@ -[ - { - "Name":"", - "name":"", - "Type":"", - "CastType":"" - }, - { - "Name":"Float", - "name":"float", - "Type":"BlockFloat64", - "CastType":"" - }, - { - "Name":"Integer", - "name":"integer", - "Type":"BlockInteger", - "CastType":"" - }, - { - "Name":"Unsigned", - "name":"unsigned", - "Type":"BlockUnsigned", - "CastType":"int64" - }, - { - "Name":"String", - "name":"string", - "Type":"BlockString", - "CastType":"" - }, - { - "Name":"Boolean", - "name":"boolean", - "Type":"BlockBoolean", - "CastType":"" - } -] diff --git a/tsdb/tsm1/encoding.gen_test.go b/tsdb/tsm1/encoding.gen_test.go deleted file mode 100644 index a14c15d71c..0000000000 --- a/tsdb/tsm1/encoding.gen_test.go +++ /dev/null @@ -1,217 +0,0 @@ -package tsm1 - -import ( - "fmt" - "testing" - - "github.com/google/go-cmp/cmp" -) - -func makeIntegerValues(count int, min, max int64) IntegerValues { - vals := make(IntegerValues, count) - - ts := min - inc := (max - min) / int64(count) - - for i := 0; i < count; i++ { - vals[i] = NewRawIntegerValue(ts, 0) - ts += inc - } - - return vals -} - -func makeIntegerValuesFromSlice(t []int64) IntegerValues { - iv := make(IntegerValues, len(t)) - for i, v := range t { - iv[i] = NewRawIntegerValue(v, 0) - } - return iv -} - -func TestIntegerValues_FindRangeNoValues(t *testing.T) { - var vals IntegerValues - l, r := vals.FindRange(0, 100) - if exp := -1; l != exp { - t.Errorf("invalid l; exp=%d, got=%d", exp, l) - } - if exp := -1; r != exp { - t.Errorf("invalid r; exp=%d, got=%d", exp, r) - } -} - -func TestIntegerValues_FindRange(t *testing.T) { - vals := makeIntegerValuesFromSlice([]int64{10, 11, 13, 15, 17, 20, 21}) - - cases := []struct { - min, max int64 - l, r int - }{ - {12, 20, 2, 5}, - {22, 40, -1, -1}, - {1, 9, -1, -1}, - {1, 10, 0, 0}, - {1, 11, 0, 1}, - {15, 15, 3, 3}, - } - - for _, tc := range cases { - t.Run(fmt.Sprintf("%d→%d", tc.min, tc.max), func(t *testing.T) { - l, r := vals.FindRange(tc.min, tc.max) - if l != tc.l { - t.Errorf("left: got %d, exp %d", l, tc.l) - } - if r != tc.r { - t.Errorf("right: got %d, exp %d", r, tc.r) - } - }) - } -} - -func TestIntegerValues_Exclude(t *testing.T) { - cases := []struct { - n string - min, max int64 - exp []int64 - }{ - {"excl bad range", 18, 11, []int64{10, 12, 14, 16, 18}}, - {"excl none-lo", 0, 9, []int64{10, 12, 14, 16, 18}}, - {"excl none-hi", 19, 30, []int64{10, 12, 14, 16, 18}}, - {"excl first", 0, 10, []int64{12, 14, 16, 18}}, - {"excl last", 18, 20, []int64{10, 12, 14, 16}}, - {"excl all but first and last", 12, 16, []int64{10, 18}}, - {"excl none in middle", 13, 13, []int64{10, 12, 14, 16, 18}}, - {"excl middle", 14, 14, []int64{10, 12, 16, 18}}, - {"excl suffix", 16, 18, []int64{10, 12, 14}}, - } - - for _, tc := range cases { - t.Run(fmt.Sprintf("%s[%d,%d]", tc.n, tc.min, tc.max), func(t *testing.T) { - vals := makeIntegerValues(5, 10, 20) - vals = vals.Exclude(tc.min, tc.max) - var got []int64 - for _, v := range vals { - got = append(got, v.UnixNano()) - } - opt := cmp.AllowUnexported(IntegerValue{}) - if !cmp.Equal(tc.exp, got, opt) { - t.Error(cmp.Diff(tc.exp, got, opt)) - } - }) - } -} - -func TestIntegerValues_Include(t *testing.T) { - cases := []struct { - n string - min, max int64 - exp []int64 - }{ - {"incl none-lo", 0, 9, nil}, - {"incl none-hi", 19, 30, nil}, - {"incl first", 0, 10, []int64{10}}, - {"incl last", 18, 20, []int64{18}}, - {"incl all but first and last", 12, 16, []int64{12, 14, 16}}, - {"incl none in middle", 13, 13, nil}, - {"incl middle", 14, 14, []int64{14}}, - } - - for _, tc := range cases { - t.Run(fmt.Sprintf("%s[%d,%d]", tc.n, tc.min, tc.max), func(t *testing.T) { - vals := makeIntegerValues(5, 10, 20) - vals = vals.Include(tc.min, tc.max) - var got []int64 - for _, v := range vals { - got = append(got, v.UnixNano()) - } - opt := cmp.AllowUnexported(IntegerValue{}) - if !cmp.Equal(tc.exp, got, opt) { - t.Error(cmp.Diff(tc.exp, got, opt)) - } - }) - } -} - -func benchExclude(b *testing.B, vals IntegerValues, min, max int64) { - b.ResetTimer() - - for i := 0; i < b.N; i++ { - vals.Exclude(min, max) - } -} - -func BenchmarkIntegerValues_ExcludeNone_1000(b *testing.B) { - benchExclude(b, makeIntegerValues(1000, 1000, 2000), 0, 500) -} - -func BenchmarkIntegerValues_ExcludeMiddleHalf_1000(b *testing.B) { - benchExclude(b, makeIntegerValues(1000, 1000, 2000), 1250, 1750) -} - -func BenchmarkIntegerValues_ExcludeFirst_1000(b *testing.B) { - benchExclude(b, makeIntegerValues(1000, 1000, 2000), 0, 1000) -} - -func BenchmarkIntegerValues_ExcludeLast_1000(b *testing.B) { - benchExclude(b, makeIntegerValues(1000, 1000, 2000), 1999, 2000) -} - -func BenchmarkIntegerValues_ExcludeNone_10000(b *testing.B) { - benchExclude(b, makeIntegerValues(10000, 10000, 20000), 00, 5000) -} - -func BenchmarkIntegerValues_ExcludeMiddleHalf_10000(b *testing.B) { - benchExclude(b, makeIntegerValues(10000, 10000, 20000), 12500, 17500) -} - -func BenchmarkIntegerValues_ExcludeFirst_10000(b *testing.B) { - benchExclude(b, makeIntegerValues(10000, 10000, 20000), 0, 10000) -} - -func BenchmarkIntegerValues_ExcludeLast_10000(b *testing.B) { - benchExclude(b, makeIntegerValues(10000, 10000, 20000), 19999, 20000) -} - -func benchInclude(b *testing.B, vals IntegerValues, min, max int64) { - tmp := append(IntegerValues{}, vals...) - n := len(vals) - b.ResetTimer() - - for i := 0; i < b.N; i++ { - vals.Include(min, max) - vals = vals[:n] - copy(vals, tmp) - } -} - -func BenchmarkIntegerValues_IncludeNone_1000(b *testing.B) { - benchInclude(b, makeIntegerValues(1000, 1000, 2000), 0, 500) -} - -func BenchmarkIntegerValues_IncludeMiddleHalf_1000(b *testing.B) { - benchInclude(b, makeIntegerValues(1000, 1000, 2000), 1250, 1750) -} - -func BenchmarkIntegerValues_IncludeFirst_1000(b *testing.B) { - benchInclude(b, makeIntegerValues(1000, 1000, 2000), 0, 1000) -} - -func BenchmarkIntegerValues_IncludeLast_1000(b *testing.B) { - benchInclude(b, makeIntegerValues(1000, 1000, 2000), 1999, 2000) -} - -func BenchmarkIntegerValues_IncludeNone_10000(b *testing.B) { - benchInclude(b, makeIntegerValues(10000, 10000, 20000), 00, 5000) -} - -func BenchmarkIntegerValues_IncludeMiddleHalf_10000(b *testing.B) { - benchInclude(b, makeIntegerValues(10000, 10000, 20000), 12500, 17500) -} - -func BenchmarkIntegerValues_IncludeFirst_10000(b *testing.B) { - benchInclude(b, makeIntegerValues(10000, 10000, 20000), 0, 10000) -} - -func BenchmarkIntegerValues_IncludeLast_10000(b *testing.B) { - benchInclude(b, makeIntegerValues(10000, 10000, 20000), 19999, 20000) -} diff --git a/tsdb/tsm1/encoding.go b/tsdb/tsm1/encoding.go deleted file mode 100644 index 8e47e22cc1..0000000000 --- a/tsdb/tsm1/encoding.go +++ /dev/null @@ -1,885 +0,0 @@ -package tsm1 - -import ( - "encoding/binary" - "fmt" - "runtime" - - "github.com/influxdata/influxdb/v2/pkg/pool" - "github.com/influxdata/influxql" -) - -const ( - // BlockFloat64 designates a block encodes float64 values. - BlockFloat64 = byte(0) - - // BlockInteger designates a block encodes int64 values. - BlockInteger = byte(1) - - // BlockBoolean designates a block encodes boolean values. - BlockBoolean = byte(2) - - // BlockString designates a block encodes string values. - BlockString = byte(3) - - // BlockUnsigned designates a block encodes uint64 values. - BlockUnsigned = byte(4) - - // BlockUndefined represents an undefined block type value. - BlockUndefined = BlockUnsigned + 1 - - // encodedBlockHeaderSize is the size of the header for an encoded block. There is one - // byte encoding the type of the block. - encodedBlockHeaderSize = 1 -) - -func init() { - // Prime the pools with one encoder/decoder for each available CPU. - vals := make([]interface{}, 0, runtime.NumCPU()) - for _, p := range []*pool.Generic{ - timeEncoderPool, timeDecoderPool, - integerEncoderPool, integerDecoderPool, - floatDecoderPool, floatDecoderPool, - stringEncoderPool, stringEncoderPool, - booleanEncoderPool, booleanDecoderPool, - } { - vals = vals[:0] - // Check one out to force the allocation now and hold onto it - for i := 0; i < runtime.NumCPU(); i++ { - v := p.Get(MaxPointsPerBlock) - vals = append(vals, v) - } - // Add them all back - for _, v := range vals { - p.Put(v) - } - } -} - -var ( - // encoder pools - - timeEncoderPool = pool.NewGeneric(runtime.NumCPU(), func(sz int) interface{} { - return NewTimeEncoder(sz) - }) - integerEncoderPool = pool.NewGeneric(runtime.NumCPU(), func(sz int) interface{} { - return NewIntegerEncoder(sz) - }) - floatEncoderPool = pool.NewGeneric(runtime.NumCPU(), func(sz int) interface{} { - return NewFloatEncoder() - }) - stringEncoderPool = pool.NewGeneric(runtime.NumCPU(), func(sz int) interface{} { - return NewStringEncoder(sz) - }) - booleanEncoderPool = pool.NewGeneric(runtime.NumCPU(), func(sz int) interface{} { - return NewBooleanEncoder(sz) - }) - - // decoder pools - - timeDecoderPool = pool.NewGeneric(runtime.NumCPU(), func(sz int) interface{} { - return &TimeDecoder{} - }) - integerDecoderPool = pool.NewGeneric(runtime.NumCPU(), func(sz int) interface{} { - return &IntegerDecoder{} - }) - floatDecoderPool = pool.NewGeneric(runtime.NumCPU(), func(sz int) interface{} { - return &FloatDecoder{} - }) - stringDecoderPool = pool.NewGeneric(runtime.NumCPU(), func(sz int) interface{} { - return &StringDecoder{} - }) - booleanDecoderPool = pool.NewGeneric(runtime.NumCPU(), func(sz int) interface{} { - return &BooleanDecoder{} - }) -) - -// Encode converts the values to a byte slice. If there are no values, -// this function panics. -func (a Values) Encode(buf []byte) ([]byte, error) { - if len(a) == 0 { - panic("unable to encode block type") - } - - switch a[0].(type) { - case FloatValue: - return encodeFloatBlock(buf, a) - case IntegerValue: - return encodeIntegerBlock(buf, a) - case UnsignedValue: - return encodeUnsignedBlock(buf, a) - case BooleanValue: - return encodeBooleanBlock(buf, a) - case StringValue: - return encodeStringBlock(buf, a) - } - - return nil, fmt.Errorf("unsupported value type %T", a[0]) -} - -// Contains returns true if values exist for the time interval [min, max] -// inclusive. The values must be sorted before calling Contains or the -// results are undefined. -func (a Values) Contains(min, max int64) bool { - rmin, rmax := a.FindRange(min, max) - if rmin == -1 && rmax == -1 { - return false - } - - // a[rmin].UnixNano() ≥ min - // a[rmax].UnixNano() ≥ max - - if a[rmin].UnixNano() == min { - return true - } - - if rmax < a.Len() && a[rmax].UnixNano() == max { - return true - } - - return rmax-rmin > 0 -} - -// InfluxQLType returns the influxql.DataType the values map to. -func (a Values) InfluxQLType() (influxql.DataType, error) { - if len(a) == 0 { - return influxql.Unknown, fmt.Errorf("no values to infer type") - } - - switch a[0].(type) { - case FloatValue: - return influxql.Float, nil - case IntegerValue: - return influxql.Integer, nil - case UnsignedValue: - return influxql.Unsigned, nil - case BooleanValue: - return influxql.Boolean, nil - case StringValue: - return influxql.String, nil - } - - return influxql.Unknown, fmt.Errorf("unsupported value type %T", a[0]) -} - -// BlockType returns the TSM block type the values map to. -func (a Values) BlockType() byte { - if len(a) == 0 { - return BlockUndefined - } - - switch a[0].(type) { - case FloatValue: - return BlockFloat64 - case IntegerValue: - return BlockInteger - case UnsignedValue: - return BlockUnsigned - case BooleanValue: - return BlockBoolean - case StringValue: - return BlockString - } - - return BlockUndefined -} - -// BlockType returns the type of value encoded in a block or an error -// if the block type is unknown. -func BlockType(block []byte) (byte, error) { - blockType := block[0] - switch blockType { - case BlockFloat64, BlockInteger, BlockUnsigned, BlockBoolean, BlockString: - return blockType, nil - default: - return 0, fmt.Errorf("unknown block type: %d", blockType) - } -} - -// BlockCount returns the number of timestamps encoded in block. -func BlockCount(block []byte) int { - if len(block) <= encodedBlockHeaderSize { - panic(fmt.Sprintf("count of short block: got %v, exp %v", len(block), encodedBlockHeaderSize)) - } - // first byte is the block type - tb, _, err := unpackBlock(block[1:]) - if err != nil { - panic(fmt.Sprintf("BlockCount: error unpacking block: %s", err.Error())) - } - return CountTimestamps(tb) -} - -// DecodeBlock takes a byte slice and decodes it into values of the appropriate type -// based on the block. -func DecodeBlock(block []byte, vals []Value) ([]Value, error) { - if len(block) <= encodedBlockHeaderSize { - panic(fmt.Sprintf("decode of short block: got %v, exp %v", len(block), encodedBlockHeaderSize)) - } - - blockType, err := BlockType(block) - if err != nil { - return nil, err - } - - switch blockType { - case BlockFloat64: - var buf []FloatValue - decoded, err := DecodeFloatBlock(block, &buf) - if len(vals) < len(decoded) { - vals = make([]Value, len(decoded)) - } - for i := range decoded { - vals[i] = decoded[i] - } - return vals[:len(decoded)], err - case BlockInteger: - var buf []IntegerValue - decoded, err := DecodeIntegerBlock(block, &buf) - if len(vals) < len(decoded) { - vals = make([]Value, len(decoded)) - } - for i := range decoded { - vals[i] = decoded[i] - } - return vals[:len(decoded)], err - - case BlockUnsigned: - var buf []UnsignedValue - decoded, err := DecodeUnsignedBlock(block, &buf) - if len(vals) < len(decoded) { - vals = make([]Value, len(decoded)) - } - for i := range decoded { - vals[i] = decoded[i] - } - return vals[:len(decoded)], err - - case BlockBoolean: - var buf []BooleanValue - decoded, err := DecodeBooleanBlock(block, &buf) - if len(vals) < len(decoded) { - vals = make([]Value, len(decoded)) - } - for i := range decoded { - vals[i] = decoded[i] - } - return vals[:len(decoded)], err - - case BlockString: - var buf []StringValue - decoded, err := DecodeStringBlock(block, &buf) - if len(vals) < len(decoded) { - vals = make([]Value, len(decoded)) - } - for i := range decoded { - vals[i] = decoded[i] - } - return vals[:len(decoded)], err - - default: - panic(fmt.Sprintf("unknown block type: %d", blockType)) - } -} - -func encodeFloatBlock(buf []byte, values []Value) ([]byte, error) { - if len(values) == 0 { - return nil, nil - } - - // A float block is encoded using different compression strategies - // for timestamps and values. - - // Encode values using Gorilla float compression - venc := getFloatEncoder(len(values)) - - // Encode timestamps using an adaptive encoder that uses delta-encoding, - // frame-or-reference and run length encoding. - tsenc := getTimeEncoder(len(values)) - - b, err := encodeFloatBlockUsing(buf, values, tsenc, venc) - - putTimeEncoder(tsenc) - putFloatEncoder(venc) - - return b, err -} - -func encodeFloatBlockUsing(buf []byte, values []Value, tsenc TimeEncoder, venc *FloatEncoder) ([]byte, error) { - tsenc.Reset() - venc.Reset() - - for _, v := range values { - vv := v.(FloatValue) - tsenc.Write(vv.UnixNano()) - venc.Write(vv.RawValue()) - } - venc.Flush() - - // Encoded timestamp values - tb, err := tsenc.Bytes() - if err != nil { - return nil, err - } - // Encoded float values - vb, err := venc.Bytes() - if err != nil { - return nil, err - } - - // Prepend the first timestamp of the block in the first 8 bytes and the block - // in the next byte, followed by the block - return packBlock(buf, BlockFloat64, tb, vb), nil -} - -// DecodeFloatBlock decodes the float block from the byte slice -// and appends the float values to a. -func DecodeFloatBlock(block []byte, a *[]FloatValue) ([]FloatValue, error) { - // Block type is the next block, make sure we actually have a float block - blockType := block[0] - if blockType != BlockFloat64 { - return nil, fmt.Errorf("invalid block type: exp %d, got %d", BlockFloat64, blockType) - } - block = block[1:] - - tb, vb, err := unpackBlock(block) - if err != nil { - return nil, err - } - - sz := CountTimestamps(tb) - - if cap(*a) < sz { - *a = make([]FloatValue, sz) - } else { - *a = (*a)[:sz] - } - - tdec := timeDecoderPool.Get(0).(*TimeDecoder) - vdec := floatDecoderPool.Get(0).(*FloatDecoder) - - var i int - err = func(a []FloatValue) error { - // Setup our timestamp and value decoders - tdec.Init(tb) - err = vdec.SetBytes(vb) - if err != nil { - return err - } - - // Decode both a timestamp and value - j := 0 - for j < len(a) && tdec.Next() && vdec.Next() { - a[j] = NewRawFloatValue(tdec.Read(), vdec.Values()) - j++ - } - i = j - - // Did timestamp decoding have an error? - err = tdec.Error() - if err != nil { - return err - } - - // Did float decoding have an error? - return vdec.Error() - }(*a) - - timeDecoderPool.Put(tdec) - floatDecoderPool.Put(vdec) - - return (*a)[:i], err -} - -func encodeBooleanBlock(buf []byte, values []Value) ([]byte, error) { - if len(values) == 0 { - return nil, nil - } - - // A boolean block is encoded using different compression strategies - // for timestamps and values. - venc := getBooleanEncoder(len(values)) - - // Encode timestamps using an adaptive encoder - tsenc := getTimeEncoder(len(values)) - - b, err := encodeBooleanBlockUsing(buf, values, tsenc, venc) - - putTimeEncoder(tsenc) - putBooleanEncoder(venc) - - return b, err -} - -func encodeBooleanBlockUsing(buf []byte, values []Value, tenc TimeEncoder, venc BooleanEncoder) ([]byte, error) { - tenc.Reset() - venc.Reset() - - for _, v := range values { - vv := v.(BooleanValue) - tenc.Write(vv.UnixNano()) - venc.Write(vv.RawValue()) - } - - // Encoded timestamp values - tb, err := tenc.Bytes() - if err != nil { - return nil, err - } - // Encoded float values - vb, err := venc.Bytes() - if err != nil { - return nil, err - } - - // Prepend the first timestamp of the block in the first 8 bytes and the block - // in the next byte, followed by the block - return packBlock(buf, BlockBoolean, tb, vb), nil -} - -// DecodeBooleanBlock decodes the boolean block from the byte slice -// and appends the boolean values to a. -func DecodeBooleanBlock(block []byte, a *[]BooleanValue) ([]BooleanValue, error) { - // Block type is the next block, make sure we actually have a float block - blockType := block[0] - if blockType != BlockBoolean { - return nil, fmt.Errorf("invalid block type: exp %d, got %d", BlockBoolean, blockType) - } - block = block[1:] - - tb, vb, err := unpackBlock(block) - if err != nil { - return nil, err - } - - sz := CountTimestamps(tb) - - if cap(*a) < sz { - *a = make([]BooleanValue, sz) - } else { - *a = (*a)[:sz] - } - - tdec := timeDecoderPool.Get(0).(*TimeDecoder) - vdec := booleanDecoderPool.Get(0).(*BooleanDecoder) - - var i int - err = func(a []BooleanValue) error { - // Setup our timestamp and value decoders - tdec.Init(tb) - vdec.SetBytes(vb) - - // Decode both a timestamp and value - j := 0 - for j < len(a) && tdec.Next() && vdec.Next() { - a[j] = NewRawBooleanValue(tdec.Read(), vdec.Read()) - j++ - } - i = j - - // Did timestamp decoding have an error? - err = tdec.Error() - if err != nil { - return err - } - // Did boolean decoding have an error? - return vdec.Error() - }(*a) - - timeDecoderPool.Put(tdec) - booleanDecoderPool.Put(vdec) - - return (*a)[:i], err -} - -func encodeIntegerBlock(buf []byte, values []Value) ([]byte, error) { - tenc := getTimeEncoder(len(values)) - venc := getIntegerEncoder(len(values)) - - b, err := encodeIntegerBlockUsing(buf, values, tenc, venc) - - putTimeEncoder(tenc) - putIntegerEncoder(venc) - - return b, err -} - -func encodeIntegerBlockUsing(buf []byte, values []Value, tenc TimeEncoder, venc IntegerEncoder) ([]byte, error) { - tenc.Reset() - venc.Reset() - - for _, v := range values { - vv := v.(IntegerValue) - tenc.Write(vv.UnixNano()) - venc.Write(vv.RawValue()) - } - - // Encoded timestamp values - tb, err := tenc.Bytes() - if err != nil { - return nil, err - } - // Encoded int64 values - vb, err := venc.Bytes() - if err != nil { - return nil, err - } - - // Prepend the first timestamp of the block in the first 8 bytes - return packBlock(buf, BlockInteger, tb, vb), nil -} - -// DecodeIntegerBlock decodes the integer block from the byte slice -// and appends the integer values to a. -func DecodeIntegerBlock(block []byte, a *[]IntegerValue) ([]IntegerValue, error) { - blockType := block[0] - if blockType != BlockInteger { - return nil, fmt.Errorf("invalid block type: exp %d, got %d", BlockInteger, blockType) - } - - block = block[1:] - - // The first 8 bytes is the minimum timestamp of the block - tb, vb, err := unpackBlock(block) - if err != nil { - return nil, err - } - - sz := CountTimestamps(tb) - - if cap(*a) < sz { - *a = make([]IntegerValue, sz) - } else { - *a = (*a)[:sz] - } - - tdec := timeDecoderPool.Get(0).(*TimeDecoder) - vdec := integerDecoderPool.Get(0).(*IntegerDecoder) - - var i int - err = func(a []IntegerValue) error { - // Setup our timestamp and value decoders - tdec.Init(tb) - vdec.SetBytes(vb) - - // Decode both a timestamp and value - j := 0 - for j < len(a) && tdec.Next() && vdec.Next() { - a[j] = NewRawIntegerValue(tdec.Read(), vdec.Read()) - j++ - } - i = j - - // Did timestamp decoding have an error? - err = tdec.Error() - if err != nil { - return err - } - // Did int64 decoding have an error? - return vdec.Error() - }(*a) - - timeDecoderPool.Put(tdec) - integerDecoderPool.Put(vdec) - - return (*a)[:i], err -} - -func encodeUnsignedBlock(buf []byte, values []Value) ([]byte, error) { - tenc := getTimeEncoder(len(values)) - venc := getUnsignedEncoder(len(values)) - - b, err := encodeUnsignedBlockUsing(buf, values, tenc, venc) - - putTimeEncoder(tenc) - putUnsignedEncoder(venc) - - return b, err -} - -func encodeUnsignedBlockUsing(buf []byte, values []Value, tenc TimeEncoder, venc IntegerEncoder) ([]byte, error) { - tenc.Reset() - venc.Reset() - - for _, v := range values { - vv := v.(UnsignedValue) - tenc.Write(vv.UnixNano()) - venc.Write(int64(vv.RawValue())) - } - - // Encoded timestamp values - tb, err := tenc.Bytes() - if err != nil { - return nil, err - } - // Encoded int64 values - vb, err := venc.Bytes() - if err != nil { - return nil, err - } - - // Prepend the first timestamp of the block in the first 8 bytes - return packBlock(buf, BlockUnsigned, tb, vb), nil -} - -// DecodeUnsignedBlock decodes the unsigned integer block from the byte slice -// and appends the unsigned integer values to a. -func DecodeUnsignedBlock(block []byte, a *[]UnsignedValue) ([]UnsignedValue, error) { - blockType := block[0] - if blockType != BlockUnsigned { - return nil, fmt.Errorf("invalid block type: exp %d, got %d", BlockUnsigned, blockType) - } - - block = block[1:] - - // The first 8 bytes is the minimum timestamp of the block - tb, vb, err := unpackBlock(block) - if err != nil { - return nil, err - } - - sz := CountTimestamps(tb) - - if cap(*a) < sz { - *a = make([]UnsignedValue, sz) - } else { - *a = (*a)[:sz] - } - - tdec := timeDecoderPool.Get(0).(*TimeDecoder) - vdec := integerDecoderPool.Get(0).(*IntegerDecoder) - - var i int - err = func(a []UnsignedValue) error { - // Setup our timestamp and value decoders - tdec.Init(tb) - vdec.SetBytes(vb) - - // Decode both a timestamp and value - j := 0 - for j < len(a) && tdec.Next() && vdec.Next() { - a[j] = NewRawUnsignedValue(tdec.Read(), uint64(vdec.Read())) - j++ - } - i = j - - // Did timestamp decoding have an error? - err = tdec.Error() - if err != nil { - return err - } - // Did int64 decoding have an error? - return vdec.Error() - }(*a) - - timeDecoderPool.Put(tdec) - integerDecoderPool.Put(vdec) - - return (*a)[:i], err -} - -func encodeStringBlock(buf []byte, values []Value) ([]byte, error) { - tenc := getTimeEncoder(len(values)) - venc := getStringEncoder(len(values) * len(values[0].(StringValue).RawValue())) - - b, err := encodeStringBlockUsing(buf, values, tenc, venc) - - putTimeEncoder(tenc) - putStringEncoder(venc) - - return b, err -} - -func encodeStringBlockUsing(buf []byte, values []Value, tenc TimeEncoder, venc StringEncoder) ([]byte, error) { - tenc.Reset() - venc.Reset() - - for _, v := range values { - vv := v.(StringValue) - tenc.Write(vv.UnixNano()) - venc.Write(vv.RawValue()) - } - - // Encoded timestamp values - tb, err := tenc.Bytes() - if err != nil { - return nil, err - } - // Encoded string values - vb, err := venc.Bytes() - if err != nil { - return nil, err - } - - // Prepend the first timestamp of the block in the first 8 bytes - return packBlock(buf, BlockString, tb, vb), nil -} - -// DecodeStringBlock decodes the string block from the byte slice -// and appends the string values to a. -func DecodeStringBlock(block []byte, a *[]StringValue) ([]StringValue, error) { - blockType := block[0] - if blockType != BlockString { - return nil, fmt.Errorf("invalid block type: exp %d, got %d", BlockString, blockType) - } - - block = block[1:] - - // The first 8 bytes is the minimum timestamp of the block - tb, vb, err := unpackBlock(block) - if err != nil { - return nil, err - } - - sz := CountTimestamps(tb) - - if cap(*a) < sz { - *a = make([]StringValue, sz) - } else { - *a = (*a)[:sz] - } - - tdec := timeDecoderPool.Get(0).(*TimeDecoder) - vdec := stringDecoderPool.Get(0).(*StringDecoder) - - var i int - err = func(a []StringValue) error { - // Setup our timestamp and value decoders - tdec.Init(tb) - err = vdec.SetBytes(vb) - if err != nil { - return err - } - - // Decode both a timestamp and value - j := 0 - for j < len(a) && tdec.Next() && vdec.Next() { - a[j] = NewRawStringValue(tdec.Read(), vdec.Read()) - j++ - } - i = j - - // Did timestamp decoding have an error? - err = tdec.Error() - if err != nil { - return err - } - // Did string decoding have an error? - return vdec.Error() - }(*a) - - timeDecoderPool.Put(tdec) - stringDecoderPool.Put(vdec) - - return (*a)[:i], err -} - -func packBlock(buf []byte, typ byte, ts []byte, values []byte) []byte { - // We encode the length of the timestamp block using a variable byte encoding. - // This allows small byte slices to take up 1 byte while larger ones use 2 or more. - sz := 1 + binary.MaxVarintLen64 + len(ts) + len(values) - if cap(buf) < sz { - buf = make([]byte, sz) - } - b := buf[:sz] - b[0] = typ - i := binary.PutUvarint(b[1:1+binary.MaxVarintLen64], uint64(len(ts))) - i += 1 - - // block is , , - copy(b[i:], ts) - // We don't encode the value length because we know it's the rest of the block after - // the timestamp block. - copy(b[i+len(ts):], values) - return b[:i+len(ts)+len(values)] -} - -func unpackBlock(buf []byte) (ts, values []byte, err error) { - // Unpack the timestamp block length - tsLen, i := binary.Uvarint(buf) - if i <= 0 { - err = fmt.Errorf("unpackBlock: unable to read timestamp block length") - return - } - - // Unpack the timestamp bytes - tsIdx := int(i) + int(tsLen) - if tsIdx > len(buf) { - err = fmt.Errorf("unpackBlock: not enough data for timestamp") - return - } - ts = buf[int(i):tsIdx] - - // Unpack the value bytes - values = buf[tsIdx:] - return -} - -// ZigZagEncode converts a int64 to a uint64 by zig zagging negative and positive values -// across even and odd numbers. Eg. [0,-1,1,-2] becomes [0, 1, 2, 3]. -func ZigZagEncode(x int64) uint64 { - return uint64(uint64(x<<1) ^ uint64((int64(x) >> 63))) -} - -// ZigZagDecode converts a previously zigzag encoded uint64 back to a int64. -func ZigZagDecode(v uint64) int64 { - return int64((v >> 1) ^ uint64((int64(v&1)<<63)>>63)) -} -func getTimeEncoder(sz int) TimeEncoder { - x := timeEncoderPool.Get(sz).(TimeEncoder) - x.Reset() - return x -} -func putTimeEncoder(enc TimeEncoder) { timeEncoderPool.Put(enc) } - -func getIntegerEncoder(sz int) IntegerEncoder { - x := integerEncoderPool.Get(sz).(IntegerEncoder) - x.Reset() - return x -} -func putIntegerEncoder(enc IntegerEncoder) { integerEncoderPool.Put(enc) } - -func getUnsignedEncoder(sz int) IntegerEncoder { - x := integerEncoderPool.Get(sz).(IntegerEncoder) - x.Reset() - return x -} -func putUnsignedEncoder(enc IntegerEncoder) { integerEncoderPool.Put(enc) } - -func getFloatEncoder(sz int) *FloatEncoder { - x := floatEncoderPool.Get(sz).(*FloatEncoder) - x.Reset() - return x -} -func putFloatEncoder(enc *FloatEncoder) { floatEncoderPool.Put(enc) } - -func getStringEncoder(sz int) StringEncoder { - x := stringEncoderPool.Get(sz).(StringEncoder) - x.Reset() - return x -} -func putStringEncoder(enc StringEncoder) { stringEncoderPool.Put(enc) } - -func getBooleanEncoder(sz int) BooleanEncoder { - x := booleanEncoderPool.Get(sz).(BooleanEncoder) - x.Reset() - return x -} -func putBooleanEncoder(enc BooleanEncoder) { booleanEncoderPool.Put(enc) } - -// BlockTypeName returns a string name for the block type. -func BlockTypeName(typ byte) string { - switch typ { - case BlockFloat64: - return "float64" - case BlockInteger: - return "integer" - case BlockBoolean: - return "boolean" - case BlockString: - return "string" - case BlockUnsigned: - return "unsigned" - default: - return fmt.Sprintf("unknown(%d)", typ) - } -} diff --git a/tsdb/tsm1/encoding_test.go b/tsdb/tsm1/encoding_test.go deleted file mode 100644 index e7fa8dd636..0000000000 --- a/tsdb/tsm1/encoding_test.go +++ /dev/null @@ -1,1941 +0,0 @@ -package tsm1_test - -import ( - "fmt" - "math/rand" - "reflect" - "testing" - "time" - - "github.com/davecgh/go-spew/spew" - "github.com/google/go-cmp/cmp" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" -) - -func TestEncoding_FloatBlock(t *testing.T) { - valueCount := 1000 - times := getTimes(valueCount, 60, time.Second) - values := make([]tsm1.Value, len(times)) - for i, t := range times { - values[i] = tsm1.NewValue(t, float64(i)) - } - - b, err := tsm1.Values(values).Encode(nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var decodedValues []tsm1.Value - decodedValues, err = tsm1.DecodeBlock(b, decodedValues) - if err != nil { - t.Fatalf("unexpected error decoding block: %v", err) - } - - if !reflect.DeepEqual(decodedValues, values) { - t.Fatalf("unexpected results:\n\tgot: %s\n\texp: %s\n", spew.Sdump(decodedValues), spew.Sdump(values)) - } -} - -func TestEncoding_FloatBlock_ZeroTime(t *testing.T) { - values := make([]tsm1.Value, 3) - for i := 0; i < 3; i++ { - values[i] = tsm1.NewValue(0, float64(i)) - } - - b, err := tsm1.Values(values).Encode(nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var decodedValues []tsm1.Value - decodedValues, err = tsm1.DecodeBlock(b, decodedValues) - if err != nil { - t.Fatalf("unexpected error decoding block: %v", err) - } - - if !reflect.DeepEqual(decodedValues, values) { - t.Fatalf("unexpected results:\n\tgot: %v\n\texp: %v\n", decodedValues, values) - } -} - -func TestEncoding_FloatBlock_SimilarFloats(t *testing.T) { - values := make([]tsm1.Value, 5) - values[0] = tsm1.NewValue(1444238178437870000, 6.00065e+06) - values[1] = tsm1.NewValue(1444238185286830000, 6.000656e+06) - values[2] = tsm1.NewValue(1444238188441501000, 6.000657e+06) - values[3] = tsm1.NewValue(1444238195286811000, 6.000659e+06) - values[4] = tsm1.NewValue(1444238198439917000, 6.000661e+06) - - b, err := tsm1.Values(values).Encode(nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var decodedValues []tsm1.Value - decodedValues, err = tsm1.DecodeBlock(b, decodedValues) - if err != nil { - t.Fatalf("unexpected error decoding block: %v", err) - } - - if !reflect.DeepEqual(decodedValues, values) { - t.Fatalf("unexpected results:\n\tgot: %v\n\texp: %v\n", decodedValues, values) - } -} - -func TestEncoding_IntBlock_Basic(t *testing.T) { - valueCount := 1000 - times := getTimes(valueCount, 60, time.Second) - values := make([]tsm1.Value, len(times)) - for i, t := range times { - values[i] = tsm1.NewValue(t, int64(i)) - } - - b, err := tsm1.Values(values).Encode(nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var decodedValues []tsm1.Value - decodedValues, err = tsm1.DecodeBlock(b, decodedValues) - if err != nil { - t.Fatalf("unexpected error decoding block: %v", err) - } - - if len(decodedValues) != len(values) { - t.Fatalf("unexpected results length:\n\tgot: %v\n\texp: %v\n", len(decodedValues), len(values)) - } - - for i := 0; i < len(decodedValues); i++ { - if decodedValues[i].UnixNano() != values[i].UnixNano() { - t.Fatalf("unexpected results:\n\tgot: %v\n\texp: %v\n", decodedValues[i].UnixNano(), values[i].UnixNano()) - } - - if decodedValues[i].Value() != values[i].Value() { - t.Fatalf("unexpected results:\n\tgot: %v\n\texp: %v\n", decodedValues[i].Value(), values[i].Value()) - } - } -} - -func TestEncoding_IntBlock_Negatives(t *testing.T) { - valueCount := 1000 - times := getTimes(valueCount, 60, time.Second) - values := make([]tsm1.Value, len(times)) - for i, t := range times { - v := int64(i) - if i%2 == 0 { - v = -v - } - values[i] = tsm1.NewValue(t, int64(v)) - } - - b, err := tsm1.Values(values).Encode(nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var decodedValues []tsm1.Value - decodedValues, err = tsm1.DecodeBlock(b, decodedValues) - if err != nil { - t.Fatalf("unexpected error decoding block: %v", err) - } - - if !reflect.DeepEqual(decodedValues, values) { - t.Fatalf("unexpected results:\n\tgot: %v\n\texp: %v\n", decodedValues, values) - } -} - -func TestEncoding_UIntBlock_Basic(t *testing.T) { - valueCount := 1000 - times := getTimes(valueCount, 60, time.Second) - values := make([]tsm1.Value, len(times)) - for i, t := range times { - values[i] = tsm1.NewValue(t, uint64(i)) - } - - b, err := tsm1.Values(values).Encode(nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var decodedValues []tsm1.Value - decodedValues, err = tsm1.DecodeBlock(b, decodedValues) - if err != nil { - t.Fatalf("unexpected error decoding block: %v", err) - } - - if len(decodedValues) != len(values) { - t.Fatalf("unexpected results length:\n\tgot: %v\n\texp: %v\n", len(decodedValues), len(values)) - } - - for i := 0; i < len(decodedValues); i++ { - if decodedValues[i].UnixNano() != values[i].UnixNano() { - t.Fatalf("unexpected results:\n\tgot: %v\n\texp: %v\n", decodedValues[i].UnixNano(), values[i].UnixNano()) - } - - if decodedValues[i].Value() != values[i].Value() { - t.Fatalf("unexpected results:\n\tgot: %v\n\texp: %v\n", decodedValues[i].Value(), values[i].Value()) - } - } -} - -// TestEncoding_UIntBlock_MaxValues encodes uint64 numbers starting at max (18446744073709551615) -// down to 18446744073709550616 -func TestEncoding_UIntBlock_MaxValues(t *testing.T) { - valueCount := 1000 - times := getTimes(valueCount, 60, time.Second) - values := make([]tsm1.Value, len(times)) - for i, t := range times { - values[i] = tsm1.NewValue(t, ^uint64(i)) - } - - b, err := tsm1.Values(values).Encode(nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var decodedValues []tsm1.Value - decodedValues, err = tsm1.DecodeBlock(b, decodedValues) - if err != nil { - t.Fatalf("unexpected error decoding block: %v", err) - } - - if !reflect.DeepEqual(decodedValues, values) { - t.Fatalf("unexpected results:\n\tgot: %v\n\texp: %v\n", decodedValues, values) - } -} - -func TestEncoding_BooleanBlock_Basic(t *testing.T) { - valueCount := 1000 - times := getTimes(valueCount, 60, time.Second) - values := make([]tsm1.Value, len(times)) - for i, t := range times { - v := true - if i%2 == 0 { - v = false - } - values[i] = tsm1.NewValue(t, v) - } - - b, err := tsm1.Values(values).Encode(nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var decodedValues []tsm1.Value - decodedValues, err = tsm1.DecodeBlock(b, decodedValues) - if err != nil { - t.Fatalf("unexpected error decoding block: %v", err) - } - - if !reflect.DeepEqual(decodedValues, values) { - t.Fatalf("unexpected results:\n\tgot: %v\n\texp: %v\n", decodedValues, values) - } -} - -func TestEncoding_StringBlock_Basic(t *testing.T) { - valueCount := 1000 - times := getTimes(valueCount, 60, time.Second) - values := make([]tsm1.Value, len(times)) - for i, t := range times { - values[i] = tsm1.NewValue(t, fmt.Sprintf("value %d", i)) - } - - b, err := tsm1.Values(values).Encode(nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var decodedValues []tsm1.Value - decodedValues, err = tsm1.DecodeBlock(b, decodedValues) - if err != nil { - t.Fatalf("unexpected error decoding block: %v", err) - } - - if !reflect.DeepEqual(decodedValues, values) { - t.Fatalf("unexpected results:\n\tgot: %v\n\texp: %v\n", decodedValues, values) - } -} - -func TestEncoding_BlockType(t *testing.T) { - tests := []struct { - value interface{} - blockType byte - }{ - {value: float64(1.0), blockType: tsm1.BlockFloat64}, - {value: int64(1), blockType: tsm1.BlockInteger}, - {value: uint64(1), blockType: tsm1.BlockUnsigned}, - {value: true, blockType: tsm1.BlockBoolean}, - {value: "string", blockType: tsm1.BlockString}, - } - - for _, test := range tests { - var values []tsm1.Value - values = append(values, tsm1.NewValue(0, test.value)) - - b, err := tsm1.Values(values).Encode(nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - bt, err := tsm1.BlockType(b) - if err != nil { - t.Fatalf("unexpected error decoding block type: %v", err) - } - - if got, exp := bt, test.blockType; got != exp { - t.Fatalf("block type mismatch: got %v, exp %v", got, exp) - } - } - - _, err := tsm1.BlockType([]byte{10}) - if err == nil { - t.Fatalf("expected error decoding block type, got nil") - } -} - -func TestEncoding_Count(t *testing.T) { - tests := []struct { - value interface{} - blockType byte - }{ - {value: float64(1.0), blockType: tsm1.BlockFloat64}, - {value: int64(1), blockType: tsm1.BlockInteger}, - {value: uint64(1), blockType: tsm1.BlockUnsigned}, - {value: true, blockType: tsm1.BlockBoolean}, - {value: "string", blockType: tsm1.BlockString}, - } - - for _, test := range tests { - var values []tsm1.Value - values = append(values, tsm1.NewValue(0, test.value)) - - b, err := tsm1.Values(values).Encode(nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got, exp := tsm1.BlockCount(b), 1; got != exp { - t.Fatalf("block count mismatch: got %v, exp %v", got, exp) - } - } -} - -func TestValues_MergeFloat(t *testing.T) { - tests := []struct { - a, b, exp []tsm1.Value - }{ - - { // empty a - a: []tsm1.Value{}, - - b: []tsm1.Value{ - tsm1.NewValue(1, 1.2), - tsm1.NewValue(2, 2.2), - }, - exp: []tsm1.Value{ - tsm1.NewValue(1, 1.2), - tsm1.NewValue(2, 2.2), - }, - }, - { // empty b - a: []tsm1.Value{ - tsm1.NewValue(1, 1.1), - tsm1.NewValue(2, 2.1), - }, - - b: []tsm1.Value{}, - exp: []tsm1.Value{ - tsm1.NewValue(1, 1.1), - tsm1.NewValue(2, 2.1), - }, - }, - { - a: []tsm1.Value{ - tsm1.NewValue(0, 0.0), - tsm1.NewValue(1, 1.1), - tsm1.NewValue(2, 2.1), - }, - b: []tsm1.Value{ - tsm1.NewValue(2, 2.2), - tsm1.NewValue(2, 2.2), // duplicate data - }, - exp: []tsm1.Value{ - tsm1.NewValue(0, 0.0), - tsm1.NewValue(1, 1.1), - tsm1.NewValue(2, 2.2), - }, - }, - { - a: []tsm1.Value{ - tsm1.NewValue(0, 0.0), - tsm1.NewValue(1, 1.1), - tsm1.NewValue(1, 1.1), // duplicate data - tsm1.NewValue(2, 2.1), - }, - b: []tsm1.Value{ - tsm1.NewValue(2, 2.2), - tsm1.NewValue(2, 2.2), // duplicate data - }, - exp: []tsm1.Value{ - tsm1.NewValue(0, 0.0), - tsm1.NewValue(1, 1.1), - tsm1.NewValue(2, 2.2), - }, - }, - - { - a: []tsm1.Value{ - tsm1.NewValue(1, 1.1), - }, - b: []tsm1.Value{ - tsm1.NewValue(0, 0.0), - tsm1.NewValue(1, 1.2), // overwrites a - tsm1.NewValue(2, 2.2), - tsm1.NewValue(3, 3.2), - tsm1.NewValue(4, 4.2), - }, - exp: []tsm1.Value{ - tsm1.NewValue(0, 0.0), - tsm1.NewValue(1, 1.2), - tsm1.NewValue(2, 2.2), - tsm1.NewValue(3, 3.2), - tsm1.NewValue(4, 4.2), - }, - }, - { - a: []tsm1.Value{ - tsm1.NewValue(1, 1.1), - tsm1.NewValue(2, 2.1), - tsm1.NewValue(3, 3.1), - tsm1.NewValue(4, 4.1), - }, - - b: []tsm1.Value{ - tsm1.NewValue(1, 1.2), // overwrites a - tsm1.NewValue(2, 2.2), // overwrites a - }, - exp: []tsm1.Value{ - tsm1.NewValue(1, 1.2), - tsm1.NewValue(2, 2.2), - tsm1.NewValue(3, 3.1), - tsm1.NewValue(4, 4.1), - }, - }, - { - a: []tsm1.Value{ - tsm1.NewValue(1, 1.1), - tsm1.NewValue(2, 2.1), - tsm1.NewValue(3, 3.1), - tsm1.NewValue(4, 4.1), - }, - - b: []tsm1.Value{ - tsm1.NewValue(1, 1.2), // overwrites a - tsm1.NewValue(2, 2.2), // overwrites a - tsm1.NewValue(3, 3.2), - tsm1.NewValue(4, 4.2), - }, - exp: []tsm1.Value{ - tsm1.NewValue(1, 1.2), - tsm1.NewValue(2, 2.2), - tsm1.NewValue(3, 3.2), - tsm1.NewValue(4, 4.2), - }, - }, - { - a: []tsm1.Value{ - tsm1.NewValue(0, 0.0), - tsm1.NewValue(1, 1.1), - tsm1.NewValue(2, 2.1), - tsm1.NewValue(3, 3.1), - tsm1.NewValue(4, 4.1), - }, - b: []tsm1.Value{ - tsm1.NewValue(0, 0.0), - tsm1.NewValue(2, 2.2), - tsm1.NewValue(4, 4.2), - }, - exp: []tsm1.Value{ - tsm1.NewValue(0, 0.0), - tsm1.NewValue(1, 1.1), - tsm1.NewValue(2, 2.2), - tsm1.NewValue(3, 3.1), - tsm1.NewValue(4, 4.2), - }, - }, - - { - a: []tsm1.Value{ - tsm1.NewValue(1462498658242869207, 0.0), - tsm1.NewValue(1462498658288956853, 1.1), - }, - b: []tsm1.Value{ - tsm1.NewValue(1462498658242870810, 0.0), - tsm1.NewValue(1462498658262911238, 2.2), - tsm1.NewValue(1462498658282415038, 4.2), - tsm1.NewValue(1462498658282417760, 4.2), - }, - exp: []tsm1.Value{ - tsm1.NewValue(1462498658242869207, 0.0), - tsm1.NewValue(1462498658242870810, 0.0), - tsm1.NewValue(1462498658262911238, 2.2), - tsm1.NewValue(1462498658282415038, 4.2), - tsm1.NewValue(1462498658282417760, 4.2), - tsm1.NewValue(1462498658288956853, 1.1), - }, - }, - { - a: []tsm1.Value{ - tsm1.NewValue(4, 4.0), - tsm1.NewValue(5, 5.0), - tsm1.NewValue(6, 6.0), - }, - b: []tsm1.Value{ - tsm1.NewValue(1, 1.0), - tsm1.NewValue(2, 2.0), - tsm1.NewValue(3, 3.0), - }, - exp: []tsm1.Value{ - tsm1.NewValue(1, 1.0), - tsm1.NewValue(2, 2.0), - tsm1.NewValue(3, 3.0), - tsm1.NewValue(4, 4.0), - tsm1.NewValue(5, 5.0), - tsm1.NewValue(6, 6.0), - }, - }, - { - a: []tsm1.Value{ - tsm1.NewValue(5, 5.0), - tsm1.NewValue(6, 6.0), - }, - b: []tsm1.Value{ - tsm1.NewValue(1, 1.0), - tsm1.NewValue(2, 2.0), - tsm1.NewValue(3, 3.0), - tsm1.NewValue(4, 4.0), - tsm1.NewValue(7, 7.0), - tsm1.NewValue(8, 8.0), - }, - exp: []tsm1.Value{ - tsm1.NewValue(1, 1.0), - tsm1.NewValue(2, 2.0), - tsm1.NewValue(3, 3.0), - tsm1.NewValue(4, 4.0), - tsm1.NewValue(5, 5.0), - tsm1.NewValue(6, 6.0), - tsm1.NewValue(7, 7.0), - tsm1.NewValue(8, 8.0), - }, - }, - { - a: []tsm1.Value{ - tsm1.NewValue(1, 1.0), - tsm1.NewValue(2, 2.0), - tsm1.NewValue(3, 3.0), - }, - b: []tsm1.Value{ - tsm1.NewValue(4, 4.0), - tsm1.NewValue(5, 5.0), - tsm1.NewValue(6, 6.0), - }, - exp: []tsm1.Value{ - tsm1.NewValue(1, 1.0), - tsm1.NewValue(2, 2.0), - tsm1.NewValue(3, 3.0), - tsm1.NewValue(4, 4.0), - tsm1.NewValue(5, 5.0), - tsm1.NewValue(6, 6.0), - }, - }, - } - - for i, test := range tests { - got := tsm1.Values(test.a).Merge(test.b) - if exp, got := len(test.exp), len(got); exp != got { - t.Fatalf("test(%d): value length mismatch: exp %v, got %v", i, exp, got) - } - - dedup := tsm1.Values(append(test.a, test.b...)).Deduplicate() - - for i := range test.exp { - if exp, got := test.exp[i].String(), got[i].String(); exp != got { - t.Fatalf("value mismatch:\n exp %v\n got %v", exp, got) - } - - if exp, got := test.exp[i].String(), dedup[i].String(); exp != got { - t.Fatalf("value mismatch:\n exp %v\n got %v", exp, got) - } - } - } -} - -func TestValues_Contains(t *testing.T) { - makeValues := func(count int, min, max int64) tsm1.Values { - vals := make(tsm1.Values, count) - - ts := min - inc := (max - min) / int64(count) - - for i := 0; i < count; i++ { - vals[i] = tsm1.NewRawIntegerValue(ts, 0) - ts += inc - } - - return vals - } - - cases := []struct { - n string - min, max int64 - exp bool - }{ - {"no/lo", 0, 9, false}, - {"no/hi", 19, 30, false}, - {"no/middle", 13, 13, false}, - - {"yes/first", 0, 10, true}, - {"yes/first-eq", 10, 10, true}, - {"yes/last", 18, 20, true}, - {"yes/last-eq", 18, 18, true}, - {"yes/all but first and last", 12, 16, true}, - {"yes/middle-eq", 14, 14, true}, - {"yes/middle-overlap", 13, 15, true}, - {"yes/covers", 8, 22, true}, - } - - for _, tc := range cases { - t.Run(fmt.Sprintf("%s[%d,%d]", tc.n, tc.min, tc.max), func(t *testing.T) { - vals := makeValues(5, 10, 20) - if got := vals.Contains(tc.min, tc.max); got != tc.exp { - t.Errorf("Contains -got/+exp\n%s", cmp.Diff(got, tc.exp)) - } - }) - } -} - -func TestIntegerValues_Merge(t *testing.T) { - integerValue := func(t int64, f int64) tsm1.IntegerValue { - return tsm1.NewValue(t, f).(tsm1.IntegerValue) - } - - tests := []struct { - a, b, exp []tsm1.IntegerValue - }{ - - { // empty a - a: []tsm1.IntegerValue{}, - - b: []tsm1.IntegerValue{ - integerValue(1, 10), - integerValue(2, 20), - }, - exp: []tsm1.IntegerValue{ - integerValue(1, 10), - integerValue(2, 20), - }, - }, - { // empty b - a: []tsm1.IntegerValue{ - integerValue(1, 1), - integerValue(2, 2), - }, - - b: []tsm1.IntegerValue{}, - exp: []tsm1.IntegerValue{ - integerValue(1, 1), - integerValue(2, 2), - }, - }, - { - a: []tsm1.IntegerValue{ - integerValue(1, 1), - }, - b: []tsm1.IntegerValue{ - integerValue(0, 0), - integerValue(1, 10), // overwrites a - integerValue(2, 20), - integerValue(3, 30), - integerValue(4, 40), - }, - exp: []tsm1.IntegerValue{ - integerValue(0, 0), - integerValue(1, 10), - integerValue(2, 20), - integerValue(3, 30), - integerValue(4, 40), - }, - }, - { - a: []tsm1.IntegerValue{ - integerValue(1, 1), - integerValue(2, 2), - integerValue(3, 3), - integerValue(4, 4), - }, - - b: []tsm1.IntegerValue{ - integerValue(1, 10), // overwrites a - integerValue(2, 20), // overwrites a - }, - exp: []tsm1.IntegerValue{ - integerValue(1, 10), - integerValue(2, 20), - integerValue(3, 3), - integerValue(4, 4), - }, - }, - { - a: []tsm1.IntegerValue{ - integerValue(1, 1), - integerValue(2, 2), - integerValue(3, 3), - integerValue(4, 4), - }, - - b: []tsm1.IntegerValue{ - integerValue(1, 10), // overwrites a - integerValue(2, 20), // overwrites a - integerValue(3, 30), - integerValue(4, 40), - }, - exp: []tsm1.IntegerValue{ - integerValue(1, 10), - integerValue(2, 20), - integerValue(3, 30), - integerValue(4, 40), - }, - }, - { - a: []tsm1.IntegerValue{ - integerValue(0, 0), - integerValue(1, 1), - integerValue(2, 2), - integerValue(3, 3), - integerValue(4, 4), - }, - b: []tsm1.IntegerValue{ - integerValue(0, 0), - integerValue(2, 20), - integerValue(4, 40), - }, - exp: []tsm1.IntegerValue{ - integerValue(0, 0.0), - integerValue(1, 1), - integerValue(2, 20), - integerValue(3, 3), - integerValue(4, 40), - }, - }, - } - - for i, test := range tests { - if i != 2 { - continue - } - - got := tsm1.IntegerValues(test.a).Merge(test.b) - if exp, got := len(test.exp), len(got); exp != got { - t.Fatalf("test(%d): value length mismatch: exp %v, got %v", i, exp, got) - } - - dedup := tsm1.IntegerValues(append(test.a, test.b...)).Deduplicate() - - for i := range test.exp { - if exp, got := test.exp[i].String(), got[i].String(); exp != got { - t.Fatalf("value mismatch:\n exp %v\n got %v", exp, got) - } - - if exp, got := test.exp[i].String(), dedup[i].String(); exp != got { - t.Fatalf("value mismatch:\n exp %v\n got %v", exp, got) - } - } - } -} - -func TestUnsignedValues_Merge(t *testing.T) { - uintValue := func(t int64, f uint64) tsm1.UnsignedValue { - return tsm1.NewValue(t, f).(tsm1.UnsignedValue) - } - - tests := []struct { - a, b, exp []tsm1.UnsignedValue - }{ - - { // empty a - a: []tsm1.UnsignedValue{}, - - b: []tsm1.UnsignedValue{ - uintValue(1, 10), - uintValue(2, 20), - }, - exp: []tsm1.UnsignedValue{ - uintValue(1, 10), - uintValue(2, 20), - }, - }, - { // empty b - a: []tsm1.UnsignedValue{ - uintValue(1, 1), - uintValue(2, 2), - }, - - b: []tsm1.UnsignedValue{}, - exp: []tsm1.UnsignedValue{ - uintValue(1, 1), - uintValue(2, 2), - }, - }, - { - a: []tsm1.UnsignedValue{ - uintValue(1, 1), - }, - b: []tsm1.UnsignedValue{ - uintValue(0, 0), - uintValue(1, 10), // overwrites a - uintValue(2, 20), - uintValue(3, 30), - uintValue(4, 40), - }, - exp: []tsm1.UnsignedValue{ - uintValue(0, 0), - uintValue(1, 10), - uintValue(2, 20), - uintValue(3, 30), - uintValue(4, 40), - }, - }, - { - a: []tsm1.UnsignedValue{ - uintValue(1, 1), - uintValue(2, 2), - uintValue(3, 3), - uintValue(4, 4), - }, - - b: []tsm1.UnsignedValue{ - uintValue(1, ^uint64(0)), // overwrites a - uintValue(2, 20), // overwrites a - }, - exp: []tsm1.UnsignedValue{ - uintValue(1, ^uint64(0)), - uintValue(2, 20), - uintValue(3, 3), - uintValue(4, 4), - }, - }, - { - a: []tsm1.UnsignedValue{ - uintValue(1, 1), - uintValue(2, 2), - uintValue(3, 3), - uintValue(4, 4), - }, - - b: []tsm1.UnsignedValue{ - uintValue(1, 10), // overwrites a - uintValue(2, 20), // overwrites a - uintValue(3, 30), - uintValue(4, 40), - }, - exp: []tsm1.UnsignedValue{ - uintValue(1, 10), - uintValue(2, 20), - uintValue(3, 30), - uintValue(4, 40), - }, - }, - { - a: []tsm1.UnsignedValue{ - uintValue(0, 0), - uintValue(1, 1), - uintValue(2, 2), - uintValue(3, 3), - uintValue(4, 4), - }, - b: []tsm1.UnsignedValue{ - uintValue(0, 0), - uintValue(2, 20), - uintValue(4, 40), - }, - exp: []tsm1.UnsignedValue{ - uintValue(0, 0.0), - uintValue(1, 1), - uintValue(2, 20), - uintValue(3, 3), - uintValue(4, 40), - }, - }, - } - - for i, test := range tests { - if i != 2 { - continue - } - - got := tsm1.UnsignedValues(test.a).Merge(test.b) - if exp, got := len(test.exp), len(got); exp != got { - t.Fatalf("test(%d): value length mismatch: exp %v, got %v", i, exp, got) - } - - dedup := tsm1.UnsignedValues(append(test.a, test.b...)).Deduplicate() - - for i := range test.exp { - if exp, got := test.exp[i].String(), got[i].String(); exp != got { - t.Fatalf("value mismatch:\n exp %v\n got %v", exp, got) - } - - if exp, got := test.exp[i].String(), dedup[i].String(); exp != got { - t.Fatalf("value mismatch:\n exp %v\n got %v", exp, got) - } - } - } -} - -func TestFloatValues_Merge(t *testing.T) { - floatValue := func(t int64, f float64) tsm1.FloatValue { - return tsm1.NewValue(t, f).(tsm1.FloatValue) - } - - tests := []struct { - a, b, exp []tsm1.FloatValue - }{ - - { // empty a - a: []tsm1.FloatValue{}, - - b: []tsm1.FloatValue{ - floatValue(1, 1.2), - floatValue(2, 2.2), - }, - exp: []tsm1.FloatValue{ - floatValue(1, 1.2), - floatValue(2, 2.2), - }, - }, - { // empty b - a: []tsm1.FloatValue{ - floatValue(1, 1.1), - floatValue(2, 2.1), - }, - - b: []tsm1.FloatValue{}, - exp: []tsm1.FloatValue{ - floatValue(1, 1.1), - floatValue(2, 2.1), - }, - }, - { - a: []tsm1.FloatValue{ - floatValue(1, 1.1), - }, - b: []tsm1.FloatValue{ - floatValue(0, 0.0), - floatValue(1, 1.2), // overwrites a - floatValue(2, 2.2), - floatValue(3, 3.2), - floatValue(4, 4.2), - }, - exp: []tsm1.FloatValue{ - floatValue(0, 0.0), - floatValue(1, 1.2), - floatValue(2, 2.2), - floatValue(3, 3.2), - floatValue(4, 4.2), - }, - }, - { - a: []tsm1.FloatValue{ - floatValue(1, 1.1), - floatValue(2, 2.1), - floatValue(3, 3.1), - floatValue(4, 4.1), - }, - - b: []tsm1.FloatValue{ - floatValue(1, 1.2), // overwrites a - floatValue(2, 2.2), // overwrites a - }, - exp: []tsm1.FloatValue{ - floatValue(1, 1.2), - floatValue(2, 2.2), - floatValue(3, 3.1), - floatValue(4, 4.1), - }, - }, - { - a: []tsm1.FloatValue{ - floatValue(1, 1.1), - floatValue(2, 2.1), - floatValue(3, 3.1), - floatValue(4, 4.1), - }, - - b: []tsm1.FloatValue{ - floatValue(1, 1.2), // overwrites a - floatValue(2, 2.2), // overwrites a - floatValue(3, 3.2), - floatValue(4, 4.2), - }, - exp: []tsm1.FloatValue{ - floatValue(1, 1.2), - floatValue(2, 2.2), - floatValue(3, 3.2), - floatValue(4, 4.2), - }, - }, - { - a: []tsm1.FloatValue{ - floatValue(0, 0.0), - floatValue(1, 1.1), - floatValue(2, 2.1), - floatValue(3, 3.1), - floatValue(4, 4.1), - }, - b: []tsm1.FloatValue{ - floatValue(0, 0.0), - floatValue(2, 2.2), - floatValue(4, 4.2), - }, - exp: []tsm1.FloatValue{ - floatValue(0, 0.0), - floatValue(1, 1.1), - floatValue(2, 2.2), - floatValue(3, 3.1), - floatValue(4, 4.2), - }, - }, - } - - for i, test := range tests { - got := tsm1.FloatValues(test.a).Merge(test.b) - if exp, got := len(test.exp), len(got); exp != got { - t.Fatalf("test(%d): value length mismatch: exp %v, got %v", i, exp, got) - } - - dedup := tsm1.FloatValues(append(test.a, test.b...)).Deduplicate() - - for i := range test.exp { - if exp, got := test.exp[i].String(), got[i].String(); exp != got { - t.Fatalf("value mismatch:\n exp %v\n got %v", exp, got) - } - - if exp, got := test.exp[i].String(), dedup[i].String(); exp != got { - t.Fatalf("value mismatch:\n exp %v\n got %v", exp, got) - } - } - } -} - -func TestBooleanValues_Merge(t *testing.T) { - booleanValue := func(t int64, f bool) tsm1.BooleanValue { - return tsm1.NewValue(t, f).(tsm1.BooleanValue) - } - - tests := []struct { - a, b, exp []tsm1.BooleanValue - }{ - - { // empty a - a: []tsm1.BooleanValue{}, - - b: []tsm1.BooleanValue{ - booleanValue(1, true), - booleanValue(2, true), - }, - exp: []tsm1.BooleanValue{ - booleanValue(1, true), - booleanValue(2, true), - }, - }, - { // empty b - a: []tsm1.BooleanValue{ - booleanValue(1, true), - booleanValue(2, true), - }, - - b: []tsm1.BooleanValue{}, - exp: []tsm1.BooleanValue{ - booleanValue(1, true), - booleanValue(2, true), - }, - }, - { - a: []tsm1.BooleanValue{ - booleanValue(1, true), - }, - b: []tsm1.BooleanValue{ - booleanValue(0, false), - booleanValue(1, false), // overwrites a - booleanValue(2, false), - booleanValue(3, false), - booleanValue(4, false), - }, - exp: []tsm1.BooleanValue{ - booleanValue(0, false), - booleanValue(1, false), - booleanValue(2, false), - booleanValue(3, false), - booleanValue(4, false), - }, - }, - { - a: []tsm1.BooleanValue{ - booleanValue(1, true), - booleanValue(2, true), - booleanValue(3, true), - booleanValue(4, true), - }, - - b: []tsm1.BooleanValue{ - booleanValue(1, false), // overwrites a - booleanValue(2, false), // overwrites a - }, - exp: []tsm1.BooleanValue{ - booleanValue(1, false), // overwrites a - booleanValue(2, false), // overwrites a - booleanValue(3, true), - booleanValue(4, true), - }, - }, - { - a: []tsm1.BooleanValue{ - booleanValue(1, true), - booleanValue(2, true), - booleanValue(3, true), - booleanValue(4, true), - }, - - b: []tsm1.BooleanValue{ - booleanValue(1, false), // overwrites a - booleanValue(2, false), // overwrites a - booleanValue(3, false), - booleanValue(4, false), - }, - exp: []tsm1.BooleanValue{ - booleanValue(1, false), - booleanValue(2, false), - booleanValue(3, false), - booleanValue(4, false), - }, - }, - { - a: []tsm1.BooleanValue{ - booleanValue(0, true), - booleanValue(1, true), - booleanValue(2, true), - booleanValue(3, true), - booleanValue(4, true), - }, - b: []tsm1.BooleanValue{ - booleanValue(0, false), - booleanValue(2, false), - booleanValue(4, false), - }, - exp: []tsm1.BooleanValue{ - booleanValue(0, false), - booleanValue(1, true), - booleanValue(2, false), - booleanValue(3, true), - booleanValue(4, false), - }, - }, - } - - for i, test := range tests { - got := tsm1.BooleanValues(test.a).Merge(test.b) - if exp, got := len(test.exp), len(got); exp != got { - t.Fatalf("test(%d): value length mismatch: exp %v, got %v", i, exp, got) - } - - dedup := tsm1.BooleanValues(append(test.a, test.b...)).Deduplicate() - - for i := range test.exp { - if exp, got := test.exp[i].String(), got[i].String(); exp != got { - t.Fatalf("value mismatch:\n exp %v\n got %v", exp, got) - } - - if exp, got := test.exp[i].String(), dedup[i].String(); exp != got { - t.Fatalf("value mismatch:\n exp %v\n got %v", exp, got) - } - } - } -} - -func TestStringValues_Merge(t *testing.T) { - stringValue := func(t int64, f string) tsm1.StringValue { - return tsm1.NewValue(t, f).(tsm1.StringValue) - } - - tests := []struct { - a, b, exp []tsm1.StringValue - }{ - - { // empty a - a: []tsm1.StringValue{}, - - b: []tsm1.StringValue{ - stringValue(1, "10"), - stringValue(2, "20"), - }, - exp: []tsm1.StringValue{ - stringValue(1, "10"), - stringValue(2, "20"), - }, - }, - { // empty b - a: []tsm1.StringValue{ - stringValue(1, "1"), - stringValue(2, "2"), - }, - - b: []tsm1.StringValue{}, - exp: []tsm1.StringValue{ - stringValue(1, "1"), - stringValue(2, "2"), - }, - }, - { - a: []tsm1.StringValue{ - stringValue(1, "1"), - }, - b: []tsm1.StringValue{ - stringValue(0, "0"), - stringValue(1, "10"), // overwrites a - stringValue(2, "20"), - stringValue(3, "30"), - stringValue(4, "40"), - }, - exp: []tsm1.StringValue{ - stringValue(0, "0"), - stringValue(1, "10"), - stringValue(2, "20"), - stringValue(3, "30"), - stringValue(4, "40"), - }, - }, - { - a: []tsm1.StringValue{ - stringValue(1, "1"), - stringValue(2, "2"), - stringValue(3, "3"), - stringValue(4, "4"), - }, - - b: []tsm1.StringValue{ - stringValue(1, "10"), // overwrites a - stringValue(2, "20"), // overwrites a - }, - exp: []tsm1.StringValue{ - stringValue(1, "10"), - stringValue(2, "20"), - stringValue(3, "3"), - stringValue(4, "4"), - }, - }, - { - a: []tsm1.StringValue{ - stringValue(1, "1"), - stringValue(2, "2"), - stringValue(3, "3"), - stringValue(4, "4"), - }, - - b: []tsm1.StringValue{ - stringValue(1, "10"), // overwrites a - stringValue(2, "20"), // overwrites a - stringValue(3, "30"), - stringValue(4, "40"), - }, - exp: []tsm1.StringValue{ - stringValue(1, "10"), - stringValue(2, "20"), - stringValue(3, "30"), - stringValue(4, "40"), - }, - }, - { - a: []tsm1.StringValue{ - stringValue(0, "0"), - stringValue(1, "1"), - stringValue(2, "2"), - stringValue(3, "3"), - stringValue(4, "4"), - }, - b: []tsm1.StringValue{ - stringValue(0, "0"), - stringValue(2, "20"), - stringValue(4, "40"), - }, - exp: []tsm1.StringValue{ - stringValue(0, "0.0"), - stringValue(1, "1"), - stringValue(2, "20"), - stringValue(3, "3"), - stringValue(4, "40"), - }, - }, - } - - for i, test := range tests { - if i != 2 { - continue - } - - got := tsm1.StringValues(test.a).Merge(test.b) - if exp, got := len(test.exp), len(got); exp != got { - t.Fatalf("test(%d): value length mismatch: exp %v, got %v", i, exp, got) - } - - dedup := tsm1.StringValues(append(test.a, test.b...)).Deduplicate() - - for i := range test.exp { - if exp, got := test.exp[i].String(), got[i].String(); exp != got { - t.Fatalf("value mismatch:\n exp %v\n got %v", exp, got) - } - - if exp, got := test.exp[i].String(), dedup[i].String(); exp != got { - t.Fatalf("value mismatch:\n exp %v\n got %v", exp, got) - } - } - } -} -func getTimes(n, step int, precision time.Duration) []int64 { - t := time.Now().Round(precision).UnixNano() - a := make([]int64, n) - for i := 0; i < n; i++ { - a[i] = t + (time.Duration(i*60) * precision).Nanoseconds() - } - return a -} - -func BenchmarkDecodeBlock_Float_Empty(b *testing.B) { - valueCount := 1000 - times := getTimes(valueCount, 60, time.Second) - values := make([]tsm1.Value, len(times)) - for i, t := range times { - values[i] = tsm1.NewValue(t, float64(i)) - } - - bytes, err := tsm1.Values(values).Encode(nil) - if err != nil { - b.Fatalf("unexpected error: %v", err) - } - - var decodedValues []tsm1.Value - b.ResetTimer() - for i := 0; i < b.N; i++ { - _, err = tsm1.DecodeBlock(bytes, decodedValues) - if err != nil { - b.Fatalf("unexpected error decoding block: %v", err) - } - } -} - -func BenchmarkDecodeBlock_Float_EqualSize(b *testing.B) { - valueCount := 1000 - times := getTimes(valueCount, 60, time.Second) - values := make([]tsm1.Value, len(times)) - for i, t := range times { - values[i] = tsm1.NewValue(t, float64(i)) - } - - bytes, err := tsm1.Values(values).Encode(nil) - if err != nil { - b.Fatalf("unexpected error: %v", err) - } - - decodedValues := make([]tsm1.Value, len(values)) - b.ResetTimer() - for i := 0; i < b.N; i++ { - _, err = tsm1.DecodeBlock(bytes, decodedValues) - if err != nil { - b.Fatalf("unexpected error decoding block: %v", err) - } - } -} - -func BenchmarkDecodeBlock_Float_TypeSpecific(b *testing.B) { - valueCount := 1000 - times := getTimes(valueCount, 60, time.Second) - values := make([]tsm1.Value, len(times)) - for i, t := range times { - values[i] = tsm1.NewValue(t, float64(i)) - } - - bytes, err := tsm1.Values(values).Encode(nil) - if err != nil { - b.Fatalf("unexpected error: %v", err) - } - - decodedValues := make([]tsm1.FloatValue, len(values)) - b.ResetTimer() - for i := 0; i < b.N; i++ { - _, err = tsm1.DecodeFloatBlock(bytes, &decodedValues) - if err != nil { - b.Fatalf("unexpected error decoding block: %v", err) - } - } -} - -func BenchmarkDecodeBlock_Integer_Empty(b *testing.B) { - valueCount := 1000 - times := getTimes(valueCount, 60, time.Second) - values := make([]tsm1.Value, len(times)) - for i, t := range times { - values[i] = tsm1.NewValue(t, int64(i)) - } - - bytes, err := tsm1.Values(values).Encode(nil) - if err != nil { - b.Fatalf("unexpected error: %v", err) - } - - var decodedValues []tsm1.Value - b.ResetTimer() - for i := 0; i < b.N; i++ { - _, err = tsm1.DecodeBlock(bytes, decodedValues) - if err != nil { - b.Fatalf("unexpected error decoding block: %v", err) - } - } -} - -func BenchmarkDecodeBlock_Integer_EqualSize(b *testing.B) { - valueCount := 1000 - times := getTimes(valueCount, 60, time.Second) - values := make([]tsm1.Value, len(times)) - for i, t := range times { - values[i] = tsm1.NewValue(t, int64(i)) - } - - bytes, err := tsm1.Values(values).Encode(nil) - if err != nil { - b.Fatalf("unexpected error: %v", err) - } - - decodedValues := make([]tsm1.Value, len(values)) - b.ResetTimer() - for i := 0; i < b.N; i++ { - _, err = tsm1.DecodeBlock(bytes, decodedValues) - if err != nil { - b.Fatalf("unexpected error decoding block: %v", err) - } - } -} - -func BenchmarkDecodeBlock_Integer_TypeSpecific(b *testing.B) { - valueCount := 1000 - times := getTimes(valueCount, 60, time.Second) - values := make([]tsm1.Value, len(times)) - for i, t := range times { - values[i] = tsm1.NewValue(t, int64(i)) - } - - bytes, err := tsm1.Values(values).Encode(nil) - if err != nil { - b.Fatalf("unexpected error: %v", err) - } - - decodedValues := make([]tsm1.IntegerValue, len(values)) - b.ResetTimer() - for i := 0; i < b.N; i++ { - _, err = tsm1.DecodeIntegerBlock(bytes, &decodedValues) - if err != nil { - b.Fatalf("unexpected error decoding block: %v", err) - } - } -} - -func BenchmarkDecodeBlock_Boolean_Empty(b *testing.B) { - valueCount := 1000 - times := getTimes(valueCount, 60, time.Second) - values := make([]tsm1.Value, len(times)) - for i, t := range times { - values[i] = tsm1.NewValue(t, true) - } - - bytes, err := tsm1.Values(values).Encode(nil) - if err != nil { - b.Fatalf("unexpected error: %v", err) - } - - var decodedValues []tsm1.Value - b.ResetTimer() - for i := 0; i < b.N; i++ { - _, err = tsm1.DecodeBlock(bytes, decodedValues) - if err != nil { - b.Fatalf("unexpected error decoding block: %v", err) - } - } -} - -func BenchmarkDecodeBlock_Boolean_EqualSize(b *testing.B) { - valueCount := 1000 - times := getTimes(valueCount, 60, time.Second) - values := make([]tsm1.Value, len(times)) - for i, t := range times { - values[i] = tsm1.NewValue(t, true) - } - - bytes, err := tsm1.Values(values).Encode(nil) - if err != nil { - b.Fatalf("unexpected error: %v", err) - } - - decodedValues := make([]tsm1.Value, len(values)) - b.ResetTimer() - for i := 0; i < b.N; i++ { - _, err = tsm1.DecodeBlock(bytes, decodedValues) - if err != nil { - b.Fatalf("unexpected error decoding block: %v", err) - } - } -} - -func BenchmarkDecodeBlock_Boolean_TypeSpecific(b *testing.B) { - valueCount := 1000 - times := getTimes(valueCount, 60, time.Second) - values := make([]tsm1.Value, len(times)) - for i, t := range times { - values[i] = tsm1.NewValue(t, true) - } - - bytes, err := tsm1.Values(values).Encode(nil) - if err != nil { - b.Fatalf("unexpected error: %v", err) - } - - decodedValues := make([]tsm1.BooleanValue, len(values)) - b.ResetTimer() - for i := 0; i < b.N; i++ { - _, err = tsm1.DecodeBooleanBlock(bytes, &decodedValues) - if err != nil { - b.Fatalf("unexpected error decoding block: %v", err) - } - } -} - -func BenchmarkDecodeBooleanBlock(b *testing.B) { - cases := []int{ - 5, - 55, - 555, - 1000, - } - for _, n := range cases { - b.Run(fmt.Sprintf("%d", n), func(b *testing.B) { - valueCount := n - times := getTimes(valueCount, 60, time.Second) - values := make([]tsm1.Value, len(times)) - for i, t := range times { - values[i] = tsm1.NewValue(t, true) - } - - bytes, err := tsm1.Values(values).Encode(nil) - if err != nil { - b.Fatalf("unexpected error: %v", err) - } - - b.ResetTimer() - b.ReportAllocs() - b.SetBytes(int64(tsm1.Values(values).Size())) - - b.RunParallel(func(pb *testing.PB) { - decodedValues := make([]tsm1.BooleanValue, len(values)) - - for pb.Next() { - _, err = tsm1.DecodeBooleanBlock(bytes, &decodedValues) - if err != nil { - b.Fatalf("unexpected error decoding block: %v", err) - } - } - }) - }) - } -} - -func BenchmarkDecodeFloatBlock(b *testing.B) { - cases := []int{ - 5, - 55, - 555, - 1000, - } - for _, n := range cases { - b.Run(fmt.Sprintf("%d", n), func(b *testing.B) { - valueCount := n - times := getTimes(valueCount, 60, time.Second) - values := make([]tsm1.Value, len(times)) - for i, t := range times { - values[i] = tsm1.NewValue(t, float64(i)) - } - - bytes, err := tsm1.Values(values).Encode(nil) - if err != nil { - b.Fatalf("unexpected error: %v", err) - } - - b.ResetTimer() - b.ReportAllocs() - b.SetBytes(int64(tsm1.Values(values).Size())) - - b.RunParallel(func(pb *testing.PB) { - decodedValues := make([]tsm1.FloatValue, len(values)) - - for pb.Next() { - _, err = tsm1.DecodeFloatBlock(bytes, &decodedValues) - if err != nil { - b.Fatalf("unexpected error decoding block: %v", err) - } - } - }) - }) - } -} - -func BenchmarkDecodeIntegerBlock(b *testing.B) { - rle := func(i int) int64 { return int64(i) } - s8b := func(i int) int64 { return int64(i + int(rand.Int31n(10))) } - - cases := []struct { - enc string - gen func(i int) int64 - n int - }{ - {enc: "rle", gen: rle, n: 5}, - {enc: "rle", gen: rle, n: 55}, - {enc: "rle", gen: rle, n: 555}, - {enc: "rle", gen: rle, n: 1000}, - {enc: "s8b", gen: s8b, n: 5}, - {enc: "s8b", gen: s8b, n: 55}, - {enc: "s8b", gen: s8b, n: 555}, - {enc: "s8b", gen: s8b, n: 1000}, - } - for _, bm := range cases { - b.Run(fmt.Sprintf("%s_%d", bm.enc, bm.n), func(b *testing.B) { - rand.Seed(int64(bm.n * 1e3)) - - valueCount := bm.n - times := getTimes(valueCount, 60, time.Second) - values := make([]tsm1.Value, len(times)) - for i, t := range times { - values[i] = tsm1.NewValue(t, bm.gen(i)) - } - - bytes, err := tsm1.Values(values).Encode(nil) - if err != nil { - b.Fatalf("unexpected error: %v", err) - } - - b.ResetTimer() - b.ReportAllocs() - b.SetBytes(int64(tsm1.Values(values).Size())) - - b.RunParallel(func(pb *testing.PB) { - decodedValues := make([]tsm1.IntegerValue, len(values)) - - for pb.Next() { - _, err = tsm1.DecodeIntegerBlock(bytes, &decodedValues) - if err != nil { - b.Fatalf("unexpected error decoding block: %v", err) - } - } - }) - }) - } -} - -func BenchmarkDecodeStringBlock(b *testing.B) { - cases := []int{ - 5, - 55, - 555, - 1000, - } - for _, n := range cases { - b.Run(fmt.Sprintf("%d", n), func(b *testing.B) { - valueCount := n - times := getTimes(valueCount, 60, time.Second) - values := make([]tsm1.Value, len(times)) - for i, t := range times { - values[i] = tsm1.NewValue(t, fmt.Sprintf("value %d", i)) - } - - bytes, err := tsm1.Values(values).Encode(nil) - if err != nil { - b.Fatalf("unexpected error: %v", err) - } - - b.ResetTimer() - b.ReportAllocs() - b.SetBytes(int64(tsm1.Values(values).Size())) - - b.RunParallel(func(pb *testing.PB) { - decodedValues := make([]tsm1.StringValue, len(values)) - - for pb.Next() { - _, err = tsm1.DecodeStringBlock(bytes, &decodedValues) - if err != nil { - b.Fatalf("unexpected error decoding block: %v", err) - } - } - }) - }) - } -} - -func BenchmarkDecodeBlock_String_Empty(b *testing.B) { - valueCount := 1000 - times := getTimes(valueCount, 60, time.Second) - values := make([]tsm1.Value, len(times)) - for i, t := range times { - values[i] = tsm1.NewValue(t, fmt.Sprintf("value %d", i)) - } - - bytes, err := tsm1.Values(values).Encode(nil) - if err != nil { - b.Fatalf("unexpected error: %v", err) - } - - var decodedValues []tsm1.Value - b.ResetTimer() - for i := 0; i < b.N; i++ { - _, err = tsm1.DecodeBlock(bytes, decodedValues) - if err != nil { - b.Fatalf("unexpected error decoding block: %v", err) - } - } -} - -func BenchmarkDecodeBlock_String_EqualSize(b *testing.B) { - valueCount := 1000 - times := getTimes(valueCount, 60, time.Second) - values := make([]tsm1.Value, len(times)) - for i, t := range times { - values[i] = tsm1.NewValue(t, fmt.Sprintf("value %d", i)) - } - - bytes, err := tsm1.Values(values).Encode(nil) - if err != nil { - b.Fatalf("unexpected error: %v", err) - } - - decodedValues := make([]tsm1.Value, len(values)) - b.ResetTimer() - for i := 0; i < b.N; i++ { - _, err = tsm1.DecodeBlock(bytes, decodedValues) - if err != nil { - b.Fatalf("unexpected error decoding block: %v", err) - } - } -} - -func BenchmarkDecodeBlock_String_TypeSpecific(b *testing.B) { - valueCount := 1000 - times := getTimes(valueCount, 60, time.Second) - values := make([]tsm1.Value, len(times)) - for i, t := range times { - values[i] = tsm1.NewValue(t, fmt.Sprintf("value %d", i)) - } - - bytes, err := tsm1.Values(values).Encode(nil) - if err != nil { - b.Fatalf("unexpected error: %v", err) - } - - decodedValues := make([]tsm1.StringValue, len(values)) - b.ResetTimer() - for i := 0; i < b.N; i++ { - _, err = tsm1.DecodeStringBlock(bytes, &decodedValues) - if err != nil { - b.Fatalf("unexpected error decoding block: %v", err) - } - } -} - -func BenchmarkValues_Deduplicate(b *testing.B) { - valueCount := 1000 - times := getTimes(valueCount, 60, time.Second) - values := make([]tsm1.Value, len(times)) - for i, t := range times { - values[i] = tsm1.NewValue(t, fmt.Sprintf("value %d", i)) - } - values = append(values, values...) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - tsm1.Values(values).Deduplicate() - } -} - -func BenchmarkValues_Merge(b *testing.B) { - valueCount := 1000 - times := getTimes(valueCount, 60, time.Second) - a := make([]tsm1.Value, len(times)) - c := make([]tsm1.Value, len(times)) - - for i, t := range times { - a[i] = tsm1.NewValue(t, float64(i)) - c[i] = tsm1.NewValue(t+1, float64(i)) - } - - b.ResetTimer() - benchmarkMerge(a, c, b) -} - -func BenchmarkValues_MergeDisjoint(b *testing.B) { - valueCount := 1000 - times := getTimes(valueCount, 60, time.Second) - a := make([]tsm1.Value, len(times)) - c := make([]tsm1.Value, len(times)) - - for i, t := range times { - a[i] = tsm1.NewValue(t, float64(i)) - c[i] = tsm1.NewValue(times[len(times)-1]+int64((i+1)*1e9), float64(i)) - } - - b.ResetTimer() - benchmarkMerge(a, c, b) -} - -func BenchmarkValues_MergeSame(b *testing.B) { - valueCount := 1000 - times := getTimes(valueCount, 60, time.Second) - a := make([]tsm1.Value, len(times)) - c := make([]tsm1.Value, len(times)) - - for i, t := range times { - a[i] = tsm1.NewValue(t, float64(i)) - c[i] = tsm1.NewValue(t, float64(i)) - } - - b.ResetTimer() - benchmarkMerge(a, c, b) -} - -func BenchmarkValues_MergeSimilar(b *testing.B) { - valueCount := 1000 - times := getTimes(valueCount, 60, time.Second) - a := make([]tsm1.Value, len(times)) - c := make([]tsm1.Value, len(times)) - - for i, t := range times { - a[i] = tsm1.NewValue(t, float64(i)) - if i == 0 { - t++ - } - c[i] = tsm1.NewValue(t, float64(i)) - } - - b.ResetTimer() - benchmarkMerge(a, c, b) -} - -func BenchmarkValues_MergeUnevenA(b *testing.B) { - valueCount := 1000 - times := getTimes(valueCount, 60, time.Second) - a := make([]tsm1.Value, len(times)) - c := make([]tsm1.Value, len(times)) - - for i, t := range times { - a[i] = tsm1.NewValue(t, float64(i)) - c[i] = tsm1.NewValue(t, float64(i)) - } - - b.ResetTimer() - benchmarkMerge(a[:700], c[:10], b) -} - -func BenchmarkValues_MergeUnevenB(b *testing.B) { - valueCount := 1000 - times := getTimes(valueCount, 60, time.Second) - a := make([]tsm1.Value, len(times)) - c := make([]tsm1.Value, len(times)) - - for i, t := range times { - a[i] = tsm1.NewValue(t, float64(i)) - c[i] = tsm1.NewValue(t, float64(i)) - } - - b.ResetTimer() - benchmarkMerge(a[:10], c[:700], b) -} - -func benchmarkMerge(a, c tsm1.Values, b *testing.B) { - for i := 0; i < b.N; i++ { - b.StopTimer() - aa := make(tsm1.Values, len(a)) - copy(aa, a) - cc := make(tsm1.Values, len(c)) - copy(cc, c) - b.StartTimer() - tsm1.Values(aa).Merge(tsm1.Values(cc)) - } -} - -func BenchmarkValues_EncodeInteger(b *testing.B) { - valueCount := 1024 - times := getTimes(valueCount, 60, time.Second) - a := make([]tsm1.Value, len(times)) - - for i, t := range times { - a[i] = tsm1.NewValue(t, int64(i)) - } - - buf := make([]byte, 1024*8) - b.ResetTimer() - for i := 0; i < b.N; i++ { - tsm1.Values(a).Encode(buf) - } -} - -func BenchmarkValues_EncodeFloat(b *testing.B) { - valueCount := 1024 - times := getTimes(valueCount, 60, time.Second) - a := make([]tsm1.Value, len(times)) - - for i, t := range times { - a[i] = tsm1.NewValue(t, float64(i)) - } - - buf := make([]byte, 1024*8) - b.ResetTimer() - for i := 0; i < b.N; i++ { - tsm1.Values(a).Encode(buf) - } -} -func BenchmarkValues_EncodeString(b *testing.B) { - valueCount := 1024 - times := getTimes(valueCount, 60, time.Second) - a := make([]tsm1.Value, len(times)) - - for i, t := range times { - a[i] = tsm1.NewValue(t, fmt.Sprintf("%d", i)) - } - - buf := make([]byte, 1024*8) - b.ResetTimer() - for i := 0; i < b.N; i++ { - tsm1.Values(a).Encode(buf) - } -} -func BenchmarkValues_EncodeBool(b *testing.B) { - valueCount := 1024 - times := getTimes(valueCount, 60, time.Second) - a := make([]tsm1.Value, len(times)) - - for i, t := range times { - if i%2 == 0 { - a[i] = tsm1.NewValue(t, true) - } else { - a[i] = tsm1.NewValue(t, false) - } - } - - buf := make([]byte, 1024*8) - b.ResetTimer() - for i := 0; i < b.N; i++ { - tsm1.Values(a).Encode(buf) - } -} diff --git a/tsdb/tsm1/engine.go b/tsdb/tsm1/engine.go deleted file mode 100644 index 5a69636094..0000000000 --- a/tsdb/tsm1/engine.go +++ /dev/null @@ -1,1541 +0,0 @@ -// Package tsm1 provides a TSDB in the Time Structured Merge tree format. -package tsm1 - -import ( - "bytes" - "context" - "fmt" - "io/ioutil" - "os" - "path/filepath" - "regexp" - "runtime" - "strings" - "sync" - "sync/atomic" - "time" - - "github.com/influxdata/influxdb/v2" - "github.com/influxdata/influxdb/v2/kit/tracing" - "github.com/influxdata/influxdb/v2/logger" - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/pkg/lifecycle" - "github.com/influxdata/influxdb/v2/pkg/limiter" - "github.com/influxdata/influxdb/v2/pkg/metrics" - "github.com/influxdata/influxdb/v2/query" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/cursors" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" - "github.com/influxdata/influxdb/v2/tsdb/tsi1" - "github.com/influxdata/influxql" - "github.com/prometheus/client_golang/prometheus" - "go.uber.org/zap" - "golang.org/x/time/rate" -) - -//go:generate env GO111MODULE=on go run github.com/benbjohnson/tmpl -data=@array_cursor.gen.go.tmpldata array_cursor.gen.go.tmpl array_cursor_iterator.gen.go.tmpl -//go:generate env GO111MODULE=on go run github.com/influxdata/influxdb/v2/tools/tmpl -i -data=file_store.gen.go.tmpldata file_store.gen.go.tmpl=file_store.gen.go -//go:generate env GO111MODULE=on go run github.com/influxdata/influxdb/v2/tools/tmpl -i -d isArray=y -data=file_store.gen.go.tmpldata file_store.gen.go.tmpl=file_store_array.gen.go -//go:generate env GO111MODULE=on go run github.com/benbjohnson/tmpl -data=@encoding.gen.go.tmpldata encoding.gen.go.tmpl -//go:generate env GO111MODULE=on go run github.com/benbjohnson/tmpl -data=@compact.gen.go.tmpldata compact.gen.go.tmpl -//go:generate env GO111MODULE=on go run github.com/benbjohnson/tmpl -data=@reader.gen.go.tmpldata reader.gen.go.tmpl -//go:generate stringer -type=CacheStatus - -var ( - // Static objects to prevent small allocs. - KeyFieldSeparatorBytes = []byte(keyFieldSeparator) -) - -var ( - tsmGroup = metrics.MustRegisterGroup("platform-tsm1") - numberOfRefCursorsCounter = metrics.MustRegisterCounter("cursors_ref", metrics.WithGroup(tsmGroup)) -) - -// NewContextWithMetricsGroup creates a new context with a tsm1 metrics.Group for tracking -// various metrics when accessing TSM data. -func NewContextWithMetricsGroup(ctx context.Context) context.Context { - group := metrics.NewGroup(tsmGroup) - return metrics.NewContextWithGroup(ctx, group) -} - -// MetricsGroupFromContext returns the tsm1 metrics.Group associated with the context -// or nil if no group has been assigned. -func MetricsGroupFromContext(ctx context.Context) *metrics.Group { - return metrics.GroupFromContext(ctx) -} - -const ( - // keyFieldSeparator separates the series key from the field name in the composite key - // that identifies a specific field in series - keyFieldSeparator = "#!~#" - - // MaxPointsPerBlock is the maximum number of points in an encoded block in a TSM file - MaxPointsPerBlock = 1000 -) - -// An EngineOption is a functional option for changing the configuration of -// an Engine. -type EngineOption func(i *Engine) - -// WithCompactionPlanner sets the compaction planner for the engine. -func WithCompactionPlanner(planner CompactionPlanner) EngineOption { - return func(e *Engine) { - planner.SetFileStore(e.FileStore) - e.CompactionPlan = planner - } -} - -// Snapshotter allows upward signaling of the tsm1 engine to the storage engine. Hopefully -// it can be removed one day. The weird interface is due to the weird inversion of locking -// that has to happen. -type Snapshotter interface { - AcquireSegments(context.Context, func(segments []string) error) error - CommitSegments(ctx context.Context, segments []string, fn func() error) error -} - -type noSnapshotter struct{} - -func (noSnapshotter) AcquireSegments(_ context.Context, fn func([]string) error) error { - return fn(nil) -} -func (noSnapshotter) CommitSegments(_ context.Context, _ []string, fn func() error) error { - return fn() -} - -// WithSnapshotter sets the callbacks for the engine to use when creating snapshots. -func WithSnapshotter(snapshotter Snapshotter) EngineOption { - return func(e *Engine) { - e.snapshotter = snapshotter - } -} - -// Engine represents a storage engine with compressed blocks. -type Engine struct { - mu sync.RWMutex - - index *tsi1.Index - indexref *lifecycle.Reference - - // The following group of fields is used to track the state of level compactions within the - // Engine. The WaitGroup is used to monitor the compaction goroutines, the 'done' channel is - // used to signal those goroutines to shutdown. Every request to disable level compactions will - // call 'Wait' on 'wg', with the first goroutine to arrive (levelWorkers == 0 while holding the - // lock) will close the done channel and re-assign 'nil' to the variable. Re-enabling will - // decrease 'levelWorkers', and when it decreases to zero, level compactions will be started - // back up again. - - wg *sync.WaitGroup // waitgroup for active level compaction goroutines - done chan struct{} // channel to signal level compactions to stop - levelWorkers int // Number of "workers" that expect compactions to be in a disabled state - - snapDone chan struct{} // channel to signal snapshot compactions to stop - snapWG *sync.WaitGroup // waitgroup for running snapshot compactions - - path string - sfile *seriesfile.SeriesFile - sfileref *lifecycle.Reference - logger *zap.Logger // Logger to be used for important messages - - Cache *Cache - Compactor *Compactor - CompactionPlan CompactionPlanner - FileStore *FileStore - - MaxPointsPerBlock int - - // CacheFlushMemorySizeThreshold specifies the minimum size threshold for - // the cache when the engine should write a snapshot to a TSM file - CacheFlushMemorySizeThreshold uint64 - - // CacheFlushAgeDurationThreshold specified the maximum age a cache can reach - // before it is snapshotted, regardless of its size. - CacheFlushAgeDurationThreshold time.Duration - - // CacheFlushWriteColdDuration specifies the length of time after which if - // no writes have been committed to the WAL, the engine will write - // a snapshot of the cache to a TSM file - CacheFlushWriteColdDuration time.Duration - - // Invoked when creating a backup file "as new". - formatFileName FormatFileNameFunc - - // Controls whether to enabled compactions when the engine is open - enableCompactionsOnOpen bool - - compactionTracker *compactionTracker // Used to track state of compactions. - readTracker *readTracker // Used to track number of reads. - defaultMetricLabels prometheus.Labels // N.B this must not be mutated after Open is called. - - // Limiter for concurrent compactions. - compactionLimiter limiter.Fixed - // A semaphore for limiting full compactions across multiple engines. - fullCompactionSemaphore influxdb.Semaphore - // Tracks how long the last full compaction took. Should be accessed atomically. - lastFullCompactionDuration int64 - - scheduler *scheduler - snapshotter Snapshotter -} - -// NewEngine returns a new instance of Engine. -func NewEngine(path string, idx *tsi1.Index, config Config, options ...EngineOption) *Engine { - fs := NewFileStore(path) - fs.openLimiter = limiter.NewFixed(config.MaxConcurrentOpens) - fs.tsmMMAPWillNeed = config.MADVWillNeed - - cache := NewCache(uint64(config.Cache.MaxMemorySize)) - - c := NewCompactor() - c.Dir = path - c.FileStore = fs - c.RateLimit = limiter.NewRate( - int(config.Compaction.Throughput), - int(config.Compaction.ThroughputBurst)) - - // determine max concurrent compactions informed by the system - maxCompactions := config.Compaction.MaxConcurrent - if maxCompactions == 0 { - maxCompactions = runtime.GOMAXPROCS(0) / 2 // Default to 50% of cores for compactions - - // On systems with more cores, cap at 4 to reduce disk utilization. - if maxCompactions > 4 { - maxCompactions = 4 - } - - if maxCompactions < 1 { - maxCompactions = 1 - } - } - - // Don't allow more compactions to run than cores. - if maxCompactions > runtime.GOMAXPROCS(0) { - maxCompactions = runtime.GOMAXPROCS(0) - } - - logger := zap.NewNop() - e := &Engine{ - path: path, - index: idx, - sfile: idx.SeriesFile(), - logger: logger, - - Cache: cache, - - FileStore: fs, - Compactor: c, - CompactionPlan: NewDefaultPlanner(fs, - time.Duration(config.Compaction.FullWriteColdDuration)), - - CacheFlushMemorySizeThreshold: uint64(config.Cache.SnapshotMemorySize), - CacheFlushWriteColdDuration: time.Duration(config.Cache.SnapshotWriteColdDuration), - CacheFlushAgeDurationThreshold: time.Duration(config.Cache.SnapshotAgeDuration), - enableCompactionsOnOpen: true, - formatFileName: DefaultFormatFileName, - compactionLimiter: limiter.NewFixed(maxCompactions), - fullCompactionSemaphore: influxdb.NopSemaphore, - scheduler: newScheduler(maxCompactions), - snapshotter: new(noSnapshotter), - } - - for _, option := range options { - option(e) - } - - return e -} - -// SetSemaphore sets the semaphore used to coordinate full compactions across -// multiple engines. -func (e *Engine) SetSemaphore(s influxdb.Semaphore) { - e.fullCompactionSemaphore = s -} - -// WithCompactionLimiter sets the compaction limiter, which is used to limit the -// number of concurrent compactions. -func (e *Engine) WithCompactionLimiter(limiter limiter.Fixed) { - e.compactionLimiter = limiter -} - -func (e *Engine) WithFormatFileNameFunc(formatFileNameFunc FormatFileNameFunc) { - e.Compactor.WithFormatFileNameFunc(formatFileNameFunc) - e.formatFileName = formatFileNameFunc -} - -func (e *Engine) WithParseFileNameFunc(parseFileNameFunc ParseFileNameFunc) { - e.FileStore.WithParseFileNameFunc(parseFileNameFunc) - e.Compactor.WithParseFileNameFunc(parseFileNameFunc) -} - -func (e *Engine) WithCurrentGenerationFunc(fn func() int) { - e.Compactor.FileStore.SetCurrentGenerationFunc(fn) -} - -func (e *Engine) WithFileStoreObserver(obs FileStoreObserver) { - e.FileStore.WithObserver(obs) -} - -func (e *Engine) WithPageFaultLimiter(limiter *rate.Limiter) { - e.FileStore.WithPageFaultLimiter(limiter) -} - -func (e *Engine) WithCompactionPlanner(planner CompactionPlanner) { - planner.SetFileStore(e.FileStore) - e.CompactionPlan = planner -} - -// SetDefaultMetricLabels sets the default labels for metrics on the engine. -// It must be called before the Engine is opened. -func (e *Engine) SetDefaultMetricLabels(labels prometheus.Labels) { - e.defaultMetricLabels = labels -} - -// SetEnabled sets whether the engine is enabled. -func (e *Engine) SetEnabled(enabled bool) { - e.enableCompactionsOnOpen = enabled - e.SetCompactionsEnabled(enabled) -} - -// SetCompactionsEnabled enables compactions on the engine. When disabled -// all running compactions are aborted and new compactions stop running. -func (e *Engine) SetCompactionsEnabled(enabled bool) { - if enabled { - e.enableSnapshotCompactions() - e.enableLevelCompactions(false) - } else { - e.disableSnapshotCompactions() - e.disableLevelCompactions(false) - } -} - -// enableLevelCompactions will request that level compactions start back up again -// -// 'wait' signifies that a corresponding call to disableLevelCompactions(true) was made at some -// point, and the associated task that required disabled compactions is now complete -func (e *Engine) enableLevelCompactions(wait bool) { - // If we don't need to wait, see if we're already enabled - if !wait { - e.mu.RLock() - if e.done != nil { - e.mu.RUnlock() - return - } - e.mu.RUnlock() - } - - e.mu.Lock() - if wait { - e.levelWorkers -= 1 - } - if e.levelWorkers != 0 || e.done != nil { - // still waiting on more workers or already enabled - e.mu.Unlock() - return - } - - // last one to enable, start things back up - e.Compactor.EnableCompactions() - e.done = make(chan struct{}) - wg := new(sync.WaitGroup) - wg.Add(1) - e.wg = wg - e.mu.Unlock() - - go func() { defer wg.Done(); e.compact(wg) }() -} - -// disableLevelCompactions will stop level compactions before returning. -// -// If 'wait' is set to true, then a corresponding call to enableLevelCompactions(true) will be -// required before level compactions will start back up again. -func (e *Engine) disableLevelCompactions(wait bool) { - e.mu.Lock() - old := e.levelWorkers - if wait { - e.levelWorkers += 1 - } - - // Hold onto the current done channel so we can wait on it if necessary - waitCh := e.done - wg := e.wg - - if old == 0 && e.done != nil { - // It's possible we have closed the done channel and released the lock and another - // goroutine has attempted to disable compactions. We're current in the process of - // disabling them so check for this and wait until the original completes. - select { - case <-e.done: - e.mu.Unlock() - return - default: - } - - // Prevent new compactions from starting - e.Compactor.DisableCompactions() - - // Stop all background compaction goroutines - close(e.done) - e.mu.Unlock() - wg.Wait() - - // Signal that all goroutines have exited. - e.mu.Lock() - e.done = nil - e.mu.Unlock() - return - } - e.mu.Unlock() - - // Compaction were already disabled. - if waitCh == nil { - return - } - - // We were not the first caller to disable compactions and they were in the process - // of being disabled. Wait for them to complete before returning. - <-waitCh - wg.Wait() -} - -func (e *Engine) enableSnapshotCompactions() { - // Check if already enabled under read lock - e.mu.RLock() - if e.snapDone != nil { - e.mu.RUnlock() - return - } - e.mu.RUnlock() - - // Check again under write lock - e.mu.Lock() - if e.snapDone != nil { - e.mu.Unlock() - return - } - - e.Compactor.EnableSnapshots() - e.snapDone = make(chan struct{}) - wg := new(sync.WaitGroup) - wg.Add(1) - e.snapWG = wg - e.mu.Unlock() - - go func() { defer wg.Done(); e.compactCache() }() -} - -func (e *Engine) disableSnapshotCompactions() { - e.mu.Lock() - if e.snapDone == nil { - e.mu.Unlock() - return - } - - // We may be in the process of stopping snapshots. See if the channel - // was closed. - select { - case <-e.snapDone: - e.mu.Unlock() - return - default: - } - - // first one here, disable and wait for completion - close(e.snapDone) - e.Compactor.DisableSnapshots() - wg := e.snapWG - e.mu.Unlock() - - // Wait for the snapshot goroutine to exit. - wg.Wait() - - // Signal that the goroutines are exit and everything is stopped by setting - // snapDone to nil. - e.mu.Lock() - e.snapDone = nil - e.mu.Unlock() -} - -// ScheduleFullCompaction will force the engine to fully compact all data stored. -// This will cancel and running compactions and snapshot any data in the cache to -// TSM files. This is an expensive operation. -func (e *Engine) ScheduleFullCompaction(ctx context.Context) error { - // Snapshot any data in the cache - if err := e.WriteSnapshot(ctx, CacheStatusFullCompaction); err != nil { - return err - } - - // Cancel running compactions - e.SetCompactionsEnabled(false) - - // Ensure compactions are restarted - defer e.SetCompactionsEnabled(true) - - // Force the planner to only create a full plan. - e.CompactionPlan.ForceFull() - return nil -} - -// Path returns the path the engine was opened with. -func (e *Engine) Path() string { return e.path } - -func (e *Engine) SetFieldName(measurement []byte, name string) { - e.index.SetFieldName(measurement, name) -} - -func (e *Engine) MeasurementExists(name []byte) (bool, error) { - return e.index.MeasurementExists(name) -} - -func (e *Engine) MeasurementNamesByRegex(re *regexp.Regexp) ([][]byte, error) { - return e.index.MeasurementNamesByRegex(re) -} - -func (e *Engine) HasTagKey(name, key []byte) (bool, error) { - return e.index.HasTagKey(name, key) -} - -func (e *Engine) MeasurementTagKeysByExpr(name []byte, expr influxql.Expr) (map[string]struct{}, error) { - return e.index.MeasurementTagKeysByExpr(name, expr) -} - -func (e *Engine) TagKeyCardinality(name, key []byte) int { - return e.index.TagKeyCardinality(name, key) -} - -// SeriesN returns the unique number of series in the index. -func (e *Engine) SeriesN() int64 { - return e.index.SeriesN() -} - -// MeasurementStats returns the current measurement stats for the engine. -func (e *Engine) MeasurementStats() (MeasurementStats, error) { - return e.FileStore.MeasurementStats() -} - -func (e *Engine) initTrackers() { - mmu.Lock() - defer mmu.Unlock() - - if bms == nil { - // Initialise metrics if an engine has not done so already. - bms = newBlockMetrics(e.defaultMetricLabels) - } - - // Propagate prometheus metrics down into trackers. - e.compactionTracker = newCompactionTracker(bms.compactionMetrics, e.defaultMetricLabels) - e.FileStore.tracker = newFileTracker(bms.fileMetrics, e.defaultMetricLabels) - e.Cache.tracker = newCacheTracker(bms.cacheMetrics, e.defaultMetricLabels) - e.readTracker = newReadTracker(bms.readMetrics, e.defaultMetricLabels) - - e.scheduler.setCompactionTracker(e.compactionTracker) -} - -// Open opens and initializes the engine. -func (e *Engine) Open(ctx context.Context) (err error) { - span, ctx := tracing.StartSpanFromContext(ctx) - defer span.Finish() - - defer func() { - if err != nil { - e.Close() - } - }() - - e.indexref, err = e.index.Acquire() - if err != nil { - return err - } - - e.sfileref, err = e.sfile.Acquire() - if err != nil { - return err - } - - e.initTrackers() - - if err := os.MkdirAll(e.path, 0777); err != nil { - return err - } - - if err := e.cleanup(); err != nil { - return err - } - - if err := e.FileStore.Open(ctx); err != nil { - return err - } - - e.Compactor.Open() - - if e.enableCompactionsOnOpen { - e.SetCompactionsEnabled(true) - } - - return nil -} - -// Close closes the engine. Subsequent calls to Close are a nop. -func (e *Engine) Close() error { - e.SetCompactionsEnabled(false) - - // Lock now and close everything else down. - e.mu.Lock() - defer e.mu.Unlock() - - // Ensures that the channel will not be closed again. - e.done = nil - - if err := e.FileStore.Close(); err != nil { - return err - } - - // Release our references. - if e.sfileref != nil { - e.sfileref.Release() - e.sfileref = nil - } - - if e.indexref != nil { - e.indexref.Release() - e.indexref = nil - } - - return nil -} - -// WithLogger sets the logger for the engine. -func (e *Engine) WithLogger(log *zap.Logger) { - e.logger = log.With(zap.String("engine", "tsm1")) - - e.FileStore.WithLogger(e.logger) -} - -// IsIdle returns true if the cache is empty, there are no running compactions and the -// shard is fully compacted. -func (e *Engine) IsIdle() bool { - cacheEmpty := e.Cache.Size() == 0 - return cacheEmpty && e.compactionTracker.AllActive() == 0 && e.CompactionPlan.FullyCompacted() -} - -// WritePoints saves the set of points in the engine. -func (e *Engine) WritePoints(points []models.Point) error { - collection := tsdb.NewSeriesCollection(points) - - values, err := CollectionToValues(collection) - if err != nil { - return err - } - - if err := e.WriteValues(values); err != nil { - return err - } - - return collection.PartialWriteError() -} - -// WriteValues saves the set of values in the engine. -func (e *Engine) WriteValues(values map[string][]Value) error { - e.mu.RLock() - defer e.mu.RUnlock() - - if err := e.Cache.WriteMulti(values); err != nil { - return err - } - - return nil -} - -// ForEachMeasurementName iterates over each measurement name in the engine. -func (e *Engine) ForEachMeasurementName(fn func(name []byte) error) error { - return e.index.ForEachMeasurementName(fn) -} - -// compactionLevel describes a snapshot or levelled compaction. -type compactionLevel int - -func (l compactionLevel) String() string { - switch l { - case 0: - return "snapshot" - case 1, 2, 3: - return fmt.Sprint(int(l)) - case 4: - return "optimize" - case 5: - return "full" - default: - panic("unsupported compaction level") - } -} - -// compactionTracker tracks compactions and snapshots within the Engine. -// -// As well as being responsible for providing atomic reads and writes to the -// statistics tracking the various compaction operations, compactionTracker also -// mirrors any writes to the prometheus block metrics, which the Engine exposes. -// -// *NOTE* - compactionTracker fields should not be directory modified. Doing so -// could result in the Engine exposing inaccurate metrics. -type compactionTracker struct { - metrics *compactionMetrics - labels prometheus.Labels - // Note: Compactions are levelled as follows: - // 0 – Snapshots - // 1-3 – Levelled compactions - // 4 – Optimize compactions - // 5 – Full compactions - - ok [6]uint64 // Counter of TSM compactions (by level) that have successfully completed. - active [6]uint64 // Gauge of TSM compactions (by level) currently running. - errors [6]uint64 // Counter of TSM compcations (by level) that have failed due to error. - queue [6]uint64 // Gauge of TSM compactions queues (by level). -} - -func newCompactionTracker(metrics *compactionMetrics, defaultLables prometheus.Labels) *compactionTracker { - return &compactionTracker{metrics: metrics, labels: defaultLables} -} - -// Labels returns a copy of the default labels used by the tracker's metrics. -// The returned map is safe for modification. -func (t *compactionTracker) Labels(level compactionLevel) prometheus.Labels { - labels := make(prometheus.Labels, len(t.labels)) - for k, v := range t.labels { - labels[k] = v - } - - // All metrics have a level label. - labels["level"] = fmt.Sprint(level) - return labels -} - -// Completed returns the total number of compactions for the provided level. -func (t *compactionTracker) Completed(level int) uint64 { return atomic.LoadUint64(&t.ok[level]) } - -// Active returns the number of active snapshots (level 0), -// level 1, 2 or 3 compactions, optimize compactions (level 4), or full -// compactions (level 5). -func (t *compactionTracker) Active(level int) uint64 { - return atomic.LoadUint64(&t.active[level]) -} - -// AllActive returns the number of active snapshots and compactions. -func (t *compactionTracker) AllActive() uint64 { - var total uint64 - for i := 0; i < len(t.active); i++ { - total += atomic.LoadUint64(&t.active[i]) - } - return total -} - -// ActiveOptimise returns the number of active Optimise compactions. -// -// ActiveOptimise is a helper for Active(4). -func (t *compactionTracker) ActiveOptimise() uint64 { return t.Active(4) } - -// ActiveFull returns the number of active Full compactions. -// -// ActiveFull is a helper for Active(5). -func (t *compactionTracker) ActiveFull() uint64 { return t.Active(5) } - -// Errors returns the total number of errors encountered attempting compactions -// for the provided level. -func (t *compactionTracker) Errors(level int) uint64 { return atomic.LoadUint64(&t.errors[level]) } - -// IncActive increments the number of active compactions for the provided level. -func (t *compactionTracker) IncActive(level compactionLevel) { - atomic.AddUint64(&t.active[level], 1) - - labels := t.Labels(level) - t.metrics.CompactionsActive.With(labels).Inc() -} - -// IncFullActive increments the number of active Full compactions. -func (t *compactionTracker) IncFullActive() { t.IncActive(5) } - -// DecActive decrements the number of active compactions for the provided level. -func (t *compactionTracker) DecActive(level compactionLevel) { - atomic.AddUint64(&t.active[level], ^uint64(0)) - - labels := t.Labels(level) - t.metrics.CompactionsActive.With(labels).Dec() -} - -// DecFullActive decrements the number of active Full compactions. -func (t *compactionTracker) DecFullActive() { t.DecActive(5) } - -// Attempted updates the number of compactions attempted for the provided level. -func (t *compactionTracker) Attempted(level compactionLevel, success bool, reason string, duration time.Duration) { - if success { - atomic.AddUint64(&t.ok[level], 1) - - labels := t.Labels(level) - t.metrics.CompactionDuration.With(labels).Observe(duration.Seconds()) - - // Total compactions metric has reason and status. - labels["reason"] = reason - labels["status"] = "ok" - t.metrics.Compactions.With(labels).Inc() - return - } - - atomic.AddUint64(&t.errors[level], 1) - - labels := t.Labels(level) - labels["status"] = "error" - labels["reason"] = reason - t.metrics.Compactions.With(labels).Inc() -} - -// SnapshotAttempted updates the number of snapshots attempted. -func (t *compactionTracker) SnapshotAttempted(success bool, reason CacheStatus, duration time.Duration) { - t.Attempted(0, success, reason.String(), duration) -} - -// SetQueue sets the compaction queue depth for the provided level. -func (t *compactionTracker) SetQueue(level compactionLevel, length uint64) { - atomic.StoreUint64(&t.queue[level], length) - - labels := t.Labels(level) - t.metrics.CompactionQueue.With(labels).Set(float64(length)) -} - -// SetOptimiseQueue sets the queue depth for Optimisation compactions. -func (t *compactionTracker) SetOptimiseQueue(length uint64) { t.SetQueue(4, length) } - -// SetFullQueue sets the queue depth for Full compactions. -func (t *compactionTracker) SetFullQueue(length uint64) { t.SetQueue(5, length) } - -func (e *Engine) WriteSnapshot(ctx context.Context, status CacheStatus) error { - start := time.Now() - err := e.writeSnapshot(ctx) - if err != nil && err != errCompactionsDisabled { - e.logger.Info("Error writing snapshot", zap.Error(err)) - } - e.compactionTracker.SnapshotAttempted( - err == nil || err == errCompactionsDisabled || err == ErrSnapshotInProgress, - status, time.Since(start)) - - if err != nil { - return err - } - return nil -} - -// WriteSnapshot will snapshot the cache and write a new TSM file with its contents, releasing the snapshot when done. -func (e *Engine) writeSnapshot(ctx context.Context) error { - span, ctx := tracing.StartSpanFromContext(ctx) - defer span.Finish() - - // Lock and grab the cache snapshot along with all the closed WAL - // filenames associated with the snapshot - - started := time.Now() - - log, logEnd := logger.NewOperation(ctx, e.logger, "Cache snapshot", "tsm1_cache_snapshot") - defer func() { - elapsed := time.Since(started) - log.Info("Snapshot for path written", - zap.String("path", e.path), - zap.Duration("duration", elapsed)) - logEnd() - }() - - var ( - snapshot *Cache - segments []string - ) - if err := e.snapshotter.AcquireSegments(ctx, func(segs []string) (err error) { - segments = segs - - e.mu.Lock() - snapshot, err = e.Cache.Snapshot() - e.mu.Unlock() - return err - }); err != nil { - return err - } - - if snapshot.Size() == 0 { - e.Cache.ClearSnapshot(true) - return nil - } - - // The snapshotted cache may have duplicate points and unsorted data. We need to deduplicate - // it before writing the snapshot. This can be very expensive so it's done while we are not - // holding the engine write lock. - snapshot.Deduplicate() - - return e.writeSnapshotAndCommit(ctx, log, snapshot, segments) -} - -// writeSnapshotAndCommit will write the passed cache to a new TSM file and remove the closed WAL segments. -func (e *Engine) writeSnapshotAndCommit(ctx context.Context, log *zap.Logger, snapshot *Cache, segments []string) (err error) { - defer func() { - if err != nil { - e.Cache.ClearSnapshot(false) - } - }() - - // write the new snapshot files - newFiles, err := e.Compactor.WriteSnapshot(ctx, snapshot) - if err != nil { - log.Info("Error writing snapshot from compactor", zap.Error(err)) - return err - } - - return e.snapshotter.CommitSegments(ctx, segments, func() error { - e.mu.RLock() - defer e.mu.RUnlock() - - // update the file store with these new files - if err := e.FileStore.Replace(nil, newFiles); err != nil { - log.Info("Error adding new TSM files from snapshot", zap.Error(err)) - return err - } - - // clear the snapshot from the in-memory cache - e.Cache.ClearSnapshot(true) - return nil - }) -} - -// compactCache checks once per second if the in-memory cache should be -// snapshotted to a TSM file. -func (e *Engine) compactCache() { - t := time.NewTicker(time.Second) - defer t.Stop() - for { - e.mu.RLock() - quit := e.snapDone - e.mu.RUnlock() - - select { - case <-quit: - return - - case <-t.C: - e.Cache.UpdateAge() - status := e.ShouldCompactCache(time.Now()) - if status == CacheStatusOkay { - continue - } - - span, ctx := tracing.StartSpanFromContextWithOperationName(context.Background(), "compact cache") - span.LogKV("path", e.path) - - err := e.WriteSnapshot(ctx, status) - if err != nil && err != errCompactionsDisabled && err != ErrSnapshotInProgress { - e.logger.Info("Error writing snapshot", zap.Error(err)) - } - - span.Finish() - } - } -} - -// CacheStatus describes the current state of the cache, with respect to whether -// it is ready to be snapshotted or not. -type CacheStatus int - -// Possible types of Cache status -const ( - CacheStatusOkay CacheStatus = iota // Cache is Okay - do not snapshot. - CacheStatusSizeExceeded // The cache is large enough to be snapshotted. - CacheStatusAgeExceeded // The cache is past the age threshold to be snapshotted. - CacheStatusColdNoWrites // The cache has not been written to for long enough that it should be snapshotted. - CacheStatusRetention // The cache was snapshotted before running retention. - CacheStatusFullCompaction // The cache was snapshotted as part of a full compaction. - CacheStatusBackup // The cache was snapshotted before running backup. -) - -// ShouldCompactCache returns a status indicating if the Cache should be -// snapshotted. There are three situations when the cache should be snapshotted: -// -// - the Cache size is over its flush size threshold; -// - the Cache has not been snapshotted for longer than its flush time threshold; or -// - the Cache has not been written since the write cold threshold. -// -func (e *Engine) ShouldCompactCache(t time.Time) CacheStatus { - sz := e.Cache.Size() - if sz == 0 { - return 0 - } - - // Cache is now big enough to snapshot. - if sz > e.CacheFlushMemorySizeThreshold { - return CacheStatusSizeExceeded - } - - // Cache is now old enough to snapshot, regardless of last write or age. - if e.CacheFlushAgeDurationThreshold > 0 && e.Cache.Age() > e.CacheFlushAgeDurationThreshold { - return CacheStatusAgeExceeded - } - - // Cache has not been written to for a long time. - if t.Sub(e.Cache.LastWriteTime()) > e.CacheFlushWriteColdDuration { - return CacheStatusColdNoWrites - } - return CacheStatusOkay -} - -func (e *Engine) lastModified() time.Time { - fsTime := e.FileStore.LastModified() - cacheTime := e.Cache.LastWriteTime() - - if cacheTime.After(fsTime) { - return cacheTime - } - - return fsTime -} - -func (e *Engine) compact(wg *sync.WaitGroup) { - t := time.NewTicker(time.Second) - defer t.Stop() - - for { - e.mu.RLock() - quit := e.done - e.mu.RUnlock() - - select { - case <-quit: - return - - case <-t.C: - - span, ctx := tracing.StartSpanFromContext(context.Background()) - - // Find our compaction plans - level1Groups := e.CompactionPlan.PlanLevel(1) - level2Groups := e.CompactionPlan.PlanLevel(2) - level3Groups := e.CompactionPlan.PlanLevel(3) - level4Groups := e.CompactionPlan.Plan(e.lastModified()) - e.compactionTracker.SetOptimiseQueue(uint64(len(level4Groups))) - - // If no full compactions are need, see if an optimize is needed - if len(level4Groups) == 0 { - level4Groups = e.CompactionPlan.PlanOptimize() - e.compactionTracker.SetOptimiseQueue(uint64(len(level4Groups))) - } - - // Update the level plan queue stats - e.compactionTracker.SetQueue(1, uint64(len(level1Groups))) - e.compactionTracker.SetQueue(2, uint64(len(level2Groups))) - e.compactionTracker.SetQueue(3, uint64(len(level3Groups))) - - // Set the queue depths on the scheduler - e.scheduler.setDepth(1, len(level1Groups)) - e.scheduler.setDepth(2, len(level2Groups)) - e.scheduler.setDepth(3, len(level3Groups)) - e.scheduler.setDepth(4, len(level4Groups)) - - // Find the next compaction that can run and try to kick it off - level, runnable := e.scheduler.next() - if runnable { - span.LogKV("level", level) - switch level { - case 1: - if e.compactHiPriorityLevel(ctx, level1Groups[0], 1, false, wg) { - level1Groups = level1Groups[1:] - } - case 2: - if e.compactHiPriorityLevel(ctx, level2Groups[0], 2, false, wg) { - level2Groups = level2Groups[1:] - } - case 3: - if e.compactLoPriorityLevel(ctx, level3Groups[0], 3, true, wg) { - level3Groups = level3Groups[1:] - } - case 4: - if e.compactFull(ctx, level4Groups[0], wg) { - level4Groups = level4Groups[1:] - } - } - } - - // Release all the plans we didn't start. - e.CompactionPlan.Release(level1Groups) - e.CompactionPlan.Release(level2Groups) - e.CompactionPlan.Release(level3Groups) - e.CompactionPlan.Release(level4Groups) - - if runnable { - span.Finish() - } - } - } -} - -// compactHiPriorityLevel kicks off compactions using the high priority policy. It returns -// true if the compaction was started -func (e *Engine) compactHiPriorityLevel(ctx context.Context, grp CompactionGroup, level compactionLevel, fast bool, wg *sync.WaitGroup) bool { - s := e.levelCompactionStrategy(grp, fast, level) - if s == nil { - return false - } - - // Try hi priority limiter, otherwise steal a little from the low priority if we can. - if e.compactionLimiter.TryTake() { - e.compactionTracker.IncActive(level) - - wg.Add(1) - go func() { - defer wg.Done() - defer e.compactionTracker.DecActive(level) - defer e.compactionLimiter.Release() - s.Apply(ctx) - // Release the files in the compaction plan - e.CompactionPlan.Release([]CompactionGroup{s.group}) - }() - return true - } - - // Return the unused plans - return false -} - -// compactLoPriorityLevel kicks off compactions using the lo priority policy. It returns -// the plans that were not able to be started -func (e *Engine) compactLoPriorityLevel(ctx context.Context, grp CompactionGroup, level compactionLevel, fast bool, wg *sync.WaitGroup) bool { - s := e.levelCompactionStrategy(grp, fast, level) - if s == nil { - return false - } - - // Try the lo priority limiter, otherwise steal a little from the high priority if we can. - if e.compactionLimiter.TryTake() { - e.compactionTracker.IncActive(level) - wg.Add(1) - go func() { - defer wg.Done() - defer e.compactionTracker.DecActive(level) - defer e.compactionLimiter.Release() - s.Apply(ctx) - // Release the files in the compaction plan - e.CompactionPlan.Release([]CompactionGroup{s.group}) - }() - return true - } - return false -} - -// compactFull kicks off full and optimize compactions using the lo priority policy. It returns -// the plans that were not able to be started. -func (e *Engine) compactFull(ctx context.Context, grp CompactionGroup, wg *sync.WaitGroup) bool { - s := e.fullCompactionStrategy(grp, false) - if s == nil { - return false - } - - // Try the lo priority limiter, otherwise steal a little from the high priority if we can. - if e.compactionLimiter.TryTake() { - // Attempt to get ownership of the semaphore for this engine. If the - // default semaphore is in use then ownership will always be granted. - ttl := influxdb.DefaultLeaseTTL - lastCompaction := time.Duration(atomic.LoadInt64(&e.lastFullCompactionDuration)) - if lastCompaction > ttl { - ttl = lastCompaction // If the last full compaction took > default ttl then set a new TTL - } - - lease, err := e.fullCompactionSemaphore.TryAcquire(ctx, ttl) - if err == influxdb.ErrNoAcquire { - e.logger.Info("Cannot acquire semaphore ownership to carry out full compaction", zap.Duration("semaphore_requested_ttl", ttl)) - e.compactionLimiter.Release() - return false - } else if err != nil { - e.logger.Warn("Failed to execute full compaction", zap.Error(err), zap.Duration("semaphore_requested_ttl", ttl)) - e.compactionLimiter.Release() - return false - } else if e.fullCompactionSemaphore != influxdb.NopSemaphore { - e.logger.Info("Acquired semaphore ownership for full compaction", zap.Duration("semaphore_requested_ttl", ttl)) - } - - ctx, cancel := context.WithCancel(ctx) - go e.keepLeaseAlive(ctx, lease) // context cancelled when compaction finished. - - e.compactionTracker.IncFullActive() - wg.Add(1) - go func() { - defer wg.Done() - defer e.compactionTracker.DecFullActive() - defer e.compactionLimiter.Release() - - now := time.Now() // Track how long compaction takes - s.Apply(ctx) - atomic.StoreInt64(&e.lastFullCompactionDuration, int64(time.Since(now))) - - // Release the files in the compaction plan - e.CompactionPlan.Release([]CompactionGroup{s.group}) - cancel() - }() - return true - } - return false -} - -// keepLeaseAlive blocks, keeping a lease alive until the context is cancelled. -func (e *Engine) keepLeaseAlive(ctx context.Context, lease influxdb.Lease) { - ttl, err := lease.TTL(ctx) - if err != nil { - e.logger.Warn("Unable to get TTL for lease on semaphore", zap.Error(err)) - ttl = influxdb.DefaultLeaseTTL // This is probably a reasonable fallback. - } - - // Renew the lease when ttl is halved - ticker := time.NewTicker(ttl / 2) - for { - select { - case <-ctx.Done(): - ticker.Stop() - if err := lease.Release(ctx); err != nil { - e.logger.Warn("Lease on sempahore was not released", zap.Error(err)) - } - return - case <-ticker.C: - if err := lease.KeepAlive(ctx); err != nil { - e.logger.Warn("Unable to extend lease", zap.Error(err)) - } else { - e.logger.Info("Extended lease on semaphore") - } - } - } -} - -// compactionStrategy holds the details of what to do in a compaction. -type compactionStrategy struct { - group CompactionGroup - - fast bool - level compactionLevel - - tracker *compactionTracker - - logger *zap.Logger - compactor *Compactor - fileStore *FileStore - - engine *Engine -} - -// Apply concurrently compacts all the groups in a compaction strategy. -func (s *compactionStrategy) Apply(ctx context.Context) { - s.compactGroup(ctx) -} - -// compactGroup executes the compaction strategy against a single CompactionGroup. -func (s *compactionStrategy) compactGroup(ctx context.Context) { - span, ctx := tracing.StartSpanFromContext(ctx) - defer span.Finish() - - now := time.Now() - group := s.group - log, logEnd := logger.NewOperation(ctx, s.logger, "TSM compaction", "tsm1_compact_group") - defer logEnd() - - log.Info("Beginning compaction", zap.Int("tsm1_files_n", len(group))) - span.LogKV("file qty", len(group), "fast", s.fast) - for i, f := range group { - log.Info("Compacting file", zap.Int("tsm1_index", i), zap.String("tsm1_file", f)) - span.LogKV("compact file", "start", "tsm1_index", i, "tsm1_file", f) - } - - var ( - err error - files []string - ) - - if s.fast { - files, err = s.compactor.CompactFast(group) - } else { - files, err = s.compactor.CompactFull(group) - } - - if err != nil { - tracing.LogError(span, err) - _, inProgress := err.(errCompactionInProgress) - if err == errCompactionsDisabled || inProgress { - log.Info("Aborted compaction", zap.Error(err)) - - if _, ok := err.(errCompactionInProgress); ok { - time.Sleep(time.Second) - } - return - } - - log.Info("Error compacting TSM files", zap.Error(err)) - s.tracker.Attempted(s.level, false, "", 0) - time.Sleep(time.Second) - return - } - - if err := s.fileStore.ReplaceWithCallback(group, files, nil); err != nil { - tracing.LogError(span, err) - log.Info("Error replacing new TSM files", zap.Error(err)) - s.tracker.Attempted(s.level, false, "", 0) - time.Sleep(time.Second) - - // Remove the new snapshot files. We will try again. - for _, file := range files { - if err := os.Remove(file); err != nil { - log.Error("Unable to remove file", zap.String("path", file), zap.Error(err)) - } - } - - return - } - - for i, f := range files { - log.Info("Compacted file", zap.Int("tsm1_index", i), zap.String("tsm1_file", f)) - span.LogKV("compact file", "end", "tsm1_index", i, "tsm1_file", f) - } - log.Info("Finished compacting files", zap.Int("tsm1_files_n", len(files))) - s.tracker.Attempted(s.level, true, "", time.Since(now)) -} - -// levelCompactionStrategy returns a compactionStrategy for the given level. -// It returns nil if there are no TSM files to compact. -func (e *Engine) levelCompactionStrategy(group CompactionGroup, fast bool, level compactionLevel) *compactionStrategy { - return &compactionStrategy{ - group: group, - logger: e.logger.With(zap.Int("tsm1_level", int(level)), zap.String("tsm1_strategy", "level")), - fileStore: e.FileStore, - compactor: e.Compactor, - fast: fast, - engine: e, - level: level, - tracker: e.compactionTracker, - } -} - -// fullCompactionStrategy returns a compactionStrategy for higher level generations of TSM files. -// It returns nil if there are no TSM files to compact. -func (e *Engine) fullCompactionStrategy(group CompactionGroup, optimize bool) *compactionStrategy { - s := &compactionStrategy{ - group: group, - logger: e.logger.With(zap.String("tsm1_strategy", "full"), zap.Bool("tsm1_optimize", optimize)), - fileStore: e.FileStore, - compactor: e.Compactor, - fast: optimize, - engine: e, - level: 5, - tracker: e.compactionTracker, - } - - if optimize { - s.level = 4 - } - return s -} - -// cleanup removes all temp files and dirs that exist on disk. This is should only be run at startup to avoid -// removing tmp files that are still in use. -func (e *Engine) cleanup() error { - allfiles, err := ioutil.ReadDir(e.path) - if os.IsNotExist(err) { - return nil - } else if err != nil { - return err - } - - ext := fmt.Sprintf(".%s", TmpTSMFileExtension) - for _, f := range allfiles { - // Check to see if there are any `.tmp` directories that were left over from failed shard snapshots - if f.IsDir() && strings.HasSuffix(f.Name(), ext) { - if err := os.RemoveAll(filepath.Join(e.path, f.Name())); err != nil { - return fmt.Errorf("error removing tmp snapshot directory %q: %s", f.Name(), err) - } - } - } - - return e.cleanupTempTSMFiles() -} - -func (e *Engine) cleanupTempTSMFiles() error { - files, err := filepath.Glob(filepath.Join(e.path, fmt.Sprintf("*.%s", CompactionTempExtension))) - if err != nil { - return fmt.Errorf("error getting compaction temp files: %s", err.Error()) - } - - for _, f := range files { - if err := os.Remove(f); err != nil { - return fmt.Errorf("error removing temp compaction files: %v", err) - } - } - return nil -} - -// KeyCursor returns a KeyCursor for the given key starting at time t. -func (e *Engine) KeyCursor(ctx context.Context, key []byte, t int64, ascending bool) *KeyCursor { - return e.FileStore.KeyCursor(ctx, key, t, ascending) -} - -// IteratorCost produces the cost of an iterator. -func (e *Engine) IteratorCost(measurement string, opt query.IteratorOptions) (query.IteratorCost, error) { - // Determine if this measurement exists. If it does not, then no shards are - // accessed to begin with. - if exists, err := e.index.MeasurementExists([]byte(measurement)); err != nil { - return query.IteratorCost{}, err - } else if !exists { - return query.IteratorCost{}, nil - } - - tagSets, err := e.index.TagSets([]byte(measurement), opt) - if err != nil { - return query.IteratorCost{}, err - } - - // Attempt to retrieve the ref from the main expression (if it exists). - var ref *influxql.VarRef - if opt.Expr != nil { - if v, ok := opt.Expr.(*influxql.VarRef); ok { - ref = v - } else if call, ok := opt.Expr.(*influxql.Call); ok { - if len(call.Args) > 0 { - ref, _ = call.Args[0].(*influxql.VarRef) - } - } - } - - // Count the number of series concatenated from the tag set. - cost := query.IteratorCost{NumShards: 1} - for _, t := range tagSets { - cost.NumSeries += int64(len(t.SeriesKeys)) - for i, key := range t.SeriesKeys { - // Retrieve the cost for the main expression (if it exists). - if ref != nil { - c := e.seriesCost(key, ref.Val, opt.StartTime, opt.EndTime) - cost = cost.Combine(c) - } - - // Retrieve the cost for every auxiliary field since these are also - // iterators that we may have to look through. - // We may want to separate these though as we are unlikely to incur - // anywhere close to the full costs of the auxiliary iterators because - // many of the selected values are usually skipped. - for _, ref := range opt.Aux { - c := e.seriesCost(key, ref.Val, opt.StartTime, opt.EndTime) - cost = cost.Combine(c) - } - - // Retrieve the expression names in the condition (if there is a condition). - // We will also create cursors for these too. - if t.Filters[i] != nil { - refs := influxql.ExprNames(t.Filters[i]) - for _, ref := range refs { - c := e.seriesCost(key, ref.Val, opt.StartTime, opt.EndTime) - cost = cost.Combine(c) - } - } - } - } - return cost, nil -} - -func (e *Engine) seriesCost(seriesKey, field string, tmin, tmax int64) query.IteratorCost { - key := SeriesFieldKeyBytes(seriesKey, field) - c := e.FileStore.Cost(key, tmin, tmax) - - // Retrieve the range of values within the cache. - cacheValues := e.Cache.Values(key) - c.CachedValues = int64(len(cacheValues.Include(tmin, tmax))) - return c -} - -// SeriesFieldKey combine a series key and field name for a unique string to be hashed to a numeric ID. -func SeriesFieldKey(seriesKey, field string) string { - return seriesKey + keyFieldSeparator + field -} - -func SeriesFieldKeyBytes(seriesKey, field string) []byte { - b := make([]byte, len(seriesKey)+len(keyFieldSeparator)+len(field)) - i := copy(b[:], seriesKey) - i += copy(b[i:], KeyFieldSeparatorBytes) - copy(b[i:], field) - return b -} - -// AppendSeriesFieldKeyBytes combines seriesKey and field such -// that can be used to search a TSM index. The value is appended to dst and -// the extended buffer returned. -func AppendSeriesFieldKeyBytes(dst, seriesKey, field []byte) []byte { - dst = append(dst, seriesKey...) - dst = append(dst, KeyFieldSeparatorBytes...) - return append(dst, field...) -} - -var ( - blockToFieldType = [8]influxql.DataType{ - BlockFloat64: influxql.Float, - BlockInteger: influxql.Integer, - BlockBoolean: influxql.Boolean, - BlockString: influxql.String, - BlockUnsigned: influxql.Unsigned, - BlockUndefined: influxql.Unknown, - 6: influxql.Unknown, - 7: influxql.Unknown, - } -) - -func BlockTypeToInfluxQLDataType(typ byte) influxql.DataType { return blockToFieldType[typ&7] } - -var ( - blockTypeFieldType = [8]cursors.FieldType{ - BlockFloat64: cursors.Float, - BlockInteger: cursors.Integer, - BlockBoolean: cursors.Boolean, - BlockString: cursors.String, - BlockUnsigned: cursors.Unsigned, - BlockUndefined: cursors.Undefined, - 6: cursors.Undefined, - 7: cursors.Undefined, - } -) - -func BlockTypeToFieldType(typ byte) cursors.FieldType { return blockTypeFieldType[typ&7] } - -// SeriesAndFieldFromCompositeKey returns the series key and the field key extracted from the composite key. -func SeriesAndFieldFromCompositeKey(key []byte) ([]byte, []byte) { - sep := bytes.Index(key, KeyFieldSeparatorBytes) - if sep == -1 { - // No field??? - return key, nil - } - return key[:sep], key[sep+len(keyFieldSeparator):] -} - -// readTracker tracks reads from the engine. -type readTracker struct { - metrics *readMetrics - labels prometheus.Labels - cursors uint64 - seeks uint64 -} - -func newReadTracker(metrics *readMetrics, defaultLabels prometheus.Labels) *readTracker { - t := &readTracker{metrics: metrics, labels: defaultLabels} - t.AddCursors(0) - t.AddSeeks(0) - return t -} - -// Labels returns a copy of the default labels used by the tracker's metrics. -// The returned map is safe for modification. -func (t *readTracker) Labels() prometheus.Labels { - labels := make(prometheus.Labels, len(t.labels)) - for k, v := range t.labels { - labels[k] = v - } - return labels -} - -// AddCursors increases the number of cursors. -func (t *readTracker) AddCursors(n uint64) { - atomic.AddUint64(&t.cursors, n) - t.metrics.Cursors.With(t.labels).Add(float64(n)) -} - -// AddSeeks increases the number of location seeks. -func (t *readTracker) AddSeeks(n uint64) { - atomic.AddUint64(&t.seeks, n) - t.metrics.Seeks.With(t.labels).Add(float64(n)) -} diff --git a/tsdb/tsm1/engine_cursor.go b/tsdb/tsm1/engine_cursor.go deleted file mode 100644 index fbd3c6b8dd..0000000000 --- a/tsdb/tsm1/engine_cursor.go +++ /dev/null @@ -1,11 +0,0 @@ -package tsm1 - -import ( - "context" - - "github.com/influxdata/influxdb/v2/tsdb/cursors" -) - -func (e *Engine) CreateCursorIterator(ctx context.Context) (cursors.CursorIterator, error) { - return &arrayCursorIterator{e: e}, nil -} diff --git a/tsdb/tsm1/engine_cursor_test.go b/tsdb/tsm1/engine_cursor_test.go deleted file mode 100644 index 321a8de22d..0000000000 --- a/tsdb/tsm1/engine_cursor_test.go +++ /dev/null @@ -1,117 +0,0 @@ -package tsm1_test - -import ( - "context" - "testing" - "time" - - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/cursors" -) - -func TestEngine_CursorIterator_Stats(t *testing.T) { - e := MustOpenEngine(t) - defer e.Close() - - points := []models.Point{ - models.MustNewPoint("cpu", - models.Tags{ - {Key: []byte("a"), Value: []byte("b")}, - }, - models.Fields{"value": 4.6}, - time.Now().UTC(), - ), - models.MustNewPoint("cpu", - models.Tags{ - {Key: []byte("a"), Value: []byte("b")}, - }, - models.Fields{"value": 3.2}, - time.Now().UTC(), - ), - models.MustNewPoint("mem", - models.Tags{ - {Key: []byte("b"), Value: []byte("c")}, - }, - models.Fields{"value": int64(3)}, - time.Now().UTC(), - ), - } - - // Write into the index. - collection := tsdb.NewSeriesCollection(points) - if err := e.index.CreateSeriesListIfNotExists(collection); err != nil { - t.Fatal(err) - } - - if err := e.WritePoints(points); err != nil { - t.Fatal(err) - } - - e.MustWriteSnapshot() - - ctx := context.Background() - cursorIterator, err := e.CreateCursorIterator(ctx) - if err != nil { - t.Fatal(err) - } - - cur, err := cursorIterator.Next(ctx, &cursors.CursorRequest{ - Name: []byte("cpu"), - Tags: []models.Tag{{Key: []byte("a"), Value: []byte("b")}}, - Field: "value", - EndTime: time.Now().UTC().UnixNano(), - Ascending: true, - }) - - if err != nil { - t.Fatal(err) - } - - if cur == nil { - t.Fatal("expected cursor to be present") - } - - fc, ok := cur.(cursors.FloatArrayCursor) - if !ok { - t.Fatalf("unexpected cursor type: expected FloatArrayCursor, got %#v", cur) - } - - // drain the cursor - for a := fc.Next(); a.Len() > 0; a = fc.Next() { - } - - cur.Close() - - cur, err = cursorIterator.Next(ctx, &cursors.CursorRequest{ - Name: []byte("mem"), - Tags: []models.Tag{{Key: []byte("b"), Value: []byte("c")}}, - Field: "value", - EndTime: time.Now().UTC().UnixNano(), - Ascending: true, - }) - - if err != nil { - t.Fatal(err) - } - - if cur == nil { - t.Fatal("expected cursor to be present") - } - - defer cur.Close() - - ic, ok := cur.(cursors.IntegerArrayCursor) - if !ok { - t.Fatalf("unexpected cursor type: expected FloatArrayCursor, got %#v", cur) - } - - // drain the cursor - for a := ic.Next(); a.Len() > 0; a = ic.Next() { - } - - // iterator should report integer array stats - if got, exp := cursorIterator.Stats(), (cursors.CursorStats{ScannedValues: 3, ScannedBytes: 24}); exp != got { - t.Fatalf("expected %v, got %v", exp, got) - } -} diff --git a/tsdb/tsm1/engine_delete_prefix.go b/tsdb/tsm1/engine_delete_prefix.go deleted file mode 100644 index 378ebd8c9a..0000000000 --- a/tsdb/tsm1/engine_delete_prefix.go +++ /dev/null @@ -1,303 +0,0 @@ -package tsm1 - -import ( - "bytes" - "context" - "fmt" - "math" - "strings" - "sync" - "time" - - "github.com/influxdata/influxdb/v2/kit/tracing" - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/tsi1" - "github.com/influxdata/influxql" -) - -// DeletePrefixRange removes all TSM data belonging to a bucket, and removes all index -// and series file data associated with the bucket. The provided time range ensures -// that only bucket data for that range is removed. -func (e *Engine) DeletePrefixRange(rootCtx context.Context, name []byte, min, max int64, pred Predicate) error { - span, ctx := tracing.StartSpanFromContext(rootCtx) - span.LogKV("name_prefix", fmt.Sprintf("%x", name), - "min", time.Unix(0, min), "max", time.Unix(0, max), - "has_pred", pred != nil, - ) - defer span.Finish() - // TODO(jeff): we need to block writes to this prefix while deletes are in progress - // otherwise we can end up in a situation where we have staged data in the cache or - // WAL that was deleted from the index, or worse. This needs to happen at a higher - // layer. - - // TODO(jeff): ensure the engine is not closed while we're running this. At least - // now we know that the series file or index won't be closed out from underneath - // of us. - - // Ensure that the index does not compact away the measurement or series we're - // going to delete before we're done with them. - span, _ = tracing.StartSpanFromContextWithOperationName(rootCtx, "disable index compactions") - e.index.DisableCompactions() - defer e.index.EnableCompactions() - e.index.Wait() - span.Finish() - - // Disable and abort running compactions so that tombstones added existing tsm - // files don't get removed. This would cause deleted measurements/series to - // re-appear once the compaction completed. We only disable the level compactions - // so that snapshotting does not stop while writing out tombstones. If it is stopped, - // and writing tombstones takes a long time, writes can get rejected due to the cache - // filling up. - span, _ = tracing.StartSpanFromContextWithOperationName(rootCtx, "disable tsm compactions") - e.disableLevelCompactions(true) - defer e.enableLevelCompactions(true) - span.Finish() - - span, _ = tracing.StartSpanFromContextWithOperationName(rootCtx, "disable series file compactions") - e.sfile.DisableCompactions() - defer e.sfile.EnableCompactions() - span.Finish() - - // TODO(jeff): are the query language values still a thing? - // Min and max time in the engine are slightly different from the query language values. - if min == influxql.MinTime { - min = math.MinInt64 - } - if max == influxql.MaxTime { - max = math.MaxInt64 - } - - // Run the delete on each TSM file in parallel and keep track of possibly dead keys. - - // TODO(jeff): keep a set of keys for each file to avoid contention. - // TODO(jeff): come up with a better way to figure out what keys we need to delete - // from the index. - - var possiblyDead struct { - sync.RWMutex - keys map[string]struct{} - } - possiblyDead.keys = make(map[string]struct{}) - - if err := e.FileStore.Apply(func(r TSMFile) error { - var predClone Predicate // Apply executes concurrently across files. - if pred != nil { - predClone = pred.Clone() - } - - // TODO(edd): tracing this deep down is currently speculative, so I have - // not added the tracing into the TSMReader API. - span, _ := tracing.StartSpanFromContextWithOperationName(rootCtx, "TSMFile delete prefix") - span.LogKV("file_path", r.Path()) - defer span.Finish() - - return r.DeletePrefix(name, min, max, predClone, func(key []byte) { - possiblyDead.Lock() - possiblyDead.keys[string(key)] = struct{}{} - possiblyDead.Unlock() - }) - }); err != nil { - return err - } - - span, _ = tracing.StartSpanFromContextWithOperationName(rootCtx, "Cache find delete keys") - span.LogKV("cache_size", e.Cache.Size()) - var keysChecked int // For tracing information. - // ApplySerialEntryFn cannot return an error in this invocation. - nameStr := string(name) - _ = e.Cache.ApplyEntryFn(func(k string, _ *entry) error { - keysChecked++ - if !strings.HasPrefix(k, nameStr) { - return nil - } - // TODO(edd): either use an unsafe conversion to []byte, or add a MatchesString - // method to tsm1.Predicate. - if pred != nil && !pred.Matches([]byte(k)) { - return nil - } - - // we have to double check every key in the cache because maybe - // it exists in the index but not yet on disk. - possiblyDead.keys[k] = struct{}{} - - return nil - }) - span.LogKV("cache_cardinality", keysChecked) - span.Finish() - - // Delete from the cache (traced in cache). - e.Cache.DeleteBucketRange(ctx, nameStr, min, max, pred) - - // Now that all of the data is purged, we need to find if some keys are fully deleted - // and if so, remove them from the index. - if err := e.FileStore.Apply(func(r TSMFile) error { - var predClone Predicate // Apply executes concurrently across files. - if pred != nil { - predClone = pred.Clone() - } - - // TODO(edd): tracing this deep down is currently speculative, so I have - // not added the tracing into the Engine API. - span, _ := tracing.StartSpanFromContextWithOperationName(rootCtx, "TSMFile determine fully deleted") - span.LogKV("file_path", r.Path()) - defer span.Finish() - - possiblyDead.RLock() - defer possiblyDead.RUnlock() - - var keysChecked int - iter := r.Iterator(name) - for i := 0; iter.Next(); i++ { - key := iter.Key() - if !bytes.HasPrefix(key, name) { - break - } - if predClone != nil && !predClone.Matches(key) { - continue - } - - // TODO(jeff): benchmark the locking here. - if i%1024 == 0 { // allow writes to proceed. - possiblyDead.RUnlock() - possiblyDead.RLock() - } - - if _, ok := possiblyDead.keys[string(key)]; ok { - possiblyDead.RUnlock() - possiblyDead.Lock() - delete(possiblyDead.keys, string(key)) - possiblyDead.Unlock() - possiblyDead.RLock() - } - } - span.LogKV("keys_checked", keysChecked) - return iter.Err() - }); err != nil { - return err - } - - span, _ = tracing.StartSpanFromContextWithOperationName(rootCtx, "Cache find delete keys") - span.LogKV("cache_size", e.Cache.Size()) - keysChecked = 0 - // ApplySerialEntryFn cannot return an error in this invocation. - _ = e.Cache.ApplyEntryFn(func(k string, _ *entry) error { - keysChecked++ - if !strings.HasPrefix(k, nameStr) { - return nil - } - // TODO(edd): either use an unsafe conversion to []byte, or add a MatchesString - // method to tsm1.Predicate. - if pred != nil && !pred.Matches([]byte(k)) { - return nil - } - - delete(possiblyDead.keys, k) - return nil - }) - span.LogKV("cache_cardinality", keysChecked) - span.Finish() - - if len(possiblyDead.keys) > 0 { - buf := make([]byte, 1024) - - // TODO(jeff): all of these methods have possible errors which opens us to partial - // failure scenarios. we need to either ensure that partial errors here are ok or - // do something to fix it. - // TODO(jeff): it's also important that all of the deletes happen atomically with - // the deletes of the data in the tsm files. - - // In this case the entire measurement (bucket) can be removed from the index. - if min == math.MinInt64 && max == math.MaxInt64 && pred == nil { - // The TSI index and Series File do not store series data in escaped form. - name = models.UnescapeMeasurement(name) - - // Build up a set of series IDs that we need to remove from the series file. - set := tsdb.NewSeriesIDSet() - itr, err := e.index.MeasurementSeriesIDIterator(name) - if err != nil { - return err - } - - var elem tsdb.SeriesIDElem - for elem, err = itr.Next(); err != nil; elem, err = itr.Next() { - if elem.SeriesID.IsZero() { - break - } - - set.AddNoLock(elem.SeriesID) - } - - if err != nil { - return err - } else if err := itr.Close(); err != nil { - return err - } - - // Remove the measurement from the index before the series file. - span, _ = tracing.StartSpanFromContextWithOperationName(rootCtx, "TSI drop measurement") - span.LogKV("measurement_name", fmt.Sprintf("%x", name)) - if err := e.index.DropMeasurement(name); err != nil { - return err - } - span.Finish() - - // Iterate over the series ids we previously extracted from the index - // and remove from the series file. - span, _ = tracing.StartSpanFromContextWithOperationName(rootCtx, "SFile Delete Series IDs") - span.LogKV("measurement_name", fmt.Sprintf("%x", name), "series_id_set_size", set.Cardinality()) - var ids []tsdb.SeriesID - set.ForEachNoLock(func(id tsdb.SeriesID) { ids = append(ids, id) }) - if err = e.sfile.DeleteSeriesIDs(ids); err != nil { - return err - } - span.Finish() - return err - } - - // This is the slow path, when not dropping the entire bucket (measurement) - span, _ = tracing.StartSpanFromContextWithOperationName(rootCtx, "TSI/SFile Delete keys") - span.LogKV("measurement_name", fmt.Sprintf("%x", name), "keys_to_delete", len(possiblyDead.keys)) - - // Convert key map to a slice. - possiblyDeadKeysSlice := make([][]byte, 0, len(possiblyDead.keys)) - for key := range possiblyDead.keys { - possiblyDeadKeysSlice = append(possiblyDeadKeysSlice, []byte(key)) - } - - const batchSize = 1000 - batch := make([]tsi1.DropSeriesItem, 0, batchSize) - ids := make([]tsdb.SeriesID, 0, batchSize) - for i := 0; i < len(possiblyDeadKeysSlice); i += batchSize { - isLastBatch := i+batchSize > len(possiblyDeadKeysSlice) - batch, ids = batch[:0], ids[:0] - - for j := 0; (i*batchSize)+j < len(possiblyDeadKeysSlice) && j < batchSize; j++ { - var item tsi1.DropSeriesItem - - // TODO(jeff): ugh reduce copies here - key := possiblyDeadKeysSlice[(i*batchSize)+j] - item.Key = []byte(key) - item.Key, _ = SeriesAndFieldFromCompositeKey(item.Key) - - name, tags := models.ParseKeyBytes(item.Key) - item.SeriesID = e.sfile.SeriesID(name, tags, buf) - if item.SeriesID.IsZero() { - continue - } - batch = append(batch, item) - ids = append(ids, item.SeriesID) - } - - // Remove from index & series file. - if err := e.index.DropSeries(batch, isLastBatch); err != nil { - return err - } else if err := e.sfile.DeleteSeriesIDs(ids); err != nil { - return err - } - } - span.Finish() - } - - return nil -} diff --git a/tsdb/tsm1/engine_delete_prefix_test.go b/tsdb/tsm1/engine_delete_prefix_test.go deleted file mode 100644 index 8cc010a3d6..0000000000 --- a/tsdb/tsm1/engine_delete_prefix_test.go +++ /dev/null @@ -1,158 +0,0 @@ -package tsm1_test - -import ( - "bytes" - "context" - "fmt" - "reflect" - "testing" - - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" -) - -func TestEngine_DeletePrefix(t *testing.T) { - // Create a few points. - p1 := MustParsePointString("cpu,host=0 value=1.1 6", "mm0") - p2 := MustParsePointString("cpu,host=A value=1.2 2", "mm0") - p3 := MustParsePointString("cpu,host=A value=1.3 3", "mm0") - p4 := MustParsePointString("cpu,host=B value=1.3 4", "mm0") - p5 := MustParsePointString("cpu,host=B value=1.3 5", "mm0") - p6 := MustParsePointString("cpu,host=C value=1.3 1", "mm0") - p7 := MustParsePointString("mem,host=C value=1.3 1", "mm1") - p8 := MustParsePointString("disk,host=C value=1.3 1", "mm2") - - e, err := NewEngine(tsm1.NewConfig(), t) - if err != nil { - t.Fatal(err) - } - if err := e.Open(context.Background()); err != nil { - t.Fatal(err) - } - defer e.Close() - - if err := e.writePoints(p1, p2, p3, p4, p5, p6, p7, p8); err != nil { - t.Fatalf("failed to write points: %s", err.Error()) - } - - if err := e.WriteSnapshot(context.Background(), tsm1.CacheStatusColdNoWrites); err != nil { - t.Fatalf("failed to snapshot: %s", err.Error()) - } - - keys := e.FileStore.Keys() - if exp, got := 6, len(keys); exp != got { - t.Fatalf("series count mismatch: exp %v, got %v", exp, got) - } - - if err := e.DeletePrefixRange(context.Background(), []byte("mm0"), 0, 3, nil); err != nil { - t.Fatalf("failed to delete series: %v", err) - } - - keys = e.FileStore.Keys() - if exp, got := 4, len(keys); exp != got { - t.Fatalf("series count mismatch: exp %v, got %v", exp, got) - } - - exp := map[string]byte{ - "mm0,\x00=cpu,host=0,\xff=value#!~#value": 0, - "mm0,\x00=cpu,host=B,\xff=value#!~#value": 0, - "mm1,\x00=mem,host=C,\xff=value#!~#value": 0, - "mm2,\x00=disk,host=C,\xff=value#!~#value": 0, - } - if !reflect.DeepEqual(keys, exp) { - t.Fatalf("unexpected series in file store: %v != %v", keys, exp) - } - - // Check that the series still exists in the index - iter, err := e.index.MeasurementSeriesIDIterator([]byte("mm0")) - if err != nil { - t.Fatalf("iterator error: %v", err) - } - defer iter.Close() - - elem, err := iter.Next() - if err != nil { - t.Fatal(err) - } - if elem.SeriesID.IsZero() { - t.Fatalf("series index mismatch: EOF, exp 2 series") - } - - // Lookup series. - name, tags := e.sfile.Series(elem.SeriesID) - if got, exp := name, []byte("mm0"); !bytes.Equal(got, exp) { - t.Fatalf("series mismatch: got %s, exp %s", got, exp) - } - - if !tags.Equal(models.NewTags(map[string]string{models.FieldKeyTagKey: "value", models.MeasurementTagKey: "cpu", "host": "0"})) && !tags.Equal(models.NewTags(map[string]string{models.FieldKeyTagKey: "value", models.MeasurementTagKey: "cpu", "host": "B"})) { - t.Fatalf(`series mismatch: got %s, exp either "host=0" or "host=B"`, tags) - } - iter.Close() - - // Deleting remaining series should remove them from the series. - if err := e.DeletePrefixRange(context.Background(), []byte("mm0"), 0, 9, nil); err != nil { - t.Fatalf("failed to delete series: %v", err) - } - - keys = e.FileStore.Keys() - if exp, got := 2, len(keys); exp != got { - t.Fatalf("series count mismatch: exp %v, got %v", exp, got) - } - - exp = map[string]byte{ - "mm1,\x00=mem,host=C,\xff=value#!~#value": 0, - "mm2,\x00=disk,host=C,\xff=value#!~#value": 0, - } - if !reflect.DeepEqual(keys, exp) { - t.Fatalf("unexpected series in file store: %v != %v", keys, exp) - } - - if iter, err = e.index.MeasurementSeriesIDIterator([]byte("mm0")); err != nil { - t.Fatalf("iterator error: %v", err) - } - if iter != nil { - defer iter.Close() - if elem, err = iter.Next(); err != nil { - t.Fatal(err) - } - if !elem.SeriesID.IsZero() { - t.Fatalf("got an undeleted series id, but series should be dropped from index") - } - } -} - -func BenchmarkEngine_DeletePrefixRange(b *testing.B) { - for i := 0; i < b.N; i++ { - b.StopTimer() - e, err := NewEngine(tsm1.NewConfig(), b) - if err != nil { - b.Fatal(err) - } else if err := e.Open(context.Background()); err != nil { - b.Fatal(err) - } - defer e.Close() - - const n = 100000 - var points []models.Point - for i := 0; i < n; i++ { - points = append(points, MustParsePointString(fmt.Sprintf("cpu,host=A%d value=1", i), "mm0")) - points = append(points, MustParsePointString(fmt.Sprintf("cpu,host=B%d value=1", i), "mm1")) - } - if err := e.writePoints(points...); err != nil { - b.Fatal(err) - } - - if err := e.WriteSnapshot(context.Background(), tsm1.CacheStatusColdNoWrites); err != nil { - b.Fatal(err) - } else if got, want := len(e.FileStore.Keys()), n*2; got != want { - b.Fatalf("len(Keys())=%d, want %d", got, want) - } - b.StartTimer() - - if err := e.DeletePrefixRange(context.Background(), []byte("mm0"), 0, 3, nil); err != nil { - b.Fatal(err) - } else if err := e.Close(); err != nil { - b.Fatal(err) - } - } -} diff --git a/tsdb/tsm1/engine_measurement_notime_schema.go b/tsdb/tsm1/engine_measurement_notime_schema.go deleted file mode 100644 index 953e19c50a..0000000000 --- a/tsdb/tsm1/engine_measurement_notime_schema.go +++ /dev/null @@ -1,350 +0,0 @@ -package tsm1 - -import ( - "context" - "sort" - - "github.com/influxdata/influxdb/v2" - "github.com/influxdata/influxdb/v2/kit/tracing" - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/cursors" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" - "github.com/influxdata/influxql" - "github.com/opentracing/opentracing-go" - "github.com/opentracing/opentracing-go/log" -) - -// MeasurementNamesNoTime returns an iterator which enumerates the measurements for the given -// bucket. -// -// MeasurementNamesNoTime will always return a StringIterator if there is no error. -// -// If the context is canceled before MeasurementNamesNoTime has finished processing, a non-nil -// error will be returned along with statistics for the already scanned data. -func (e *Engine) MeasurementNamesNoTime(ctx context.Context, orgID, bucketID influxdb.ID, predicate influxql.Expr) (cursors.StringIterator, error) { - span, ctx := tracing.StartSpanFromContext(ctx) - defer span.Finish() - - return e.tagValuesNoTime(ctx, orgID, bucketID, models.MeasurementTagKeyBytes, predicate) -} - -// MeasurementTagValuesNoTime returns an iterator which enumerates the tag values for the given -// bucket, measurement and tag key and filtered using the optional the predicate. -// -// MeasurementTagValuesNoTime will always return a StringIterator if there is no error. -// -// If the context is canceled before MeasurementTagValuesNoTime has finished processing, a non-nil -// error will be returned along with statistics for the already scanned data. -func (e *Engine) MeasurementTagValuesNoTime(ctx context.Context, orgID, bucketID influxdb.ID, measurement, tagKey string, predicate influxql.Expr) (cursors.StringIterator, error) { - predicate = AddMeasurementToExpr(measurement, predicate) - - return e.tagValuesNoTime(ctx, orgID, bucketID, []byte(tagKey), predicate) -} - -func (e *Engine) tagValuesNoTime(ctx context.Context, orgID, bucketID influxdb.ID, tagKeyBytes []byte, predicate influxql.Expr) (cursors.StringIterator, error) { - if err := ValidateTagPredicate(predicate); err != nil { - return nil, err - } - - orgBucket := tsdb.EncodeName(orgID, bucketID) - - // fetch distinct values for tag key in bucket - itr, err := e.index.TagValueIterator(orgBucket[:], tagKeyBytes) - if err != nil { - return nil, err - } else if itr == nil { - return cursors.NewStringSliceIterator(nil), err - } - defer itr.Close() - - var ( - vals = make([]string, 0, 128) - ) - - span := opentracing.SpanFromContext(ctx) - if span != nil { - defer func() { - span.LogFields( - log.Int("values_count", len(vals)), - ) - }() - } - - // reusable buffers - var ( - tagKey = string(tagKeyBytes) - ) - - for i := 0; ; i++ { - // to keep cache scans fast, check context every 'cancelCheckInterval' iterations - if i%cancelCheckInterval == 0 { - select { - case <-ctx.Done(): - return cursors.NewStringSliceIterator(nil), ctx.Err() - default: - } - } - - val, err := itr.Next() - if err != nil { - return cursors.NewStringSliceIterator(nil), err - } else if len(val) == 0 { - break - } - - // = val - var expr influxql.Expr = &influxql.BinaryExpr{ - LHS: &influxql.VarRef{Val: tagKey, Type: influxql.Tag}, - Op: influxql.EQ, - RHS: &influxql.StringLiteral{Val: string(val)}, - } - - if predicate != nil { - // = val AND (expr) - expr = &influxql.BinaryExpr{ - LHS: expr, - Op: influxql.AND, - RHS: &influxql.ParenExpr{ - Expr: predicate, - }, - } - } - - if err := func() error { - sitr, err := e.index.MeasurementSeriesByExprIterator(orgBucket[:], expr) - if err != nil { - return err - } - defer sitr.Close() - - if elem, err := sitr.Next(); err != nil { - return err - } else if !elem.SeriesID.IsZero() { - vals = append(vals, string(val)) - } - return nil - }(); err != nil { - return cursors.NewStringSliceIterator(nil), err - } - } - - sort.Strings(vals) - return cursors.NewStringSliceIterator(vals), err -} - -// MeasurementFieldsNoTime returns an iterator which enumerates the field schema for the given -// bucket and measurement, filtered using the optional the predicate. -// -// MeasurementFieldsNoTime will always return a MeasurementFieldsIterator if there is no error. -// -// If the context is canceled before MeasurementFieldsNoTime has finished processing, a non-nil -// error will be returned along with statistics for the already scanned data. -func (e *Engine) MeasurementFieldsNoTime(ctx context.Context, orgID, bucketID influxdb.ID, measurement string, predicate influxql.Expr) (cursors.MeasurementFieldsIterator, error) { - predicate = AddMeasurementToExpr(measurement, predicate) - - return e.fieldsNoTime(ctx, orgID, bucketID, []byte(measurement), predicate) -} - -func (e *Engine) fieldsNoTime(ctx context.Context, orgID, bucketID influxdb.ID, measurement []byte, predicate influxql.Expr) (cursors.MeasurementFieldsIterator, error) { - type fieldKeyType struct { - key []byte - typ cursors.FieldType - } - - if err := ValidateTagPredicate(predicate); err != nil { - return nil, err - } - - orgBucket := tsdb.EncodeName(orgID, bucketID) - - // fetch distinct values for field, which may be a superset of the measurement - itr, err := e.index.TagValueIterator(orgBucket[:], models.FieldKeyTagKeyBytes) - if err != nil { - return nil, err - } - defer itr.Close() - - var ( - fieldTypes = make([]fieldKeyType, 0, 128) - ) - - span := opentracing.SpanFromContext(ctx) - if span != nil { - defer func() { - span.LogFields( - log.Int("values_count", len(fieldTypes)), - ) - }() - } - - for i := 0; ; i++ { - // to keep cache scans fast, check context every 'cancelCheckInterval' iterations - if i%cancelCheckInterval == 0 { - select { - case <-ctx.Done(): - return cursors.NewMeasurementFieldsSliceIterator(nil), ctx.Err() - default: - } - } - - val, err := itr.Next() - if err != nil { - return cursors.NewMeasurementFieldsSliceIterator(nil), err - } else if len(val) == 0 { - break - } - - // = val - var expr influxql.Expr = &influxql.BinaryExpr{ - LHS: &influxql.VarRef{Val: models.FieldKeyTagKey, Type: influxql.Tag}, - Op: influxql.EQ, - RHS: &influxql.StringLiteral{Val: string(val)}, - } - - if predicate != nil { - // = val AND (expr) - expr = &influxql.BinaryExpr{ - LHS: expr, - Op: influxql.AND, - RHS: &influxql.ParenExpr{ - Expr: predicate, - }, - } - } - - if err := func() error { - sitr, err := e.index.MeasurementSeriesByExprIterator(orgBucket[:], expr) - if err != nil { - return err - } - defer sitr.Close() - - if elem, err := sitr.Next(); err != nil { - return err - } else if !elem.SeriesID.IsZero() { - key := e.sfile.SeriesKey(elem.SeriesID) - typedID := e.sfile.SeriesIDTypedBySeriesKey(key) - fieldTypes = append(fieldTypes, fieldKeyType{key: val, typ: cursors.ModelsFieldTypeToFieldType(typedID.Type())}) - } - return nil - }(); err != nil { - return cursors.NewMeasurementFieldsSliceIterator(nil), err - } - } - - vals := make([]cursors.MeasurementField, 0, len(fieldTypes)) - for i := range fieldTypes { - val := &fieldTypes[i] - vals = append(vals, cursors.MeasurementField{Key: string(val.key), Type: val.typ, Timestamp: 0}) - } - - return cursors.NewMeasurementFieldsSliceIterator([]cursors.MeasurementFields{{Fields: vals}}), nil -} - -// MeasurementTagKeysNoTime returns an iterator which enumerates the tag keys -// for the given bucket, measurement and tag key and filtered using the optional -// the predicate. -// -// MeasurementTagKeysNoTime will always return a StringIterator if there is no error. -// -// If the context is canceled before MeasurementTagKeysNoTime has finished -// processing, a non-nil error will be returned along with statistics for the -// already scanned data. -func (e *Engine) MeasurementTagKeysNoTime(ctx context.Context, orgID, bucketID influxdb.ID, measurement string, predicate influxql.Expr) (cursors.StringIterator, error) { - if measurement != "" { - predicate = AddMeasurementToExpr(measurement, predicate) - } - return e.tagKeysNoTime(ctx, orgID, bucketID, predicate) -} - -func (e *Engine) tagKeysNoTime(ctx context.Context, orgID, bucketID influxdb.ID, predicate influxql.Expr) (cursors.StringIterator, error) { - if err := ValidateTagPredicate(predicate); err != nil { - return nil, err - } - - orgBucket := tsdb.EncodeName(orgID, bucketID) - - vals := make([]string, 0, 32) - - span := opentracing.SpanFromContext(ctx) - if span != nil { - defer func() { - span.LogFields( - log.Int("values_count", len(vals)), - ) - }() - } - - var ( - km keyMerger - keys = make([][]byte, 0, 32) - ) - - if err := func() error { - sitr, err := e.index.MeasurementSeriesByExprIterator(orgBucket[:], predicate) - if err != nil { - return err - } - defer sitr.Close() - - for i := 0; ; i++ { - // to keep cache scans fast, check context every 'cancelCheckInterval' iterations - if i%cancelCheckInterval == 0 { - select { - case <-ctx.Done(): - return ctx.Err() - default: - } - } - - elem, err := sitr.Next() - if err != nil { - return err - } else if elem.SeriesID.IsZero() { - return nil - } - - sf := e.index.SeriesFile() - if sf == nil { - return nil - } - - skey := sf.SeriesKey(elem.SeriesID) - if len(skey) == 0 { - continue - } - - keys = parseSeriesKeys(skey, keys) - km.MergeKeys(keys) - } - }(); err != nil { - return cursors.NewStringSliceIterator(nil), err - } - - for _, v := range km.Get() { - vals = append(vals, string(v)) - } - - return cursors.NewStringSliceIterator(vals), nil -} - -// parseSeriesKeys is adapted from seriesfile.ParseSeriesKeyInto. Instead of -// returning the full tag information, it only returns the keys. -func parseSeriesKeys(data []byte, dst [][]byte) [][]byte { - _, data = seriesfile.ReadSeriesKeyLen(data) - _, data = seriesfile.ReadSeriesKeyMeasurement(data) - tagN, data := seriesfile.ReadSeriesKeyTagN(data) - - if cap(dst) < tagN { - dst = make([][]byte, tagN) - } else { - dst = dst[:tagN] - } - - for i := 0; i < tagN; i++ { - dst[i], _, data = seriesfile.ReadSeriesKeyTag(data) - } - - return dst -} diff --git a/tsdb/tsm1/engine_measurement_notime_schema_test.go b/tsdb/tsm1/engine_measurement_notime_schema_test.go deleted file mode 100644 index 5b14816942..0000000000 --- a/tsdb/tsm1/engine_measurement_notime_schema_test.go +++ /dev/null @@ -1,492 +0,0 @@ -package tsm1_test - -import ( - "context" - "testing" - - "github.com/influxdata/influxdb/v2" - "github.com/influxdata/influxdb/v2/tsdb/cursors" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" - "github.com/influxdata/influxql" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func strL(s ...string) []string { return s } - -func TestEngine_MeasurementNamesNoTime(t *testing.T) { - e, err := NewEngine(tsm1.NewConfig(), t) - if err != nil { - t.Fatal(err) - } - if err := e.Open(context.Background()); err != nil { - t.Fatal(err) - } - defer e.Close() - - orgs := []struct { - org, bucket influxdb.ID - }{ - { - org: 0x5020, - bucket: 0x5100, - }, - { - org: 0x6000, - bucket: 0x6100, - }, - } - - // this org will require escaping the 0x20 byte - e.MustWritePointsString(orgs[0].org, orgs[0].bucket, ` -cpu,cpu0=v,cpu1=v,cpu2=v f=1 101 -cpu,cpu1=v f=1 103 -cpu,cpu2=v f=1 105 -cpu,cpu0=v,cpu2=v f=1 107 -cpu,cpu2=v,cpu3=v,other=c f=1 109 -mem,mem0=v,mem1=v,other=m f=1 101`) - e.MustWritePointsString(orgs[1].org, orgs[1].bucket, ` -cpu2,cpu0=v,cpu1=v,cpu2=v f=1 101 -cpu2,cpu1=v f=1 103 -cpu2,cpu2=v f=1 105 -cpu2,cpu0=v,cpu2=v f=1 107 -cpu2,cpu2=v,cpu3=v,other=c f=1 109 -mem2,mem0=v,mem1=v,other=m f=1 101`) - - // this test verifies the index is immediately queryable before TSM is written - t.Run("gets all measurements before snapshot", func(t *testing.T) { - iter, err := e.MeasurementNamesNoTime(context.Background(), orgs[0].org, orgs[0].bucket, nil) - require.NoError(t, err) - assert.Equal(t, cursors.StringIteratorToSlice(iter), strL("cpu", "mem")) - }) - - // this test verifies the index is immediately queryable before TSM is written - t.Run("verify subset of measurements with predicate", func(t *testing.T) { - iter, err := e.MeasurementNamesNoTime(context.Background(), orgs[0].org, orgs[0].bucket, influxql.MustParseExpr("other = 'c'")) - require.NoError(t, err) - assert.Equal(t, cursors.StringIteratorToSlice(iter), strL("cpu")) - }) - - // delete some data from the first bucket - e.MustDeleteBucketRange(orgs[0].org, orgs[0].bucket, 0, 105) - - // this test verifies measurement disappears if deleted whilst in cache - t.Run("only contains cpu measurement", func(t *testing.T) { - iter, err := e.MeasurementNamesNoTime(context.Background(), orgs[0].org, orgs[0].bucket, nil) - require.NoError(t, err) - assert.Equal(t, cursors.StringIteratorToSlice(iter), strL("cpu")) - }) - - // write the values back - e.MustWritePointsString(orgs[0].org, orgs[0].bucket, ` -cpu,cpu0=v,cpu1=v,cpu2=v f=1 101 -cpu,cpu1=v f=1 103 -cpu,cpu2=v f=1 105 -mem,mem0=v,mem1=v,other=m f=1 101`) - - // send some points to TSM data - e.MustWriteSnapshot() - - // this test verifies the index is immediately queryable before TSM is written - t.Run("contains cpu and mem measurement in TSM", func(t *testing.T) { - iter, err := e.MeasurementNamesNoTime(context.Background(), orgs[0].org, orgs[0].bucket, nil) - require.NoError(t, err) - assert.Equal(t, cursors.StringIteratorToSlice(iter), strL("cpu", "mem")) - }) - - // delete some data from the first bucket - e.MustDeleteBucketRange(orgs[0].org, orgs[0].bucket, 0, 105) - - // this test verifies measurement disappears if deleted from TSM - t.Run("only contains cpu measurement in TSM", func(t *testing.T) { - iter, err := e.MeasurementNamesNoTime(context.Background(), orgs[0].org, orgs[0].bucket, nil) - require.NoError(t, err) - assert.Equal(t, cursors.StringIteratorToSlice(iter), strL("cpu")) - }) - - e.MustDeleteBucketRange(orgs[0].org, orgs[0].bucket, 0, 1000) - - // this test verifies all measurements disappears if deleted - t.Run("no measurements", func(t *testing.T) { - iter, err := e.MeasurementNamesNoTime(context.Background(), orgs[0].org, orgs[0].bucket, nil) - require.NoError(t, err) - assert.Equal(t, cursors.StringIteratorToSlice(iter), strL()) - }) - -} - -func TestEngine_MeasurementTagValuesNoTime(t *testing.T) { - e, err := NewEngine(tsm1.NewConfig(), t) - if err != nil { - t.Fatal(err) - } - if err := e.Open(context.Background()); err != nil { - t.Fatal(err) - } - defer e.Close() - - orgs := []struct { - org, bucket influxdb.ID - }{ - { - org: 0x5020, - bucket: 0x5100, - }, - { - org: 0x6000, - bucket: 0x6100, - }, - } - - // this org will require escaping the 0x20 byte - e.MustWritePointsString(orgs[0].org, orgs[0].bucket, ` -cpuA,host=0A,os=linux value=1.1 101 -cpuA,host=AA,os=linux value=1.2 102 -cpuA,host=AA,os=linux value=1.3 104 -cpuA,host=CA,os=linux value=1.3 104 -cpuA,host=CA,os=linux value=1.3 105 -cpuA,host=DA,os=macOS value=1.3 106 -memA,host=DA,os=macOS value=1.3 101`) - e.MustWritePointsString(orgs[1].org, orgs[1].bucket, ` -cpuB,host=0B,os=linux value=1.1 101 -cpuB,host=AB,os=linux value=1.2 102 -cpuB,host=AB,os=linux value=1.3 104 -cpuB,host=CB,os=linux value=1.3 104 -cpuB,host=CB,os=linux value=1.3 105 -cpuB,host=DB,os=macOS value=1.3 106 -memB,host=DB,os=macOS value=1.3 101`) - - t.Run("before snapshot", func(t *testing.T) { - t.Run("cpuA", func(t *testing.T) { - t.Run("host tag returns all values", func(t *testing.T) { - iter, err := e.MeasurementTagValuesNoTime(context.Background(), orgs[0].org, orgs[0].bucket, "cpuA", "host", nil) - require.NoError(t, err) - assert.Equal(t, cursors.StringIteratorToSlice(iter), strL("0A", "AA", "CA", "DA")) - }) - - t.Run("host tag returns subset with predicate", func(t *testing.T) { - iter, err := e.MeasurementTagValuesNoTime(context.Background(), orgs[0].org, orgs[0].bucket, "cpuA", "host", influxql.MustParseExpr("os = 'macOS'")) - require.NoError(t, err) - assert.Equal(t, cursors.StringIteratorToSlice(iter), strL("DA")) - }) - }) - - t.Run("memA", func(t *testing.T) { - t.Run("host tag returns all values", func(t *testing.T) { - iter, err := e.MeasurementTagValuesNoTime(context.Background(), orgs[0].org, orgs[0].bucket, "memA", "host", nil) - require.NoError(t, err) - assert.Equal(t, cursors.StringIteratorToSlice(iter), strL("DA")) - }) - t.Run("os tag returns all values", func(t *testing.T) { - iter, err := e.MeasurementTagValuesNoTime(context.Background(), orgs[0].org, orgs[0].bucket, "memA", "os", nil) - require.NoError(t, err) - assert.Equal(t, cursors.StringIteratorToSlice(iter), strL("macOS")) - }) - }) - }) - - e.MustDeleteBucketRange(orgs[0].org, orgs[0].bucket, 102, 105) - - t.Run("before snapshot after delete", func(t *testing.T) { - t.Run("cpuA", func(t *testing.T) { - t.Run("host tag returns all values", func(t *testing.T) { - iter, err := e.MeasurementTagValuesNoTime(context.Background(), orgs[0].org, orgs[0].bucket, "cpuA", "host", nil) - require.NoError(t, err) - assert.Equal(t, cursors.StringIteratorToSlice(iter), strL("0A", "DA")) - }) - - t.Run("host tag returns subset with predicate", func(t *testing.T) { - iter, err := e.MeasurementTagValuesNoTime(context.Background(), orgs[0].org, orgs[0].bucket, "cpuA", "host", influxql.MustParseExpr("os = 'macOS'")) - require.NoError(t, err) - assert.Equal(t, cursors.StringIteratorToSlice(iter), strL("DA")) - }) - }) - - t.Run("memA", func(t *testing.T) { - t.Run("host tag returns all values", func(t *testing.T) { - iter, err := e.MeasurementTagValuesNoTime(context.Background(), orgs[0].org, orgs[0].bucket, "memA", "host", nil) - require.NoError(t, err) - assert.Equal(t, cursors.StringIteratorToSlice(iter), strL("DA")) - }) - t.Run("os tag returns all values", func(t *testing.T) { - iter, err := e.MeasurementTagValuesNoTime(context.Background(), orgs[0].org, orgs[0].bucket, "memA", "os", nil) - require.NoError(t, err) - assert.Equal(t, cursors.StringIteratorToSlice(iter), strL("macOS")) - }) - }) - }) - - // send some points to TSM data - e.MustWriteSnapshot() - - // leave some points in the cache - e.MustWritePointsString(orgs[0].org, orgs[0].bucket, ` -cpuA,host=0A,os=linux value=1.1 201 -cpuA,host=AA,os=linux value=1.2 202 -cpuA,host=AA,os=linux value=1.3 204 -cpuA,host=BA,os=macOS value=1.3 204 -cpuA,host=BA,os=macOS value=1.3 205 -cpuA,host=EA,os=linux value=1.3 206 -memA,host=EA,os=linux value=1.3 201`) - e.MustWritePointsString(orgs[1].org, orgs[1].bucket, ` -cpuB,host=0B,os=linux value=1.1 201 -cpuB,host=AB,os=linux value=1.2 202 -cpuB,host=AB,os=linux value=1.3 204 -cpuB,host=BB,os=linux value=1.3 204 -cpuB,host=BB,os=linux value=1.3 205 -cpuB,host=EB,os=macOS value=1.3 206 -memB,host=EB,os=macOS value=1.3 201`) - - t.Run("after snapshot", func(t *testing.T) { - t.Run("cpuA", func(t *testing.T) { - t.Run("host tag returns all values", func(t *testing.T) { - iter, err := e.MeasurementTagValuesNoTime(context.Background(), orgs[0].org, orgs[0].bucket, "cpuA", "host", nil) - require.NoError(t, err) - assert.Equal(t, cursors.StringIteratorToSlice(iter), strL("0A", "AA", "BA", "DA", "EA")) - }) - - t.Run("host tag returns subset with predicate", func(t *testing.T) { - iter, err := e.MeasurementTagValuesNoTime(context.Background(), orgs[0].org, orgs[0].bucket, "cpuA", "host", influxql.MustParseExpr("os = 'macOS'")) - require.NoError(t, err) - assert.Equal(t, cursors.StringIteratorToSlice(iter), strL("BA", "DA")) - }) - }) - }) - - e.MustDeleteBucketRange(orgs[0].org, orgs[0].bucket, 0, 1000) - - t.Run("returns no data after deleting everything", func(t *testing.T) { - iter, err := e.MeasurementTagValuesNoTime(context.Background(), orgs[0].org, orgs[0].bucket, "cpuA", "host", nil) - require.NoError(t, err) - assert.Equal(t, cursors.StringIteratorToSlice(iter), strL()) - }) -} - -func TestEngine_MeasurementFieldsNoTime(t *testing.T) { - e, err := NewEngine(tsm1.NewConfig(), t) - if err != nil { - t.Fatal(err) - } - if err := e.Open(context.Background()); err != nil { - t.Fatal(err) - } - defer e.Close() - - orgs := []struct { - org, bucket influxdb.ID - }{ - { - org: 0x5020, - bucket: 0x5100, - }, - { - org: 0x6000, - bucket: 0x6100, - }, - } - - // this org will require escaping the 0x20 byte - e.MustWritePointsString(orgs[0].org, orgs[0].bucket, ` -m00,tag00=v00,tag10=v10 i=1i 101 -m00,tag00=v00,tag10=v11 i=1i 102 -m00,tag00=v00,tag10=v12 f=1 101 -m00,tag00=v00,tag10=v13 i=1i 108 -m00,tag00=v00,tag10=v14 f=1 109 -m00,tag00=v00,tag10=v15 i=1i 109 -m01,tag00=v00,tag10=v10 b=true 101 -`) - e.MustWritePointsString(orgs[1].org, orgs[1].bucket, ` -m10,foo=v barF=50 101 -`) - - fldL := func(t *testing.T, kv ...interface{}) []cursors.MeasurementField { - t.Helper() - if len(kv)&1 == 1 { - panic("uneven kv slice") - } - - res := make([]cursors.MeasurementField, 0, len(kv)/2) - for i := 0; i < len(kv); i += 2 { - res = append(res, cursors.MeasurementField{ - Key: kv[i].(string), - Type: kv[i+1].(cursors.FieldType), - }) - } - return res - } - - t.Run("first writes", func(t *testing.T) { - t.Run("m00 no predicate", func(t *testing.T) { - iter, err := e.MeasurementFieldsNoTime(context.Background(), orgs[0].org, orgs[0].bucket, "m00", nil) - require.NoError(t, err) - assert.Equal(t, cursors.MeasurementFieldsIteratorFlatMap(iter), fldL(t, "f", cursors.Float, "i", cursors.Integer)) - }) - - t.Run("m00 with predicate", func(t *testing.T) { - iter, err := e.MeasurementFieldsNoTime(context.Background(), orgs[0].org, orgs[0].bucket, "m00", influxql.MustParseExpr("tag10 = 'v15'")) - require.NoError(t, err) - assert.Equal(t, cursors.MeasurementFieldsIteratorFlatMap(iter), fldL(t, "i", cursors.Integer)) - }) - - t.Run("m01 no predicate", func(t *testing.T) { - iter, err := e.MeasurementFieldsNoTime(context.Background(), orgs[0].org, orgs[0].bucket, "m01", nil) - require.NoError(t, err) - assert.Equal(t, cursors.MeasurementFieldsIteratorFlatMap(iter), fldL(t, "b", cursors.Boolean)) - }) - }) - - // change type of field i (which is not expected, and won't be supported in the future) - e.MustWritePointsString(orgs[0].org, orgs[0].bucket, ` -m00,tag00=v00,tag10=v22 f=1 201 -m00,tag00=v00,tag10=v21 i="s" 202 -m00,tag00=v00,tag10=v20 b=true 210 -`) - - t.Run("i is still integer", func(t *testing.T) { - iter, err := e.MeasurementFieldsNoTime(context.Background(), orgs[0].org, orgs[0].bucket, "m00", nil) - require.NoError(t, err) - assert.Equal(t, cursors.MeasurementFieldsIteratorFlatMap(iter), fldL(t, "b", cursors.Boolean, "f", cursors.Float, "i", cursors.Integer)) - }) - - // delete earlier data - e.MustDeleteBucketRange(orgs[0].org, orgs[0].bucket, 0, 200) - - t.Run("i is now a string", func(t *testing.T) { - iter, err := e.MeasurementFieldsNoTime(context.Background(), orgs[0].org, orgs[0].bucket, "m00", nil) - require.NoError(t, err) - assert.Equal(t, cursors.MeasurementFieldsIteratorFlatMap(iter), fldL(t, "b", cursors.Boolean, "f", cursors.Float, "i", cursors.String)) - }) -} - -func TestEngine_MeasurementTagKeysNoTime(t *testing.T) { - e, err := NewEngine(tsm1.NewConfig(), t) - if err != nil { - t.Fatal(err) - } - if err := e.Open(context.Background()); err != nil { - t.Fatal(err) - } - defer e.Close() - - orgs := []struct { - org, bucket influxdb.ID - }{ - { - org: 0x5020, - bucket: 0x5100, - }, - { - org: 0x6000, - bucket: 0x6100, - }, - } - - // this org will require escaping the 0x20 byte - e.MustWritePointsString(orgs[0].org, orgs[0].bucket, ` -cpuA,host=0A,os=linux value=1.1 101 -cpuA,host=AA,os=linux value=1.2 102 -cpuA,host=AA,os=linux value=1.3 104 -cpuA,host=CA,os=linux value=1.3 104 -cpuA,host=CA,os=linux value=1.3 105 -cpuA,host=DA,os=macOS,release=10.15 value=1.3 106 -memA,host=DA,os=macOS,release=10.15 value=1.3 101`) - e.MustWritePointsString(orgs[1].org, orgs[1].bucket, ` -cpuB,host=0B,os=linux value=1.1 101 -cpuB,host=AB,os=linux value=1.2 102 -cpuB,host=AB,os=linux value=1.3 104 -cpuB,host=CB,os=linux value=1.3 104 -cpuB,host=CB,os=linux value=1.3 105 -cpuB,host=DB,os=macOS,release=10.15 value=1.3 106 -memB,host=DB,os=macOS,release=10.15 value=1.3 101`) - - t.Run("before snapshot", func(t *testing.T) { - t.Run("cpuA", func(t *testing.T) { - t.Run("measurement name returns all keys", func(t *testing.T) { - iter, err := e.MeasurementTagKeysNoTime(context.Background(), orgs[0].org, orgs[0].bucket, "cpuA", nil) - require.NoError(t, err) - assert.Equal(t, strL("\x00", "host", "os", "release", "\xff"), cursors.StringIteratorToSlice(iter)) - }) - }) - }) - - e.MustDeleteBucketRange(orgs[0].org, orgs[0].bucket, 102, 105) - - t.Run("before snapshot after delete", func(t *testing.T) { - t.Run("cpuA", func(t *testing.T) { - t.Run("measurement name returns all keys", func(t *testing.T) { - iter, err := e.MeasurementTagKeysNoTime(context.Background(), orgs[0].org, orgs[0].bucket, "cpuA", nil) - require.NoError(t, err) - assert.Equal(t, strL("\x00", "host", "os", "release", "\xff"), cursors.StringIteratorToSlice(iter)) - }) - - t.Run("measurement name returns subset with predicate", func(t *testing.T) { - iter, err := e.MeasurementTagKeysNoTime(context.Background(), orgs[0].org, orgs[0].bucket, "cpuA", influxql.MustParseExpr("os = 'linux'")) - require.NoError(t, err) - assert.Equal(t, strL("\x00", "host", "os", "\xff"), cursors.StringIteratorToSlice(iter)) - }) - }) - }) - - // send some points to TSM data - e.MustWriteSnapshot() - - // leave some points in the cache - e.MustWritePointsString(orgs[0].org, orgs[0].bucket, ` -cpuA,host=0A,os=linux value=1.1 201 -cpuA,host=AA,os=linux value=1.2 202 -cpuA,host=AA,os=linux value=1.3 204 -cpuA,host=BA,os=macOS,release=10.15,shell=zsh value=1.3 204 -cpuA,host=BA,os=macOS,release=10.15,shell=zsh value=1.3 205 -cpuA,host=EA,os=linux value=1.3 206 -memA,host=EA,os=linux value=1.3 201`) - e.MustWritePointsString(orgs[1].org, orgs[1].bucket, ` -cpuB,host=0B,os=linux value=1.1 201 -cpuB,host=AB,os=linux value=1.2 202 -cpuB,host=AB,os=linux value=1.3 204 -cpuB,host=BB,os=linux value=1.3 204 -cpuB,host=BB,os=linux value=1.3 205 -cpuB,host=EB,os=macOS,release=10.15,shell=zsh value=1.3 206 -memB,host=EB,os=macOS,release=10.15,shell=zsh value=1.3 201`) - - t.Run("after snapshot", func(t *testing.T) { - t.Run("cpuA", func(t *testing.T) { - t.Run("measurement name returns all keys", func(t *testing.T) { - iter, err := e.MeasurementTagKeysNoTime(context.Background(), orgs[0].org, orgs[0].bucket, "cpuA", nil) - require.NoError(t, err) - assert.Equal(t, strL("\x00", "host", "os", "release", "shell", "\xff"), cursors.StringIteratorToSlice(iter)) - }) - - t.Run("measurement name returns subset with predicate", func(t *testing.T) { - iter, err := e.MeasurementTagKeysNoTime(context.Background(), orgs[0].org, orgs[0].bucket, "cpuA", influxql.MustParseExpr("os = 'linux'")) - require.NoError(t, err) - assert.Equal(t, strL("\x00", "host", "os", "\xff"), cursors.StringIteratorToSlice(iter)) - }) - - t.Run("measurement name returns subset with composite predicate", func(t *testing.T) { - iter, err := e.MeasurementTagKeysNoTime(context.Background(), orgs[0].org, orgs[0].bucket, "cpuA", influxql.MustParseExpr("os = 'linux' AND host = 'AA'")) - require.NoError(t, err) - assert.Equal(t, strL("\x00", "host", "os", "\xff"), cursors.StringIteratorToSlice(iter)) - }) - - t.Run("measurement name returns no results with bad predicate", func(t *testing.T) { - iter, err := e.MeasurementTagKeysNoTime(context.Background(), orgs[0].org, orgs[0].bucket, "cpuA", influxql.MustParseExpr("os = 'darwin'")) - require.NoError(t, err) - assert.Equal(t, strL(), cursors.StringIteratorToSlice(iter)) - }) - - t.Run("bad measurement name returns no results", func(t *testing.T) { - iter, err := e.MeasurementTagKeysNoTime(context.Background(), orgs[0].org, orgs[0].bucket, "cpuC", nil) - require.NoError(t, err) - assert.Equal(t, strL(), cursors.StringIteratorToSlice(iter)) - }) - }) - }) - - e.MustDeleteBucketRange(orgs[0].org, orgs[0].bucket, 0, 1000) - - t.Run("returns no data after deleting everything", func(t *testing.T) { - iter, err := e.MeasurementTagKeysNoTime(context.Background(), orgs[0].org, orgs[0].bucket, "cpuA", nil) - require.NoError(t, err) - assert.Equal(t, strL(), cursors.StringIteratorToSlice(iter)) - }) -} diff --git a/tsdb/tsm1/engine_measurement_schema.go b/tsdb/tsm1/engine_measurement_schema.go deleted file mode 100644 index c2e37abb4b..0000000000 --- a/tsdb/tsm1/engine_measurement_schema.go +++ /dev/null @@ -1,582 +0,0 @@ -package tsm1 - -import ( - "bytes" - "context" - "sort" - "strings" - - "github.com/influxdata/influxdb/v2" - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/cursors" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" - "github.com/influxdata/influxql" - "go.uber.org/zap" -) - -// MeasurementNames returns an iterator which enumerates the measurements for the given -// bucket and limited to the time range [start, end]. -// -// MeasurementNames will always return a StringIterator if there is no error. -// -// If the context is canceled before MeasurementNames has finished processing, a non-nil -// error will be returned along with statistics for the already scanned data. -func (e *Engine) MeasurementNames(ctx context.Context, orgID, bucketID influxdb.ID, start, end int64, predicate influxql.Expr) (cursors.StringIterator, error) { - if predicate == nil { - return e.measurementNamesNoPredicate(ctx, orgID, bucketID, start, end) - } - return e.measurementNamesPredicate(ctx, orgID, bucketID, start, end, predicate) -} - -func (e *Engine) measurementNamesNoPredicate(ctx context.Context, orgID, bucketID influxdb.ID, start, end int64) (cursors.StringIterator, error) { - orgBucket := tsdb.EncodeName(orgID, bucketID) - // TODO(edd): we need to clean up how we're encoding the prefix so that we - // don't have to remember to get it right everywhere we need to touch TSM data. - prefix := models.EscapeMeasurement(orgBucket[:]) - - var ( - tsmValues = make(map[string]struct{}) - stats cursors.CursorStats - canceled bool - ) - - e.FileStore.ForEachFile(func(f TSMFile) bool { - // Check the context before accessing each tsm file - select { - case <-ctx.Done(): - canceled = true - return false - default: - } - if f.OverlapsTimeRange(start, end) && f.OverlapsKeyPrefixRange(prefix, prefix) { - iter := f.TimeRangeIterator(prefix, start, end) - for i := 0; iter.Next(); i++ { - sfkey := iter.Key() - if !bytes.HasPrefix(sfkey, prefix) { - // end of org+bucket - break - } - - key, _ := SeriesAndFieldFromCompositeKey(sfkey) - name, err := models.ParseMeasurement(key) - if err != nil { - e.logger.Error("Invalid series key in TSM index", zap.Error(err), zap.Binary("key", key)) - continue - } - - if _, ok := tsmValues[string(name)]; ok { - continue - } - - if iter.HasData() { - tsmValues[string(name)] = struct{}{} - } - } - stats.Add(iter.Stats()) - } - return true - }) - - if canceled { - return cursors.NewStringSliceIteratorWithStats(nil, stats), ctx.Err() - } - - var ts cursors.TimestampArray - - // With performance in mind, we explicitly do not check the context - // while scanning the entries in the cache. - prefixStr := string(prefix) - _ = e.Cache.ApplyEntryFn(func(sfkey string, entry *entry) error { - if !strings.HasPrefix(sfkey, prefixStr) { - return nil - } - - // TODO(edd): consider the []byte() conversion here. - key, _ := SeriesAndFieldFromCompositeKey([]byte(sfkey)) - name, err := models.ParseMeasurement(key) - if err != nil { - e.logger.Error("Invalid series key in cache", zap.Error(err), zap.Binary("key", key)) - return nil - } - - if _, ok := tsmValues[string(name)]; ok { - return nil - } - - ts.Timestamps = entry.AppendTimestamps(ts.Timestamps[:0]) - if ts.Len() == 0 { - return nil - } - - sort.Sort(&ts) - - stats.ScannedValues += ts.Len() - stats.ScannedBytes += ts.Len() * 8 // sizeof timestamp - - if ts.Contains(start, end) { - tsmValues[string(name)] = struct{}{} - } - return nil - }) - - vals := make([]string, 0, len(tsmValues)) - for val := range tsmValues { - vals = append(vals, val) - } - sort.Strings(vals) - - return cursors.NewStringSliceIteratorWithStats(vals, stats), nil -} - -func (e *Engine) measurementNamesPredicate(ctx context.Context, orgID, bucketID influxdb.ID, start, end int64, predicate influxql.Expr) (cursors.StringIterator, error) { - if err := ValidateMeasurementNamesTagPredicate(predicate); err != nil { - return nil, err - } - - orgBucket := tsdb.EncodeName(orgID, bucketID) - - keys, err := e.findCandidateKeys(ctx, orgBucket[:], predicate) - if err != nil { - return cursors.EmptyStringIterator, err - } - - if len(keys) == 0 { - return cursors.EmptyStringIterator, nil - } - - var files []TSMFile - defer func() { - for _, f := range files { - f.Unref() - } - }() - var iters []*TimeRangeIterator - - // TODO(edd): we need to clean up how we're encoding the prefix so that we - // don't have to remember to get it right everywhere we need to touch TSM data. - tsmKeyPrefix := models.EscapeMeasurement(orgBucket[:]) - - var canceled bool - - e.FileStore.ForEachFile(func(f TSMFile) bool { - // Check the context before accessing each tsm file - select { - case <-ctx.Done(): - canceled = true - return false - default: - } - if f.OverlapsTimeRange(start, end) && f.OverlapsKeyPrefixRange(tsmKeyPrefix, tsmKeyPrefix) { - f.Ref() - files = append(files, f) - iters = append(iters, f.TimeRangeIterator(tsmKeyPrefix, start, end)) - } - return true - }) - - var stats cursors.CursorStats - - if canceled { - stats = statsFromIters(stats, iters) - return cursors.NewStringSliceIteratorWithStats(nil, stats), ctx.Err() - } - - tsmValues := make(map[string]struct{}) - - // reusable buffers - var ( - tags models.Tags - keybuf []byte - sfkey []byte - ts cursors.TimestampArray - ) - - for i := range keys { - // to keep cache scans fast, check context every 'cancelCheckInterval' iteratons - if i%cancelCheckInterval == 0 { - select { - case <-ctx.Done(): - stats = statsFromIters(stats, iters) - return cursors.NewStringSliceIteratorWithStats(nil, stats), ctx.Err() - default: - } - } - - _, tags = seriesfile.ParseSeriesKeyInto(keys[i], tags[:0]) - - // orgBucketEsc is already escaped, so no need to use models.AppendMakeKey, which - // unescapes and escapes the value again. The degenerate case is if the orgBucketEsc - // has escaped values, causing two allocations per key - keybuf = append(keybuf[:0], tsmKeyPrefix...) - keybuf = tags.AppendHashKey(keybuf) - sfkey = AppendSeriesFieldKeyBytes(sfkey[:0], keybuf, tags.Get(models.FieldKeyTagKeyBytes)) - - key, _ := SeriesAndFieldFromCompositeKey(sfkey) - name, err := models.ParseMeasurement(key) - if err != nil { - e.logger.Error("Invalid series key in TSM index", zap.Error(err), zap.Binary("key", key)) - continue - } - - if _, ok := tsmValues[string(name)]; ok { - continue - } - - ts.Timestamps = e.Cache.AppendTimestamps(sfkey, ts.Timestamps[:0]) - if ts.Len() > 0 { - sort.Sort(&ts) - - stats.ScannedValues += ts.Len() - stats.ScannedBytes += ts.Len() * 8 // sizeof timestamp - - if ts.Contains(start, end) { - tsmValues[string(name)] = struct{}{} - } - continue - } - - for _, iter := range iters { - if exact, _ := iter.Seek(sfkey); !exact { - continue - } - - if iter.HasData() { - tsmValues[string(name)] = struct{}{} - break - } - } - } - - vals := make([]string, 0, len(tsmValues)) - for val := range tsmValues { - vals = append(vals, val) - } - sort.Strings(vals) - - stats = statsFromIters(stats, iters) - return cursors.NewStringSliceIteratorWithStats(vals, stats), err -} - -// MeasurementTagValues returns an iterator which enumerates the tag values for the given -// bucket, measurement and tag key, filtered using the optional the predicate and limited to the -// time range [start, end]. -// -// MeasurementTagValues will always return a StringIterator if there is no error. -// -// If the context is canceled before MeasurementTagValues has finished processing, a non-nil -// error will be returned along with statistics for the already scanned data. -func (e *Engine) MeasurementTagValues(ctx context.Context, orgID, bucketID influxdb.ID, measurement, tagKey string, start, end int64, predicate influxql.Expr) (cursors.StringIterator, error) { - if predicate == nil { - return e.tagValuesNoPredicate(ctx, orgID, bucketID, []byte(measurement), []byte(tagKey), start, end) - } - - predicate = AddMeasurementToExpr(measurement, predicate) - - return e.tagValuesPredicate(ctx, orgID, bucketID, []byte(measurement), []byte(tagKey), start, end, predicate) - -} - -// MeasurementTagKeys returns an iterator which enumerates the tag keys for the given -// bucket and measurement, filtered using the optional the predicate and limited to the -//// time range [start, end]. -// -// MeasurementTagKeys will always return a StringIterator if there is no error. -// -// If the context is canceled before MeasurementTagKeys has finished processing, a non-nil -// error will be returned along with statistics for the already scanned data. -func (e *Engine) MeasurementTagKeys(ctx context.Context, orgID, bucketID influxdb.ID, measurement string, start, end int64, predicate influxql.Expr) (cursors.StringIterator, error) { - if predicate == nil { - return e.tagKeysNoPredicate(ctx, orgID, bucketID, []byte(measurement), start, end) - } - - predicate = AddMeasurementToExpr(measurement, predicate) - - return e.tagKeysPredicate(ctx, orgID, bucketID, []byte(measurement), start, end, predicate) -} - -// MeasurementFields returns an iterator which enumerates the field schema for the given -// bucket and measurement, filtered using the optional the predicate and limited to the -// time range [start, end]. -// -// MeasurementFields will always return a MeasurementFieldsIterator if there is no error. -// -// If the context is canceled before MeasurementFields has finished processing, a non-nil -// error will be returned along with statistics for the already scanned data. -func (e *Engine) MeasurementFields(ctx context.Context, orgID, bucketID influxdb.ID, measurement string, start, end int64, predicate influxql.Expr) (cursors.MeasurementFieldsIterator, error) { - if predicate == nil { - return e.fieldsNoPredicate(ctx, orgID, bucketID, []byte(measurement), start, end) - } - - predicate = AddMeasurementToExpr(measurement, predicate) - - return e.fieldsPredicate(ctx, orgID, bucketID, []byte(measurement), start, end, predicate) -} - -type fieldTypeTime struct { - typ cursors.FieldType - max int64 -} - -func (e *Engine) fieldsPredicate(ctx context.Context, orgID influxdb.ID, bucketID influxdb.ID, measurement []byte, start int64, end int64, predicate influxql.Expr) (cursors.MeasurementFieldsIterator, error) { - if err := ValidateTagPredicate(predicate); err != nil { - return nil, err - } - - orgBucket := tsdb.EncodeName(orgID, bucketID) - - keys, err := e.findCandidateKeys(ctx, orgBucket[:], predicate) - if err != nil { - return cursors.EmptyMeasurementFieldsIterator, err - } - - if len(keys) == 0 { - return cursors.EmptyMeasurementFieldsIterator, nil - } - - var files []TSMFile - defer func() { - for _, f := range files { - f.Unref() - } - }() - var iters []*TimeRangeMaxTimeIterator - - // TODO(edd): we need to clean up how we're encoding the prefix so that we - // don't have to remember to get it right everywhere we need to touch TSM data. - orgBucketEsc := models.EscapeMeasurement(orgBucket[:]) - - mt := models.Tags{models.NewTag(models.MeasurementTagKeyBytes, measurement)} - tsmKeyPrefix := mt.AppendHashKey(orgBucketEsc) - tsmKeyPrefix = append(tsmKeyPrefix, ',') - - var canceled bool - - e.FileStore.ForEachFile(func(f TSMFile) bool { - // Check the context before accessing each tsm file - select { - case <-ctx.Done(): - canceled = true - return false - default: - } - if f.OverlapsTimeRange(start, end) && f.OverlapsKeyPrefixRange(tsmKeyPrefix, tsmKeyPrefix) { - f.Ref() - files = append(files, f) - iters = append(iters, f.TimeRangeMaxTimeIterator(tsmKeyPrefix, start, end)) - } - return true - }) - - var stats cursors.CursorStats - - if canceled { - stats = statsFromTimeRangeMaxTimeIters(stats, iters) - return cursors.NewMeasurementFieldsSliceIteratorWithStats(nil, stats), ctx.Err() - } - - tsmValues := make(map[string]fieldTypeTime) - - // reusable buffers - var ( - tags models.Tags - keybuf []byte - sfkey []byte - ts cursors.TimestampArray - ) - - for i := range keys { - // to keep cache scans fast, check context every 'cancelCheckInterval' iteratons - if i%cancelCheckInterval == 0 { - select { - case <-ctx.Done(): - stats = statsFromTimeRangeMaxTimeIters(stats, iters) - return cursors.NewMeasurementFieldsSliceIteratorWithStats(nil, stats), ctx.Err() - default: - } - } - - _, tags = seriesfile.ParseSeriesKeyInto(keys[i], tags[:0]) - fieldKey := tags.Get(models.FieldKeyTagKeyBytes) - keybuf = models.AppendMakeKey(keybuf[:0], orgBucketEsc, tags) - sfkey = AppendSeriesFieldKeyBytes(sfkey[:0], keybuf, fieldKey) - - cur := fieldTypeTime{max: InvalidMinNanoTime} - - ts.Timestamps = e.Cache.AppendTimestamps(sfkey, ts.Timestamps[:0]) - if ts.Len() > 0 { - sort.Sort(&ts) - - stats.ScannedValues += ts.Len() - stats.ScannedBytes += ts.Len() * 8 // sizeof timestamp - - if ts.Contains(start, end) { - max := ts.MaxTime() - if max > cur.max { - cur.max = max - cur.typ = BlockTypeToFieldType(e.Cache.BlockType(sfkey)) - } - } - } - - for _, iter := range iters { - if exact, _ := iter.Seek(sfkey); !exact { - continue - } - - max := iter.MaxTime() - if max > cur.max { - cur.max = max - cur.typ = BlockTypeToFieldType(iter.Type()) - } - } - - if cur.max != InvalidMinNanoTime { - tsmValues[string(fieldKey)] = cur - } - } - - vals := make([]cursors.MeasurementField, 0, len(tsmValues)) - for key, val := range tsmValues { - vals = append(vals, cursors.MeasurementField{Key: key, Type: val.typ, Timestamp: val.max}) - } - - return cursors.NewMeasurementFieldsSliceIteratorWithStats([]cursors.MeasurementFields{{Fields: vals}}, stats), nil -} - -func (e *Engine) fieldsNoPredicate(ctx context.Context, orgID influxdb.ID, bucketID influxdb.ID, measurement []byte, start int64, end int64) (cursors.MeasurementFieldsIterator, error) { - tsmValues := make(map[string]fieldTypeTime) - orgBucket := tsdb.EncodeName(orgID, bucketID) - - // TODO(edd): we need to clean up how we're encoding the prefix so that we - // don't have to remember to get it right everywhere we need to touch TSM data. - orgBucketEsc := models.EscapeMeasurement(orgBucket[:]) - - mt := models.Tags{models.NewTag(models.MeasurementTagKeyBytes, measurement)} - tsmKeyPrefix := mt.AppendHashKey(orgBucketEsc) - tsmKeyPrefix = append(tsmKeyPrefix, ',') - - var stats cursors.CursorStats - var canceled bool - - e.FileStore.ForEachFile(func(f TSMFile) bool { - // Check the context before touching each tsm file - select { - case <-ctx.Done(): - canceled = true - return false - default: - } - if f.OverlapsTimeRange(start, end) && f.OverlapsKeyPrefixRange(tsmKeyPrefix, tsmKeyPrefix) { - // TODO(sgc): create f.TimeRangeIterator(minKey, maxKey, start, end) - iter := f.TimeRangeMaxTimeIterator(tsmKeyPrefix, start, end) - for i := 0; iter.Next(); i++ { - sfkey := iter.Key() - if !bytes.HasPrefix(sfkey, tsmKeyPrefix) { - // end of prefix - break - } - - max := iter.MaxTime() - if max == InvalidMinNanoTime { - continue - } - - _, fieldKey := SeriesAndFieldFromCompositeKey(sfkey) - v, ok := tsmValues[string(fieldKey)] - if !ok || v.max < max { - tsmValues[string(fieldKey)] = fieldTypeTime{ - typ: BlockTypeToFieldType(iter.Type()), - max: max, - } - } - } - stats.Add(iter.Stats()) - } - return true - }) - - if canceled { - return cursors.NewMeasurementFieldsSliceIteratorWithStats(nil, stats), ctx.Err() - } - - var ts cursors.TimestampArray - - // With performance in mind, we explicitly do not check the context - // while scanning the entries in the cache. - tsmKeyPrefixStr := string(tsmKeyPrefix) - _ = e.Cache.ApplyEntryFn(func(sfkey string, entry *entry) error { - if !strings.HasPrefix(sfkey, tsmKeyPrefixStr) { - return nil - } - - ts.Timestamps = entry.AppendTimestamps(ts.Timestamps[:0]) - if ts.Len() == 0 { - return nil - } - - sort.Sort(&ts) - - stats.ScannedValues += ts.Len() - stats.ScannedBytes += ts.Len() * 8 // sizeof timestamp - - if !ts.Contains(start, end) { - return nil - } - - max := ts.MaxTime() - - // TODO(edd): consider the []byte() conversion here. - _, fieldKey := SeriesAndFieldFromCompositeKey([]byte(sfkey)) - v, ok := tsmValues[string(fieldKey)] - if !ok || v.max < max { - tsmValues[string(fieldKey)] = fieldTypeTime{ - typ: BlockTypeToFieldType(entry.BlockType()), - max: max, - } - } - - return nil - }) - - vals := make([]cursors.MeasurementField, 0, len(tsmValues)) - for key, val := range tsmValues { - vals = append(vals, cursors.MeasurementField{Key: key, Type: val.typ, Timestamp: val.max}) - } - - return cursors.NewMeasurementFieldsSliceIteratorWithStats([]cursors.MeasurementFields{{Fields: vals}}, stats), nil -} - -func AddMeasurementToExpr(measurement string, base influxql.Expr) influxql.Expr { - // \x00 = '' - expr := &influxql.BinaryExpr{ - LHS: &influxql.VarRef{ - Val: models.MeasurementTagKey, - Type: influxql.Tag, - }, - Op: influxql.EQ, - RHS: &influxql.StringLiteral{ - Val: measurement, - }, - } - - if base != nil { - // \x00 = '' AND (base) - expr = &influxql.BinaryExpr{ - LHS: expr, - Op: influxql.AND, - RHS: &influxql.ParenExpr{ - Expr: base, - }, - } - } - - return expr -} - -func statsFromTimeRangeMaxTimeIters(stats cursors.CursorStats, iters []*TimeRangeMaxTimeIterator) cursors.CursorStats { - for _, iter := range iters { - stats.Add(iter.Stats()) - } - return stats -} diff --git a/tsdb/tsm1/engine_measurement_schema_test.go b/tsdb/tsm1/engine_measurement_schema_test.go deleted file mode 100644 index adbcd27275..0000000000 --- a/tsdb/tsm1/engine_measurement_schema_test.go +++ /dev/null @@ -1,1199 +0,0 @@ -package tsm1_test - -import ( - "context" - "fmt" - "math" - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/influxdata/influxdb/v2" - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/tsdb/cursors" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" - "github.com/influxdata/influxql" - "github.com/stretchr/testify/assert" -) - -func TestEngine_MeasurementCancelContext(t *testing.T) { - e, err := NewEngine(tsm1.NewConfig(), t) - if err != nil { - t.Fatal(err) - } - if err := e.Open(context.Background()); err != nil { - t.Fatal(err) - } - defer e.Close() - - var ( - org influxdb.ID = 0x6000 - bucket influxdb.ID = 0x6100 - ) - - e.MustWritePointsString(org, bucket, ` -cpuB,host=0B,os=linux value=1.1 101 -cpuB,host=AB,os=linux value=1.2 102 -cpuB,host=AB,os=linux value=1.3 104 -cpuB,host=CB,os=linux value=1.3 104 -cpuB,host=CB,os=linux value=1.3 105 -cpuB,host=DB,os=macOS value=1.3 106 -memB,host=DB,os=macOS value=1.3 101`) - - // send some points to TSM data - e.MustWriteSnapshot() - - e.MustWritePointsString(org, bucket, ` -cpuB,host=0B,os=linux value=1.1 201 -cpuB,host=AB,os=linux value=1.2 202 -cpuB,host=AB,os=linux value=1.3 204 -cpuB,host=BB,os=linux value=1.3 204 -cpuB,host=BB,os=linux value=1.3 205 -cpuB,host=EB,os=macOS value=1.3 206 -memB,host=EB,os=macOS value=1.3 201`) - - t.Run("cancel MeasurementNames", func(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - cancel() - - iter, err := e.MeasurementNames(ctx, org, bucket, 0, math.MaxInt64, nil) - if err == nil { - t.Fatal("MeasurementNames: expected error but got nothing") - } else if err.Error() != "context canceled" { - t.Fatalf("MeasurementNames: error %v", err) - } - - if got := iter.Stats(); !cmp.Equal(got, cursors.CursorStats{}) { - t.Errorf("unexpected Stats: -got/+exp\n%v", cmp.Diff(got, cursors.CursorStats{})) - } - }) -} - -func TestEngine_MeasurementNames(t *testing.T) { - e, err := NewEngine(tsm1.NewConfig(), t) - if err != nil { - t.Fatal(err) - } - if err := e.Open(context.Background()); err != nil { - t.Fatal(err) - } - defer e.Close() - - orgs := []struct { - org, bucket influxdb.ID - }{ - { - org: 0x5020, - bucket: 0x5100, - }, - { - org: 0x6000, - bucket: 0x6100, - }, - } - - // this org will require escaping the 0x20 byte - e.MustWritePointsString(orgs[0].org, orgs[0].bucket, ` -cpu,cpu0=v,cpu1=v,cpu2=v f=1 101 -cpu,cpu1=v f=1 103 -cpu,cpu2=v f=1 105 -cpu,cpu0=v,cpu2=v f=1 107 -cpu,cpu2=v,cpu3=v,other=c f=1 109 -mem,mem0=v,mem1=v,other=m f=1 101`) - e.MustWritePointsString(orgs[1].org, orgs[1].bucket, ` -cpu,cpu0=v,cpu1=v,cpu2=v f=1 101 -cpu,cpu1=v f=1 103 -cpu,cpu2=v f=1 105 -cpu,cpu0=v,cpu2=v f=1 107 -cpu,cpu2=v,cpu3=v,other=c f=1 109 -mem,mem0=v,mem1=v,other=m f=1 101`) - - // send some points to TSM data - e.MustWriteSnapshot() - - // delete some data from the first bucket - e.MustDeleteBucketRange(orgs[0].org, orgs[0].bucket, 0, 105) - - // leave some points in the cache - e.MustWritePointsString(orgs[0].org, orgs[0].bucket, ` -cpu,cpu3=v,cpu4=v,cpu5=v f=1 201 -cpu,cpu4=v f=1 203 -cpu,cpu3=v f=1 205 -cpu,cpu3=v,cpu4=v f=1 207 -cpu,cpu4=v,cpu5=v,other=c f=1 209 -mem,mem1=v,mem2=v,other=m f=1 201`) - e.MustWritePointsString(orgs[1].org, orgs[1].bucket, ` -cpu,cpu3=v,cpu4=v,cpu5=v f=1 201 -cpu,cpu4=v f=1 203 -cpu,cpu3=v f=1 205 -cpu,cpu3=v,cpu4=v f=1 207 -cpu,cpu4=v,cpu5=v,other=c f=1 209 -mem,mem1=v,mem2=v,other=m f=1 201`) - - type args struct { - org int - min, max int64 - expr string - } - - var tests = []struct { - name string - args args - exp []string - expStats cursors.CursorStats - }{ - // *********************** - // * queries for the first org, which has some deleted data - // *********************** - - { - name: "TSM and cache", - args: args{ - org: 0, - min: 0, - max: 300, - }, - exp: []string{"cpu", "mem"}, - expStats: cursors.CursorStats{ScannedValues: 1, ScannedBytes: 8}, - }, - { - name: "only TSM", - args: args{ - org: 0, - min: 0, - max: 199, - }, - exp: []string{"cpu"}, - expStats: cursors.CursorStats{ScannedValues: 1, ScannedBytes: 8}, - }, - { - name: "only cache", - args: args{ - org: 0, - min: 200, - max: 299, - }, - exp: []string{"cpu", "mem"}, - expStats: cursors.CursorStats{ScannedValues: 2, ScannedBytes: 16}, - }, - { - name: "one timestamp TSM/data", - args: args{ - org: 0, - min: 107, - max: 107, - }, - exp: []string{"cpu"}, - expStats: cursors.CursorStats{ScannedValues: 1, ScannedBytes: 8}, - }, - { - name: "one timestamp cache/data", - args: args{ - org: 0, - min: 207, - max: 207, - }, - exp: []string{"cpu"}, - expStats: cursors.CursorStats{ScannedValues: 3, ScannedBytes: 24}, - }, - { - name: "one timestamp TSM/nodata", - args: args{ - org: 0, - min: 102, - max: 102, - }, - exp: nil, - expStats: cursors.CursorStats{ScannedValues: 6, ScannedBytes: 48}, - }, - { - name: "one timestamp cache/nodata", - args: args{ - org: 0, - min: 202, - max: 202, - }, - exp: nil, - expStats: cursors.CursorStats{ScannedValues: 6, ScannedBytes: 48}, - }, - - // queries with predicates - { - name: "predicate/equal", - args: args{ - org: 0, - min: 0, - max: 300, - expr: `cpu4 = 'v'`, - }, - exp: []string{"cpu"}, - expStats: cursors.CursorStats{ScannedValues: 1, ScannedBytes: 8}, - }, - { - name: "predicate/regexp", - args: args{ - org: 0, - min: 0, - max: 300, - expr: `other =~ /c|m/`, - }, - exp: []string{"cpu", "mem"}, - expStats: cursors.CursorStats{ScannedValues: 1, ScannedBytes: 8}, - }, - - // *********************** - // * queries for the second org, which has no deleted data - // *********************** - { - name: "TSM and cache", - args: args{ - org: 1, - min: 0, - max: 300, - }, - exp: []string{"cpu", "mem"}, - expStats: cursors.CursorStats{ScannedValues: 0, ScannedBytes: 0}, - }, - } - for _, tc := range tests { - t.Run(fmt.Sprintf("org%d/%s", tc.args.org, tc.name), func(t *testing.T) { - a := tc.args - var expr influxql.Expr - if len(a.expr) > 0 { - expr = influxql.MustParseExpr(a.expr) - } - - iter, err := e.MeasurementNames(context.Background(), orgs[a.org].org, orgs[a.org].bucket, a.min, a.max, expr) - if err != nil { - t.Fatalf("MeasurementNames: error %v", err) - } - - if got := cursors.StringIteratorToSlice(iter); !cmp.Equal(got, tc.exp) { - t.Errorf("unexpected MeasurementNames: -got/+exp\n%v", cmp.Diff(got, tc.exp)) - } - - if got := iter.Stats(); !cmp.Equal(got, tc.expStats) { - t.Errorf("unexpected Stats: -got/+exp\n%v", cmp.Diff(got, tc.expStats)) - } - }) - } -} - -func TestEngine_MeasurementTagValues(t *testing.T) { - e, err := NewEngine(tsm1.NewConfig(), t) - if err != nil { - t.Fatal(err) - } - if err := e.Open(context.Background()); err != nil { - t.Fatal(err) - } - defer e.Close() - - orgs := []struct { - org, bucket influxdb.ID - }{ - { - org: 0x5020, - bucket: 0x5100, - }, - { - org: 0x6000, - bucket: 0x6100, - }, - } - - // this org will require escaping the 0x20 byte - e.MustWritePointsString(orgs[0].org, orgs[0].bucket, ` -cpuA,host=0A,os=linux value=1.1 101 -cpuA,host=AA,os=linux value=1.2 102 -cpuA,host=AA,os=linux value=1.3 104 -cpuA,host=CA,os=linux value=1.3 104 -cpuA,host=CA,os=linux value=1.3 105 -cpuA,host=DA,os=macOS value=1.3 106 -memA,host=DA,os=macOS value=1.3 101`) - e.MustWritePointsString(orgs[1].org, orgs[1].bucket, ` -cpuB,host=0B,os=linux value=1.1 101 -cpuB,host=AB,os=linux value=1.2 102 -cpuB,host=AB,os=linux value=1.3 104 -cpuB,host=CB,os=linux value=1.3 104 -cpuB,host=CB,os=linux value=1.3 105 -cpuB,host=DB,os=macOS value=1.3 106 -memB,host=DB,os=macOS value=1.3 101`) - - // send some points to TSM data - e.MustWriteSnapshot() - - // delete some data from the first bucket - e.MustDeleteBucketRange(orgs[0].org, orgs[0].bucket, 0, 105) - - // leave some points in the cache - e.MustWritePointsString(orgs[0].org, orgs[0].bucket, ` -cpuA,host=0A,os=linux value=1.1 201 -cpuA,host=AA,os=linux value=1.2 202 -cpuA,host=AA,os=linux value=1.3 204 -cpuA,host=BA,os=macOS value=1.3 204 -cpuA,host=BA,os=macOS value=1.3 205 -cpuA,host=EA,os=linux value=1.3 206 -memA,host=EA,os=linux value=1.3 201`) - e.MustWritePointsString(orgs[1].org, orgs[1].bucket, ` -cpuB,host=0B,os=linux value=1.1 201 -cpuB,host=AB,os=linux value=1.2 202 -cpuB,host=AB,os=linux value=1.3 204 -cpuB,host=BB,os=linux value=1.3 204 -cpuB,host=BB,os=linux value=1.3 205 -cpuB,host=EB,os=macOS value=1.3 206 -memB,host=EB,os=macOS value=1.3 201`) - - type args struct { - org int - m string - key string - min, max int64 - expr string - } - - var tests = []struct { - name string - args args - exp []string - expStats cursors.CursorStats - }{ - // *********************** - // * queries for the first org, which has some deleted data - // *********************** - - // host tag - { - name: "TSM and cache", - args: args{ - org: 0, - m: "cpuA", - key: "host", - min: 0, - max: 300, - }, - exp: []string{"0A", "AA", "BA", "DA", "EA"}, - expStats: cursors.CursorStats{ScannedValues: 6, ScannedBytes: 48}, - }, - { - name: "cpuA only TSM", - args: args{ - org: 0, - m: "cpuA", - key: "host", - min: 0, - max: 199, - }, - exp: []string{"DA"}, - expStats: cursors.CursorStats{ScannedValues: 6, ScannedBytes: 48}, - }, - { - name: "memA all time", - args: args{ - org: 0, - m: "memA", - key: "host", - min: 0, - max: 1000, - }, - exp: []string{"EA"}, - expStats: cursors.CursorStats{ScannedValues: 1, ScannedBytes: 8}, - }, - { - name: "cpuB only TSM", - args: args{ - org: 1, - m: "cpuB", - key: "host", - min: 0, - max: 199, - }, - exp: []string{"0B", "AB", "CB", "DB"}, - expStats: cursors.CursorStats{ScannedValues: 3, ScannedBytes: 24}, - }, - { - name: "only cache", - args: args{ - org: 0, - m: "cpuA", - key: "host", - min: 200, - max: 299, - }, - exp: []string{"0A", "AA", "BA", "EA"}, - expStats: cursors.CursorStats{ScannedValues: 6, ScannedBytes: 48}, - }, - { - name: "one timestamp TSM/data", - args: args{ - org: 0, - m: "cpuA", - key: "host", - min: 106, - max: 106, - }, - exp: []string{"DA"}, - expStats: cursors.CursorStats{ScannedValues: 6, ScannedBytes: 48}, - }, - { - name: "one timestamp cache/data", - args: args{ - org: 0, - m: "cpuA", - key: "host", - min: 201, - max: 201, - }, - exp: []string{"0A"}, - expStats: cursors.CursorStats{ScannedValues: 6, ScannedBytes: 48}, - }, - { - name: "one timestamp TSM/nodata", - args: args{ - org: 0, - m: "cpuA", - key: "host", - min: 103, - max: 103, - }, - exp: nil, - expStats: cursors.CursorStats{ScannedValues: 6, ScannedBytes: 48}, - }, - { - name: "one timestamp cache/nodata", - args: args{ - org: 0, - m: "cpuA", - key: "host", - min: 203, - max: 203, - }, - exp: nil, - expStats: cursors.CursorStats{ScannedValues: 6, ScannedBytes: 48}, - }, - - // queries with predicates - { - name: "predicate/macOS", - args: args{ - org: 0, - m: "cpuA", - key: "host", - min: 0, - max: 300, - expr: `os = 'macOS'`, - }, - exp: []string{"BA", "DA"}, - expStats: cursors.CursorStats{ScannedValues: 2, ScannedBytes: 16}, - }, - { - name: "predicate/linux", - args: args{ - org: 0, - m: "cpuA", - key: "host", - min: 0, - max: 300, - expr: `os = 'linux'`, - }, - exp: []string{"0A", "AA", "EA"}, - expStats: cursors.CursorStats{ScannedValues: 4, ScannedBytes: 32}, - }, - - // *********************** - // * queries for the second org, which has no deleted data - // *********************** - { - name: "all data", - args: args{ - org: 1, - m: "cpuB", - key: "host", - min: 0, - max: 1000, - }, - exp: []string{"0B", "AB", "BB", "CB", "DB", "EB"}, - expStats: cursors.CursorStats{ScannedValues: 3, ScannedBytes: 24}, - }, - - // *********************** - // * other scenarios - // *********************** - { - // ensure StringIterator is never nil - name: "predicate/no candidate series", - args: args{ - org: 1, - m: "cpuB", - key: "host", - min: 0, - max: 1000, - expr: `foo = 'bar'`, - }, - exp: nil, - expStats: cursors.CursorStats{ScannedValues: 0, ScannedBytes: 0}, - }, - { - name: "prefix substring without predicate", - args: args{ - org: 1, - m: "cpu", - key: "host", - min: 0, - max: 1000, - }, - expStats: cursors.CursorStats{}, - }, - { - name: "prefix substring with predicate", - args: args{ - org: 1, - m: "cpu", - key: "host", - min: 0, - max: 1000, - expr: `os = 'linux'`, - }, - expStats: cursors.CursorStats{}, - }, - } - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - a := tc.args - var expr influxql.Expr - if len(a.expr) > 0 { - expr = influxql.MustParseExpr(a.expr) - } - - iter, err := e.MeasurementTagValues(context.Background(), orgs[a.org].org, orgs[a.org].bucket, a.m, a.key, a.min, a.max, expr) - if err != nil { - t.Fatalf("TagValues: error %v", err) - } - - if got := cursors.StringIteratorToSlice(iter); !cmp.Equal(got, tc.exp) { - t.Errorf("unexpected TagValues: -got/+exp\n%v", cmp.Diff(got, tc.exp)) - } - - if got := iter.Stats(); !cmp.Equal(got, tc.expStats) { - t.Errorf("unexpected Stats: -got/+exp\n%v", cmp.Diff(got, tc.expStats)) - } - }) - } -} - -func TestEngine_MeasurementTagKeys(t *testing.T) { - e, err := NewEngine(tsm1.NewConfig(), t) - if err != nil { - t.Fatal(err) - } - if err := e.Open(context.Background()); err != nil { - t.Fatal(err) - } - defer e.Close() - - orgs := []struct { - org, bucket influxdb.ID - }{ - { - org: 0x5020, - bucket: 0x5100, - }, - { - org: 0x6000, - bucket: 0x6100, - }, - } - - // this org will require escaping the 0x20 byte - e.MustWritePointsString(orgs[0].org, orgs[0].bucket, ` -cpu,cpu0=v,cpu1=v,cpu2=v f=1 101 -cpu,cpu1=v f=1 103 -cpu,cpu2=v f=1 105 -cpu,cpu0=v,cpu2=v f=1 107 -cpu,cpu2=v,cpu3=v f=1 109 -mem,mem0=v,mem1=v f=1 101`) - e.MustWritePointsString(orgs[1].org, orgs[1].bucket, ` -cpu,cpu0=v,cpu1=v,cpu2=v f=1 101 -cpu,cpu1=v f=1 103 -cpu,cpu2=v f=1 105 -cpu,cpu0=v,cpu2=v f=1 107 -cpu,cpu2=v,cpu3=v f=1 109 -mem,mem0=v,mem1=v f=1 101`) - - // send some points to TSM data - e.MustWriteSnapshot() - - // delete some data from the first bucket - e.MustDeleteBucketRange(orgs[0].org, orgs[0].bucket, 0, 105) - - // leave some points in the cache - e.MustWritePointsString(orgs[0].org, orgs[0].bucket, ` -cpu,cpu3=v,cpu4=v,cpu5=v f=1 201 -cpu,cpu4=v f=1 203 -cpu,cpu3=v f=1 205 -cpu,cpu3=v,cpu4=v f=1 207 -cpu,cpu4=v,cpu5=v f=1 209 -mem,mem1=v,mem2=v f=1 201`) - e.MustWritePointsString(orgs[1].org, orgs[1].bucket, ` -cpu,cpu3=v,cpu4=v,cpu5=v f=1 201 -cpu,cpu4=v f=1 203 -cpu,cpu3=v f=1 205 -cpu,cpu3=v,cpu4=v f=1 207 -cpu,cpu4=v,cpu5=v f=1 209 -mem,mem1=v,mem2=v f=1 201`) - - type args struct { - org int - m string - min, max int64 - expr string - } - - var tests = []struct { - name string - args args - exp []string - expStats cursors.CursorStats - }{ - // *********************** - // * queries for the first org, which has some deleted data - // *********************** - - { - name: "TSM and cache", - args: args{ - org: 0, - m: "cpu", - min: 0, - max: 300, - }, - exp: []string{models.MeasurementTagKey, "cpu0", "cpu2", "cpu3", "cpu4", "cpu5", models.FieldKeyTagKey}, - expStats: cursors.CursorStats{ScannedValues: 2, ScannedBytes: 16}, - }, - { - name: "only TSM", - args: args{ - org: 0, - m: "cpu", - min: 0, - max: 199, - }, - exp: []string{models.MeasurementTagKey, "cpu0", "cpu2", "cpu3", models.FieldKeyTagKey}, - expStats: cursors.CursorStats{ScannedValues: 4, ScannedBytes: 32}, - }, - { - name: "only cache", - args: args{ - org: 0, - m: "cpu", - min: 200, - max: 299, - }, - exp: []string{models.MeasurementTagKey, "cpu3", "cpu4", "cpu5", models.FieldKeyTagKey}, - expStats: cursors.CursorStats{ScannedValues: 3, ScannedBytes: 24}, - }, - { - name: "one timestamp TSM/data", - args: args{ - org: 0, - m: "cpu", - min: 107, - max: 107, - }, - exp: []string{models.MeasurementTagKey, "cpu0", "cpu2", models.FieldKeyTagKey}, - expStats: cursors.CursorStats{ScannedValues: 5, ScannedBytes: 40}, - }, - { - name: "one timestamp cache/data", - args: args{ - org: 0, - m: "cpu", - min: 207, - max: 207, - }, - exp: []string{models.MeasurementTagKey, "cpu3", "cpu4", models.FieldKeyTagKey}, - expStats: cursors.CursorStats{ScannedValues: 4, ScannedBytes: 32}, - }, - { - name: "one timestamp TSM/nodata", - args: args{ - org: 0, - m: "cpu", - min: 102, - max: 102, - }, - exp: nil, - expStats: cursors.CursorStats{ScannedValues: 5, ScannedBytes: 40}, - }, - { - name: "one timestamp cache/nodata", - args: args{ - org: 0, - m: "cpu", - min: 202, - max: 202, - }, - exp: nil, - expStats: cursors.CursorStats{ScannedValues: 5, ScannedBytes: 40}, - }, - - // queries with predicates - { - name: "predicate/all time/cpu", - args: args{ - org: 0, - m: "cpu", - min: 0, - max: 300, - expr: `cpu0 = 'v' OR cpu4 = 'v'`, - }, - exp: []string{models.MeasurementTagKey, "cpu0", "cpu2", "cpu3", "cpu4", "cpu5", models.FieldKeyTagKey}, - expStats: cursors.CursorStats{ScannedValues: 2, ScannedBytes: 16}, - }, - { - name: "predicate/all time/mem", - args: args{ - org: 0, - m: "mem", - min: 0, - max: 300, - expr: `mem1 = 'v'`, - }, - exp: []string{models.MeasurementTagKey, "mem1", "mem2", models.FieldKeyTagKey}, - expStats: cursors.CursorStats{ScannedValues: 1, ScannedBytes: 8}, - }, - { - name: "predicate/all time/cpu0", - args: args{ - org: 0, - m: "cpu", - min: 0, - max: 300, - expr: "cpu0 = 'v'", - }, - exp: []string{models.MeasurementTagKey, "cpu0", "cpu2", models.FieldKeyTagKey}, - expStats: cursors.CursorStats{ScannedValues: 0, ScannedBytes: 0}, - }, - { - name: "predicate/all time/cpu3", - args: args{ - org: 0, - m: "cpu", - min: 0, - max: 300, - expr: "cpu3 = 'v'", - }, - exp: []string{models.MeasurementTagKey, "cpu2", "cpu3", "cpu4", "cpu5", models.FieldKeyTagKey}, - expStats: cursors.CursorStats{ScannedValues: 2, ScannedBytes: 16}, - }, - - // *********************** - // * queries for the second org, which has no deleted data - // *********************** - { - name: "TSM and cache", - args: args{ - org: 1, - m: "mem", - min: 0, - max: 300, - }, - exp: []string{models.MeasurementTagKey, "mem0", "mem1", "mem2", models.FieldKeyTagKey}, - expStats: cursors.CursorStats{ScannedValues: 1, ScannedBytes: 8}, - }, - - // *********************** - // * other scenarios - // *********************** - { - // ensure StringIterator is never nil - name: "predicate/no candidate series", - args: args{ - org: 0, - min: 0, - max: 300, - expr: "foo = 'bar'", - }, - exp: nil, - expStats: cursors.CursorStats{ScannedValues: 0, ScannedBytes: 0}, - }, - { - name: "prefix substring without predicate", - args: args{ - org: 0, - m: "cp", - min: 0, - max: 1000, - }, - expStats: cursors.CursorStats{}, - }, - { - name: "prefix substring with predicate", - args: args{ - org: 0, - m: "cp", - min: 0, - max: 1000, - expr: `cpu = 'v'`, - }, - expStats: cursors.CursorStats{}, - }, - } - for _, tc := range tests { - t.Run(fmt.Sprintf("org%d/%s", tc.args.org, tc.name), func(t *testing.T) { - a := tc.args - var expr influxql.Expr - if len(a.expr) > 0 { - expr = influxql.MustParseExpr(a.expr) - } - - iter, err := e.MeasurementTagKeys(context.Background(), orgs[a.org].org, orgs[a.org].bucket, a.m, a.min, a.max, expr) - if err != nil { - t.Fatalf("TagKeys: error %v", err) - } - - if got := cursors.StringIteratorToSlice(iter); !cmp.Equal(got, tc.exp) { - t.Errorf("unexpected TagKeys: -got/+exp\n%v", cmp.Diff(got, tc.exp)) - } - - if got := iter.Stats(); !cmp.Equal(got, tc.expStats) { - t.Errorf("unexpected Stats: -got/+exp\n%v", cmp.Diff(got, tc.expStats)) - } - }) - } -} - -func TestEngine_MeasurementFields(t *testing.T) { - e, err := NewEngine(tsm1.NewConfig(), t) - if err != nil { - t.Fatal(err) - } - if err := e.Open(context.Background()); err != nil { - t.Fatal(err) - } - defer e.Close() - - orgs := []struct { - org, bucket influxdb.ID - }{ - { - org: 0x5020, - bucket: 0x5100, - }, - { - org: 0x6000, - bucket: 0x6100, - }, - } - - // this org will require escaping the 0x20 byte - e.MustWritePointsString(orgs[0].org, orgs[0].bucket, ` -m00,tag00=v00,tag10=v10 i=1i 101 -m00,tag00=v00,tag10=v10 i=1i 102 -m00,tag00=v00,tag10=v10 f=1 101 -m00,tag00=v00,tag10=v10 i=1i 108 -m00,tag00=v00,tag10=v10 f=1 109 -m00,tag00=v00,tag10=v10 i=1i 109 -m01,tag00=v00,tag10=v10 b=true 101 -`) - e.MustWritePointsString(orgs[1].org, orgs[1].bucket, ` -m10,foo=v barF=50 101 -`) - - // send some points to TSM data - e.MustWriteSnapshot() - - // delete some data from the first bucket - e.MustDeleteBucketRange(orgs[0].org, orgs[0].bucket, 0, 105) - - // leave some points in the cache - e.MustWritePointsString(orgs[0].org, orgs[0].bucket, ` -m00,tag00=v00,tag10=v10 i=2i 201 -m00,tag00=v00,tag10=v10 i=2i 202 -m00,tag00=v00,tag10=v10 f=2 201 -m00,tag00=v00,tag10=v11 i="s" 202 -m00,tag00=v00,tag10=v11 i="s" 208 -m00,tag00=v00,tag10=v11 i="s" 209 -m01,tag00=v00,tag10=v10 b=true 201 -`) - e.MustWritePointsString(orgs[1].org, orgs[1].bucket, ` -m10,foo=v barS="60" 501 -`) - - type args struct { - org int - m string - min, max int64 - expr string - } - - makeStats := func(v int) cursors.CursorStats { - return cursors.CursorStats{ - ScannedValues: v, - ScannedBytes: v * 8, - } - } - - var tests = []struct { - name string - args args - exp []cursors.MeasurementField - expStats cursors.CursorStats - }{ - // *********************** - // * queries for the first org, which has some deleted data - // *********************** - { - name: "TSM and cache", - args: args{ - org: 0, - m: "m00", - min: 0, - max: 300, - }, - exp: []cursors.MeasurementField{{Key: "i", Type: cursors.String, Timestamp: 209}, {Key: "f", Type: cursors.Float, Timestamp: 201}}, - expStats: makeStats(12), - }, - { - name: "m00 only TSM", - args: args{ - org: 0, - m: "m00", - min: 0, - max: 199, - }, - exp: []cursors.MeasurementField{{Key: "i", Type: cursors.Integer, Timestamp: 109}, {Key: "f", Type: cursors.Float, Timestamp: 109}}, - expStats: makeStats(12), - }, - { - name: "m01 all time", - args: args{ - org: 0, - m: "m01", - min: 0, - max: 1000, - }, - exp: []cursors.MeasurementField{{Key: "b", Type: cursors.Boolean, Timestamp: 201}}, - expStats: makeStats(1), - }, - { - name: "m10 only TSM", - args: args{ - org: 1, - m: "m10", - min: 0, - max: 199, - }, - exp: []cursors.MeasurementField{{Key: "barF", Type: cursors.Float, Timestamp: 101}}, - expStats: makeStats(1), - }, - { - name: "only cache", - args: args{ - org: 0, - m: "m00", - min: 200, - max: 299, - }, - exp: []cursors.MeasurementField{{Key: "i", Type: cursors.String, Timestamp: 209}, {Key: "f", Type: cursors.Float, Timestamp: 201}}, - expStats: makeStats(6), - }, - { - name: "one timestamp TSM/data", - args: args{ - org: 0, - m: "m00", - min: 109, - max: 109, - }, - exp: []cursors.MeasurementField{{Key: "i", Type: cursors.Integer, Timestamp: 109}, {Key: "f", Type: cursors.Float, Timestamp: 109}}, - expStats: makeStats(6), - }, - { - name: "one timestamp cache/data", - args: args{ - org: 0, - m: "m00", - min: 201, - max: 201, - }, - exp: []cursors.MeasurementField{{Key: "i", Type: cursors.Integer, Timestamp: 202}, {Key: "f", Type: cursors.Float, Timestamp: 201}}, - expStats: makeStats(6), - }, - { - name: "one timestamp change type cache/data", - args: args{ - org: 0, - m: "m00", - min: 202, - max: 202, - }, - exp: []cursors.MeasurementField{{Key: "i", Type: cursors.String, Timestamp: 209}}, - expStats: makeStats(6), - }, - { - name: "one timestamp TSM/nodata", - args: args{ - org: 0, - m: "m00", - min: 103, - max: 103, - }, - exp: nil, - expStats: makeStats(12), - }, - { - name: "one timestamp cache/nodata", - args: args{ - org: 0, - m: "m00", - min: 203, - max: 203, - }, - exp: nil, - expStats: makeStats(6), - }, - - // queries with predicates - { - name: "predicate/v10", - args: args{ - org: 0, - m: "m00", - min: 0, - max: 300, - expr: `tag10 = 'v10'`, - }, - exp: []cursors.MeasurementField{{Key: "i", Type: cursors.Integer, Timestamp: 202}, {Key: "f", Type: cursors.Float, Timestamp: 201}}, - expStats: makeStats(3), - }, - { - name: "predicate/v11", - args: args{ - org: 0, - m: "m00", - min: 0, - max: 300, - expr: `tag10 = 'v11'`, - }, - exp: []cursors.MeasurementField{{Key: "i", Type: cursors.String, Timestamp: 209}}, - expStats: makeStats(3), - }, - - // *********************** - // * queries for the second org, which has no deleted data - // *********************** - { - name: "all data", - args: args{ - org: 1, - m: "m10", - min: 0, - max: 1000, - }, - exp: []cursors.MeasurementField{{Key: "barF", Type: cursors.Float, Timestamp: 101}, {Key: "barS", Type: cursors.String, Timestamp: 501}}, - expStats: makeStats(1), - }, - - // *********************** - // * other scenarios - // *********************** - { - // ensure StringIterator is never nil - name: "predicate/no candidate series", - args: args{ - org: 1, - m: "m10", - min: 0, - max: 1000, - expr: `foo = 'nonexistent'`, - }, - exp: nil, - expStats: makeStats(0), - }, - { - name: "prefix substring without predicate", - args: args{ - org: 0, - m: "m0", - min: 0, - max: 1000, - }, - exp: nil, - expStats: makeStats(0), - }, - { - name: "prefix substring with predicate", - args: args{ - org: 0, - m: "m0", - min: 0, - max: 1000, - expr: `tag10 = 'v10'`, - }, - exp: nil, - expStats: makeStats(0), - }, - } - for _, tc := range tests { - t.Run(fmt.Sprintf("org%d/%s", tc.args.org, tc.name), func(t *testing.T) { - a := tc.args - var expr influxql.Expr - if len(a.expr) > 0 { - expr = influxql.MustParseExpr(a.expr) - } - - iter, err := e.MeasurementFields(context.Background(), orgs[a.org].org, orgs[a.org].bucket, a.m, a.min, a.max, expr) - assert.NoError(t, err) - - if got := cursors.MeasurementFieldsIteratorFlatMap(iter); !assert.ElementsMatch(t, tc.exp, got) { - return - } - - if got := iter.Stats(); !assert.Equal(t, tc.expStats, got) { - return - } - }) - } -} - -// Verifies AddMeasurementToExpr amends the given influxql.Expr -// with a predicate to restrict results to a single measurement -func TestAddMeasurementToExpr(t *testing.T) { - tests := []struct { - name string - measurement string - expr influxql.Expr - exp string - }{ - { - name: "no expression", - measurement: "foo", - expr: nil, - exp: "\"\x00\"::tag = 'foo'", - }, - { - name: "simple expression", - measurement: "foo", - expr: influxql.MustParseExpr(`bar::tag = 'v1'`), - exp: "\"\x00\"::tag = 'foo' AND (bar::tag = 'v1')", - }, - { - name: "regex expression", - measurement: "foo", - expr: influxql.MustParseExpr(`bar::tag =~ /v1/`), - exp: "\"\x00\"::tag = 'foo' AND (bar::tag =~ /v1/)", - }, - { - name: "multiple binary expressions", - measurement: "foo", - expr: influxql.MustParseExpr(`(bar = 'a1' OR bar = 'a2') AND cpu = 'cpu0'`), - exp: "\"\x00\"::tag = 'foo' AND ((bar = 'a1' OR bar = 'a2') AND cpu = 'cpu0')", - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := tsm1.AddMeasurementToExpr(tt.measurement, tt.expr).String() - assert.Equal(t, tt.exp, got, "unexpected value for expression") - }) - } -} diff --git a/tsdb/tsm1/engine_schema.go b/tsdb/tsm1/engine_schema.go deleted file mode 100644 index 8f032d915f..0000000000 --- a/tsdb/tsm1/engine_schema.go +++ /dev/null @@ -1,652 +0,0 @@ -package tsm1 - -import ( - "bytes" - "context" - "errors" - "fmt" - "sort" - "strings" - - "github.com/influxdata/influxdb/v2" - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/cursors" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" - "github.com/influxdata/influxql" -) - -// cancelCheckInterval represents the period at which various schema calls -// will check for a canceled context. It is important this -// is not too frequent, or it could cause expensive context switches in -// tight loops. -const cancelCheckInterval = 5000 - -// TagValues returns an iterator which enumerates the values for the specific -// tagKey in the given bucket matching the predicate within the -// time range [start, end]. -// -// TagValues will always return a StringIterator if there is no error. -// -// If the context is canceled before TagValues has finished processing, a non-nil -// error will be returned along with a partial result of the already scanned values. -func (e *Engine) TagValues(ctx context.Context, orgID, bucketID influxdb.ID, tagKey string, start, end int64, predicate influxql.Expr) (cursors.StringIterator, error) { - if predicate == nil { - return e.tagValuesNoPredicate(ctx, orgID, bucketID, nil, []byte(tagKey), start, end) - } - - return e.tagValuesPredicate(ctx, orgID, bucketID, nil, []byte(tagKey), start, end, predicate) -} - -func (e *Engine) tagValuesNoPredicate(ctx context.Context, orgID, bucketID influxdb.ID, measurement, tagKeyBytes []byte, start, end int64) (cursors.StringIterator, error) { - tsmValues := make(map[string]struct{}) - var tags models.Tags - - orgBucket := tsdb.EncodeName(orgID, bucketID) - - // TODO(edd): we need to clean up how we're encoding the prefix so that we - // don't have to remember to get it right everywhere we need to touch TSM data. - orgBucketEsc := models.EscapeMeasurement(orgBucket[:]) - - tsmKeyPrefix := orgBucketEsc - if len(measurement) > 0 { - // append the measurement tag key to the prefix - mt := models.Tags{models.NewTag(models.MeasurementTagKeyBytes, measurement)} - tsmKeyPrefix = mt.AppendHashKey(tsmKeyPrefix) - tsmKeyPrefix = append(tsmKeyPrefix, ',') - } - - // TODO(sgc): extend prefix when filtering by \x00 == - - var stats cursors.CursorStats - var canceled bool - - e.FileStore.ForEachFile(func(f TSMFile) bool { - // Check the context before accessing each tsm file - select { - case <-ctx.Done(): - canceled = true - return false - default: - } - if f.OverlapsTimeRange(start, end) && f.OverlapsKeyPrefixRange(tsmKeyPrefix, tsmKeyPrefix) { - // TODO(sgc): create f.TimeRangeIterator(minKey, maxKey, start, end) - iter := f.TimeRangeIterator(tsmKeyPrefix, start, end) - for i := 0; iter.Next(); i++ { - sfkey := iter.Key() - if !bytes.HasPrefix(sfkey, tsmKeyPrefix) { - // end of prefix - break - } - - key, _ := SeriesAndFieldFromCompositeKey(sfkey) - tags = models.ParseTagsWithTags(key, tags[:0]) - curVal := tags.Get(tagKeyBytes) - if len(curVal) == 0 { - continue - } - - if _, ok := tsmValues[string(curVal)]; ok { - continue - } - - if iter.HasData() { - tsmValues[string(curVal)] = struct{}{} - } - } - stats.Add(iter.Stats()) - } - return true - }) - - if canceled { - return cursors.NewStringSliceIteratorWithStats(nil, stats), ctx.Err() - } - - var ts cursors.TimestampArray - - // With performance in mind, we explicitly do not check the context - // while scanning the entries in the cache. - tsmKeyprefixStr := string(tsmKeyPrefix) - _ = e.Cache.ApplyEntryFn(func(sfkey string, entry *entry) error { - if !strings.HasPrefix(sfkey, tsmKeyprefixStr) { - return nil - } - - // TODO(edd): consider the []byte() conversion here. - key, _ := SeriesAndFieldFromCompositeKey([]byte(sfkey)) - tags = models.ParseTagsWithTags(key, tags[:0]) - curVal := tags.Get(tagKeyBytes) - if len(curVal) == 0 { - return nil - } - - if _, ok := tsmValues[string(curVal)]; ok { - return nil - } - - ts.Timestamps = entry.AppendTimestamps(ts.Timestamps[:0]) - if ts.Len() > 0 { - sort.Sort(&ts) - - stats.ScannedValues += ts.Len() - stats.ScannedBytes += ts.Len() * 8 // sizeof timestamp - - if ts.Contains(start, end) { - tsmValues[string(curVal)] = struct{}{} - } - } - - return nil - }) - - vals := make([]string, 0, len(tsmValues)) - for val := range tsmValues { - vals = append(vals, val) - } - sort.Strings(vals) - - return cursors.NewStringSliceIteratorWithStats(vals, stats), nil -} - -func (e *Engine) tagValuesPredicate(ctx context.Context, orgID, bucketID influxdb.ID, measurement, tagKeyBytes []byte, start, end int64, predicate influxql.Expr) (cursors.StringIterator, error) { - if err := ValidateTagPredicate(predicate); err != nil { - return nil, err - } - - orgBucket := tsdb.EncodeName(orgID, bucketID) - - keys, err := e.findCandidateKeys(ctx, orgBucket[:], predicate) - if err != nil { - return cursors.EmptyStringIterator, err - } - - if len(keys) == 0 { - return cursors.EmptyStringIterator, nil - } - - var files []TSMFile - defer func() { - for _, f := range files { - f.Unref() - } - }() - var iters []*TimeRangeIterator - - // TODO(edd): we need to clean up how we're encoding the prefix so that we - // don't have to remember to get it right everywhere we need to touch TSM data. - orgBucketEsc := models.EscapeMeasurement(orgBucket[:]) - - tsmKeyPrefix := orgBucketEsc - if len(measurement) > 0 { - // append the measurement tag key to the prefix - mt := models.Tags{models.NewTag(models.MeasurementTagKeyBytes, measurement)} - tsmKeyPrefix = mt.AppendHashKey(tsmKeyPrefix) - tsmKeyPrefix = append(tsmKeyPrefix, ',') - } - - var canceled bool - - e.FileStore.ForEachFile(func(f TSMFile) bool { - // Check the context before accessing each tsm file - select { - case <-ctx.Done(): - canceled = true - return false - default: - } - if f.OverlapsTimeRange(start, end) && f.OverlapsKeyPrefixRange(tsmKeyPrefix, tsmKeyPrefix) { - f.Ref() - files = append(files, f) - iters = append(iters, f.TimeRangeIterator(tsmKeyPrefix, start, end)) - } - return true - }) - - var stats cursors.CursorStats - - if canceled { - stats = statsFromIters(stats, iters) - return cursors.NewStringSliceIteratorWithStats(nil, stats), ctx.Err() - } - - tsmValues := make(map[string]struct{}) - - // reusable buffers - var ( - tags models.Tags - keybuf []byte - sfkey []byte - ts cursors.TimestampArray - ) - - for i := range keys { - // to keep cache scans fast, check context every 'cancelCheckInterval' iteratons - if i%cancelCheckInterval == 0 { - select { - case <-ctx.Done(): - stats = statsFromIters(stats, iters) - return cursors.NewStringSliceIteratorWithStats(nil, stats), ctx.Err() - default: - } - } - - _, tags = seriesfile.ParseSeriesKeyInto(keys[i], tags[:0]) - curVal := tags.Get(tagKeyBytes) - if len(curVal) == 0 { - continue - } - - if _, ok := tsmValues[string(curVal)]; ok { - continue - } - - // orgBucketEsc is already escaped, so no need to use models.AppendMakeKey, which - // unescapes and escapes the value again. The degenerate case is if the orgBucketEsc - // has escaped values, causing two allocations per key - keybuf = append(keybuf[:0], orgBucketEsc...) - keybuf = tags.AppendHashKey(keybuf) - sfkey = AppendSeriesFieldKeyBytes(sfkey[:0], keybuf, tags.Get(models.FieldKeyTagKeyBytes)) - - ts.Timestamps = e.Cache.AppendTimestamps(sfkey, ts.Timestamps[:0]) - if ts.Len() > 0 { - sort.Sort(&ts) - - stats.ScannedValues += ts.Len() - stats.ScannedBytes += ts.Len() * 8 // sizeof timestamp - - if ts.Contains(start, end) { - tsmValues[string(curVal)] = struct{}{} - } - continue - } - - for _, iter := range iters { - if exact, _ := iter.Seek(sfkey); !exact { - continue - } - - if iter.HasData() { - tsmValues[string(curVal)] = struct{}{} - break - } - } - } - - vals := make([]string, 0, len(tsmValues)) - for val := range tsmValues { - vals = append(vals, val) - } - sort.Strings(vals) - - stats = statsFromIters(stats, iters) - return cursors.NewStringSliceIteratorWithStats(vals, stats), err -} - -func (e *Engine) findCandidateKeys(ctx context.Context, orgBucket []byte, predicate influxql.Expr) ([][]byte, error) { - // determine candidate series keys - sitr, err := e.index.MeasurementSeriesByExprIterator(orgBucket, predicate) - if err != nil { - return nil, err - } else if sitr == nil { - return nil, nil - } - defer sitr.Close() - - var keys [][]byte - for i := 0; ; i++ { - // to keep series file index scans fast, - // check context every 'cancelCheckInterval' iteratons - if i%cancelCheckInterval == 0 { - select { - case <-ctx.Done(): - return keys, ctx.Err() - default: - } - } - - elem, err := sitr.Next() - if err != nil { - return nil, err - } else if elem.SeriesID.IsZero() { - break - } - - key := e.sfile.SeriesKey(elem.SeriesID) - if len(key) == 0 { - continue - } - keys = append(keys, key) - } - - return keys, nil -} - -// TagKeys returns an iterator which enumerates the tag keys for the given -// bucket matching the predicate within the time range [start, end]. -// -// TagKeys will always return a StringIterator if there is no error. -// -// If the context is canceled before TagKeys has finished processing, a non-nil -// error will be returned along with a partial result of the already scanned keys. -func (e *Engine) TagKeys(ctx context.Context, orgID, bucketID influxdb.ID, start, end int64, predicate influxql.Expr) (cursors.StringIterator, error) { - if predicate == nil { - return e.tagKeysNoPredicate(ctx, orgID, bucketID, nil, start, end) - } - - return e.tagKeysPredicate(ctx, orgID, bucketID, nil, start, end, predicate) -} - -func (e *Engine) tagKeysNoPredicate(ctx context.Context, orgID, bucketID influxdb.ID, measurement []byte, start, end int64) (cursors.StringIterator, error) { - var tags models.Tags - - orgBucket := tsdb.EncodeName(orgID, bucketID) - - // TODO(edd): we need to clean up how we're encoding the prefix so that we - // don't have to remember to get it right everywhere we need to touch TSM data. - orgBucketEsc := models.EscapeMeasurement(orgBucket[:]) - - tsmKeyPrefix := orgBucketEsc - if len(measurement) > 0 { - // append the measurement tag key to the prefix - mt := models.Tags{models.NewTag(models.MeasurementTagKeyBytes, measurement)} - tsmKeyPrefix = mt.AppendHashKey(tsmKeyPrefix) - tsmKeyPrefix = append(tsmKeyPrefix, ',') - } - - var keyset models.TagKeysSet - - // TODO(sgc): extend prefix when filtering by \x00 == - - var stats cursors.CursorStats - var canceled bool - - var files unrefs - defer func() { files.Unref() }() - - e.FileStore.ForEachFile(func(f TSMFile) bool { - // Check the context before touching each tsm file - select { - case <-ctx.Done(): - canceled = true - return false - default: - } - - var hasRef bool - if f.OverlapsTimeRange(start, end) && f.OverlapsKeyPrefixRange(tsmKeyPrefix, tsmKeyPrefix) { - // TODO(sgc): create f.TimeRangeIterator(minKey, maxKey, start, end) - iter := f.TimeRangeIterator(tsmKeyPrefix, start, end) - for i := 0; iter.Next(); i++ { - sfkey := iter.Key() - if !bytes.HasPrefix(sfkey, tsmKeyPrefix) { - // end of prefix - break - } - - key, _ := SeriesAndFieldFromCompositeKey(sfkey) - tags = models.ParseTagsWithTags(key, tags[:0]) - if keyset.IsSupersetKeys(tags) { - continue - } - - if iter.HasData() { - keyset.UnionKeys(tags) - - // Add reference to ensure tags are valid for the outer function. - if !hasRef { - f.Ref() - files, hasRef = append(files, f), true - } - } - } - stats.Add(iter.Stats()) - } - return true - }) - - if canceled { - return cursors.NewStringSliceIteratorWithStats(nil, stats), ctx.Err() - } - - var ts cursors.TimestampArray - - // With performance in mind, we explicitly do not check the context - // while scanning the entries in the cache. - tsmKeyprefixStr := string(tsmKeyPrefix) - _ = e.Cache.ApplyEntryFn(func(sfkey string, entry *entry) error { - if !strings.HasPrefix(sfkey, tsmKeyprefixStr) { - return nil - } - - // TODO(edd): consider []byte conversion here. - key, _ := SeriesAndFieldFromCompositeKey([]byte(sfkey)) - tags = models.ParseTagsWithTags(key, tags[:0]) - if keyset.IsSupersetKeys(tags) { - return nil - } - - ts.Timestamps = entry.AppendTimestamps(ts.Timestamps[:0]) - if ts.Len() > 0 { - sort.Sort(&ts) - - stats.ScannedValues += ts.Len() - stats.ScannedBytes += ts.Len() * 8 // sizeof timestamp - - if ts.Contains(start, end) { - keyset.UnionKeys(tags) - } - } - - return nil - }) - - return cursors.NewStringSliceIteratorWithStats(keyset.Keys(), stats), nil -} - -func (e *Engine) tagKeysPredicate(ctx context.Context, orgID, bucketID influxdb.ID, measurement []byte, start, end int64, predicate influxql.Expr) (cursors.StringIterator, error) { - if err := ValidateTagPredicate(predicate); err != nil { - return nil, err - } - - orgBucket := tsdb.EncodeName(orgID, bucketID) - - keys, err := e.findCandidateKeys(ctx, orgBucket[:], predicate) - if err != nil { - return cursors.EmptyStringIterator, err - } - - if len(keys) == 0 { - return cursors.EmptyStringIterator, nil - } - - var files []TSMFile - defer func() { - for _, f := range files { - f.Unref() - } - }() - var iters []*TimeRangeIterator - - // TODO(edd): we need to clean up how we're encoding the prefix so that we - // don't have to remember to get it right everywhere we need to touch TSM data. - orgBucketEsc := models.EscapeMeasurement(orgBucket[:]) - - tsmKeyPrefix := orgBucketEsc - if len(measurement) > 0 { - // append the measurement tag key to the prefix - mt := models.Tags{models.NewTag(models.MeasurementTagKeyBytes, measurement)} - tsmKeyPrefix = mt.AppendHashKey(tsmKeyPrefix) - tsmKeyPrefix = append(tsmKeyPrefix, ',') - } - - var canceled bool - - e.FileStore.ForEachFile(func(f TSMFile) bool { - // Check the context before touching each tsm file - select { - case <-ctx.Done(): - canceled = true - return false - default: - } - if f.OverlapsTimeRange(start, end) && f.OverlapsKeyPrefixRange(tsmKeyPrefix, tsmKeyPrefix) { - f.Ref() - files = append(files, f) - iters = append(iters, f.TimeRangeIterator(tsmKeyPrefix, start, end)) - } - return true - }) - - var stats cursors.CursorStats - - if canceled { - stats = statsFromIters(stats, iters) - return cursors.NewStringSliceIteratorWithStats(nil, stats), ctx.Err() - } - - var keyset models.TagKeysSet - - // reusable buffers - var ( - tags models.Tags - keybuf []byte - sfkey []byte - ts cursors.TimestampArray - ) - - for i := range keys { - // to keep cache scans fast, check context every 'cancelCheckInterval' iteratons - if i%cancelCheckInterval == 0 { - select { - case <-ctx.Done(): - stats = statsFromIters(stats, iters) - return cursors.NewStringSliceIteratorWithStats(nil, stats), ctx.Err() - default: - } - } - - _, tags = seriesfile.ParseSeriesKeyInto(keys[i], tags[:0]) - if keyset.IsSupersetKeys(tags) { - continue - } - - // orgBucketEsc is already escaped, so no need to use models.AppendMakeKey, which - // unescapes and escapes the value again. The degenerate case is if the orgBucketEsc - // has escaped values, causing two allocations per key - keybuf = append(keybuf[:0], orgBucketEsc...) - keybuf = tags.AppendHashKey(keybuf) - sfkey = AppendSeriesFieldKeyBytes(sfkey[:0], keybuf, tags.Get(models.FieldKeyTagKeyBytes)) - - ts.Timestamps = e.Cache.AppendTimestamps(sfkey, ts.Timestamps[:0]) - if ts.Len() > 0 { - sort.Sort(&ts) - - stats.ScannedValues += ts.Len() - stats.ScannedBytes += ts.Len() * 8 // sizeof timestamp - - if ts.Contains(start, end) { - keyset.UnionKeys(tags) - continue - } - } - - for _, iter := range iters { - if exact, _ := iter.Seek(sfkey); !exact { - continue - } - - if iter.HasData() { - keyset.UnionKeys(tags) - break - } - } - } - - stats = statsFromIters(stats, iters) - return cursors.NewStringSliceIteratorWithStats(keyset.Keys(), stats), err -} - -func statsFromIters(stats cursors.CursorStats, iters []*TimeRangeIterator) cursors.CursorStats { - for _, iter := range iters { - stats.Add(iter.Stats()) - } - return stats -} - -var ( - errUnexpectedTagComparisonOperator = errors.New("unexpected tag comparison operator") - errNotImplemented = errors.New("not implemented") -) - -func ValidateTagPredicate(expr influxql.Expr) (err error) { - influxql.WalkFunc(expr, func(node influxql.Node) { - if err != nil { - return - } - - switch n := node.(type) { - case *influxql.BinaryExpr: - switch n.Op { - case influxql.EQ, influxql.EQREGEX, influxql.NEQREGEX, influxql.NEQ, influxql.OR, influxql.AND: - default: - err = errUnexpectedTagComparisonOperator - } - - switch r := n.LHS.(type) { - case *influxql.VarRef: - case *influxql.BinaryExpr: - case *influxql.ParenExpr: - default: - err = fmt.Errorf("binary expression: LHS must be tag key reference, got: %T", r) - } - - switch r := n.RHS.(type) { - case *influxql.StringLiteral: - case *influxql.RegexLiteral: - case *influxql.BinaryExpr: - case *influxql.ParenExpr: - default: - err = fmt.Errorf("binary expression: RHS must be string or regex, got: %T", r) - } - } - }) - return err -} - -func ValidateMeasurementNamesTagPredicate(expr influxql.Expr) (err error) { - influxql.WalkFunc(expr, func(node influxql.Node) { - if err != nil { - return - } - - switch n := node.(type) { - case *influxql.BinaryExpr: - switch n.Op { - case influxql.EQ, influxql.EQREGEX, influxql.OR, influxql.AND: - case influxql.NEQREGEX, influxql.NEQ: - err = errNotImplemented - default: - err = errUnexpectedTagComparisonOperator - } - - switch r := n.LHS.(type) { - case *influxql.VarRef: - case *influxql.BinaryExpr: - case *influxql.ParenExpr: - default: - err = fmt.Errorf("binary expression: LHS must be tag key reference, got: %T", r) - } - - switch r := n.RHS.(type) { - case *influxql.StringLiteral: - case *influxql.RegexLiteral: - case *influxql.BinaryExpr: - case *influxql.ParenExpr: - default: - err = fmt.Errorf("binary expression: RHS must be string or regex, got: %T", r) - } - } - }) - return err -} diff --git a/tsdb/tsm1/engine_schema_test.go b/tsdb/tsm1/engine_schema_test.go deleted file mode 100644 index 0869da149e..0000000000 --- a/tsdb/tsm1/engine_schema_test.go +++ /dev/null @@ -1,685 +0,0 @@ -package tsm1_test - -import ( - "context" - "fmt" - "math" - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/influxdata/influxdb/v2" - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/tsdb/cursors" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" - "github.com/influxdata/influxql" -) - -func TestEngine_CancelContext(t *testing.T) { - e, err := NewEngine(tsm1.NewConfig(), t) - if err != nil { - t.Fatal(err) - } - if err := e.Open(context.Background()); err != nil { - t.Fatal(err) - } - defer e.Close() - - var ( - org influxdb.ID = 0x6000 - bucket influxdb.ID = 0x6100 - ) - - e.MustWritePointsString(org, bucket, ` -cpuB,host=0B,os=linux value=1.1 101 -cpuB,host=AB,os=linux value=1.2 102 -cpuB,host=AB,os=linux value=1.3 104 -cpuB,host=CB,os=linux value=1.3 104 -cpuB,host=CB,os=linux value=1.3 105 -cpuB,host=DB,os=macOS value=1.3 106 -memB,host=DB,os=macOS value=1.3 101`) - - // send some points to TSM data - e.MustWriteSnapshot() - - e.MustWritePointsString(org, bucket, ` -cpuB,host=0B,os=linux value=1.1 201 -cpuB,host=AB,os=linux value=1.2 202 -cpuB,host=AB,os=linux value=1.3 204 -cpuB,host=BB,os=linux value=1.3 204 -cpuB,host=BB,os=linux value=1.3 205 -cpuB,host=EB,os=macOS value=1.3 206 -memB,host=EB,os=macOS value=1.3 201`) - - t.Run("cancel tag values no predicate", func(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - cancel() - - key := "host" - - iter, err := e.TagValues(ctx, org, bucket, key, 0, math.MaxInt64, nil) - if err == nil { - t.Fatal("TagValues: expected error but got nothing") - } else if err.Error() != "context canceled" { - t.Fatalf("TagValues: error %v", err) - } - - if got := iter.Stats(); !cmp.Equal(got, cursors.CursorStats{}) { - t.Errorf("unexpected Stats: -got/+exp\n%v", cmp.Diff(got, cursors.CursorStats{})) - } - }) - - t.Run("cancel tag values with predicate", func(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - cancel() - - key := "host" - predicate := influxql.MustParseExpr(`os = 'linux'`) - - iter, err := e.TagValues(ctx, org, bucket, key, 0, math.MaxInt64, predicate) - if err == nil { - t.Fatal("TagValues: expected error but got nothing") - } else if err.Error() != "context canceled" { - t.Fatalf("TagValues: error %v", err) - } - - if got := iter.Stats(); !cmp.Equal(got, cursors.CursorStats{}) { - t.Errorf("unexpected Stats: -got/+exp\n%v", cmp.Diff(got, cursors.CursorStats{})) - } - }) - - t.Run("cancel tag keys no predicate", func(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - cancel() - - iter, err := e.TagKeys(ctx, org, bucket, 0, math.MaxInt64, nil) - if err == nil { - t.Fatal("TagKeys: expected error but got nothing") - } else if err.Error() != "context canceled" { - t.Fatalf("TagKeys: error %v", err) - } - - if got := iter.Stats(); !cmp.Equal(got, cursors.CursorStats{}) { - t.Errorf("unexpected Stats: -got/+exp\n%v", cmp.Diff(got, cursors.CursorStats{})) - } - }) - - t.Run("cancel tag keys with predicate", func(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - cancel() - - predicate := influxql.MustParseExpr(`os = 'linux'`) - - iter, err := e.TagKeys(ctx, org, bucket, 0, math.MaxInt64, predicate) - if err == nil { - t.Fatal("TagKeys: expected error but got nothing") - } else if err.Error() != "context canceled" { - t.Fatalf("TagKeys: error %v", err) - } - - if got := iter.Stats(); !cmp.Equal(got, cursors.CursorStats{}) { - t.Errorf("unexpected Stats: -got/+exp\n%v", cmp.Diff(got, cursors.CursorStats{})) - } - }) -} - -func TestEngine_TagValues(t *testing.T) { - e, err := NewEngine(tsm1.NewConfig(), t) - if err != nil { - t.Fatal(err) - } - if err := e.Open(context.Background()); err != nil { - t.Fatal(err) - } - defer e.Close() - - orgs := []struct { - org, bucket influxdb.ID - }{ - { - org: 0x5020, - bucket: 0x5100, - }, - { - org: 0x6000, - bucket: 0x6100, - }, - } - - // this org will require escaping the 0x20 byte - e.MustWritePointsString(orgs[0].org, orgs[0].bucket, ` -cpuA,host=0A,os=linux value=1.1 101 -cpuA,host=AA,os=linux value=1.2 102 -cpuA,host=AA,os=linux value=1.3 104 -cpuA,host=CA,os=linux value=1.3 104 -cpuA,host=CA,os=linux value=1.3 105 -cpuA,host=DA,os=macOS value=1.3 106 -memA,host=DA,os=macOS value=1.3 101`) - e.MustWritePointsString(orgs[1].org, orgs[1].bucket, ` -cpuB,host=0B,os=linux value=1.1 101 -cpuB,host=AB,os=linux value=1.2 102 -cpuB,host=AB,os=linux value=1.3 104 -cpuB,host=CB,os=linux value=1.3 104 -cpuB,host=CB,os=linux value=1.3 105 -cpuB,host=DB,os=macOS value=1.3 106 -memB,host=DB,os=macOS value=1.3 101`) - - // send some points to TSM data - e.MustWriteSnapshot() - - // delete some data from the first bucket - e.MustDeleteBucketRange(orgs[0].org, orgs[0].bucket, 0, 105) - - // leave some points in the cache - e.MustWritePointsString(orgs[0].org, orgs[0].bucket, ` -cpuA,host=0A,os=linux value=1.1 201 -cpuA,host=AA,os=linux value=1.2 202 -cpuA,host=AA,os=linux value=1.3 204 -cpuA,host=BA,os=macOS value=1.3 204 -cpuA,host=BA,os=macOS value=1.3 205 -cpuA,host=EA,os=linux value=1.3 206 -memA,host=EA,os=linux value=1.3 201`) - e.MustWritePointsString(orgs[1].org, orgs[1].bucket, ` -cpuB,host=0B,os=linux value=1.1 201 -cpuB,host=AB,os=linux value=1.2 202 -cpuB,host=AB,os=linux value=1.3 204 -cpuB,host=BB,os=linux value=1.3 204 -cpuB,host=BB,os=linux value=1.3 205 -cpuB,host=EB,os=macOS value=1.3 206 -memB,host=EB,os=macOS value=1.3 201`) - - type args struct { - org int - key string - min, max int64 - expr string - } - - var tests = []struct { - name string - args args - exp []string - expStats cursors.CursorStats - }{ - // *********************** - // * queries for the first org, which has some deleted data - // *********************** - - // host tag - { - name: "TSM and cache", - args: args{ - org: 0, - key: "host", - min: 0, - max: 300, - }, - exp: []string{"0A", "AA", "BA", "DA", "EA"}, - expStats: cursors.CursorStats{ScannedValues: 6, ScannedBytes: 48}, - }, - { - name: "only TSM", - args: args{ - org: 0, - key: "host", - min: 0, - max: 199, - }, - exp: []string{"DA"}, - expStats: cursors.CursorStats{ScannedValues: 7, ScannedBytes: 56}, - }, - { - name: "only cache", - args: args{ - org: 0, - key: "host", - min: 200, - max: 299, - }, - exp: []string{"0A", "AA", "BA", "EA"}, - expStats: cursors.CursorStats{ScannedValues: 6, ScannedBytes: 48}, - }, - { - name: "one timestamp TSM/data", - args: args{ - org: 0, - key: "host", - min: 106, - max: 106, - }, - exp: []string{"DA"}, - expStats: cursors.CursorStats{ScannedValues: 7, ScannedBytes: 56}, - }, - { - name: "one timestamp cache/data", - args: args{ - org: 0, - key: "host", - min: 201, - max: 201, - }, - exp: []string{"0A", "EA"}, - expStats: cursors.CursorStats{ScannedValues: 7, ScannedBytes: 56}, - }, - { - name: "one timestamp TSM/nodata", - args: args{ - org: 0, - key: "host", - min: 103, - max: 103, - }, - exp: nil, - expStats: cursors.CursorStats{ScannedValues: 7, ScannedBytes: 56}, - }, - { - name: "one timestamp cache/nodata", - args: args{ - org: 0, - key: "host", - min: 203, - max: 203, - }, - exp: nil, - expStats: cursors.CursorStats{ScannedValues: 7, ScannedBytes: 56}, - }, - - // models.MeasurementTagKey tag - { - name: "_measurement/all", - args: args{ - org: 0, - key: models.MeasurementTagKey, - min: 0, - max: 399, - }, - exp: []string{"cpuA", "memA"}, - expStats: cursors.CursorStats{ScannedValues: 1, ScannedBytes: 8}, - }, - { - name: "_measurement/some", - args: args{ - org: 0, - key: models.MeasurementTagKey, - min: 205, - max: 399, - }, - exp: []string{"cpuA"}, - expStats: cursors.CursorStats{ScannedValues: 3, ScannedBytes: 24}, - }, - - // queries with predicates - { - name: "predicate/macOS", - args: args{ - org: 0, - key: "host", - min: 0, - max: 300, - expr: `os = 'macOS'`, - }, - exp: []string{"BA", "DA"}, - expStats: cursors.CursorStats{ScannedValues: 2, ScannedBytes: 16}, - }, - { - name: "predicate/linux", - args: args{ - org: 0, - key: "host", - min: 0, - max: 300, - expr: `os = 'linux'`, - }, - exp: []string{"0A", "AA", "EA"}, - expStats: cursors.CursorStats{ScannedValues: 4, ScannedBytes: 32}, - }, - - // *********************** - // * queries for the second org, which has no deleted data - // *********************** - { - name: "all data", - args: args{ - org: 1, - key: "host", - min: 0, - max: 1000, - }, - exp: []string{"0B", "AB", "BB", "CB", "DB", "EB"}, - expStats: cursors.CursorStats{ScannedValues: 3, ScannedBytes: 24}, - }, - - // *********************** - // * other scenarios - // *********************** - { - // ensure StringIterator is never nil - name: "predicate/no candidate series", - args: args{ - org: 1, - key: "host", - min: 0, - max: 1000, - expr: `foo = 'bar'`, - }, - exp: nil, - expStats: cursors.CursorStats{ScannedValues: 0, ScannedBytes: 0}, - }, - } - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - a := tc.args - var expr influxql.Expr - if len(a.expr) > 0 { - expr = influxql.MustParseExpr(a.expr) - } - - iter, err := e.TagValues(context.Background(), orgs[a.org].org, orgs[a.org].bucket, a.key, a.min, a.max, expr) - if err != nil { - t.Fatalf("TagValues: error %v", err) - } - - if got := cursors.StringIteratorToSlice(iter); !cmp.Equal(got, tc.exp) { - t.Errorf("unexpected TagValues: -got/+exp\n%v", cmp.Diff(got, tc.exp)) - } - - if got := iter.Stats(); !cmp.Equal(got, tc.expStats) { - t.Errorf("unexpected Stats: -got/+exp\n%v", cmp.Diff(got, tc.expStats)) - } - }) - } -} - -func TestEngine_TagKeys(t *testing.T) { - e, err := NewEngine(tsm1.NewConfig(), t) - if err != nil { - t.Fatal(err) - } - if err := e.Open(context.Background()); err != nil { - t.Fatal(err) - } - defer e.Close() - - orgs := []struct { - org, bucket influxdb.ID - }{ - { - org: 0x5020, - bucket: 0x5100, - }, - { - org: 0x6000, - bucket: 0x6100, - }, - } - - // this org will require escaping the 0x20 byte - e.MustWritePointsString(orgs[0].org, orgs[0].bucket, ` -cpu,cpu0=v,cpu1=v,cpu2=v f=1 101 -cpu,cpu1=v f=1 103 -cpu,cpu2=v f=1 105 -cpu,cpu0=v,cpu2=v f=1 107 -cpu,cpu2=v,cpu3=v f=1 109 -mem,mem0=v,mem1=v f=1 101`) - e.MustWritePointsString(orgs[1].org, orgs[1].bucket, ` -cpu,cpu0=v,cpu1=v,cpu2=v f=1 101 -cpu,cpu1=v f=1 103 -cpu,cpu2=v f=1 105 -cpu,cpu0=v,cpu2=v f=1 107 -cpu,cpu2=v,cpu3=v f=1 109 -mem,mem0=v,mem1=v f=1 101`) - - // send some points to TSM data - e.MustWriteSnapshot() - - // delete some data from the first bucket - e.MustDeleteBucketRange(orgs[0].org, orgs[0].bucket, 0, 105) - - // leave some points in the cache - e.MustWritePointsString(orgs[0].org, orgs[0].bucket, ` -cpu,cpu3=v,cpu4=v,cpu5=v f=1 201 -cpu,cpu4=v f=1 203 -cpu,cpu3=v f=1 205 -cpu,cpu3=v,cpu4=v f=1 207 -cpu,cpu4=v,cpu5=v f=1 209 -mem,mem1=v,mem2=v f=1 201`) - e.MustWritePointsString(orgs[1].org, orgs[1].bucket, ` -cpu,cpu3=v,cpu4=v,cpu5=v f=1 201 -cpu,cpu4=v f=1 203 -cpu,cpu3=v f=1 205 -cpu,cpu3=v,cpu4=v f=1 207 -cpu,cpu4=v,cpu5=v f=1 209 -mem,mem1=v,mem2=v f=1 201`) - - type args struct { - org int - min, max int64 - expr string - } - - var tests = []struct { - name string - args args - exp []string - expStats cursors.CursorStats - }{ - // *********************** - // * queries for the first org, which has some deleted data - // *********************** - - { - name: "TSM and cache", - args: args{ - org: 0, - min: 0, - max: 300, - }, - exp: []string{models.MeasurementTagKey, "cpu0", "cpu2", "cpu3", "cpu4", "cpu5", "mem1", "mem2", models.FieldKeyTagKey}, - expStats: cursors.CursorStats{ScannedValues: 3, ScannedBytes: 24}, - }, - { - name: "only TSM", - args: args{ - org: 0, - min: 0, - max: 199, - }, - exp: []string{models.MeasurementTagKey, "cpu0", "cpu2", "cpu3", models.FieldKeyTagKey}, - expStats: cursors.CursorStats{ScannedValues: 5, ScannedBytes: 40}, - }, - { - name: "only cache", - args: args{ - org: 0, - min: 200, - max: 299, - }, - exp: []string{models.MeasurementTagKey, "cpu3", "cpu4", "cpu5", "mem1", "mem2", models.FieldKeyTagKey}, - expStats: cursors.CursorStats{ScannedValues: 4, ScannedBytes: 32}, - }, - { - name: "one timestamp TSM/data", - args: args{ - org: 0, - min: 107, - max: 107, - }, - exp: []string{models.MeasurementTagKey, "cpu0", "cpu2", models.FieldKeyTagKey}, - expStats: cursors.CursorStats{ScannedValues: 6, ScannedBytes: 48}, - }, - { - name: "one timestamp cache/data", - args: args{ - org: 0, - min: 207, - max: 207, - }, - exp: []string{models.MeasurementTagKey, "cpu3", "cpu4", models.FieldKeyTagKey}, - expStats: cursors.CursorStats{ScannedValues: 5, ScannedBytes: 40}, - }, - { - name: "one timestamp TSM/nodata", - args: args{ - org: 0, - min: 102, - max: 102, - }, - exp: nil, - expStats: cursors.CursorStats{ScannedValues: 6, ScannedBytes: 48}, - }, - { - name: "one timestamp cache/nodata", - args: args{ - org: 0, - min: 202, - max: 202, - }, - exp: nil, - expStats: cursors.CursorStats{ScannedValues: 6, ScannedBytes: 48}, - }, - - // queries with predicates - { - name: "predicate/all time/cpu", - args: args{ - org: 0, - min: 0, - max: 300, - expr: "_m = 'cpu'", - }, - exp: []string{models.MeasurementTagKey, "cpu0", "cpu2", "cpu3", "cpu4", "cpu5", models.FieldKeyTagKey}, - expStats: cursors.CursorStats{ScannedValues: 2, ScannedBytes: 16}, - }, - { - name: "predicate/all time/mem", - args: args{ - org: 0, - min: 0, - max: 300, - expr: "_m = 'mem'", - }, - exp: []string{models.MeasurementTagKey, "mem1", "mem2", models.FieldKeyTagKey}, - expStats: cursors.CursorStats{ScannedValues: 1, ScannedBytes: 8}, - }, - { - name: "predicate/all time/cpu0", - args: args{ - org: 0, - min: 0, - max: 300, - expr: "cpu0 = 'v'", - }, - exp: []string{models.MeasurementTagKey, "cpu0", "cpu2", models.FieldKeyTagKey}, - expStats: cursors.CursorStats{ScannedValues: 0, ScannedBytes: 0}, - }, - { - name: "predicate/all time/cpu3", - args: args{ - org: 0, - min: 0, - max: 300, - expr: "cpu3 = 'v'", - }, - exp: []string{models.MeasurementTagKey, "cpu2", "cpu3", "cpu4", "cpu5", models.FieldKeyTagKey}, - expStats: cursors.CursorStats{ScannedValues: 2, ScannedBytes: 16}, - }, - - // *********************** - // * queries for the second org, which has no deleted data - // *********************** - { - name: "TSM and cache", - args: args{ - org: 1, - min: 0, - max: 300, - }, - exp: []string{models.MeasurementTagKey, "cpu0", "cpu1", "cpu2", "cpu3", "cpu4", "cpu5", "mem0", "mem1", "mem2", models.FieldKeyTagKey}, - expStats: cursors.CursorStats{ScannedValues: 2, ScannedBytes: 16}, - }, - - // *********************** - // * other scenarios - // *********************** - { - // ensure StringIterator is never nil - name: "predicate/no candidate series", - args: args{ - org: 0, - min: 0, - max: 300, - expr: "foo = 'bar'", - }, - exp: nil, - expStats: cursors.CursorStats{ScannedValues: 0, ScannedBytes: 0}, - }, - } - for _, tc := range tests { - t.Run(fmt.Sprintf("org%d/%s", tc.args.org, tc.name), func(t *testing.T) { - a := tc.args - var expr influxql.Expr - if len(a.expr) > 0 { - expr = influxql.MustParseExpr(a.expr) - expr = influxql.RewriteExpr(expr, func(expr influxql.Expr) influxql.Expr { - switch n := expr.(type) { - case *influxql.BinaryExpr: - if r, ok := n.LHS.(*influxql.VarRef); ok { - if r.Val == "_m" { - r.Val = models.MeasurementTagKey - } - } - } - return expr - }) - } - - iter, err := e.TagKeys(context.Background(), orgs[a.org].org, orgs[a.org].bucket, a.min, a.max, expr) - if err != nil { - t.Fatalf("TagKeys: error %v", err) - } - - if got := cursors.StringIteratorToSlice(iter); !cmp.Equal(got, tc.exp) { - t.Errorf("unexpected TagKeys: -got/+exp\n%v", cmp.Diff(got, tc.exp)) - } - - if got := iter.Stats(); !cmp.Equal(got, tc.expStats) { - t.Errorf("unexpected Stats: -got/+exp\n%v", cmp.Diff(got, tc.expStats)) - } - }) - } -} - -func TestValidateTagPredicate(t *testing.T) { - tests := []struct { - name string - expr string - wantErr bool - }{ - { - expr: `"_m" = 'foo'`, - wantErr: false, - }, - { - expr: `_m = 'foo'`, - wantErr: false, - }, - { - expr: `_m = foo`, - wantErr: true, - }, - { - expr: `_m = 5`, - wantErr: true, - }, - { - expr: `_m =~ //`, - wantErr: false, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if err := tsm1.ValidateTagPredicate(influxql.MustParseExpr(tt.expr)); (err != nil) != tt.wantErr { - t.Errorf("ValidateTagPredicate() error = %v, wantErr %v", err, tt.wantErr) - } - }) - } -} diff --git a/tsdb/tsm1/engine_test.go b/tsdb/tsm1/engine_test.go deleted file mode 100644 index 2c147228dd..0000000000 --- a/tsdb/tsm1/engine_test.go +++ /dev/null @@ -1,614 +0,0 @@ -package tsm1_test - -import ( - "context" - "fmt" - "io/ioutil" - "math" - "os" - "path/filepath" - "runtime" - "strings" - "sync" - "testing" - "time" - - "github.com/influxdata/influxdb/v2" - "github.com/influxdata/influxdb/v2/logger" - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/toml" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/seriesfile" - "github.com/influxdata/influxdb/v2/tsdb/tsi1" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" - "github.com/influxdata/influxql" - "go.uber.org/zap/zaptest" -) - -// Test that series id set gets updated and returned appropriately. -func TestIndex_SeriesIDSet(t *testing.T) { - engine := MustOpenEngine(t) - defer engine.Close() - - // Add some series. - engine.MustAddSeries("cpu", map[string]string{"host": "a", "region": "west"}) - engine.MustAddSeries("cpu", map[string]string{"host": "b", "region": "west"}) - engine.MustAddSeries("cpu", map[string]string{"host": "b"}) - engine.MustAddSeries("gpu", nil) - engine.MustAddSeries("gpu", map[string]string{"host": "b"}) - engine.MustAddSeries("mem", map[string]string{"host": "z"}) - - // Collect series IDs. - seriesIDMap := map[string]tsdb.SeriesID{} - for _, seriesID := range engine.sfile.SeriesIDs() { - if seriesID.IsZero() { - break - } - - name, tags := seriesfile.ParseSeriesKey(engine.sfile.SeriesKey(seriesID)) - key := fmt.Sprintf("%s%s", name, tags.HashKey()) - seriesIDMap[key] = seriesID - } - - for _, id := range seriesIDMap { - if !engine.SeriesIDSet().Contains(id) { - t.Fatalf("bitmap does not contain ID: %d", id) - } - } - - // Drop all the series for the gpu measurement and they should no longer - // be in the series ID set. - if err := engine.DeletePrefixRange(context.Background(), []byte("gpu"), math.MinInt64, math.MaxInt64, nil); err != nil { - t.Fatal(err) - } - - if engine.SeriesIDSet().Contains(seriesIDMap["gpu"]) { - t.Fatalf("bitmap does not contain ID: %d for key %s, but should", seriesIDMap["gpu"], "gpu") - } else if engine.SeriesIDSet().Contains(seriesIDMap["gpu,host=b"]) { - t.Fatalf("bitmap does not contain ID: %d for key %s, but should", seriesIDMap["gpu,host=b"], "gpu,host=b") - } - delete(seriesIDMap, "gpu") - delete(seriesIDMap, "gpu,host=b") - - // The rest of the keys should still be in the set. - for key, id := range seriesIDMap { - if !engine.SeriesIDSet().Contains(id) { - t.Fatalf("bitmap does not contain ID: %d for key %s, but should", id, key) - } - } - - // Reopen the engine, and the series should be re-added to the bitmap. - if err := engine.Reopen(); err != nil { - t.Fatal(err) - } - - // Check bitset is expected. - expected := tsdb.NewSeriesIDSet() - for _, id := range seriesIDMap { - expected.Add(id) - } - - if !engine.SeriesIDSet().Equals(expected) { - t.Fatalf("got bitset %s, expected %s", engine.SeriesIDSet().String(), expected.String()) - } -} - -func TestEngine_SnapshotsDisabled(t *testing.T) { - sfile := MustOpenSeriesFile() - defer sfile.Close() - - // Generate temporary file. - dir, _ := ioutil.TempDir("", "tsm") - defer os.RemoveAll(dir) - - // Create a tsm1 engine. - idx := MustOpenIndex(filepath.Join(dir, "index"), tsdb.NewSeriesIDSet(), sfile.SeriesFile) - defer idx.Close() - - config := tsm1.NewConfig() - e := tsm1.NewEngine(filepath.Join(dir, "data"), idx, config, - tsm1.WithCompactionPlanner(newMockPlanner())) - - e.SetEnabled(false) - if err := e.Open(context.Background()); err != nil { - t.Fatalf("failed to open tsm1 engine: %s", err.Error()) - } - defer e.Close() - - // Make sure Snapshots are disabled. - e.SetCompactionsEnabled(false) - e.Compactor.DisableSnapshots() - - // Writing a snapshot should not fail when the snapshot is empty - // even if snapshots are disabled. - if err := e.WriteSnapshot(context.Background(), tsm1.CacheStatusColdNoWrites); err != nil { - t.Fatalf("failed to snapshot: %s", err.Error()) - } -} - -func TestEngine_ShouldCompactCache(t *testing.T) { - nowTime := time.Now() - - e, err := NewEngine(tsm1.NewConfig(), t) - if err != nil { - t.Fatal(err) - } - - // mock the planner so compactions don't run during the test - e.CompactionPlan = &mockPlanner{} - e.SetEnabled(false) - if err := e.Open(context.Background()); err != nil { - t.Fatalf("failed to open tsm1 engine: %s", err.Error()) - } - defer e.Close() - - if got, exp := e.ShouldCompactCache(nowTime), tsm1.CacheStatusOkay; got != exp { - t.Fatalf("got status %v, exp status %v - nothing written to cache, so should not compact", got, exp) - } - - if err := e.WritePointsString("mm", "m,k=v f=3i"); err != nil { - t.Fatal(err) - } - - if got, exp := e.ShouldCompactCache(nowTime), tsm1.CacheStatusOkay; got != exp { - t.Fatalf("got status %v, exp status %v - cache size < flush threshold and nothing written to FileStore, so should not compact", got, exp) - } - - if got, exp := e.ShouldCompactCache(nowTime.Add(time.Hour)), tsm1.CacheStatusColdNoWrites; got != exp { - t.Fatalf("got status %v, exp status %v - last compaction was longer than flush write cold threshold, so should compact", got, exp) - } - - e.CacheFlushMemorySizeThreshold = 1 - if got, exp := e.ShouldCompactCache(nowTime), tsm1.CacheStatusSizeExceeded; got != exp { - t.Fatalf("got status %v, exp status %v - cache size > flush threshold, so should compact", got, exp) - } - - e.CacheFlushMemorySizeThreshold = 1024 // Reset. - if got, exp := e.ShouldCompactCache(nowTime), tsm1.CacheStatusOkay; got != exp { - t.Fatalf("got status %v, exp status %v - nothing written to cache, so should not compact", got, exp) - } - - e.CacheFlushAgeDurationThreshold = 100 * time.Millisecond - time.Sleep(250 * time.Millisecond) - if got, exp := e.ShouldCompactCache(nowTime), tsm1.CacheStatusAgeExceeded; got != exp { - t.Fatalf("got status %v, exp status %v - cache age > max age threshold, so should compact", got, exp) - } -} - -func makeBlockTypeSlice(n int) []byte { - r := make([]byte, n) - b := tsm1.BlockFloat64 - m := tsm1.BlockUnsigned + 1 - for i := 0; i < len(r); i++ { - r[i] = b % m - } - return r -} - -var blockType = influxql.Unknown - -func BenchmarkBlockTypeToInfluxQLDataType(b *testing.B) { - t := makeBlockTypeSlice(1000) - for i := 0; i < b.N; i++ { - for j := 0; j < len(t); j++ { - blockType = tsm1.BlockTypeToInfluxQLDataType(t[j]) - } - } -} - -// This test ensures that "sync: WaitGroup is reused before previous Wait has returned" is -// is not raised. -func TestEngine_DisableEnableCompactions_Concurrent(t *testing.T) { - e := MustOpenEngine(t) - defer e.Close() - - var wg sync.WaitGroup - wg.Add(2) - - go func() { - defer wg.Done() - for i := 0; i < 1000; i++ { - e.SetCompactionsEnabled(true) - e.SetCompactionsEnabled(false) - } - }() - - go func() { - defer wg.Done() - for i := 0; i < 1000; i++ { - e.SetCompactionsEnabled(false) - e.SetCompactionsEnabled(true) - } - }() - - done := make(chan struct{}) - go func() { - wg.Wait() - close(done) - }() - - // Wait for waitgroup or fail if it takes too long. - select { - case <-time.NewTimer(30 * time.Second).C: - t.Fatalf("timed out after 30 seconds waiting for waitgroup") - case <-done: - } -} - -func BenchmarkEngine_WritePoints(b *testing.B) { - batchSizes := []int{10, 100, 1000, 5000, 10000} - for _, sz := range batchSizes { - e := MustOpenEngine(b) - pp := make([]models.Point, 0, sz) - for i := 0; i < sz; i++ { - p := MustParsePointString(fmt.Sprintf("cpu,host=%d value=1.2", i), "mm") - pp = append(pp, p) - } - - b.Run(fmt.Sprintf("%d", sz), func(b *testing.B) { - b.ReportAllocs() - for i := 0; i < b.N; i++ { - err := e.WritePoints(pp) - if err != nil { - b.Fatal(err) - } - } - }) - e.Close() - } -} - -func BenchmarkEngine_WritePoints_Parallel(b *testing.B) { - batchSizes := []int{1000, 5000, 10000, 25000, 50000, 75000, 100000, 200000} - for _, sz := range batchSizes { - e := MustOpenEngine(b) - - cpus := runtime.GOMAXPROCS(0) - pp := make([]models.Point, 0, sz*cpus) - for i := 0; i < sz*cpus; i++ { - p := MustParsePointString(fmt.Sprintf("cpu,host=%d value=1.2,other=%di", i, i), "mm") - pp = append(pp, p) - } - - b.Run(fmt.Sprintf("%d", sz), func(b *testing.B) { - b.ReportAllocs() - for i := 0; i < b.N; i++ { - var wg sync.WaitGroup - errC := make(chan error) - for i := 0; i < cpus; i++ { - wg.Add(1) - go func(i int) { - defer wg.Done() - from, to := i*sz, (i+1)*sz - err := e.WritePoints(pp[from:to]) - if err != nil { - errC <- err - return - } - }(i) - } - - go func() { - wg.Wait() - close(errC) - }() - - for err := range errC { - if err != nil { - b.Error(err) - } - } - } - }) - e.Close() - } -} - -func BenchmarkEngine_DeletePrefixRange_Cache(b *testing.B) { - config := tsm1.NewConfig() - config.Cache.SnapshotMemorySize = toml.Size(256 * 1024 * 1024) - e, err := NewEngine(config, b) - if err != nil { - b.Fatal(err) - } - - if err := e.Open(context.Background()); err != nil { - b.Fatal(err) - } - - pp := make([]models.Point, 0, 100000) - for i := 0; i < 100000; i++ { - p := MustParsePointString(fmt.Sprintf("cpu-%d,host=%d value=1.2", i%1000, i), fmt.Sprintf("000000001122111100000000112211%d", i%1000)) - pp = append(pp, p) - } - - b.Run("exists", func(b *testing.B) { - b.ReportAllocs() - for i := 0; i < b.N; i++ { - b.StopTimer() - if err = e.WritePoints(pp); err != nil { - b.Fatal(err) - } - b.StartTimer() - - if err := e.DeletePrefixRange(context.Background(), []byte("0000000011221111000000001122112"), 0, math.MaxInt64, nil); err != nil { - b.Fatal(err) - } - } - }) - - b.Run("not_exists", func(b *testing.B) { - b.ReportAllocs() - for i := 0; i < b.N; i++ { - b.StopTimer() - if err = e.WritePoints(pp); err != nil { - b.Fatal(err) - } - b.StartTimer() - - if err := e.DeletePrefixRange(context.Background(), []byte("fooasdasdasdasdasd"), 0, math.MaxInt64, nil); err != nil { - b.Fatal(err) - } - } - }) - e.Close() -} - -// Engine is a test wrapper for tsm1.Engine. -type Engine struct { - *tsm1.Engine - root string - indexPath string - index *tsi1.Index - sfile *seriesfile.SeriesFile -} - -// NewEngine returns a new instance of Engine at a temporary location. -func NewEngine(config tsm1.Config, tb testing.TB) (*Engine, error) { - root, err := ioutil.TempDir("", "tsm1-") - if err != nil { - panic(err) - } - - // Setup series file. - sfile := seriesfile.NewSeriesFile(filepath.Join(root, "_series")) - sfile.Logger = zaptest.NewLogger(tb) - if testing.Verbose() { - sfile.Logger = logger.New(os.Stdout) - } - if err = sfile.Open(context.Background()); err != nil { - return nil, err - } - - idxPath := filepath.Join(root, "index") - idx := MustOpenIndex(idxPath, tsdb.NewSeriesIDSet(), sfile) - - tsm1Engine := tsm1.NewEngine(filepath.Join(root, "data"), idx, config, - tsm1.WithCompactionPlanner(newMockPlanner())) - - return &Engine{ - Engine: tsm1Engine, - root: root, - indexPath: idxPath, - index: idx, - sfile: sfile, - }, nil -} - -// MustOpenEngine returns a new, open instance of Engine. -func MustOpenEngine(tb testing.TB) *Engine { - e, err := NewEngine(tsm1.NewConfig(), tb) - if err != nil { - panic(err) - } - - if err := e.Open(context.Background()); err != nil { - panic(err) - } - return e -} - -// Close closes the engine and removes all underlying data. -func (e *Engine) Close() error { - return e.close(true) -} - -func (e *Engine) close(cleanup bool) error { - err := e.Engine.Close() - if err != nil { - return err - } - - if e.index != nil { - e.index.Close() - } - - if e.sfile != nil { - e.sfile.Close() - } - - if cleanup { - os.RemoveAll(e.root) - } - - return nil -} - -// Reopen closes and reopens the engine. -func (e *Engine) Reopen() error { - // Close engine without removing underlying engine data. - if err := e.close(false); err != nil { - return err - } - - // Re-open series file. Must create a new series file using the same data. - e.sfile = seriesfile.NewSeriesFile(e.sfile.Path()) - if err := e.sfile.Open(context.Background()); err != nil { - return err - } - - // Re-open index. - e.index = MustOpenIndex(e.indexPath, tsdb.NewSeriesIDSet(), e.sfile) - - // Re-initialize engine. - config := tsm1.NewConfig() - e.Engine = tsm1.NewEngine(filepath.Join(e.root, "data"), e.index, config, - tsm1.WithCompactionPlanner(newMockPlanner())) - - // Reopen engine - if err := e.Engine.Open(context.Background()); err != nil { - return err - } - - // Reload series data into index (no-op on TSI). - return nil -} - -// SeriesIDSet provides access to the underlying series id bitset in the engine's -// index. It will panic if the underlying index does not have a SeriesIDSet -// method. -func (e *Engine) SeriesIDSet() *tsdb.SeriesIDSet { - return e.index.SeriesIDSet() -} - -// AddSeries adds the provided series data to the index and writes a point to -// the engine with default values for a field and a time of now. -func (e *Engine) AddSeries(name string, tags map[string]string) error { - point, err := models.NewPoint(name, models.NewTags(tags), models.Fields{"v": 1.0}, time.Now()) - if err != nil { - return err - } - return e.writePoints(point) -} - -// WritePointsString calls WritePointsString on the underlying engine, but also -// adds the associated series to the index. -func (e *Engine) WritePointsString(mm string, ptstr ...string) error { - points, err := models.ParsePointsString(strings.Join(ptstr, "\n"), mm) - if err != nil { - return err - } - return e.writePoints(points...) -} - -// writePoints adds the series for the provided points to the index, and writes -// the point data to the engine. -func (e *Engine) writePoints(points ...models.Point) error { - // Write into the index. - collection := tsdb.NewSeriesCollection(points) - if err := e.index.CreateSeriesListIfNotExists(collection); err != nil { - return err - } - // Write the points into the cache/wal. - return e.WritePoints(points) -} - -// MustAddSeries calls AddSeries, panicking if there is an error. -func (e *Engine) MustAddSeries(name string, tags map[string]string) { - if err := e.AddSeries(name, tags); err != nil { - panic(err) - } -} - -// MustWriteSnapshot forces a snapshot of the engine. Panic on error. -func (e *Engine) MustWriteSnapshot() { - if err := e.WriteSnapshot(context.Background(), tsm1.CacheStatusColdNoWrites); err != nil { - panic(err) - } -} - -// MustWritePointsString parses and writes the specified points to the -// provided org and bucket. Panic on error. -func (e *Engine) MustWritePointsString(org, bucket influxdb.ID, buf string) { - err := e.writePoints(MustParseExplodePoints(org, bucket, buf)...) - if err != nil { - panic(err) - } -} - -// MustDeleteBucketRange calls DeletePrefixRange using the org and bucket for -// the prefix. Panic on error. -func (e *Engine) MustDeleteBucketRange(orgID, bucketID influxdb.ID, min, max int64) { - // TODO(edd): we need to clean up how we're encoding the prefix so that we - // don't have to remember to get it right everywhere we need to touch TSM data. - encoded := tsdb.EncodeName(orgID, bucketID) - name := models.EscapeMeasurement(encoded[:]) - - err := e.DeletePrefixRange(context.Background(), name, min, max, nil) - if err != nil { - panic(err) - } -} - -func MustOpenIndex(path string, seriesIDSet *tsdb.SeriesIDSet, sfile *seriesfile.SeriesFile) *tsi1.Index { - idx := tsi1.NewIndex(sfile, tsi1.NewConfig(), tsi1.WithPath(path)) - if err := idx.Open(context.Background()); err != nil { - panic(err) - } - return idx -} - -// SeriesFile is a test wrapper for tsdb.SeriesFile. -type SeriesFile struct { - *seriesfile.SeriesFile -} - -// NewSeriesFile returns a new instance of SeriesFile with a temporary file path. -func NewSeriesFile() *SeriesFile { - dir, err := ioutil.TempDir("", "tsdb-series-file-") - if err != nil { - panic(err) - } - return &SeriesFile{SeriesFile: seriesfile.NewSeriesFile(dir)} -} - -// MustOpenSeriesFile returns a new, open instance of SeriesFile. Panic on error. -func MustOpenSeriesFile() *SeriesFile { - f := NewSeriesFile() - if err := f.Open(context.Background()); err != nil { - panic(err) - } - return f -} - -// Close closes the log file and removes it from disk. -func (f *SeriesFile) Close() { - defer os.RemoveAll(f.Path()) - if err := f.SeriesFile.Close(); err != nil { - panic(err) - } -} - -// MustParsePointsString parses points from a string. Panic on error. -func MustParsePointsString(buf, mm string) []models.Point { - a, err := models.ParsePointsString(buf, mm) - if err != nil { - panic(err) - } - return a -} - -// MustParseExplodePoints parses points from a string and transforms using -// ExplodePoints using the provided org and bucket. Panic on error. -func MustParseExplodePoints(org, bucket influxdb.ID, buf string) []models.Point { - encoded := tsdb.EncodeName(org, bucket) - name := models.EscapeMeasurement(encoded[:]) - return MustParsePointsString(buf, string(name)) -} - -// MustParsePointString parses the first point from a string. Panic on error. -func MustParsePointString(buf, mm string) models.Point { return MustParsePointsString(buf, mm)[0] } - -type mockPlanner struct{} - -func newMockPlanner() tsm1.CompactionPlanner { - return &mockPlanner{} -} - -func (m *mockPlanner) Plan(lastWrite time.Time) []tsm1.CompactionGroup { return nil } -func (m *mockPlanner) PlanLevel(level int) []tsm1.CompactionGroup { return nil } -func (m *mockPlanner) PlanOptimize() []tsm1.CompactionGroup { return nil } -func (m *mockPlanner) Release(groups []tsm1.CompactionGroup) {} -func (m *mockPlanner) FullyCompacted() bool { return false } -func (m *mockPlanner) ForceFull() {} -func (m *mockPlanner) SetFileStore(fs *tsm1.FileStore) {} diff --git a/tsdb/tsm1/errors.go b/tsdb/tsm1/errors.go deleted file mode 100644 index 27f09b1d89..0000000000 --- a/tsdb/tsm1/errors.go +++ /dev/null @@ -1,11 +0,0 @@ -package tsm1 - -import "errors" - -var ( - // errFieldTypeConflict is returned when a new field already exists with a different type. - errFieldTypeConflict = errors.New("field type conflict") - - // errUnknownFieldType is returned when the type of a field cannot be determined. - errUnknownFieldType = errors.New("unknown field type") -) diff --git a/tsdb/tsm1/file_store.gen.go b/tsdb/tsm1/file_store.gen.go deleted file mode 100644 index 1db9b6731c..0000000000 --- a/tsdb/tsm1/file_store.gen.go +++ /dev/null @@ -1,933 +0,0 @@ -// Code generated by file_store.gen.go.tmpl. DO NOT EDIT. - -package tsm1 - -// ReadFloatBlock reads the next block as a set of float values. -func (c *KeyCursor) ReadFloatBlock(buf *[]FloatValue) ([]FloatValue, error) { -LOOP: - // No matching blocks to decode - if len(c.current) == 0 { - return nil, nil - } - - // First block is the oldest block containing the points we're searching for. - first := c.current[0] - *buf = (*buf)[:0] - var values FloatValues - values, err := first.r.ReadFloatBlockAt(&first.entry, buf) - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter(floatBlocksDecodedCounter).Add(1) - c.col.GetCounter(floatBlocksSizeCounter).Add(int64(first.entry.Size)) - } - - // Remove values we already read - values = values.Exclude(first.readMin, first.readMax) - - // Remove any tombstones - c.trbuf = first.r.TombstoneRange(c.key, c.trbuf[:0]) - values = excludeTombstonesFloatValues(c.trbuf, values) - // If there are no values in this first block (all tombstoned or previously read) and - // we have more potential blocks too search. Try again. - if values.Len() == 0 && len(c.current) > 0 { - c.current = c.current[1:] - goto LOOP - } - - // Only one block with this key and time range so return it - if len(c.current) == 1 { - if values.Len() > 0 { - first.markRead(values.MinTime(), values.MaxTime()) - } - return values, nil - } - - // Use the current block time range as our overlapping window - minT, maxT := first.readMin, first.readMax - if values.Len() > 0 { - minT, maxT = values.MinTime(), values.MaxTime() - } - if c.ascending { - // Blocks are ordered by generation, we may have values in the past in later blocks, if so, - // expand the window to include the min time range to ensure values are returned in ascending - // order - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.MinTime < minT && !cur.read() { - minT = cur.entry.MinTime - } - } - - // Find first block that overlaps our window - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.OverlapsTimeRange(minT, maxT) && !cur.read() { - // Shrink our window so it's the intersection of the first overlapping block and the - // first block. We do this to minimize the region that overlaps and needs to - // be merged. - if cur.entry.MaxTime > maxT { - maxT = cur.entry.MaxTime - } - values = values.Include(minT, maxT) - break - } - } - - // Search the remaining blocks that overlap our window and append their values so we can - // merge them. - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - // Skip this block if it doesn't contain points we looking for or they have already been read - if !cur.entry.OverlapsTimeRange(minT, maxT) || cur.read() { - cur.markRead(minT, maxT) - continue - } - - var a []FloatValue - var v FloatValues - v, err := cur.r.ReadFloatBlockAt(&cur.entry, &a) - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter(floatBlocksDecodedCounter).Add(1) - c.col.GetCounter(floatBlocksSizeCounter).Add(int64(cur.entry.Size)) - } - - c.trbuf = cur.r.TombstoneRange(c.key, c.trbuf[:0]) - // Remove any tombstoned values - v = excludeTombstonesFloatValues(c.trbuf, v) - - // Remove values we already read - v = v.Exclude(cur.readMin, cur.readMax) - - if v.Len() > 0 { - // Only use values in the overlapping window - v = v.Include(minT, maxT) - // Merge the remaining values with the existing - values = values.Merge(v) - } - cur.markRead(minT, maxT) - } - - } else { - // Blocks are ordered by generation, we may have values in the past in later blocks, if so, - // expand the window to include the max time range to ensure values are returned in descending - // order - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.MaxTime > maxT && !cur.read() { - maxT = cur.entry.MaxTime - } - } - - // Find first block that overlaps our window - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.OverlapsTimeRange(minT, maxT) && !cur.read() { - // Shrink our window so it's the intersection of the first overlapping block and the - // first block. We do this to minimize the region that overlaps and needs to - // be merged. - if cur.entry.MinTime < minT { - minT = cur.entry.MinTime - } - values = values.Include(minT, maxT) - break - } - } - - // Search the remaining blocks that overlap our window and append their values so we can - // merge them. - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - // Skip this block if it doesn't contain points we looking for or they have already been read - if !cur.entry.OverlapsTimeRange(minT, maxT) || cur.read() { - cur.markRead(minT, maxT) - continue - } - - var a []FloatValue - var v FloatValues - v, err := cur.r.ReadFloatBlockAt(&cur.entry, &a) - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter(floatBlocksDecodedCounter).Add(1) - c.col.GetCounter(floatBlocksSizeCounter).Add(int64(cur.entry.Size)) - } - c.trbuf = cur.r.TombstoneRange(c.key, c.trbuf[:0]) - // Remove any tombstoned values - v = excludeTombstonesFloatValues(c.trbuf, v) - - // Remove values we already read - v = v.Exclude(cur.readMin, cur.readMax) - - // If the block we decoded should have all of it's values included, mark it as read so we - // don't use it again. - if v.Len() > 0 { - v = v.Include(minT, maxT) - // Merge the remaining values with the existing - values = v.Merge(values) - } - cur.markRead(minT, maxT) - } - } - - first.markRead(minT, maxT) - - return values, err -} - -func excludeTombstonesFloatValues(t []TimeRange, values FloatValues) FloatValues { - for i := range t { - values = values.Exclude(t[i].Min, t[i].Max) - } - return values -} - -// ReadIntegerBlock reads the next block as a set of integer values. -func (c *KeyCursor) ReadIntegerBlock(buf *[]IntegerValue) ([]IntegerValue, error) { -LOOP: - // No matching blocks to decode - if len(c.current) == 0 { - return nil, nil - } - - // First block is the oldest block containing the points we're searching for. - first := c.current[0] - *buf = (*buf)[:0] - var values IntegerValues - values, err := first.r.ReadIntegerBlockAt(&first.entry, buf) - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter(integerBlocksDecodedCounter).Add(1) - c.col.GetCounter(integerBlocksSizeCounter).Add(int64(first.entry.Size)) - } - - // Remove values we already read - values = values.Exclude(first.readMin, first.readMax) - - // Remove any tombstones - c.trbuf = first.r.TombstoneRange(c.key, c.trbuf[:0]) - values = excludeTombstonesIntegerValues(c.trbuf, values) - // If there are no values in this first block (all tombstoned or previously read) and - // we have more potential blocks too search. Try again. - if values.Len() == 0 && len(c.current) > 0 { - c.current = c.current[1:] - goto LOOP - } - - // Only one block with this key and time range so return it - if len(c.current) == 1 { - if values.Len() > 0 { - first.markRead(values.MinTime(), values.MaxTime()) - } - return values, nil - } - - // Use the current block time range as our overlapping window - minT, maxT := first.readMin, first.readMax - if values.Len() > 0 { - minT, maxT = values.MinTime(), values.MaxTime() - } - if c.ascending { - // Blocks are ordered by generation, we may have values in the past in later blocks, if so, - // expand the window to include the min time range to ensure values are returned in ascending - // order - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.MinTime < minT && !cur.read() { - minT = cur.entry.MinTime - } - } - - // Find first block that overlaps our window - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.OverlapsTimeRange(minT, maxT) && !cur.read() { - // Shrink our window so it's the intersection of the first overlapping block and the - // first block. We do this to minimize the region that overlaps and needs to - // be merged. - if cur.entry.MaxTime > maxT { - maxT = cur.entry.MaxTime - } - values = values.Include(minT, maxT) - break - } - } - - // Search the remaining blocks that overlap our window and append their values so we can - // merge them. - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - // Skip this block if it doesn't contain points we looking for or they have already been read - if !cur.entry.OverlapsTimeRange(minT, maxT) || cur.read() { - cur.markRead(minT, maxT) - continue - } - - var a []IntegerValue - var v IntegerValues - v, err := cur.r.ReadIntegerBlockAt(&cur.entry, &a) - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter(integerBlocksDecodedCounter).Add(1) - c.col.GetCounter(integerBlocksSizeCounter).Add(int64(cur.entry.Size)) - } - - c.trbuf = cur.r.TombstoneRange(c.key, c.trbuf[:0]) - // Remove any tombstoned values - v = excludeTombstonesIntegerValues(c.trbuf, v) - - // Remove values we already read - v = v.Exclude(cur.readMin, cur.readMax) - - if v.Len() > 0 { - // Only use values in the overlapping window - v = v.Include(minT, maxT) - // Merge the remaining values with the existing - values = values.Merge(v) - } - cur.markRead(minT, maxT) - } - - } else { - // Blocks are ordered by generation, we may have values in the past in later blocks, if so, - // expand the window to include the max time range to ensure values are returned in descending - // order - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.MaxTime > maxT && !cur.read() { - maxT = cur.entry.MaxTime - } - } - - // Find first block that overlaps our window - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.OverlapsTimeRange(minT, maxT) && !cur.read() { - // Shrink our window so it's the intersection of the first overlapping block and the - // first block. We do this to minimize the region that overlaps and needs to - // be merged. - if cur.entry.MinTime < minT { - minT = cur.entry.MinTime - } - values = values.Include(minT, maxT) - break - } - } - - // Search the remaining blocks that overlap our window and append their values so we can - // merge them. - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - // Skip this block if it doesn't contain points we looking for or they have already been read - if !cur.entry.OverlapsTimeRange(minT, maxT) || cur.read() { - cur.markRead(minT, maxT) - continue - } - - var a []IntegerValue - var v IntegerValues - v, err := cur.r.ReadIntegerBlockAt(&cur.entry, &a) - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter(integerBlocksDecodedCounter).Add(1) - c.col.GetCounter(integerBlocksSizeCounter).Add(int64(cur.entry.Size)) - } - c.trbuf = cur.r.TombstoneRange(c.key, c.trbuf[:0]) - // Remove any tombstoned values - v = excludeTombstonesIntegerValues(c.trbuf, v) - - // Remove values we already read - v = v.Exclude(cur.readMin, cur.readMax) - - // If the block we decoded should have all of it's values included, mark it as read so we - // don't use it again. - if v.Len() > 0 { - v = v.Include(minT, maxT) - // Merge the remaining values with the existing - values = v.Merge(values) - } - cur.markRead(minT, maxT) - } - } - - first.markRead(minT, maxT) - - return values, err -} - -func excludeTombstonesIntegerValues(t []TimeRange, values IntegerValues) IntegerValues { - for i := range t { - values = values.Exclude(t[i].Min, t[i].Max) - } - return values -} - -// ReadUnsignedBlock reads the next block as a set of unsigned values. -func (c *KeyCursor) ReadUnsignedBlock(buf *[]UnsignedValue) ([]UnsignedValue, error) { -LOOP: - // No matching blocks to decode - if len(c.current) == 0 { - return nil, nil - } - - // First block is the oldest block containing the points we're searching for. - first := c.current[0] - *buf = (*buf)[:0] - var values UnsignedValues - values, err := first.r.ReadUnsignedBlockAt(&first.entry, buf) - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter(unsignedBlocksDecodedCounter).Add(1) - c.col.GetCounter(unsignedBlocksSizeCounter).Add(int64(first.entry.Size)) - } - - // Remove values we already read - values = values.Exclude(first.readMin, first.readMax) - - // Remove any tombstones - c.trbuf = first.r.TombstoneRange(c.key, c.trbuf[:0]) - values = excludeTombstonesUnsignedValues(c.trbuf, values) - // If there are no values in this first block (all tombstoned or previously read) and - // we have more potential blocks too search. Try again. - if values.Len() == 0 && len(c.current) > 0 { - c.current = c.current[1:] - goto LOOP - } - - // Only one block with this key and time range so return it - if len(c.current) == 1 { - if values.Len() > 0 { - first.markRead(values.MinTime(), values.MaxTime()) - } - return values, nil - } - - // Use the current block time range as our overlapping window - minT, maxT := first.readMin, first.readMax - if values.Len() > 0 { - minT, maxT = values.MinTime(), values.MaxTime() - } - if c.ascending { - // Blocks are ordered by generation, we may have values in the past in later blocks, if so, - // expand the window to include the min time range to ensure values are returned in ascending - // order - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.MinTime < minT && !cur.read() { - minT = cur.entry.MinTime - } - } - - // Find first block that overlaps our window - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.OverlapsTimeRange(minT, maxT) && !cur.read() { - // Shrink our window so it's the intersection of the first overlapping block and the - // first block. We do this to minimize the region that overlaps and needs to - // be merged. - if cur.entry.MaxTime > maxT { - maxT = cur.entry.MaxTime - } - values = values.Include(minT, maxT) - break - } - } - - // Search the remaining blocks that overlap our window and append their values so we can - // merge them. - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - // Skip this block if it doesn't contain points we looking for or they have already been read - if !cur.entry.OverlapsTimeRange(minT, maxT) || cur.read() { - cur.markRead(minT, maxT) - continue - } - - var a []UnsignedValue - var v UnsignedValues - v, err := cur.r.ReadUnsignedBlockAt(&cur.entry, &a) - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter(unsignedBlocksDecodedCounter).Add(1) - c.col.GetCounter(unsignedBlocksSizeCounter).Add(int64(cur.entry.Size)) - } - - c.trbuf = cur.r.TombstoneRange(c.key, c.trbuf[:0]) - // Remove any tombstoned values - v = excludeTombstonesUnsignedValues(c.trbuf, v) - - // Remove values we already read - v = v.Exclude(cur.readMin, cur.readMax) - - if v.Len() > 0 { - // Only use values in the overlapping window - v = v.Include(minT, maxT) - // Merge the remaining values with the existing - values = values.Merge(v) - } - cur.markRead(minT, maxT) - } - - } else { - // Blocks are ordered by generation, we may have values in the past in later blocks, if so, - // expand the window to include the max time range to ensure values are returned in descending - // order - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.MaxTime > maxT && !cur.read() { - maxT = cur.entry.MaxTime - } - } - - // Find first block that overlaps our window - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.OverlapsTimeRange(minT, maxT) && !cur.read() { - // Shrink our window so it's the intersection of the first overlapping block and the - // first block. We do this to minimize the region that overlaps and needs to - // be merged. - if cur.entry.MinTime < minT { - minT = cur.entry.MinTime - } - values = values.Include(minT, maxT) - break - } - } - - // Search the remaining blocks that overlap our window and append their values so we can - // merge them. - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - // Skip this block if it doesn't contain points we looking for or they have already been read - if !cur.entry.OverlapsTimeRange(minT, maxT) || cur.read() { - cur.markRead(minT, maxT) - continue - } - - var a []UnsignedValue - var v UnsignedValues - v, err := cur.r.ReadUnsignedBlockAt(&cur.entry, &a) - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter(unsignedBlocksDecodedCounter).Add(1) - c.col.GetCounter(unsignedBlocksSizeCounter).Add(int64(cur.entry.Size)) - } - c.trbuf = cur.r.TombstoneRange(c.key, c.trbuf[:0]) - // Remove any tombstoned values - v = excludeTombstonesUnsignedValues(c.trbuf, v) - - // Remove values we already read - v = v.Exclude(cur.readMin, cur.readMax) - - // If the block we decoded should have all of it's values included, mark it as read so we - // don't use it again. - if v.Len() > 0 { - v = v.Include(minT, maxT) - // Merge the remaining values with the existing - values = v.Merge(values) - } - cur.markRead(minT, maxT) - } - } - - first.markRead(minT, maxT) - - return values, err -} - -func excludeTombstonesUnsignedValues(t []TimeRange, values UnsignedValues) UnsignedValues { - for i := range t { - values = values.Exclude(t[i].Min, t[i].Max) - } - return values -} - -// ReadStringBlock reads the next block as a set of string values. -func (c *KeyCursor) ReadStringBlock(buf *[]StringValue) ([]StringValue, error) { -LOOP: - // No matching blocks to decode - if len(c.current) == 0 { - return nil, nil - } - - // First block is the oldest block containing the points we're searching for. - first := c.current[0] - *buf = (*buf)[:0] - var values StringValues - values, err := first.r.ReadStringBlockAt(&first.entry, buf) - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter(stringBlocksDecodedCounter).Add(1) - c.col.GetCounter(stringBlocksSizeCounter).Add(int64(first.entry.Size)) - } - - // Remove values we already read - values = values.Exclude(first.readMin, first.readMax) - - // Remove any tombstones - c.trbuf = first.r.TombstoneRange(c.key, c.trbuf[:0]) - values = excludeTombstonesStringValues(c.trbuf, values) - // If there are no values in this first block (all tombstoned or previously read) and - // we have more potential blocks too search. Try again. - if values.Len() == 0 && len(c.current) > 0 { - c.current = c.current[1:] - goto LOOP - } - - // Only one block with this key and time range so return it - if len(c.current) == 1 { - if values.Len() > 0 { - first.markRead(values.MinTime(), values.MaxTime()) - } - return values, nil - } - - // Use the current block time range as our overlapping window - minT, maxT := first.readMin, first.readMax - if values.Len() > 0 { - minT, maxT = values.MinTime(), values.MaxTime() - } - if c.ascending { - // Blocks are ordered by generation, we may have values in the past in later blocks, if so, - // expand the window to include the min time range to ensure values are returned in ascending - // order - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.MinTime < minT && !cur.read() { - minT = cur.entry.MinTime - } - } - - // Find first block that overlaps our window - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.OverlapsTimeRange(minT, maxT) && !cur.read() { - // Shrink our window so it's the intersection of the first overlapping block and the - // first block. We do this to minimize the region that overlaps and needs to - // be merged. - if cur.entry.MaxTime > maxT { - maxT = cur.entry.MaxTime - } - values = values.Include(minT, maxT) - break - } - } - - // Search the remaining blocks that overlap our window and append their values so we can - // merge them. - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - // Skip this block if it doesn't contain points we looking for or they have already been read - if !cur.entry.OverlapsTimeRange(minT, maxT) || cur.read() { - cur.markRead(minT, maxT) - continue - } - - var a []StringValue - var v StringValues - v, err := cur.r.ReadStringBlockAt(&cur.entry, &a) - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter(stringBlocksDecodedCounter).Add(1) - c.col.GetCounter(stringBlocksSizeCounter).Add(int64(cur.entry.Size)) - } - - c.trbuf = cur.r.TombstoneRange(c.key, c.trbuf[:0]) - // Remove any tombstoned values - v = excludeTombstonesStringValues(c.trbuf, v) - - // Remove values we already read - v = v.Exclude(cur.readMin, cur.readMax) - - if v.Len() > 0 { - // Only use values in the overlapping window - v = v.Include(minT, maxT) - // Merge the remaining values with the existing - values = values.Merge(v) - } - cur.markRead(minT, maxT) - } - - } else { - // Blocks are ordered by generation, we may have values in the past in later blocks, if so, - // expand the window to include the max time range to ensure values are returned in descending - // order - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.MaxTime > maxT && !cur.read() { - maxT = cur.entry.MaxTime - } - } - - // Find first block that overlaps our window - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.OverlapsTimeRange(minT, maxT) && !cur.read() { - // Shrink our window so it's the intersection of the first overlapping block and the - // first block. We do this to minimize the region that overlaps and needs to - // be merged. - if cur.entry.MinTime < minT { - minT = cur.entry.MinTime - } - values = values.Include(minT, maxT) - break - } - } - - // Search the remaining blocks that overlap our window and append their values so we can - // merge them. - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - // Skip this block if it doesn't contain points we looking for or they have already been read - if !cur.entry.OverlapsTimeRange(minT, maxT) || cur.read() { - cur.markRead(minT, maxT) - continue - } - - var a []StringValue - var v StringValues - v, err := cur.r.ReadStringBlockAt(&cur.entry, &a) - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter(stringBlocksDecodedCounter).Add(1) - c.col.GetCounter(stringBlocksSizeCounter).Add(int64(cur.entry.Size)) - } - c.trbuf = cur.r.TombstoneRange(c.key, c.trbuf[:0]) - // Remove any tombstoned values - v = excludeTombstonesStringValues(c.trbuf, v) - - // Remove values we already read - v = v.Exclude(cur.readMin, cur.readMax) - - // If the block we decoded should have all of it's values included, mark it as read so we - // don't use it again. - if v.Len() > 0 { - v = v.Include(minT, maxT) - // Merge the remaining values with the existing - values = v.Merge(values) - } - cur.markRead(minT, maxT) - } - } - - first.markRead(minT, maxT) - - return values, err -} - -func excludeTombstonesStringValues(t []TimeRange, values StringValues) StringValues { - for i := range t { - values = values.Exclude(t[i].Min, t[i].Max) - } - return values -} - -// ReadBooleanBlock reads the next block as a set of boolean values. -func (c *KeyCursor) ReadBooleanBlock(buf *[]BooleanValue) ([]BooleanValue, error) { -LOOP: - // No matching blocks to decode - if len(c.current) == 0 { - return nil, nil - } - - // First block is the oldest block containing the points we're searching for. - first := c.current[0] - *buf = (*buf)[:0] - var values BooleanValues - values, err := first.r.ReadBooleanBlockAt(&first.entry, buf) - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter(booleanBlocksDecodedCounter).Add(1) - c.col.GetCounter(booleanBlocksSizeCounter).Add(int64(first.entry.Size)) - } - - // Remove values we already read - values = values.Exclude(first.readMin, first.readMax) - - // Remove any tombstones - c.trbuf = first.r.TombstoneRange(c.key, c.trbuf[:0]) - values = excludeTombstonesBooleanValues(c.trbuf, values) - // If there are no values in this first block (all tombstoned or previously read) and - // we have more potential blocks too search. Try again. - if values.Len() == 0 && len(c.current) > 0 { - c.current = c.current[1:] - goto LOOP - } - - // Only one block with this key and time range so return it - if len(c.current) == 1 { - if values.Len() > 0 { - first.markRead(values.MinTime(), values.MaxTime()) - } - return values, nil - } - - // Use the current block time range as our overlapping window - minT, maxT := first.readMin, first.readMax - if values.Len() > 0 { - minT, maxT = values.MinTime(), values.MaxTime() - } - if c.ascending { - // Blocks are ordered by generation, we may have values in the past in later blocks, if so, - // expand the window to include the min time range to ensure values are returned in ascending - // order - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.MinTime < minT && !cur.read() { - minT = cur.entry.MinTime - } - } - - // Find first block that overlaps our window - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.OverlapsTimeRange(minT, maxT) && !cur.read() { - // Shrink our window so it's the intersection of the first overlapping block and the - // first block. We do this to minimize the region that overlaps and needs to - // be merged. - if cur.entry.MaxTime > maxT { - maxT = cur.entry.MaxTime - } - values = values.Include(minT, maxT) - break - } - } - - // Search the remaining blocks that overlap our window and append their values so we can - // merge them. - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - // Skip this block if it doesn't contain points we looking for or they have already been read - if !cur.entry.OverlapsTimeRange(minT, maxT) || cur.read() { - cur.markRead(minT, maxT) - continue - } - - var a []BooleanValue - var v BooleanValues - v, err := cur.r.ReadBooleanBlockAt(&cur.entry, &a) - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter(booleanBlocksDecodedCounter).Add(1) - c.col.GetCounter(booleanBlocksSizeCounter).Add(int64(cur.entry.Size)) - } - - c.trbuf = cur.r.TombstoneRange(c.key, c.trbuf[:0]) - // Remove any tombstoned values - v = excludeTombstonesBooleanValues(c.trbuf, v) - - // Remove values we already read - v = v.Exclude(cur.readMin, cur.readMax) - - if v.Len() > 0 { - // Only use values in the overlapping window - v = v.Include(minT, maxT) - // Merge the remaining values with the existing - values = values.Merge(v) - } - cur.markRead(minT, maxT) - } - - } else { - // Blocks are ordered by generation, we may have values in the past in later blocks, if so, - // expand the window to include the max time range to ensure values are returned in descending - // order - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.MaxTime > maxT && !cur.read() { - maxT = cur.entry.MaxTime - } - } - - // Find first block that overlaps our window - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.OverlapsTimeRange(minT, maxT) && !cur.read() { - // Shrink our window so it's the intersection of the first overlapping block and the - // first block. We do this to minimize the region that overlaps and needs to - // be merged. - if cur.entry.MinTime < minT { - minT = cur.entry.MinTime - } - values = values.Include(minT, maxT) - break - } - } - - // Search the remaining blocks that overlap our window and append their values so we can - // merge them. - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - // Skip this block if it doesn't contain points we looking for or they have already been read - if !cur.entry.OverlapsTimeRange(minT, maxT) || cur.read() { - cur.markRead(minT, maxT) - continue - } - - var a []BooleanValue - var v BooleanValues - v, err := cur.r.ReadBooleanBlockAt(&cur.entry, &a) - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter(booleanBlocksDecodedCounter).Add(1) - c.col.GetCounter(booleanBlocksSizeCounter).Add(int64(cur.entry.Size)) - } - c.trbuf = cur.r.TombstoneRange(c.key, c.trbuf[:0]) - // Remove any tombstoned values - v = excludeTombstonesBooleanValues(c.trbuf, v) - - // Remove values we already read - v = v.Exclude(cur.readMin, cur.readMax) - - // If the block we decoded should have all of it's values included, mark it as read so we - // don't use it again. - if v.Len() > 0 { - v = v.Include(minT, maxT) - // Merge the remaining values with the existing - values = v.Merge(values) - } - cur.markRead(minT, maxT) - } - } - - first.markRead(minT, maxT) - - return values, err -} - -func excludeTombstonesBooleanValues(t []TimeRange, values BooleanValues) BooleanValues { - for i := range t { - values = values.Exclude(t[i].Min, t[i].Max) - } - return values -} diff --git a/tsdb/tsm1/file_store.gen.go.tmpl b/tsdb/tsm1/file_store.gen.go.tmpl deleted file mode 100644 index e96186d61d..0000000000 --- a/tsdb/tsm1/file_store.gen.go.tmpl +++ /dev/null @@ -1,277 +0,0 @@ -package tsm1 - -import ( - "github.com/influxdata/influxdb/v2/tsdb/cursors" -) -{{$isArray := .D.isArray}} -{{$isNotArray := not $isArray}} -{{range .In}} -{{if $isArray -}} -// Read{{.Name}}ArrayBlock reads the next block as a set of {{.name}} values. -func (c *KeyCursor) Read{{.Name}}ArrayBlock(values *cursors.{{.Name}}Array) (*cursors.{{.Name}}Array, error) { -LOOP: - // No matching blocks to decode - if len(c.current) == 0 { - values.Timestamps = values.Timestamps[:0] - values.Values = values.Values[:0] - return values, nil - } -{{else}} -// Read{{.Name}}Block reads the next block as a set of {{.name}} values. -func (c *KeyCursor) Read{{.Name}}Block(buf *[]{{.Name}}Value) ([]{{.Name}}Value, error) { -LOOP: - // No matching blocks to decode - if len(c.current) == 0 { - return nil, nil - } -{{end}} - - // First block is the oldest block containing the points we're searching for. - first := c.current[0] -{{if $isArray -}} - err := first.r.Read{{.Name}}ArrayBlockAt(&first.entry, values) -{{else -}} - *buf = (*buf)[:0] - var values {{.Name}}Values - values, err := first.r.Read{{.Name}}BlockAt(&first.entry, buf) -{{end -}} - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter({{.name}}BlocksDecodedCounter).Add(1) - c.col.GetCounter({{.name}}BlocksSizeCounter).Add(int64(first.entry.Size)) - } - - // Remove values we already read -{{if $isArray -}} - values.Exclude(first.readMin, first.readMax) -{{else -}} - values = values.Exclude(first.readMin, first.readMax) -{{end}} - - // Remove any tombstones - c.trbuf = first.r.TombstoneRange(c.key, c.trbuf[:0]) -{{if $isArray -}} - excludeTombstones{{.Name}}Array(c.trbuf, values) -{{else -}} - values = excludeTombstones{{.Name}}Values(c.trbuf, values) -{{end -}} - - // If there are no values in this first block (all tombstoned or previously read) and - // we have more potential blocks too search. Try again. - if values.Len() == 0 && len(c.current) > 0 { - c.current = c.current[1:] - goto LOOP - } - - // Only one block with this key and time range so return it - if len(c.current) == 1 { - if values.Len() > 0 { - first.markRead(values.MinTime(), values.MaxTime()) - } - return values, nil - } - - // Use the current block time range as our overlapping window - minT, maxT := first.readMin, first.readMax - if values.Len() > 0 { - minT, maxT = values.MinTime(), values.MaxTime() - } - if c.ascending { - // Blocks are ordered by generation, we may have values in the past in later blocks, if so, - // expand the window to include the min time range to ensure values are returned in ascending - // order - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.MinTime < minT && !cur.read() { - minT = cur.entry.MinTime - } - } - - // Find first block that overlaps our window - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.OverlapsTimeRange(minT, maxT) && !cur.read() { - // Shrink our window so it's the intersection of the first overlapping block and the - // first block. We do this to minimize the region that overlaps and needs to - // be merged. - if cur.entry.MaxTime > maxT { - maxT = cur.entry.MaxTime - } -{{if $isArray -}} - values.Include(minT, maxT) -{{else -}} - values = values.Include(minT, maxT) -{{end -}} - break - } - } - - // Search the remaining blocks that overlap our window and append their values so we can - // merge them. - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - // Skip this block if it doesn't contain points we looking for or they have already been read - if !cur.entry.OverlapsTimeRange(minT, maxT) || cur.read() { - cur.markRead(minT, maxT) - continue - } - -{{if $isArray -}} - v := &cursors.{{.Name}}Array{} - err := cur.r.Read{{.Name}}ArrayBlockAt(&cur.entry, v) -{{else -}} - var a []{{.Name}}Value - var v {{.Name}}Values - v, err := cur.r.Read{{.Name}}BlockAt(&cur.entry, &a) -{{end -}} - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter({{.name}}BlocksDecodedCounter).Add(1) - c.col.GetCounter({{.name}}BlocksSizeCounter).Add(int64(cur.entry.Size)) - } - - c.trbuf = cur.r.TombstoneRange(c.key, c.trbuf[:0]) -{{if $isArray -}} - // Remove any tombstoned values - excludeTombstones{{.Name}}Array(c.trbuf, v) - - // Remove values we already read - v.Exclude(cur.readMin, cur.readMax) - - if v.Len() > 0 { - // Only use values in the overlapping window - v.Include(minT, maxT) - // Merge the remaining values with the existing - values.Merge(v) - } -{{else -}} - // Remove any tombstoned values - v = excludeTombstones{{.Name}}Values(c.trbuf, v) - - // Remove values we already read - v = v.Exclude(cur.readMin, cur.readMax) - - if v.Len() > 0 { - // Only use values in the overlapping window - v = v.Include(minT, maxT) - // Merge the remaining values with the existing - values = values.Merge(v) - } -{{end -}} - cur.markRead(minT, maxT) - } - - } else { - // Blocks are ordered by generation, we may have values in the past in later blocks, if so, - // expand the window to include the max time range to ensure values are returned in descending - // order - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.MaxTime > maxT && !cur.read() { - maxT = cur.entry.MaxTime - } - } - - // Find first block that overlaps our window - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.OverlapsTimeRange(minT, maxT) && !cur.read() { - // Shrink our window so it's the intersection of the first overlapping block and the - // first block. We do this to minimize the region that overlaps and needs to - // be merged. - if cur.entry.MinTime < minT { - minT = cur.entry.MinTime - } -{{if $isArray -}} - values.Include(minT, maxT) -{{else -}} - values = values.Include(minT, maxT) -{{end -}} - break - } - } - - // Search the remaining blocks that overlap our window and append their values so we can - // merge them. - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - // Skip this block if it doesn't contain points we looking for or they have already been read - if !cur.entry.OverlapsTimeRange(minT, maxT) || cur.read() { - cur.markRead(minT, maxT) - continue - } - -{{if $isArray -}} - v := &cursors.{{.Name}}Array{} - err := cur.r.Read{{.Name}}ArrayBlockAt(&cur.entry, v) -{{else -}} - var a []{{.Name}}Value - var v {{.Name}}Values - v, err := cur.r.Read{{.Name}}BlockAt(&cur.entry, &a) -{{end -}} - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter({{.name}}BlocksDecodedCounter).Add(1) - c.col.GetCounter({{.name}}BlocksSizeCounter).Add(int64(cur.entry.Size)) - } - c.trbuf = cur.r.TombstoneRange(c.key, c.trbuf[:0]) -{{if $isArray -}} - // Remove any tombstoned values - excludeTombstones{{.Name}}Array(c.trbuf, v) - - // Remove values we already read - v.Exclude(cur.readMin, cur.readMax) - - // If the block we decoded should have all of it's values included, mark it as read so we - // don't use it again. - if v.Len() > 0 { - v.Include(minT, maxT) - // Merge the remaining values with the existing - v.Merge(values) - *values = *v - } -{{else -}} - // Remove any tombstoned values - v = excludeTombstones{{.Name}}Values(c.trbuf, v) - - // Remove values we already read - v = v.Exclude(cur.readMin, cur.readMax) - - // If the block we decoded should have all of it's values included, mark it as read so we - // don't use it again. - if v.Len() > 0 { - v = v.Include(minT, maxT) - // Merge the remaining values with the existing - values = v.Merge(values) - } -{{end -}} - cur.markRead(minT, maxT) - } - } - - first.markRead(minT, maxT) - - return values, err -} - -{{if $isArray -}} -func excludeTombstones{{.Name}}Array(t []TimeRange, values *cursors.{{.Name}}Array) { - for i := range t { - values.Exclude(t[i].Min, t[i].Max) - } -} -{{else -}} -func excludeTombstones{{.Name}}Values(t []TimeRange, values {{.Name}}Values) {{.Name}}Values { - for i := range t { - values = values.Exclude(t[i].Min, t[i].Max) - } - return values -} -{{end -}} -{{ end }} diff --git a/tsdb/tsm1/file_store.gen.go.tmpldata b/tsdb/tsm1/file_store.gen.go.tmpldata deleted file mode 100644 index 236ba310ba..0000000000 --- a/tsdb/tsm1/file_store.gen.go.tmpldata +++ /dev/null @@ -1,22 +0,0 @@ -[ - { - "Name":"Float", - "name":"float" - }, - { - "Name":"Integer", - "name":"integer" - }, - { - "Name":"Unsigned", - "name":"unsigned" - }, - { - "Name":"String", - "name":"string" - }, - { - "Name":"Boolean", - "name":"boolean" - } -] diff --git a/tsdb/tsm1/file_store.go b/tsdb/tsm1/file_store.go deleted file mode 100644 index 656cefe2f6..0000000000 --- a/tsdb/tsm1/file_store.go +++ /dev/null @@ -1,1630 +0,0 @@ -package tsm1 - -import ( - "bytes" - "context" - "errors" - "fmt" - "io/ioutil" - "math" - "os" - "path/filepath" - "runtime" - "sort" - "strconv" - "strings" - "sync" - "sync/atomic" - "time" - - "github.com/influxdata/influxdb/v2/kit/tracing" - "github.com/influxdata/influxdb/v2/pkg/fs" - "github.com/influxdata/influxdb/v2/pkg/limiter" - "github.com/influxdata/influxdb/v2/pkg/metrics" - "github.com/influxdata/influxdb/v2/query" - "github.com/influxdata/influxdb/v2/tsdb/cursors" - "github.com/prometheus/client_golang/prometheus" - "go.uber.org/zap" - "golang.org/x/time/rate" -) - -const ( - // The extension used to describe temporary snapshot files. - TmpTSMFileExtension = "tmp" - - // The extension used to describe corrupt snapshot files. - BadTSMFileExtension = "bad" -) - -type TSMIterator interface { - Next() bool - Peek() []byte - Key() []byte - Type() byte - Entries() []IndexEntry - Err() error -} - -// TSMFile represents an on-disk TSM file. -type TSMFile interface { - // Path returns the underlying file path for the TSMFile. If the file - // has not be written or loaded from disk, the zero value is returned. - Path() string - - // Read returns all the values in the block where time t resides. - Read(key []byte, t int64) ([]Value, error) - - // ReadAt returns all the values in the block identified by entry. - ReadAt(entry *IndexEntry, values []Value) ([]Value, error) - ReadFloatBlockAt(entry *IndexEntry, values *[]FloatValue) ([]FloatValue, error) - ReadFloatArrayBlockAt(entry *IndexEntry, values *cursors.FloatArray) error - ReadIntegerBlockAt(entry *IndexEntry, values *[]IntegerValue) ([]IntegerValue, error) - ReadIntegerArrayBlockAt(entry *IndexEntry, values *cursors.IntegerArray) error - ReadUnsignedBlockAt(entry *IndexEntry, values *[]UnsignedValue) ([]UnsignedValue, error) - ReadUnsignedArrayBlockAt(entry *IndexEntry, values *cursors.UnsignedArray) error - ReadStringBlockAt(entry *IndexEntry, values *[]StringValue) ([]StringValue, error) - ReadStringArrayBlockAt(entry *IndexEntry, values *cursors.StringArray) error - ReadBooleanBlockAt(entry *IndexEntry, values *[]BooleanValue) ([]BooleanValue, error) - ReadBooleanArrayBlockAt(entry *IndexEntry, values *cursors.BooleanArray) error - - // Entries returns the index entries for all blocks for the given key. - ReadEntries(key []byte, entries []IndexEntry) ([]IndexEntry, error) - - // Contains returns true if the file contains any values for the given - // key. - Contains(key []byte) bool - - // OverlapsTimeRange returns true if the time range of the file intersect min and max. - OverlapsTimeRange(min, max int64) bool - - // OverlapsKeyRange returns true if the key range of the file intersects min and max. - OverlapsKeyRange(min, max []byte) bool - - // OverlapsKeyPrefixRange returns true if the key range of the file - // intersects min and max, evaluating up to the length of min and max - // of the key range. - OverlapsKeyPrefixRange(min, max []byte) bool - - // TimeRange returns the min and max time across all keys in the file. - TimeRange() (int64, int64) - - // TombstoneRange returns ranges of time that are deleted for the given key. - TombstoneRange(key []byte, buf []TimeRange) []TimeRange - - // KeyRange returns the min and max keys in the file. - KeyRange() ([]byte, []byte) - - // KeyCount returns the number of distinct keys in the file. - KeyCount() int - - // Iterator returns an iterator over the keys starting at the provided key. You must - // call Next before calling any of the accessors. - Iterator([]byte) TSMIterator - - // Type returns the block type of the values stored for the key. Returns one of - // BlockFloat64, BlockInt64, BlockBoolean, BlockString. If key does not exist, - // an error is returned. - Type(key []byte) (byte, error) - - // BatchDelete return a BatchDeleter that allows for multiple deletes in batches - // and group commit or rollback. - BatchDelete() BatchDeleter - - // Delete removes the keys from the set of keys available in this file. - Delete(keys [][]byte) error - - // DeleteRange removes the values for keys between timestamps min and max. - DeleteRange(keys [][]byte, min, max int64) error - - // DeletePrefix removes the values for keys beginning with prefix. It calls dead with - // any keys that became dead as a result of this call. - DeletePrefix(prefix []byte, min, max int64, pred Predicate, dead func([]byte)) error - - // HasTombstones returns true if file contains values that have been deleted. - HasTombstones() bool - - // TombstoneFiles returns the tombstone filestats if there are any tombstones - // written for this file. - TombstoneFiles() []FileStat - - // Close closes the underlying file resources. - Close() error - - // Size returns the size of the file on disk in bytes. - Size() uint32 - - // Rename renames the existing TSM file to a new name and replaces the mmap backing slice using the new - // file name. Index and Reader state are not re-initialized. - Rename(path string) error - - // Remove deletes the file from the filesystem. - Remove() error - - // InUse returns true if the file is currently in use by queries. - InUse() bool - - // Ref records that this file is actively in use. - Ref() - - // Unref records that this file is no longer in use. - Unref() - - // Stats returns summary information about the TSM file. - Stats() FileStat - - // BlockIterator returns an iterator pointing to the first block in the file and - // allows sequential iteration to each and every block. - BlockIterator() *BlockIterator - - // TimeRangeIterator returns an iterator over the keys, starting at the provided - // key. Calling the HasData accessor will return true if data exists for the - // interval [min, max] for the current key. - // Next must be called before calling any of the accessors. - TimeRangeIterator(key []byte, min, max int64) *TimeRangeIterator - - // TimeRangeMaxTimeIterator returns an iterator over the keys, starting at the provided - // key. Calling the HasData and MaxTime accessors will be restricted to the - // interval [min, max] for the current key. - // Next must be called before calling any of the accessors. - TimeRangeMaxTimeIterator(key []byte, min, max int64) *TimeRangeMaxTimeIterator - - // Free releases any resources held by the FileStore to free up system resources. - Free() error - - // Stats returns the statistics for the file. - MeasurementStats() (MeasurementStats, error) -} - -// FileStoreObserver is passed notifications before the file store adds or deletes files. In this way, it can -// be sure to observe every file that is added or removed even in the presence of process death. -type FileStoreObserver interface { - // FileFinishing is called before a file is renamed to it's final name. - FileFinishing(path string) error - - // FileUnlinking is called before a file is unlinked. - FileUnlinking(path string) error -} - -var ( - floatBlocksDecodedCounter = metrics.MustRegisterCounter("float_blocks_decoded", metrics.WithGroup(tsmGroup)) - floatBlocksSizeCounter = metrics.MustRegisterCounter("float_blocks_size_bytes", metrics.WithGroup(tsmGroup)) - integerBlocksDecodedCounter = metrics.MustRegisterCounter("integer_blocks_decoded", metrics.WithGroup(tsmGroup)) - integerBlocksSizeCounter = metrics.MustRegisterCounter("integer_blocks_size_bytes", metrics.WithGroup(tsmGroup)) - unsignedBlocksDecodedCounter = metrics.MustRegisterCounter("unsigned_blocks_decoded", metrics.WithGroup(tsmGroup)) - unsignedBlocksSizeCounter = metrics.MustRegisterCounter("unsigned_blocks_size_bytes", metrics.WithGroup(tsmGroup)) - stringBlocksDecodedCounter = metrics.MustRegisterCounter("string_blocks_decoded", metrics.WithGroup(tsmGroup)) - stringBlocksSizeCounter = metrics.MustRegisterCounter("string_blocks_size_bytes", metrics.WithGroup(tsmGroup)) - booleanBlocksDecodedCounter = metrics.MustRegisterCounter("boolean_blocks_decoded", metrics.WithGroup(tsmGroup)) - booleanBlocksSizeCounter = metrics.MustRegisterCounter("boolean_blocks_size_bytes", metrics.WithGroup(tsmGroup)) -) - -// FileStore is an abstraction around multiple TSM files. -type FileStore struct { - mu sync.RWMutex - lastModified time.Time - // Most recently known file stats. If nil then stats will need to be - // recalculated - lastFileStats []FileStat - - currentGeneration int // internally maintained generation - currentGenerationFunc func() int // external generation - dir string - - files []TSMFile - tsmMMAPWillNeed bool // If true then the kernel will be advised MMAP_WILLNEED for TSM files. - openLimiter limiter.Fixed // limit the number of concurrent opening TSM files. - - logger *zap.Logger // Logger to be used for important messages - - tracker *fileTracker - purger *purger - - currentTempDirID int - - parseFileName ParseFileNameFunc - - obs FileStoreObserver - - pageFaultLimiter *rate.Limiter -} - -// FileStat holds information about a TSM file on disk. -type FileStat struct { - Path string - HasTombstone bool - Size uint32 - CreatedAt int64 - LastModified int64 - MinTime, MaxTime int64 - MinKey, MaxKey []byte -} - -// OverlapsTimeRange returns true if the time range of the file intersect min and max. -func (f FileStat) OverlapsTimeRange(min, max int64) bool { - return f.MinTime <= max && f.MaxTime >= min -} - -// OverlapsKeyRange returns true if the min and max keys of the file overlap the arguments min and max. -func (f FileStat) OverlapsKeyRange(min, max []byte) bool { - return len(min) != 0 && len(max) != 0 && bytes.Compare(f.MinKey, max) <= 0 && bytes.Compare(f.MaxKey, min) >= 0 -} - -// ContainsKey returns true if the min and max keys of the file overlap the arguments min and max. -func (f FileStat) MaybeContainsKey(key []byte) bool { - return bytes.Compare(f.MinKey, key) >= 0 || bytes.Compare(key, f.MaxKey) <= 0 -} - -// NewFileStore returns a new instance of FileStore based on the given directory. -func NewFileStore(dir string) *FileStore { - logger := zap.NewNop() - fs := &FileStore{ - dir: dir, - lastModified: time.Time{}, - logger: logger, - openLimiter: limiter.NewFixed(runtime.GOMAXPROCS(0)), - purger: &purger{ - files: map[string]TSMFile{}, - logger: logger, - }, - obs: noFileStoreObserver{}, - parseFileName: DefaultParseFileName, - tracker: newFileTracker(newFileMetrics(nil), nil), - } - fs.purger.fileStore = fs - return fs -} - -// WithObserver sets the observer for the file store. -func (f *FileStore) WithObserver(obs FileStoreObserver) { - if obs == nil { - obs = noFileStoreObserver{} - } - f.obs = obs -} - -func (f *FileStore) WithParseFileNameFunc(parseFileNameFunc ParseFileNameFunc) { - f.parseFileName = parseFileNameFunc -} - -func (f *FileStore) ParseFileName(path string) (int, int, error) { - return f.parseFileName(path) -} - -// SetCurrentGenerationFunc must be set before using FileStore. -func (f *FileStore) SetCurrentGenerationFunc(fn func() int) { - f.currentGenerationFunc = fn -} - -// WithPageFaultLimiter sets the rate limiter used for limiting page faults. -func (f *FileStore) WithPageFaultLimiter(limiter *rate.Limiter) { - f.pageFaultLimiter = limiter -} - -// WithLogger sets the logger on the file store. -func (f *FileStore) WithLogger(log *zap.Logger) { - f.logger = log.With(zap.String("service", "filestore")) - f.purger.logger = f.logger -} - -// FileStoreStatistics keeps statistics about the file store. -type FileStoreStatistics struct { - SDiskBytes int64 - SFileCount int64 -} - -// fileTracker tracks file counts and sizes within the FileStore. -// -// As well as being responsible for providing atomic reads and writes to the -// statistics, fileTracker also mirrors any changes to the external prometheus -// metrics, which the Engine exposes. -// -// *NOTE* - fileTracker fields should not be directory modified. Doing so -// could result in the Engine exposing inaccurate metrics. -type fileTracker struct { - metrics *fileMetrics - labels prometheus.Labels - diskBytes uint64 -} - -func newFileTracker(metrics *fileMetrics, defaultLabels prometheus.Labels) *fileTracker { - return &fileTracker{metrics: metrics, labels: defaultLabels} -} - -// Labels returns a copy of the default labels used by the tracker's metrics. -// The returned map is safe for modification. -func (t *fileTracker) Labels() prometheus.Labels { - labels := make(prometheus.Labels, len(t.labels)) - for k, v := range t.labels { - labels[k] = v - } - return labels -} - -// Bytes returns the number of bytes in use on disk. -func (t *fileTracker) Bytes() uint64 { return atomic.LoadUint64(&t.diskBytes) } - -// SetBytes sets the number of bytes in use on disk. -func (t *fileTracker) SetBytes(bytes map[int]uint64) { - total := uint64(0) - labels := t.Labels() - sizes := make(map[string]uint64) - for k, v := range bytes { - label := formatLevel(uint64(k)) - sizes[label] += v - total += v - } - for k, v := range sizes { - labels["level"] = k - t.metrics.DiskSize.With(labels).Set(float64(v)) - } - atomic.StoreUint64(&t.diskBytes, total) -} - -// AddBytes increases the number of bytes. -func (t *fileTracker) AddBytes(bytes uint64, level int) { - atomic.AddUint64(&t.diskBytes, bytes) - - labels := t.Labels() - labels["level"] = formatLevel(uint64(level)) - t.metrics.DiskSize.With(labels).Add(float64(bytes)) -} - -// SetFileCount sets the number of files in the FileStore. -func (t *fileTracker) SetFileCount(files map[int]uint64) { - labels := t.Labels() - counts := make(map[string]uint64) - for k, v := range files { - label := formatLevel(uint64(k)) - counts[label] += v - } - for k, v := range counts { - labels["level"] = k - t.metrics.Files.With(labels).Set(float64(v)) - } -} - -func (t *fileTracker) ClearFileCounts() { - labels := t.Labels() - for i := uint64(1); i <= 4; i++ { - labels["level"] = formatLevel(i) - t.metrics.Files.With(labels).Set(float64(0)) - } -} - -func (t *fileTracker) ClearDiskSizes() { - labels := t.Labels() - for i := uint64(1); i <= 4; i++ { - labels["level"] = formatLevel(i) - t.metrics.DiskSize.With(labels).Set(float64(0)) - } -} - -func formatLevel(level uint64) string { - if level >= 4 { - return "4+" - } else { - return fmt.Sprintf("%d", level) - } -} - -// Count returns the number of TSM files currently loaded. -func (f *FileStore) Count() int { - f.mu.RLock() - defer f.mu.RUnlock() - return len(f.files) -} - -// Files returns the slice of TSM files currently loaded. This is only used for -// tests, and the files aren't guaranteed to stay valid in the presence of compactions. -func (f *FileStore) Files() []TSMFile { - f.mu.RLock() - defer f.mu.RUnlock() - return f.files -} - -// CurrentGeneration returns the current generation of the TSM files. -// Delegates to currentGenerationFunc, if set. Only called by tests. -func (f *FileStore) CurrentGeneration() int { - if fn := f.currentGenerationFunc; fn != nil { - return fn() - } - - f.mu.RLock() - defer f.mu.RUnlock() - return f.currentGeneration -} - -// NextGeneration increments the max file ID and returns the new value. -// Delegates to currentGenerationFunc, if set. -func (f *FileStore) NextGeneration() int { - if fn := f.currentGenerationFunc; fn != nil { - return fn() - } - - f.mu.Lock() - defer f.mu.Unlock() - f.currentGeneration++ - return f.currentGeneration -} - -// WalkKeys calls fn for every key in every TSM file known to the FileStore. If the key -// exists in multiple files, it will be invoked for each file. -func (f *FileStore) WalkKeys(seek []byte, fn func(key []byte, typ byte) error) error { - f.mu.RLock() - if len(f.files) == 0 { - f.mu.RUnlock() - return nil - } - - // Ensure files are not unmapped while we're iterating over them. - for _, r := range f.files { - r.Ref() - defer r.Unref() - } - - ki := newMergeKeyIterator(f.files, seek) - f.mu.RUnlock() - for ki.Next() { - key, typ := ki.Read() - if err := fn(key, typ); err != nil { - return err - } - } - - return ki.Err() -} - -// Keys returns all keys and types for all files in the file store. -func (f *FileStore) Keys() map[string]byte { - f.mu.RLock() - defer f.mu.RUnlock() - - uniqueKeys := map[string]byte{} - if err := f.WalkKeys(nil, func(key []byte, typ byte) error { - uniqueKeys[string(key)] = typ - return nil - }); err != nil { - return nil - } - - return uniqueKeys -} - -// Type returns the type of values store at the block for key. -func (f *FileStore) Type(key []byte) (byte, error) { - f.mu.RLock() - defer f.mu.RUnlock() - - for _, f := range f.files { - if f.Contains(key) { - return f.Type(key) - } - } - return 0, fmt.Errorf("unknown type for %v", key) -} - -// Delete removes the keys from the set of keys available in this file. -func (f *FileStore) Delete(keys [][]byte) error { - return f.DeleteRange(keys, math.MinInt64, math.MaxInt64) -} - -type unrefs []TSMFile - -func (u *unrefs) Unref() { - for _, f := range *u { - f.Unref() - } -} - -// ForEachFile calls fn for all TSM files or until fn returns false. -// fn is called on the same goroutine as the caller. -func (f *FileStore) ForEachFile(fn func(f TSMFile) bool) { - f.mu.RLock() - files := make(unrefs, 0, len(f.files)) - defer files.Unref() - - for _, f := range f.files { - f.Ref() - files = append(files, f) - if !fn(f) { - break - } - } - f.mu.RUnlock() -} - -// Apply calls fn on each TSMFile in the store concurrently. The level of -// concurrency is set to GOMAXPROCS. -func (f *FileStore) Apply(fn func(r TSMFile) error) error { - // Limit apply fn to number of cores - limiter := limiter.NewFixed(runtime.GOMAXPROCS(0)) - - f.mu.RLock() - errC := make(chan error, len(f.files)) - - for _, f := range f.files { - go func(r TSMFile) { - limiter.Take() - defer limiter.Release() - - r.Ref() - defer r.Unref() - errC <- fn(r) - }(f) - } - - var applyErr error - for i := 0; i < cap(errC); i++ { - if err := <-errC; err != nil { - applyErr = err - } - } - f.mu.RUnlock() - - f.mu.Lock() - f.lastModified = time.Now().UTC() - f.lastFileStats = nil - f.mu.Unlock() - - return applyErr -} - -// DeleteRange removes the values for keys between timestamps min and max. This should only -// be used with smaller batches of series keys. -func (f *FileStore) DeleteRange(keys [][]byte, min, max int64) error { - var batches BatchDeleters - f.mu.RLock() - for _, f := range f.files { - if f.OverlapsTimeRange(min, max) { - batches = append(batches, f.BatchDelete()) - } - } - f.mu.RUnlock() - - if len(batches) == 0 { - return nil - } - - if err := func() error { - if err := batches.DeleteRange(keys, min, max); err != nil { - return err - } - - return batches.Commit() - }(); err != nil { - // Rollback the deletes - _ = batches.Rollback() - return err - } - - f.mu.Lock() - f.lastModified = time.Now().UTC() - f.lastFileStats = nil - f.mu.Unlock() - return nil -} - -// Open loads all the TSM files in the configured directory. -func (f *FileStore) Open(ctx context.Context) error { - f.mu.Lock() - defer f.mu.Unlock() - - // Not loading files from disk so nothing to do - if f.dir == "" { - return nil - } - - if f.openLimiter == nil { - return errors.New("cannot open FileStore without an OpenLimiter (is EngineOptions.OpenLimiter set?)") - } - - span, _ := tracing.StartSpanFromContext(ctx) - defer span.Finish() - - // find the current max ID for temp directories - tmpfiles, err := ioutil.ReadDir(f.dir) - if err != nil { - return err - } - ext := fmt.Sprintf(".%s", TmpTSMFileExtension) - for _, fi := range tmpfiles { - if fi.IsDir() && strings.HasSuffix(fi.Name(), ext) { - ss := strings.Split(filepath.Base(fi.Name()), ".") - if len(ss) == 2 { - if i, err := strconv.Atoi(ss[0]); err != nil { - if i > f.currentTempDirID { - f.currentTempDirID = i - } - } - } - } - } - - files, err := filepath.Glob(filepath.Join(f.dir, fmt.Sprintf("*.%s", TSMFileExtension))) - if err != nil { - return err - } - - // struct to hold the result of opening each reader in a goroutine - type res struct { - r *TSMReader - err error - } - - readerC := make(chan *res) - for i, fn := range files { - // Keep track of the latest ID - generation, _, err := f.parseFileName(fn) - if err != nil { - return err - } - - if f.currentGenerationFunc == nil && generation >= f.currentGeneration { - f.currentGeneration = generation + 1 - } - - file, err := os.OpenFile(fn, os.O_RDONLY, 0666) - if err != nil { - return fmt.Errorf("error opening file %s: %v", fn, err) - } - - go func(idx int, file *os.File) { - // Ensure a limited number of TSM files are loaded at once. - // Systems which have very large datasets (1TB+) can have thousands - // of TSM files which can cause extremely long load times. - f.openLimiter.Take() - defer f.openLimiter.Release() - - start := time.Now() - df, err := NewTSMReader(file, - WithMadviseWillNeed(f.tsmMMAPWillNeed), - WithTSMReaderPageFaultLimiter(f.pageFaultLimiter), - WithTSMReaderLogger(f.logger)) - f.logger.Info("Opened file", - zap.String("path", file.Name()), - zap.Int("id", idx), - zap.Duration("duration", time.Since(start))) - - // If we are unable to read a TSM file then log the error, rename - // the file, and continue loading the shard without it. - if err != nil { - f.logger.Error("Cannot read corrupt tsm file, renaming", zap.String("path", file.Name()), zap.Int("id", idx), zap.Error(err)) - if e := fs.RenameFile(file.Name(), file.Name()+"."+BadTSMFileExtension); e != nil { - f.logger.Error("Cannot rename corrupt tsm file", zap.String("path", file.Name()), zap.Int("id", idx), zap.Error(e)) - readerC <- &res{r: df, err: fmt.Errorf("cannot rename corrupt file %s: %v", file.Name(), e)} - return - } - } - - df.WithObserver(f.obs) - readerC <- &res{r: df} - }(i, file) - } - - var lm int64 - counts := make(map[int]uint64, 4) - sizes := make(map[int]uint64, 4) - for i := 1; i <= 4; i++ { - counts[i] = 0 - sizes[i] = 0 - } - for range files { - res := <-readerC - if res.err != nil { - return res.err - } else if res.r == nil { - continue - } - f.files = append(f.files, res.r) - name := filepath.Base(res.r.Stats().Path) - _, seq, err := f.parseFileName(name) - if err != nil { - return err - } - counts[seq]++ - - // Accumulate file store size stats - totalSize := uint64(res.r.Size()) - for _, ts := range res.r.TombstoneFiles() { - totalSize += uint64(ts.Size) - } - sizes[seq] += totalSize - - // Re-initialize the lastModified time for the file store - if res.r.LastModified() > lm { - lm = res.r.LastModified() - } - - } - f.lastModified = time.Unix(0, lm).UTC() - close(readerC) - - sort.Sort(tsmReaders(f.files)) - f.tracker.SetBytes(sizes) - f.tracker.SetFileCount(counts) - return nil -} - -// Close closes the file store. -func (f *FileStore) Close() error { - // Make the object appear closed to other method calls. - f.mu.Lock() - - files := f.files - - f.lastFileStats = nil - f.files = nil - f.tracker.ClearFileCounts() - - // Let other methods access this closed object while we do the actual closing. - f.mu.Unlock() - - for _, file := range files { - err := file.Close() - if err != nil { - return err - } - } - - return nil -} - -// DiskSizeBytes returns the total number of bytes consumed by the files in the FileStore. -func (f *FileStore) DiskSizeBytes() int64 { return int64(f.tracker.Bytes()) } - -// Read returns the slice of values for the given key and the given timestamp, -// if any file matches those constraints. -func (f *FileStore) Read(key []byte, t int64) ([]Value, error) { - f.mu.RLock() - defer f.mu.RUnlock() - - for _, f := range f.files { - // Can this file possibly contain this key and timestamp? - if !f.Contains(key) { - continue - } - - // May have the key and time we are looking for so try to find - v, err := f.Read(key, t) - if err != nil { - return nil, err - } - - if len(v) > 0 { - return v, nil - } - } - return nil, nil -} - -func (f *FileStore) Cost(key []byte, min, max int64) query.IteratorCost { - f.mu.RLock() - defer f.mu.RUnlock() - return f.cost(key, min, max) -} - -// Reader returns a TSMReader for path if one is currently managed by the FileStore. -// Otherwise it returns nil. If it returns a file, you must call Unref on it when -// you are done, and never use it after that. -func (f *FileStore) TSMReader(path string) *TSMReader { - f.mu.RLock() - defer f.mu.RUnlock() - for _, r := range f.files { - if r.Path() == path { - r.Ref() - return r.(*TSMReader) - } - } - return nil -} - -// KeyCursor returns a KeyCursor for key and t across the files in the FileStore. -func (f *FileStore) KeyCursor(ctx context.Context, key []byte, t int64, ascending bool) *KeyCursor { - f.mu.RLock() - defer f.mu.RUnlock() - return newKeyCursor(ctx, f, key, t, ascending) -} - -// Stats returns the stats of the underlying files, preferring the cached version if it is still valid. -func (f *FileStore) Stats() []FileStat { - f.mu.RLock() - if len(f.lastFileStats) > 0 { - defer f.mu.RUnlock() - return f.lastFileStats - } - f.mu.RUnlock() - - // The file stats cache is invalid due to changes to files. Need to - // recalculate. - f.mu.Lock() - defer f.mu.Unlock() - - if len(f.lastFileStats) > 0 { - return f.lastFileStats - } - - // If lastFileStats's capacity is far away from the number of entries - // we need to add, then we'll reallocate. - if cap(f.lastFileStats) < len(f.files)/2 { - f.lastFileStats = make([]FileStat, 0, len(f.files)) - } - - for _, fd := range f.files { - f.lastFileStats = append(f.lastFileStats, fd.Stats()) - } - return f.lastFileStats -} - -// ReplaceWithCallback replaces oldFiles with newFiles and calls updatedFn with the files to be added the FileStore. -func (f *FileStore) ReplaceWithCallback(oldFiles, newFiles []string, updatedFn func(r []TSMFile)) error { - return f.replace(oldFiles, newFiles, updatedFn) -} - -// Replace replaces oldFiles with newFiles. -func (f *FileStore) Replace(oldFiles, newFiles []string) error { - return f.replace(oldFiles, newFiles, nil) -} - -func (f *FileStore) replace(oldFiles, newFiles []string, updatedFn func(r []TSMFile)) error { - if len(oldFiles) == 0 && len(newFiles) == 0 { - return nil - } - - f.mu.RLock() - maxTime := f.lastModified - f.mu.RUnlock() - - updated := make([]TSMFile, 0, len(newFiles)) - tsmTmpExt := fmt.Sprintf("%s.%s", TSMFileExtension, TmpTSMFileExtension) - - // Rename all the new files to make them live on restart - for _, file := range newFiles { - if !strings.HasSuffix(file, tsmTmpExt) && !strings.HasSuffix(file, TSMFileExtension) { - // This isn't a .tsm or .tsm.tmp file. - continue - } - - // give the observer a chance to process the file first. - if err := f.obs.FileFinishing(file); err != nil { - return err - } - - // Observe the associated statistics file, if available. - statsFile := StatsFilename(file) - if _, err := os.Stat(statsFile); err == nil { - if err := f.obs.FileFinishing(statsFile); err != nil { - return err - } - } - - var newName = file - if strings.HasSuffix(file, tsmTmpExt) { - // The new TSM files have a tmp extension. First rename them. - newName = file[:len(file)-4] - if err := fs.RenameFile(file, newName); err != nil { - return err - } - } - - fd, err := os.Open(newName) - if err != nil { - return err - } - - // Keep track of the new mod time - if stat, err := fd.Stat(); err == nil { - if maxTime.IsZero() || stat.ModTime().UTC().After(maxTime) { - maxTime = stat.ModTime().UTC() - } - } - - tsm, err := NewTSMReader(fd, - WithMadviseWillNeed(f.tsmMMAPWillNeed), - WithTSMReaderPageFaultLimiter(f.pageFaultLimiter), - WithTSMReaderLogger(f.logger)) - if err != nil { - return err - } - tsm.WithObserver(f.obs) - - updated = append(updated, tsm) - } - - if updatedFn != nil { - updatedFn(updated) - } - - f.mu.Lock() - defer f.mu.Unlock() - - // Copy the current set of active files while we rename - // and load the new files. We copy the pointers here to minimize - // the time that locks are held as well as to ensure that the replacement - // is atomic.© - - updated = append(updated, f.files...) - - // We need to prune our set of active files now - var active, inuse []TSMFile - for _, file := range updated { - keep := true - for _, remove := range oldFiles { - if remove == file.Path() { - keep = false - - // give the observer a chance to process the file first. - if err := f.obs.FileUnlinking(file.Path()); err != nil { - return err - } - - // Remove associated stats file. - statsFile := StatsFilename(file.Path()) - if _, err := os.Stat(statsFile); err == nil { - if err := f.obs.FileUnlinking(statsFile); err != nil { - return err - } - } - - for _, t := range file.TombstoneFiles() { - if err := f.obs.FileUnlinking(t.Path); err != nil { - return err - } - } - - // If queries are running against this file, then we need to move it out of the - // way and let them complete. We'll then delete the original file to avoid - // blocking callers upstream. If the process crashes, the temp file is - // cleaned up at startup automatically. - // - // In order to ensure that there are no races with this (file held externally calls Ref - // after we check InUse), we need to maintain the invariant that every handle to a file - // is handed out in use (Ref'd), and handlers only ever relinquish the file once (call Unref - // exactly once, and never use it again). InUse is only valid during a write lock, since - // we allow calls to Ref and Unref under the read lock and no lock at all respectively. - if file.InUse() { - // Copy all the tombstones related to this TSM file - var deletes []string - for _, t := range file.TombstoneFiles() { - deletes = append(deletes, t.Path) - } - - // Rename the TSM file used by this reader - tempPath := fmt.Sprintf("%s.%s", file.Path(), TmpTSMFileExtension) - if err := file.Rename(tempPath); err != nil { - return err - } - - // Remove the old file and tombstones. We can't use the normal TSMReader.Remove() - // because it now refers to our temp file which we can't remove. - for _, f := range deletes { - if err := os.Remove(f); err != nil { - return err - } - } - - inuse = append(inuse, file) - continue - } - - if err := file.Close(); err != nil { - return err - } - - if err := file.Remove(); err != nil { - return err - } - break - } - } - - if keep { - active = append(active, file) - } - } - - if err := fs.SyncDir(f.dir); err != nil { - return err - } - - // Tell the purger about our in-use files we need to remove - f.purger.add(inuse) - - // If times didn't change (which can happen since file mod times are second level), - // then add a ns to the time to ensure that lastModified changes since files on disk - // actually did change - if maxTime.Equal(f.lastModified) || maxTime.Before(f.lastModified) { - maxTime = f.lastModified.UTC().Add(1) - } - - f.lastModified = maxTime.UTC() - - f.lastFileStats = nil - f.files = active - sort.Sort(tsmReaders(f.files)) - f.tracker.ClearFileCounts() - f.tracker.ClearDiskSizes() - - // Recalculate the disk size stat - sizes := make(map[int]uint64, 4) - counts := make(map[int]uint64, 4) - for _, file := range f.files { - size := uint64(file.Size()) - for _, ts := range file.TombstoneFiles() { - size += uint64(ts.Size) - } - _, seq, err := f.parseFileName(file.Path()) - if err != nil { - return err - } - sizes[seq] += size - counts[seq]++ - } - f.tracker.SetBytes(sizes) - f.tracker.SetFileCount(counts) - - return nil -} - -// LastModified returns the last time the file store was updated with new -// TSM files or a delete. -func (f *FileStore) LastModified() time.Time { - f.mu.RLock() - defer f.mu.RUnlock() - - return f.lastModified -} - -// BlockCount returns number of values stored in the block at location idx -// in the file at path. If path does not match any file in the store, 0 is -// returned. If idx is out of range for the number of blocks in the file, -// 0 is returned. -func (f *FileStore) BlockCount(path string, idx int) int { - f.mu.RLock() - defer f.mu.RUnlock() - - if idx < 0 { - return 0 - } - - for _, fd := range f.files { - if fd.Path() == path { - iter := fd.BlockIterator() - for i := 0; i < idx; i++ { - if !iter.Next() { - return 0 - } - } - _, _, _, _, _, block, _ := iter.Read() - return BlockCount(block) - } - } - return 0 -} - -// We need to determine the possible files that may be accessed by this query given -// the time range. -func (f *FileStore) cost(key []byte, min, max int64) query.IteratorCost { - var entries []IndexEntry - var err error - var trbuf []TimeRange - - cost := query.IteratorCost{} - for _, fd := range f.files { - minTime, maxTime := fd.TimeRange() - if !(maxTime > min && minTime < max) { - continue - } - skipped := true - trbuf = fd.TombstoneRange(key, trbuf[:0]) - - entries, err = fd.ReadEntries(key, entries) - if err != nil { - // TODO(jeff): log this somehow? we have an invalid entry in the tsm index - continue - } - - ENTRIES: - for i := 0; i < len(entries); i++ { - ie := entries[i] - - if !(ie.MaxTime > min && ie.MinTime < max) { - continue - } - - // Skip any blocks only contain values that are tombstoned. - for _, t := range trbuf { - if t.Min <= ie.MinTime && t.Max >= ie.MaxTime { - continue ENTRIES - } - } - - cost.BlocksRead++ - cost.BlockSize += int64(ie.Size) - skipped = false - } - - if !skipped { - cost.NumFiles++ - } - } - return cost -} - -// locations returns the files and index blocks for a key and time. ascending indicates -// whether the key will be scan in ascending time order or descenging time order. -// This function assumes the read-lock has been taken. -func (f *FileStore) locations(key []byte, t int64, ascending bool) []*location { - var entries []IndexEntry - var err error - var trbuf []TimeRange - - locations := make([]*location, 0, len(f.files)) - for _, fd := range f.files { - minTime, maxTime := fd.TimeRange() - - // If we ascending and the max time of the file is before where we want to start - // skip it. - if ascending && maxTime < t { - continue - // If we are descending and the min time of the file is after where we want to start, - // then skip it. - } else if !ascending && minTime > t { - continue - } - trbuf = fd.TombstoneRange(key, trbuf[:0]) - - // This file could potential contain points we are looking for so find the blocks for - // the given key. - entries, err = fd.ReadEntries(key, entries) - if err != nil { - // TODO(jeff): log this somehow? we have an invalid entry in the tsm index - continue - } - - LOOP: - for i := 0; i < len(entries); i++ { - ie := entries[i] - - // Skip any blocks only contain values that are tombstoned. - for _, t := range trbuf { - if t.Min <= ie.MinTime && t.Max >= ie.MaxTime { - continue LOOP - } - } - - // If we ascending and the max time of a block is before where we are looking, skip - // it since the data is out of our range - if ascending && ie.MaxTime < t { - continue - // If we descending and the min time of a block is after where we are looking, skip - // it since the data is out of our range - } else if !ascending && ie.MinTime > t { - continue - } - - location := &location{ - r: fd, - entry: ie, - } - - if ascending { - // For an ascending cursor, mark everything before the seek time as read - // so we can filter it out at query time - location.readMin = math.MinInt64 - location.readMax = t - 1 - } else { - // For an ascending cursort, mark everything after the seek time as read - // so we can filter it out at query time - location.readMin = t + 1 - location.readMax = math.MaxInt64 - } - // Otherwise, add this file and block location - locations = append(locations, location) - } - } - return locations -} - -// CreateSnapshot creates hardlinks for all tsm and tombstone files -// in the path provided. -func (f *FileStore) CreateSnapshot(ctx context.Context) (backupID int, backupDirFullPath string, err error) { - span, _ := tracing.StartSpanFromContext(ctx) - defer span.Finish() - - span.LogKV("dir", f.dir) - - f.mu.Lock() - // create a copy of the files slice and ensure they aren't closed out from - // under us, nor the slice mutated. - files := make([]TSMFile, len(f.files)) - copy(files, f.files) - - for _, tsmf := range files { - tsmf.Ref() - defer tsmf.Unref() - } - - // increment and keep track of the current temp dir for when we drop the lock. - // this ensures we are the only writer to the directory. - f.currentTempDirID += 1 - backupID = f.currentTempDirID - f.mu.Unlock() - - backupDirFullPath = f.InternalBackupPath(backupID) - - // create the tmp directory and add the hard links. there is no longer any shared - // mutable state. - err = os.Mkdir(backupDirFullPath, 0777) - if err != nil { - return 0, "", err - } - for _, tsmf := range files { - newpath := filepath.Join(backupDirFullPath, filepath.Base(tsmf.Path())) - if err := os.Link(tsmf.Path(), newpath); err != nil { - return 0, "", fmt.Errorf("error creating tsm hard link: %q", err) - } - for _, tf := range tsmf.TombstoneFiles() { - newpath := filepath.Join(backupDirFullPath, filepath.Base(tf.Path)) - if err := os.Link(tf.Path, newpath); err != nil { - return 0, "", fmt.Errorf("error creating tombstone hard link: %q", err) - } - } - } - - return backupID, backupDirFullPath, nil -} - -func (f *FileStore) InternalBackupPath(backupID int) string { - return filepath.Join(f.dir, fmt.Sprintf("%d.%s", backupID, TmpTSMFileExtension)) -} - -// MeasurementStats returns the sum of all measurement stats within the store. -func (f *FileStore) MeasurementStats() (MeasurementStats, error) { - f.mu.RLock() - defer f.mu.RUnlock() - - stats := NewMeasurementStats() - for _, file := range f.files { - s, err := file.MeasurementStats() - if err != nil { - return nil, err - } - stats.Add(s) - } - return stats, nil -} - -// FormatFileNameFunc is executed when generating a new TSM filename. -// Source filenames are provided via src. -type FormatFileNameFunc func(generation, sequence int) string - -// DefaultFormatFileName is the default implementation to format TSM filenames. -func DefaultFormatFileName(generation, sequence int) string { - return fmt.Sprintf("%015d-%09d", generation, sequence) -} - -// ParseFileNameFunc is executed when parsing a TSM filename into generation & sequence. -type ParseFileNameFunc func(name string) (generation, sequence int, err error) - -// DefaultParseFileName is used to parse the filenames of TSM files. -func DefaultParseFileName(name string) (int, int, error) { - base := filepath.Base(name) - idx := strings.Index(base, ".") - if idx == -1 { - return 0, 0, fmt.Errorf("file %s is named incorrectly", name) - } - - id := base[:idx] - - idx = strings.Index(id, "-") - if idx == -1 { - return 0, 0, fmt.Errorf("file %s is named incorrectly", name) - } - - generation, err := strconv.ParseUint(id[:idx], 10, 64) - if err != nil { - return 0, 0, fmt.Errorf("file %s is named incorrectly", name) - } - - sequence, err := strconv.ParseUint(id[idx+1:], 10, 32) - if err != nil { - return 0, 0, fmt.Errorf("file %s is named incorrectly", name) - } - - return int(generation), int(sequence), nil -} - -// KeyCursor allows iteration through keys in a set of files within a FileStore. -type KeyCursor struct { - key []byte - - // trbuf is scratch allocation space for tombstones - trbuf []TimeRange - - // seeks is all the file locations that we need to return during iteration. - seeks []*location - - // current is the set of blocks possibly containing the next set of points. - // Normally this is just one entry, but there may be multiple if points have - // been overwritten. - current []*location - buf []Value - - ctx context.Context - col *metrics.Group - - // pos is the index within seeks. Based on ascending, it will increment or - // decrement through the size of seeks slice. - pos int - ascending bool -} - -type location struct { - r TSMFile - entry IndexEntry - - readMin, readMax int64 -} - -func (l *location) read() bool { - return l.readMin <= l.entry.MinTime && l.readMax >= l.entry.MaxTime -} - -func (l *location) markRead(min, max int64) { - if min < l.readMin { - l.readMin = min - } - - if max > l.readMax { - l.readMax = max - } -} - -type descLocations []*location - -// Sort methods -func (a descLocations) Len() int { return len(a) } -func (a descLocations) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a descLocations) Less(i, j int) bool { - if a[i].entry.OverlapsTimeRange(a[j].entry.MinTime, a[j].entry.MaxTime) { - return a[i].r.Path() < a[j].r.Path() - } - return a[i].entry.MaxTime < a[j].entry.MaxTime -} - -type ascLocations []*location - -// Sort methods -func (a ascLocations) Len() int { return len(a) } -func (a ascLocations) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a ascLocations) Less(i, j int) bool { - if a[i].entry.OverlapsTimeRange(a[j].entry.MinTime, a[j].entry.MaxTime) { - return a[i].r.Path() < a[j].r.Path() - } - return a[i].entry.MinTime < a[j].entry.MinTime -} - -// newKeyCursor returns a new instance of KeyCursor. -// This function assumes the read-lock has been taken. -func newKeyCursor(ctx context.Context, fs *FileStore, key []byte, t int64, ascending bool) *KeyCursor { - c := &KeyCursor{ - key: key, - seeks: fs.locations(key, t, ascending), - ctx: ctx, - col: metrics.GroupFromContext(ctx), - ascending: ascending, - } - - if ascending { - sort.Sort(ascLocations(c.seeks)) - } else { - sort.Sort(descLocations(c.seeks)) - } - - // Determine the distinct set of TSM files in use and mark then as in-use - for _, f := range c.seeks { - f.r.Ref() - } - - c.seek(t) - return c -} - -// Close removes all references on the cursor. -func (c *KeyCursor) Close() { - // Remove all of our in-use references since we're done - for _, f := range c.seeks { - f.r.Unref() - } - - c.buf = nil - c.seeks = nil - c.current = nil -} - -// seek positions the cursor at the given time. -func (c *KeyCursor) seek(t int64) { - if len(c.seeks) == 0 { - return - } - c.current = nil - - if c.ascending { - c.seekAscending(t) - } else { - c.seekDescending(t) - } -} - -func (c *KeyCursor) seekAscending(t int64) { - for i, e := range c.seeks { - if t < e.entry.MinTime || e.entry.Contains(t) { - // Record the position of the first block matching our seek time - if len(c.current) == 0 { - c.pos = i - } - - c.current = append(c.current, e) - } - } -} - -func (c *KeyCursor) seekDescending(t int64) { - for i := len(c.seeks) - 1; i >= 0; i-- { - e := c.seeks[i] - if t > e.entry.MaxTime || e.entry.Contains(t) { - // Record the position of the first block matching our seek time - if len(c.current) == 0 { - c.pos = i - } - c.current = append(c.current, e) - } - } -} - -// seekN returns the number of seek locations. -func (c *KeyCursor) seekN() int { - return len(c.seeks) -} - -// Next moves the cursor to the next position. -// Data should be read by the ReadBlock functions. -func (c *KeyCursor) Next() { - if len(c.current) == 0 { - return - } - // Do we still have unread values in the current block - if !c.current[0].read() { - return - } - c.current = c.current[:0] - if c.ascending { - c.nextAscending() - } else { - c.nextDescending() - } -} - -func (c *KeyCursor) nextAscending() { - for { - c.pos++ - if c.pos >= len(c.seeks) { - return - } else if !c.seeks[c.pos].read() { - break - } - } - - // Append the first matching block - if len(c.current) == 0 { - c.current = append(c.current, nil) - } else { - c.current = c.current[:1] - } - c.current[0] = c.seeks[c.pos] - - // If we have overlapping blocks, append all their values so we can dedup - for i := c.pos + 1; i < len(c.seeks); i++ { - if c.seeks[i].read() { - continue - } - - c.current = append(c.current, c.seeks[i]) - } -} - -func (c *KeyCursor) nextDescending() { - for { - c.pos-- - if c.pos < 0 { - return - } else if !c.seeks[c.pos].read() { - break - } - } - - // Append the first matching block - if len(c.current) == 0 { - c.current = append(c.current, nil) - } else { - c.current = c.current[:1] - } - c.current[0] = c.seeks[c.pos] - - // If we have overlapping blocks, append all their values so we can dedup - for i := c.pos; i >= 0; i-- { - if c.seeks[i].read() { - continue - } - - c.current = append(c.current, c.seeks[i]) - } -} - -type purger struct { - mu sync.RWMutex - fileStore *FileStore - files map[string]TSMFile - running bool - - logger *zap.Logger -} - -func (p *purger) add(files []TSMFile) { - p.mu.Lock() - for _, f := range files { - p.files[f.Path()] = f - } - p.mu.Unlock() - p.purge() -} - -func (p *purger) purge() { - p.mu.Lock() - if p.running { - p.mu.Unlock() - return - } - p.running = true - p.mu.Unlock() - - go func() { - for { - p.mu.Lock() - for k, v := range p.files { - // In order to ensure that there are no races with this (file held externally calls Ref - // after we check InUse), we need to maintain the invariant that every handle to a file - // is handed out in use (Ref'd), and handlers only ever relinquish the file once (call Unref - // exactly once, and never use it again). InUse is only valid during a write lock, since - // we allow calls to Ref and Unref under the read lock and no lock at all respectively. - if !v.InUse() { - if err := v.Close(); err != nil { - p.logger.Info("Purge: close file", zap.Error(err)) - continue - } - - if err := v.Remove(); err != nil { - p.logger.Info("Purge: remove file", zap.Error(err)) - continue - } - delete(p.files, k) - } - } - - if len(p.files) == 0 { - p.running = false - p.mu.Unlock() - return - } - - p.mu.Unlock() - time.Sleep(time.Second) - } - }() -} - -type tsmReaders []TSMFile - -func (a tsmReaders) Len() int { return len(a) } -func (a tsmReaders) Less(i, j int) bool { return a[i].Path() < a[j].Path() } -func (a tsmReaders) Swap(i, j int) { a[i], a[j] = a[j], a[i] } diff --git a/tsdb/tsm1/file_store_array.gen.go b/tsdb/tsm1/file_store_array.gen.go deleted file mode 100644 index 0a2cfd1b2c..0000000000 --- a/tsdb/tsm1/file_store_array.gen.go +++ /dev/null @@ -1,927 +0,0 @@ -// Code generated by file_store.gen.go.tmpl. DO NOT EDIT. - -package tsm1 - -import ( - "github.com/influxdata/influxdb/v2/tsdb/cursors" -) - -// ReadFloatArrayBlock reads the next block as a set of float values. -func (c *KeyCursor) ReadFloatArrayBlock(values *cursors.FloatArray) (*cursors.FloatArray, error) { -LOOP: - // No matching blocks to decode - if len(c.current) == 0 { - values.Timestamps = values.Timestamps[:0] - values.Values = values.Values[:0] - return values, nil - } - - // First block is the oldest block containing the points we're searching for. - first := c.current[0] - err := first.r.ReadFloatArrayBlockAt(&first.entry, values) - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter(floatBlocksDecodedCounter).Add(1) - c.col.GetCounter(floatBlocksSizeCounter).Add(int64(first.entry.Size)) - } - - // Remove values we already read - values.Exclude(first.readMin, first.readMax) - - // Remove any tombstones - c.trbuf = first.r.TombstoneRange(c.key, c.trbuf[:0]) - excludeTombstonesFloatArray(c.trbuf, values) - // If there are no values in this first block (all tombstoned or previously read) and - // we have more potential blocks too search. Try again. - if values.Len() == 0 && len(c.current) > 0 { - c.current = c.current[1:] - goto LOOP - } - - // Only one block with this key and time range so return it - if len(c.current) == 1 { - if values.Len() > 0 { - first.markRead(values.MinTime(), values.MaxTime()) - } - return values, nil - } - - // Use the current block time range as our overlapping window - minT, maxT := first.readMin, first.readMax - if values.Len() > 0 { - minT, maxT = values.MinTime(), values.MaxTime() - } - if c.ascending { - // Blocks are ordered by generation, we may have values in the past in later blocks, if so, - // expand the window to include the min time range to ensure values are returned in ascending - // order - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.MinTime < minT && !cur.read() { - minT = cur.entry.MinTime - } - } - - // Find first block that overlaps our window - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.OverlapsTimeRange(minT, maxT) && !cur.read() { - // Shrink our window so it's the intersection of the first overlapping block and the - // first block. We do this to minimize the region that overlaps and needs to - // be merged. - if cur.entry.MaxTime > maxT { - maxT = cur.entry.MaxTime - } - values.Include(minT, maxT) - break - } - } - - // Search the remaining blocks that overlap our window and append their values so we can - // merge them. - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - // Skip this block if it doesn't contain points we looking for or they have already been read - if !cur.entry.OverlapsTimeRange(minT, maxT) || cur.read() { - cur.markRead(minT, maxT) - continue - } - - v := &cursors.FloatArray{} - err := cur.r.ReadFloatArrayBlockAt(&cur.entry, v) - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter(floatBlocksDecodedCounter).Add(1) - c.col.GetCounter(floatBlocksSizeCounter).Add(int64(cur.entry.Size)) - } - - c.trbuf = cur.r.TombstoneRange(c.key, c.trbuf[:0]) - // Remove any tombstoned values - excludeTombstonesFloatArray(c.trbuf, v) - - // Remove values we already read - v.Exclude(cur.readMin, cur.readMax) - - if v.Len() > 0 { - // Only use values in the overlapping window - v.Include(minT, maxT) - // Merge the remaining values with the existing - values.Merge(v) - } - cur.markRead(minT, maxT) - } - - } else { - // Blocks are ordered by generation, we may have values in the past in later blocks, if so, - // expand the window to include the max time range to ensure values are returned in descending - // order - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.MaxTime > maxT && !cur.read() { - maxT = cur.entry.MaxTime - } - } - - // Find first block that overlaps our window - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.OverlapsTimeRange(minT, maxT) && !cur.read() { - // Shrink our window so it's the intersection of the first overlapping block and the - // first block. We do this to minimize the region that overlaps and needs to - // be merged. - if cur.entry.MinTime < minT { - minT = cur.entry.MinTime - } - values.Include(minT, maxT) - break - } - } - - // Search the remaining blocks that overlap our window and append their values so we can - // merge them. - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - // Skip this block if it doesn't contain points we looking for or they have already been read - if !cur.entry.OverlapsTimeRange(minT, maxT) || cur.read() { - cur.markRead(minT, maxT) - continue - } - - v := &cursors.FloatArray{} - err := cur.r.ReadFloatArrayBlockAt(&cur.entry, v) - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter(floatBlocksDecodedCounter).Add(1) - c.col.GetCounter(floatBlocksSizeCounter).Add(int64(cur.entry.Size)) - } - c.trbuf = cur.r.TombstoneRange(c.key, c.trbuf[:0]) - // Remove any tombstoned values - excludeTombstonesFloatArray(c.trbuf, v) - - // Remove values we already read - v.Exclude(cur.readMin, cur.readMax) - - // If the block we decoded should have all of it's values included, mark it as read so we - // don't use it again. - if v.Len() > 0 { - v.Include(minT, maxT) - // Merge the remaining values with the existing - v.Merge(values) - *values = *v - } - cur.markRead(minT, maxT) - } - } - - first.markRead(minT, maxT) - - return values, err -} - -func excludeTombstonesFloatArray(t []TimeRange, values *cursors.FloatArray) { - for i := range t { - values.Exclude(t[i].Min, t[i].Max) - } -} - -// ReadIntegerArrayBlock reads the next block as a set of integer values. -func (c *KeyCursor) ReadIntegerArrayBlock(values *cursors.IntegerArray) (*cursors.IntegerArray, error) { -LOOP: - // No matching blocks to decode - if len(c.current) == 0 { - values.Timestamps = values.Timestamps[:0] - values.Values = values.Values[:0] - return values, nil - } - - // First block is the oldest block containing the points we're searching for. - first := c.current[0] - err := first.r.ReadIntegerArrayBlockAt(&first.entry, values) - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter(integerBlocksDecodedCounter).Add(1) - c.col.GetCounter(integerBlocksSizeCounter).Add(int64(first.entry.Size)) - } - - // Remove values we already read - values.Exclude(first.readMin, first.readMax) - - // Remove any tombstones - c.trbuf = first.r.TombstoneRange(c.key, c.trbuf[:0]) - excludeTombstonesIntegerArray(c.trbuf, values) - // If there are no values in this first block (all tombstoned or previously read) and - // we have more potential blocks too search. Try again. - if values.Len() == 0 && len(c.current) > 0 { - c.current = c.current[1:] - goto LOOP - } - - // Only one block with this key and time range so return it - if len(c.current) == 1 { - if values.Len() > 0 { - first.markRead(values.MinTime(), values.MaxTime()) - } - return values, nil - } - - // Use the current block time range as our overlapping window - minT, maxT := first.readMin, first.readMax - if values.Len() > 0 { - minT, maxT = values.MinTime(), values.MaxTime() - } - if c.ascending { - // Blocks are ordered by generation, we may have values in the past in later blocks, if so, - // expand the window to include the min time range to ensure values are returned in ascending - // order - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.MinTime < minT && !cur.read() { - minT = cur.entry.MinTime - } - } - - // Find first block that overlaps our window - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.OverlapsTimeRange(minT, maxT) && !cur.read() { - // Shrink our window so it's the intersection of the first overlapping block and the - // first block. We do this to minimize the region that overlaps and needs to - // be merged. - if cur.entry.MaxTime > maxT { - maxT = cur.entry.MaxTime - } - values.Include(minT, maxT) - break - } - } - - // Search the remaining blocks that overlap our window and append their values so we can - // merge them. - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - // Skip this block if it doesn't contain points we looking for or they have already been read - if !cur.entry.OverlapsTimeRange(minT, maxT) || cur.read() { - cur.markRead(minT, maxT) - continue - } - - v := &cursors.IntegerArray{} - err := cur.r.ReadIntegerArrayBlockAt(&cur.entry, v) - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter(integerBlocksDecodedCounter).Add(1) - c.col.GetCounter(integerBlocksSizeCounter).Add(int64(cur.entry.Size)) - } - - c.trbuf = cur.r.TombstoneRange(c.key, c.trbuf[:0]) - // Remove any tombstoned values - excludeTombstonesIntegerArray(c.trbuf, v) - - // Remove values we already read - v.Exclude(cur.readMin, cur.readMax) - - if v.Len() > 0 { - // Only use values in the overlapping window - v.Include(minT, maxT) - // Merge the remaining values with the existing - values.Merge(v) - } - cur.markRead(minT, maxT) - } - - } else { - // Blocks are ordered by generation, we may have values in the past in later blocks, if so, - // expand the window to include the max time range to ensure values are returned in descending - // order - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.MaxTime > maxT && !cur.read() { - maxT = cur.entry.MaxTime - } - } - - // Find first block that overlaps our window - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.OverlapsTimeRange(minT, maxT) && !cur.read() { - // Shrink our window so it's the intersection of the first overlapping block and the - // first block. We do this to minimize the region that overlaps and needs to - // be merged. - if cur.entry.MinTime < minT { - minT = cur.entry.MinTime - } - values.Include(minT, maxT) - break - } - } - - // Search the remaining blocks that overlap our window and append their values so we can - // merge them. - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - // Skip this block if it doesn't contain points we looking for or they have already been read - if !cur.entry.OverlapsTimeRange(minT, maxT) || cur.read() { - cur.markRead(minT, maxT) - continue - } - - v := &cursors.IntegerArray{} - err := cur.r.ReadIntegerArrayBlockAt(&cur.entry, v) - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter(integerBlocksDecodedCounter).Add(1) - c.col.GetCounter(integerBlocksSizeCounter).Add(int64(cur.entry.Size)) - } - c.trbuf = cur.r.TombstoneRange(c.key, c.trbuf[:0]) - // Remove any tombstoned values - excludeTombstonesIntegerArray(c.trbuf, v) - - // Remove values we already read - v.Exclude(cur.readMin, cur.readMax) - - // If the block we decoded should have all of it's values included, mark it as read so we - // don't use it again. - if v.Len() > 0 { - v.Include(minT, maxT) - // Merge the remaining values with the existing - v.Merge(values) - *values = *v - } - cur.markRead(minT, maxT) - } - } - - first.markRead(minT, maxT) - - return values, err -} - -func excludeTombstonesIntegerArray(t []TimeRange, values *cursors.IntegerArray) { - for i := range t { - values.Exclude(t[i].Min, t[i].Max) - } -} - -// ReadUnsignedArrayBlock reads the next block as a set of unsigned values. -func (c *KeyCursor) ReadUnsignedArrayBlock(values *cursors.UnsignedArray) (*cursors.UnsignedArray, error) { -LOOP: - // No matching blocks to decode - if len(c.current) == 0 { - values.Timestamps = values.Timestamps[:0] - values.Values = values.Values[:0] - return values, nil - } - - // First block is the oldest block containing the points we're searching for. - first := c.current[0] - err := first.r.ReadUnsignedArrayBlockAt(&first.entry, values) - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter(unsignedBlocksDecodedCounter).Add(1) - c.col.GetCounter(unsignedBlocksSizeCounter).Add(int64(first.entry.Size)) - } - - // Remove values we already read - values.Exclude(first.readMin, first.readMax) - - // Remove any tombstones - c.trbuf = first.r.TombstoneRange(c.key, c.trbuf[:0]) - excludeTombstonesUnsignedArray(c.trbuf, values) - // If there are no values in this first block (all tombstoned or previously read) and - // we have more potential blocks too search. Try again. - if values.Len() == 0 && len(c.current) > 0 { - c.current = c.current[1:] - goto LOOP - } - - // Only one block with this key and time range so return it - if len(c.current) == 1 { - if values.Len() > 0 { - first.markRead(values.MinTime(), values.MaxTime()) - } - return values, nil - } - - // Use the current block time range as our overlapping window - minT, maxT := first.readMin, first.readMax - if values.Len() > 0 { - minT, maxT = values.MinTime(), values.MaxTime() - } - if c.ascending { - // Blocks are ordered by generation, we may have values in the past in later blocks, if so, - // expand the window to include the min time range to ensure values are returned in ascending - // order - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.MinTime < minT && !cur.read() { - minT = cur.entry.MinTime - } - } - - // Find first block that overlaps our window - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.OverlapsTimeRange(minT, maxT) && !cur.read() { - // Shrink our window so it's the intersection of the first overlapping block and the - // first block. We do this to minimize the region that overlaps and needs to - // be merged. - if cur.entry.MaxTime > maxT { - maxT = cur.entry.MaxTime - } - values.Include(minT, maxT) - break - } - } - - // Search the remaining blocks that overlap our window and append their values so we can - // merge them. - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - // Skip this block if it doesn't contain points we looking for or they have already been read - if !cur.entry.OverlapsTimeRange(minT, maxT) || cur.read() { - cur.markRead(minT, maxT) - continue - } - - v := &cursors.UnsignedArray{} - err := cur.r.ReadUnsignedArrayBlockAt(&cur.entry, v) - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter(unsignedBlocksDecodedCounter).Add(1) - c.col.GetCounter(unsignedBlocksSizeCounter).Add(int64(cur.entry.Size)) - } - - c.trbuf = cur.r.TombstoneRange(c.key, c.trbuf[:0]) - // Remove any tombstoned values - excludeTombstonesUnsignedArray(c.trbuf, v) - - // Remove values we already read - v.Exclude(cur.readMin, cur.readMax) - - if v.Len() > 0 { - // Only use values in the overlapping window - v.Include(minT, maxT) - // Merge the remaining values with the existing - values.Merge(v) - } - cur.markRead(minT, maxT) - } - - } else { - // Blocks are ordered by generation, we may have values in the past in later blocks, if so, - // expand the window to include the max time range to ensure values are returned in descending - // order - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.MaxTime > maxT && !cur.read() { - maxT = cur.entry.MaxTime - } - } - - // Find first block that overlaps our window - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.OverlapsTimeRange(minT, maxT) && !cur.read() { - // Shrink our window so it's the intersection of the first overlapping block and the - // first block. We do this to minimize the region that overlaps and needs to - // be merged. - if cur.entry.MinTime < minT { - minT = cur.entry.MinTime - } - values.Include(minT, maxT) - break - } - } - - // Search the remaining blocks that overlap our window and append their values so we can - // merge them. - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - // Skip this block if it doesn't contain points we looking for or they have already been read - if !cur.entry.OverlapsTimeRange(minT, maxT) || cur.read() { - cur.markRead(minT, maxT) - continue - } - - v := &cursors.UnsignedArray{} - err := cur.r.ReadUnsignedArrayBlockAt(&cur.entry, v) - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter(unsignedBlocksDecodedCounter).Add(1) - c.col.GetCounter(unsignedBlocksSizeCounter).Add(int64(cur.entry.Size)) - } - c.trbuf = cur.r.TombstoneRange(c.key, c.trbuf[:0]) - // Remove any tombstoned values - excludeTombstonesUnsignedArray(c.trbuf, v) - - // Remove values we already read - v.Exclude(cur.readMin, cur.readMax) - - // If the block we decoded should have all of it's values included, mark it as read so we - // don't use it again. - if v.Len() > 0 { - v.Include(minT, maxT) - // Merge the remaining values with the existing - v.Merge(values) - *values = *v - } - cur.markRead(minT, maxT) - } - } - - first.markRead(minT, maxT) - - return values, err -} - -func excludeTombstonesUnsignedArray(t []TimeRange, values *cursors.UnsignedArray) { - for i := range t { - values.Exclude(t[i].Min, t[i].Max) - } -} - -// ReadStringArrayBlock reads the next block as a set of string values. -func (c *KeyCursor) ReadStringArrayBlock(values *cursors.StringArray) (*cursors.StringArray, error) { -LOOP: - // No matching blocks to decode - if len(c.current) == 0 { - values.Timestamps = values.Timestamps[:0] - values.Values = values.Values[:0] - return values, nil - } - - // First block is the oldest block containing the points we're searching for. - first := c.current[0] - err := first.r.ReadStringArrayBlockAt(&first.entry, values) - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter(stringBlocksDecodedCounter).Add(1) - c.col.GetCounter(stringBlocksSizeCounter).Add(int64(first.entry.Size)) - } - - // Remove values we already read - values.Exclude(first.readMin, first.readMax) - - // Remove any tombstones - c.trbuf = first.r.TombstoneRange(c.key, c.trbuf[:0]) - excludeTombstonesStringArray(c.trbuf, values) - // If there are no values in this first block (all tombstoned or previously read) and - // we have more potential blocks too search. Try again. - if values.Len() == 0 && len(c.current) > 0 { - c.current = c.current[1:] - goto LOOP - } - - // Only one block with this key and time range so return it - if len(c.current) == 1 { - if values.Len() > 0 { - first.markRead(values.MinTime(), values.MaxTime()) - } - return values, nil - } - - // Use the current block time range as our overlapping window - minT, maxT := first.readMin, first.readMax - if values.Len() > 0 { - minT, maxT = values.MinTime(), values.MaxTime() - } - if c.ascending { - // Blocks are ordered by generation, we may have values in the past in later blocks, if so, - // expand the window to include the min time range to ensure values are returned in ascending - // order - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.MinTime < minT && !cur.read() { - minT = cur.entry.MinTime - } - } - - // Find first block that overlaps our window - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.OverlapsTimeRange(minT, maxT) && !cur.read() { - // Shrink our window so it's the intersection of the first overlapping block and the - // first block. We do this to minimize the region that overlaps and needs to - // be merged. - if cur.entry.MaxTime > maxT { - maxT = cur.entry.MaxTime - } - values.Include(minT, maxT) - break - } - } - - // Search the remaining blocks that overlap our window and append their values so we can - // merge them. - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - // Skip this block if it doesn't contain points we looking for or they have already been read - if !cur.entry.OverlapsTimeRange(minT, maxT) || cur.read() { - cur.markRead(minT, maxT) - continue - } - - v := &cursors.StringArray{} - err := cur.r.ReadStringArrayBlockAt(&cur.entry, v) - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter(stringBlocksDecodedCounter).Add(1) - c.col.GetCounter(stringBlocksSizeCounter).Add(int64(cur.entry.Size)) - } - - c.trbuf = cur.r.TombstoneRange(c.key, c.trbuf[:0]) - // Remove any tombstoned values - excludeTombstonesStringArray(c.trbuf, v) - - // Remove values we already read - v.Exclude(cur.readMin, cur.readMax) - - if v.Len() > 0 { - // Only use values in the overlapping window - v.Include(minT, maxT) - // Merge the remaining values with the existing - values.Merge(v) - } - cur.markRead(minT, maxT) - } - - } else { - // Blocks are ordered by generation, we may have values in the past in later blocks, if so, - // expand the window to include the max time range to ensure values are returned in descending - // order - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.MaxTime > maxT && !cur.read() { - maxT = cur.entry.MaxTime - } - } - - // Find first block that overlaps our window - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.OverlapsTimeRange(minT, maxT) && !cur.read() { - // Shrink our window so it's the intersection of the first overlapping block and the - // first block. We do this to minimize the region that overlaps and needs to - // be merged. - if cur.entry.MinTime < minT { - minT = cur.entry.MinTime - } - values.Include(minT, maxT) - break - } - } - - // Search the remaining blocks that overlap our window and append their values so we can - // merge them. - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - // Skip this block if it doesn't contain points we looking for or they have already been read - if !cur.entry.OverlapsTimeRange(minT, maxT) || cur.read() { - cur.markRead(minT, maxT) - continue - } - - v := &cursors.StringArray{} - err := cur.r.ReadStringArrayBlockAt(&cur.entry, v) - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter(stringBlocksDecodedCounter).Add(1) - c.col.GetCounter(stringBlocksSizeCounter).Add(int64(cur.entry.Size)) - } - c.trbuf = cur.r.TombstoneRange(c.key, c.trbuf[:0]) - // Remove any tombstoned values - excludeTombstonesStringArray(c.trbuf, v) - - // Remove values we already read - v.Exclude(cur.readMin, cur.readMax) - - // If the block we decoded should have all of it's values included, mark it as read so we - // don't use it again. - if v.Len() > 0 { - v.Include(minT, maxT) - // Merge the remaining values with the existing - v.Merge(values) - *values = *v - } - cur.markRead(minT, maxT) - } - } - - first.markRead(minT, maxT) - - return values, err -} - -func excludeTombstonesStringArray(t []TimeRange, values *cursors.StringArray) { - for i := range t { - values.Exclude(t[i].Min, t[i].Max) - } -} - -// ReadBooleanArrayBlock reads the next block as a set of boolean values. -func (c *KeyCursor) ReadBooleanArrayBlock(values *cursors.BooleanArray) (*cursors.BooleanArray, error) { -LOOP: - // No matching blocks to decode - if len(c.current) == 0 { - values.Timestamps = values.Timestamps[:0] - values.Values = values.Values[:0] - return values, nil - } - - // First block is the oldest block containing the points we're searching for. - first := c.current[0] - err := first.r.ReadBooleanArrayBlockAt(&first.entry, values) - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter(booleanBlocksDecodedCounter).Add(1) - c.col.GetCounter(booleanBlocksSizeCounter).Add(int64(first.entry.Size)) - } - - // Remove values we already read - values.Exclude(first.readMin, first.readMax) - - // Remove any tombstones - c.trbuf = first.r.TombstoneRange(c.key, c.trbuf[:0]) - excludeTombstonesBooleanArray(c.trbuf, values) - // If there are no values in this first block (all tombstoned or previously read) and - // we have more potential blocks too search. Try again. - if values.Len() == 0 && len(c.current) > 0 { - c.current = c.current[1:] - goto LOOP - } - - // Only one block with this key and time range so return it - if len(c.current) == 1 { - if values.Len() > 0 { - first.markRead(values.MinTime(), values.MaxTime()) - } - return values, nil - } - - // Use the current block time range as our overlapping window - minT, maxT := first.readMin, first.readMax - if values.Len() > 0 { - minT, maxT = values.MinTime(), values.MaxTime() - } - if c.ascending { - // Blocks are ordered by generation, we may have values in the past in later blocks, if so, - // expand the window to include the min time range to ensure values are returned in ascending - // order - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.MinTime < minT && !cur.read() { - minT = cur.entry.MinTime - } - } - - // Find first block that overlaps our window - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.OverlapsTimeRange(minT, maxT) && !cur.read() { - // Shrink our window so it's the intersection of the first overlapping block and the - // first block. We do this to minimize the region that overlaps and needs to - // be merged. - if cur.entry.MaxTime > maxT { - maxT = cur.entry.MaxTime - } - values.Include(minT, maxT) - break - } - } - - // Search the remaining blocks that overlap our window and append their values so we can - // merge them. - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - // Skip this block if it doesn't contain points we looking for or they have already been read - if !cur.entry.OverlapsTimeRange(minT, maxT) || cur.read() { - cur.markRead(minT, maxT) - continue - } - - v := &cursors.BooleanArray{} - err := cur.r.ReadBooleanArrayBlockAt(&cur.entry, v) - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter(booleanBlocksDecodedCounter).Add(1) - c.col.GetCounter(booleanBlocksSizeCounter).Add(int64(cur.entry.Size)) - } - - c.trbuf = cur.r.TombstoneRange(c.key, c.trbuf[:0]) - // Remove any tombstoned values - excludeTombstonesBooleanArray(c.trbuf, v) - - // Remove values we already read - v.Exclude(cur.readMin, cur.readMax) - - if v.Len() > 0 { - // Only use values in the overlapping window - v.Include(minT, maxT) - // Merge the remaining values with the existing - values.Merge(v) - } - cur.markRead(minT, maxT) - } - - } else { - // Blocks are ordered by generation, we may have values in the past in later blocks, if so, - // expand the window to include the max time range to ensure values are returned in descending - // order - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.MaxTime > maxT && !cur.read() { - maxT = cur.entry.MaxTime - } - } - - // Find first block that overlaps our window - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - if cur.entry.OverlapsTimeRange(minT, maxT) && !cur.read() { - // Shrink our window so it's the intersection of the first overlapping block and the - // first block. We do this to minimize the region that overlaps and needs to - // be merged. - if cur.entry.MinTime < minT { - minT = cur.entry.MinTime - } - values.Include(minT, maxT) - break - } - } - - // Search the remaining blocks that overlap our window and append their values so we can - // merge them. - for i := 1; i < len(c.current); i++ { - cur := c.current[i] - // Skip this block if it doesn't contain points we looking for or they have already been read - if !cur.entry.OverlapsTimeRange(minT, maxT) || cur.read() { - cur.markRead(minT, maxT) - continue - } - - v := &cursors.BooleanArray{} - err := cur.r.ReadBooleanArrayBlockAt(&cur.entry, v) - if err != nil { - return nil, err - } - if c.col != nil { - c.col.GetCounter(booleanBlocksDecodedCounter).Add(1) - c.col.GetCounter(booleanBlocksSizeCounter).Add(int64(cur.entry.Size)) - } - c.trbuf = cur.r.TombstoneRange(c.key, c.trbuf[:0]) - // Remove any tombstoned values - excludeTombstonesBooleanArray(c.trbuf, v) - - // Remove values we already read - v.Exclude(cur.readMin, cur.readMax) - - // If the block we decoded should have all of it's values included, mark it as read so we - // don't use it again. - if v.Len() > 0 { - v.Include(minT, maxT) - // Merge the remaining values with the existing - v.Merge(values) - *values = *v - } - cur.markRead(minT, maxT) - } - } - - first.markRead(minT, maxT) - - return values, err -} - -func excludeTombstonesBooleanArray(t []TimeRange, values *cursors.BooleanArray) { - for i := range t { - values.Exclude(t[i].Min, t[i].Max) - } -} diff --git a/tsdb/tsm1/file_store_array_test.go b/tsdb/tsm1/file_store_array_test.go deleted file mode 100644 index 6132f1c5f4..0000000000 --- a/tsdb/tsm1/file_store_array_test.go +++ /dev/null @@ -1,369 +0,0 @@ -package tsm1_test - -import ( - "context" - "os" - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/influxdata/influxdb/v2/tsdb/cursors" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" -) - -func TestFileStore_Array(t *testing.T) { - makeFile := func(d ...interface{}) keyValues { - t.Helper() - if len(d)&1 == 1 { - panic("input should be even") - } - - vals := make([]tsm1.Value, len(d)/2) - for i := 0; i < len(d); i += 2 { - vals[i/2] = tsm1.NewFloatValue(int64(d[i].(int)), d[i+1].(float64)) - } - return keyValues{key: "cpu", values: vals} - } - - // sel selects files and values from the keyValues slice - // and used to build the expected output. - type sel struct { - // f is the index of the tsm file - f int - // i is the index of the value to select from the file - i int - } - - // del represents a file delete in order to generate tombstones - type del struct { - // f is the index of the tsm file to perform a delete. - // Specifying -1 will perform a delete over the entire FileStore. - f int - min, max int64 - } - - type read []sel - - cases := []struct { - name string - data []keyValues - time int64 - asc bool - deletes []del - reads []read - }{ - { - name: "SeekToAsc_FromStart", - - data: []keyValues{ - makeFile(0, 1.0), - makeFile(1, 2.0), - makeFile(2, 3.0), - }, - time: 0, - asc: true, - reads: []read{ - []sel{{0, 0}}, - }, - }, - { - name: "SeekToAsc_BeforeStart", - - data: []keyValues{ - makeFile(1, 1.0), - makeFile(2, 2.0), - makeFile(3, 3.0), - }, - time: 0, - asc: true, - reads: []read{ - []sel{{0, 0}}, - }, - }, - { - // Tests that seeking and reading all blocks that contain overlapping points does - // not skip any blocks. - name: "SeekToAsc_BeforeStart_OverlapFloat", - - data: []keyValues{ - makeFile(0, 0.0, 1, 1.0), - makeFile(2, 2.0), - makeFile(3, 3.0), - makeFile(0, 4.0, 2, 7.0), - }, - time: 0, - asc: true, - reads: []read{ - []sel{{3, 0}, {0, 1}, {3, 1}}, - []sel{{2, 0}}, - }, - }, - { - // Tests that blocks with a lower min time in later files are not returned - // more than once causing unsorted results. - name: "SeekToAsc_OverlapMinFloat", - - data: []keyValues{ - makeFile(1, 1.0, 3, 3.0), - makeFile(2, 2.0, 4, 4.0), - makeFile(0, 0.0, 1, 1.1), - makeFile(2, 2.2), - }, - time: 0, - asc: true, - reads: []read{ - []sel{{2, 0}, {2, 1}, {3, 0}, {0, 1}}, - []sel{{1, 1}}, - []sel{}, - }, - }, - { - name: "SeekToAsc_Middle", - - data: []keyValues{ - makeFile(1, 1.0, 2, 2.0, 3, 3.0), - makeFile(4, 4.0), - }, - time: 3, - asc: true, - reads: []read{ - []sel{{0, 2}}, - []sel{{1, 0}}, - }, - }, - { - name: "SeekToAsc_End", - - data: []keyValues{ - makeFile(0, 1.0), - makeFile(1, 2.0), - makeFile(2, 3.0), - }, - time: 2, - asc: true, - reads: []read{ - []sel{{2, 0}}, - }, - }, - - // descending cursor tests - { - name: "SeekToDesc_FromStart", - - data: []keyValues{ - makeFile(0, 1.0), - makeFile(1, 2.0), - makeFile(2, 3.0), - }, - time: 0, - asc: false, - reads: []read{ - []sel{{0, 0}}, - }, - }, - { - name: "SeekToDesc_Duplicate", - - data: []keyValues{ - makeFile(0, 4.0), - makeFile(0, 1.0), - makeFile(2, 2.0), - makeFile(2, 3.0), - }, - time: 2, - asc: false, - reads: []read{ - []sel{{3, 0}}, - []sel{{1, 0}}, - }, - }, - { - name: "SeekToDesc_OverlapMaxFloat", - - data: []keyValues{ - makeFile(1, 1.0, 3, 3.0), - makeFile(2, 2.0, 4, 4.0), - makeFile(0, 0.0, 1, 1.1), - makeFile(2, 2.2), - }, - time: 5, - asc: false, - reads: []read{ - []sel{{3, 0}, {0, 1}, {1, 1}}, - []sel{{2, 0}, {2, 1}}, - }, - }, - { - name: "SeekToDesc_AfterEnd", - - data: []keyValues{ - makeFile(1, 1.0), - makeFile(2, 2.0), - makeFile(3, 3.0), - }, - time: 4, - asc: false, - reads: []read{ - []sel{{2, 0}}, - }, - }, - { - name: "SeekToDesc_AfterEnd_OverlapFloat", - - data: []keyValues{ - makeFile(8, 0.0, 9, 1.0), - makeFile(2, 2.0), - makeFile(3, 3.0), - makeFile(3, 4.0, 7, 7.0), - }, - time: 10, - asc: false, - reads: []read{ - []sel{{0, 0}, {0, 1}}, - []sel{{3, 0}, {3, 1}}, - []sel{{1, 0}}, - []sel{}, - }, - }, - { - name: "SeekToDesc_Middle", - - data: []keyValues{ - makeFile(1, 1.0), - makeFile(2, 2.0, 3, 3.0, 4, 4.0), - }, - time: 3, - asc: false, - reads: []read{ - []sel{{1, 0}, {1, 1}}, - }, - }, - { - name: "SeekToDesc_End", - - data: []keyValues{ - makeFile(0, 1.0), - makeFile(1, 2.0), - makeFile(2, 3.0), - }, - time: 2, - asc: false, - reads: []read{ - []sel{{2, 0}}, - }, - }, - - // tombstone tests - { - name: "TombstoneRange", - - data: []keyValues{ - makeFile(0, 1.0), - makeFile(1, 2.0), - makeFile(2, 3.0), - }, - time: 0, - asc: true, - deletes: []del{ - {-1, 1, 1}, - }, - reads: []read{ - []sel{{0, 0}}, - []sel{{2, 0}}, - []sel{}, - }, - }, - { - name: "TombstoneRange_PartialFirst", - - data: []keyValues{ - makeFile(0, 0.0, 1, 1.0), - makeFile(2, 2.0), - }, - time: 0, - asc: true, - deletes: []del{ - {0, 1, 3}, - }, - reads: []read{ - []sel{{0, 0}}, - []sel{{1, 0}}, - []sel{}, - }, - }, - { - name: "TombstoneRange_PartialFloat", - - data: []keyValues{ - makeFile(0, 0.0, 1, 1.0, 2, 2.0), - }, - time: 0, - asc: true, - deletes: []del{ - {-1, 1, 1}, - }, - reads: []read{ - []sel{{0, 0}, {0, 2}}, - []sel{}, - }, - }, - } - - for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - files, err := newFiles(dir, tc.data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - for _, del := range tc.deletes { - if del.f > -1 { - // Delete part of the block in the first file. - r := MustOpenTSMReader(files[del.f]) - r.DeleteRange([][]byte{[]byte("cpu")}, del.min, del.max) - } - } - - fs.Replace(nil, files) - - for _, del := range tc.deletes { - if del.f == -1 { - if err := fs.DeleteRange([][]byte{[]byte("cpu")}, del.min, del.max); err != nil { - t.Fatalf("unexpected error delete range: %v", err) - } - } - } - - buf := cursors.NewFloatArrayLen(1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), tc.time, tc.asc) - - for i, read := range tc.reads { - // Search for an entry that exists in the second file - values, err := c.ReadFloatArrayBlock(buf) - if err != nil { - t.Fatalf("read %d failed: unexpected error reading values: %v", i, err) - } - - exp := &cursors.FloatArray{} - for _, s := range read { - vals := tc.data[s.f].values - exp.Timestamps = append(exp.Timestamps, vals[s.i].UnixNano()) - exp.Values = append(exp.Values, vals[s.i].Value().(float64)) - } - - if len(read) == 0 { - exp = cursors.NewFloatArrayLen(0) - } - - if !cmp.Equal(values, exp) { - t.Fatalf("read %d failed: unexpected values -got/+exp\n%s", i, cmp.Diff(values, exp)) - } - - c.Next() - } - }) - } -} diff --git a/tsdb/tsm1/file_store_key_iterator.go b/tsdb/tsm1/file_store_key_iterator.go deleted file mode 100644 index ade8ec1a08..0000000000 --- a/tsdb/tsm1/file_store_key_iterator.go +++ /dev/null @@ -1,115 +0,0 @@ -package tsm1 - -import ( - "bytes" - "container/heap" -) - -type keyIterator struct { - iter TSMIterator - key []byte - typ byte -} - -func newKeyIterator(f TSMFile, seek []byte) *keyIterator { - k := &keyIterator{iter: f.Iterator(seek)} - k.next() - return k -} - -func (k *keyIterator) next() bool { - if k.iter.Next() { - k.key, k.typ = k.iter.Key(), k.iter.Type() - return true - } - return false -} - -func (k *keyIterator) Err() error { return k.iter.Err() } - -type mergeKeyIterator struct { - itrs keyIterators - key []byte - typ byte - err error -} - -func newMergeKeyIterator(files []TSMFile, seek []byte) *mergeKeyIterator { - m := &mergeKeyIterator{} - itrs := make(keyIterators, 0, len(files)) - for _, f := range files { - if ki := newKeyIterator(f, seek); ki != nil { - itrs = append(itrs, ki) - } - } - m.itrs = itrs - heap.Init(&m.itrs) - - return m -} - -func (m *mergeKeyIterator) Next() bool { - if m.err != nil { - return false - } - - merging := len(m.itrs) > 1 - -RETRY: - if len(m.itrs) == 0 { - return false - } - - key, typ := m.itrs[0].key, m.itrs[0].typ - more := m.itrs[0].next() - - if !more { - if err := m.itrs[0].Err(); err != nil { - m.err = err - return false - } - } - - switch { - case len(m.itrs) > 1: - if !more { - // remove iterator from heap - heap.Pop(&m.itrs) - } else { - heap.Fix(&m.itrs, 0) - } - - case len(m.itrs) == 1: - if !more { - m.itrs = nil - } - } - - if merging && bytes.Equal(m.key, key) { - // same as previous key, keep iterating - goto RETRY - } - - m.key, m.typ = key, typ - return true -} - -func (m *mergeKeyIterator) Err() error { return m.err } - -func (m *mergeKeyIterator) Read() ([]byte, byte) { return m.key, m.typ } - -type keyIterators []*keyIterator - -func (k keyIterators) Len() int { return len(k) } -func (k keyIterators) Less(i, j int) bool { return bytes.Compare(k[i].key, k[j].key) == -1 } -func (k keyIterators) Swap(i, j int) { k[i], k[j] = k[j], k[i] } - -func (k *keyIterators) Push(x interface{}) { *k = append(*k, x.(*keyIterator)) } - -func (k *keyIterators) Pop() interface{} { - old := *k - n := len(old) - x := old[n-1] - *k = old[:n-1] - return x -} diff --git a/tsdb/tsm1/file_store_key_iterator_test.go b/tsdb/tsm1/file_store_key_iterator_test.go deleted file mode 100644 index 6561064218..0000000000 --- a/tsdb/tsm1/file_store_key_iterator_test.go +++ /dev/null @@ -1,160 +0,0 @@ -package tsm1 - -import ( - "sort" - "testing" - - "github.com/google/go-cmp/cmp" -) - -func TestNewMergeKeyIterator(t *testing.T) { - cases := []struct { - name string - seek string - files []TSMFile - - exp []string - }{ - { - name: "mixed", - files: newTSMFiles( - []string{"aaaa", "bbbb", "cccc", "dddd"}, - []string{"aaaa", "cccc", "dddd"}, - []string{"eeee", "ffff", "gggg"}, - []string{"aaaa"}, - []string{"dddd"}, - ), - exp: []string{"aaaa", "bbbb", "cccc", "dddd", "eeee", "ffff", "gggg"}, - }, - - { - name: "similar keys", - files: newTSMFiles( - []string{"a", "aaa"}, - []string{"aa", "aaaa"}, - ), - exp: []string{"a", "aa", "aaa", "aaaa"}, - }, - - { - name: "seek skips some files", - seek: "eeee", - files: newTSMFiles( - []string{"aaaa", "bbbb", "cccc", "dddd"}, - []string{"aaaa", "cccc", "dddd"}, - []string{"eeee", "ffff", "gggg"}, - []string{"aaaa"}, - []string{"dddd"}, - ), - exp: []string{"eeee", "ffff", "gggg"}, - }, - - { - name: "keys same across all files", - files: newTSMFiles( - []string{"aaaa", "bbbb", "cccc", "dddd"}, - []string{"aaaa", "bbbb", "cccc", "dddd"}, - []string{"aaaa", "bbbb", "cccc", "dddd"}, - ), - exp: []string{"aaaa", "bbbb", "cccc", "dddd"}, - }, - - { - name: "keys same across all files with extra", - files: newTSMFiles( - []string{"aaaa", "bbbb", "cccc", "dddd"}, - []string{"aaaa", "bbbb", "cccc", "dddd"}, - []string{"aaaa", "bbbb", "cccc", "dddd", "eeee"}, - ), - exp: []string{"aaaa", "bbbb", "cccc", "dddd", "eeee"}, - }, - - { - name: "seek skips all files", - seek: "eeee", - files: newTSMFiles( - []string{"aaaa", "bbbb", "cccc", "dddd"}, - []string{"aaaa", "bbbb", "cccc", "dddd"}, - []string{"aaaa", "bbbb", "cccc", "dddd"}, - ), - exp: nil, - }, - - { - name: "keys sequential across all files", - files: newTSMFiles( - []string{"a", "b", "c", "d"}, - []string{"e", "f", "g", "h"}, - []string{"i", "j", "k", "l"}, - ), - exp: []string{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"}, - }, - - { - name: "seek past one file", - seek: "e", - files: newTSMFiles( - []string{"a", "b", "c", "d"}, - []string{"e", "f", "g", "h"}, - []string{"i", "j", "k", "l"}, - ), - exp: []string{"e", "f", "g", "h", "i", "j", "k", "l"}, - }, - } - - for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - ki := newMergeKeyIterator(tc.files, []byte(tc.seek)) - var act []string - for ki.Next() { - key, _ := ki.Read() - act = append(act, string(key)) - } - if !cmp.Equal(tc.exp, act) { - t.Error(cmp.Diff(tc.exp, act)) - } - }) - } - -} - -func newTSMFiles(keys ...[]string) []TSMFile { - var files []TSMFile - for _, k := range keys { - files = append(files, newMockTSMFile(k...)) - } - return files -} - -type mockTSMFile struct { - TSMFile - keys []string -} - -func newMockTSMFile(keys ...string) *mockTSMFile { - sort.Strings(keys) - return &mockTSMFile{keys: keys} -} - -func (m *mockTSMFile) Iterator(seek []byte) TSMIterator { - skey := string(seek) - n := sort.Search(len(m.keys), func(i int) bool { return m.keys[i] >= skey }) - return &mockTSMIterator{ - n: n - 1, - keys: m.keys, - } -} - -type mockTSMIterator struct { - TSMIndexIterator - n int - keys []string -} - -func (m *mockTSMIterator) Next() bool { - m.n++ - return m.n < len(m.keys) -} - -func (m *mockTSMIterator) Key() []byte { return []byte(m.keys[m.n]) } -func (m *mockTSMIterator) Type() byte { return 0 } diff --git a/tsdb/tsm1/file_store_observer.go b/tsdb/tsm1/file_store_observer.go deleted file mode 100644 index a39ee114a4..0000000000 --- a/tsdb/tsm1/file_store_observer.go +++ /dev/null @@ -1,6 +0,0 @@ -package tsm1 - -type noFileStoreObserver struct{} - -func (noFileStoreObserver) FileFinishing(path string) error { return nil } -func (noFileStoreObserver) FileUnlinking(path string) error { return nil } diff --git a/tsdb/tsm1/file_store_test.go b/tsdb/tsm1/file_store_test.go deleted file mode 100644 index 6652f92d0d..0000000000 --- a/tsdb/tsm1/file_store_test.go +++ /dev/null @@ -1,3085 +0,0 @@ -package tsm1_test - -import ( - "context" - "fmt" - "io/ioutil" - "math" - "os" - "path/filepath" - "reflect" - "strings" - "sync/atomic" - "testing" - "time" - - "github.com/influxdata/influxdb/v2/logger" - "github.com/influxdata/influxdb/v2/pkg/fs" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" -) - -func TestFileStore_Read(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, 1.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(1, 2.0)}}, - keyValues{"mem", []tsm1.Value{tsm1.NewValue(0, 1.0)}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - // Search for an entry that exists in the second file - values, err := fs.Read([]byte("cpu"), 1) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp := data[1] - if got, exp := len(values), len(exp.values); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp.values { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } -} - -func TestFileStore_SeekToAsc_FromStart(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, 1.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(1, 2.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, 3.0)}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - buf := make([]tsm1.FloatValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 0, true) - // Search for an entry that exists in the second file - values, err := c.ReadFloatBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp := data[0] - if got, exp := len(values), len(exp.values); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp.values { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } -} - -func TestFileStore_SeekToAsc_Duplicate(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, 1.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, 2.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, 3.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, 4.0)}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - buf := make([]tsm1.FloatValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 0, true) - // Search for an entry that exists in the second file - values, err := c.ReadFloatBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp := []tsm1.Value{ - data[1].values[0], - } - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - // Check that calling Next will dedupe points - c.Next() - values, err = c.ReadFloatBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp = []tsm1.Value{ - data[3].values[0], - } - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadFloatBlock(&buf) - if err != nil { - t.Fatal(err) - } - - exp = nil - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } -} - -func TestFileStore_SeekToAsc_BeforeStart(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(1, 1.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, 2.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(3, 3.0)}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - // Search for an entry that exists in the second file - buf := make([]tsm1.FloatValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 0, true) - values, err := c.ReadFloatBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp := data[0] - if got, exp := len(values), len(exp.values); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp.values { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } -} - -// Tests that seeking and reading all blocks that contain overlapping points does -// not skip any blocks. -func TestFileStore_SeekToAsc_BeforeStart_OverlapFloat(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, 0.0), tsm1.NewValue(1, 1.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, 2.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(3, 3.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, 4.0), tsm1.NewValue(2, 7.0)}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - // Search for an entry that exists in the second file - buf := make([]tsm1.FloatValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 0, true) - values, err := c.ReadFloatBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp := []tsm1.Value{ - data[3].values[0], - data[0].values[1], - data[3].values[1], - } - - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadFloatBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp = []tsm1.Value{ - data[2].values[0], - } - - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } -} - -// Tests that seeking and reading all blocks that contain overlapping points does -// not skip any blocks. -func TestFileStore_SeekToAsc_BeforeStart_OverlapInteger(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, int64(0)), tsm1.NewValue(1, int64(1))}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, int64(2))}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(3, int64(3))}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, int64(4)), tsm1.NewValue(2, int64(7))}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - // Search for an entry that exists in the second file - buf := make([]tsm1.IntegerValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 0, true) - values, err := c.ReadIntegerBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp := []tsm1.Value{ - data[3].values[0], - data[0].values[1], - data[3].values[1], - } - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadIntegerBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp = []tsm1.Value{ - data[2].values[0], - } - - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } -} - -// Tests that seeking and reading all blocks that contain overlapping points does -// not skip any blocks. -func TestFileStore_SeekToAsc_BeforeStart_OverlapUnsigned(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, uint64(0)), tsm1.NewValue(1, uint64(1))}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, uint64(2))}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(3, uint64(3))}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, uint64(4)), tsm1.NewValue(2, uint64(7))}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - // Search for an entry that exists in the second file - buf := make([]tsm1.UnsignedValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 0, true) - values, err := c.ReadUnsignedBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp := []tsm1.Value{ - data[3].values[0], - data[0].values[1], - data[3].values[1], - } - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadUnsignedBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp = []tsm1.Value{ - data[2].values[0], - } - - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } -} - -// Tests that seeking and reading all blocks that contain overlapping points does -// not skip any blocks. -func TestFileStore_SeekToAsc_BeforeStart_OverlapBoolean(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, true), tsm1.NewValue(1, false)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, true)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(3, true)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, false), tsm1.NewValue(2, true)}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - // Search for an entry that exists in the second file - buf := make([]tsm1.BooleanValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 0, true) - values, err := c.ReadBooleanBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp := []tsm1.Value{ - data[3].values[0], - data[0].values[1], - data[3].values[1], - } - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadBooleanBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp = []tsm1.Value{ - data[2].values[0], - } - - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } -} - -// Tests that seeking and reading all blocks that contain overlapping points does -// not skip any blocks. -func TestFileStore_SeekToAsc_BeforeStart_OverlapString(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, "zero"), tsm1.NewValue(1, "one")}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, "two")}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(3, "three")}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, "four"), tsm1.NewValue(2, "seven")}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - // Search for an entry that exists in the second file - buf := make([]tsm1.StringValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 0, true) - values, err := c.ReadStringBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp := []tsm1.Value{ - data[3].values[0], - data[0].values[1], - data[3].values[1], - } - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadStringBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp = []tsm1.Value{ - data[2].values[0], - } - - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } -} - -// Tests that blocks with a lower min time in later files are not returned -// more than once causing unsorted results -func TestFileStore_SeekToAsc_OverlapMinFloat(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(1, 1.0), tsm1.NewValue(3, 3.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, 2.0), tsm1.NewValue(4, 4.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, 0.0), tsm1.NewValue(1, 1.1)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, 2.2)}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - buf := make([]tsm1.FloatValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 0, true) - // Search for an entry that exists in the second file - values, err := c.ReadFloatBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp := []tsm1.Value{ - data[2].values[0], - data[2].values[1], - data[3].values[0], - data[0].values[1], - } - - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - // Check that calling Next will dedupe points - c.Next() - values, err = c.ReadFloatBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp = []tsm1.Value{ - data[1].values[1], - } - - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadFloatBlock(&buf) - if err != nil { - t.Fatal(err) - } - - exp = nil - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } -} - -// Tests that blocks with a lower min time in later files are not returned -// more than once causing unsorted results -func TestFileStore_SeekToAsc_OverlapMinInteger(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(1, int64(1)), tsm1.NewValue(3, int64(3))}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, int64(2)), tsm1.NewValue(4, int64(4))}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, int64(0)), tsm1.NewValue(1, int64(10))}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, int64(5))}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - buf := make([]tsm1.IntegerValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 0, true) - // Search for an entry that exists in the second file - values, err := c.ReadIntegerBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp := []tsm1.Value{ - data[2].values[0], - data[2].values[1], - data[3].values[0], - data[0].values[1], - } - - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - // Check that calling Next will dedupe points - c.Next() - values, err = c.ReadIntegerBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp = []tsm1.Value{ - data[1].values[1], - } - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadIntegerBlock(&buf) - if err != nil { - t.Fatal(err) - } - - exp = nil - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } -} - -// Tests that blocks with a lower min time in later files are not returned -// more than once causing unsorted results -func TestFileStore_SeekToAsc_OverlapMinUnsigned(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(1, uint64(1)), tsm1.NewValue(3, uint64(3))}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, uint64(2)), tsm1.NewValue(4, uint64(4))}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, uint64(0)), tsm1.NewValue(1, uint64(10))}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, uint64(5))}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - buf := make([]tsm1.UnsignedValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 0, true) - // Search for an entry that exists in the second file - values, err := c.ReadUnsignedBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp := []tsm1.Value{ - data[2].values[0], - data[2].values[1], - data[3].values[0], - data[0].values[1], - } - - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - // Check that calling Next will dedupe points - c.Next() - values, err = c.ReadUnsignedBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp = []tsm1.Value{ - data[1].values[1], - } - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadUnsignedBlock(&buf) - if err != nil { - t.Fatal(err) - } - - exp = nil - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } -} - -// Tests that blocks with a lower min time in later files are not returned -// more than once causing unsorted results -func TestFileStore_SeekToAsc_OverlapMinBoolean(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(1, true), tsm1.NewValue(3, true)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, true), tsm1.NewValue(4, true)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, true), tsm1.NewValue(1, false)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, false)}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - buf := make([]tsm1.BooleanValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 0, true) - // Search for an entry that exists in the second file - values, err := c.ReadBooleanBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp := []tsm1.Value{ - data[2].values[0], - data[2].values[1], - data[3].values[0], - data[0].values[1], - } - - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - // Check that calling Next will dedupe points - c.Next() - values, err = c.ReadBooleanBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp = []tsm1.Value{ - data[1].values[1], - } - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadBooleanBlock(&buf) - if err != nil { - t.Fatal(err) - } - - exp = nil - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } -} - -// Tests that blocks with a lower min time in later files are not returned -// more than once causing unsorted results -func TestFileStore_SeekToAsc_OverlapMinString(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(1, "1.0"), tsm1.NewValue(3, "3.0")}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, "2.0"), tsm1.NewValue(4, "4.0")}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, "0.0"), tsm1.NewValue(1, "1.1")}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, "2.2")}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - buf := make([]tsm1.StringValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 0, true) - // Search for an entry that exists in the second file - values, err := c.ReadStringBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp := []tsm1.Value{ - data[2].values[0], - data[2].values[1], - data[3].values[0], - data[0].values[1], - } - - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - // Check that calling Next will dedupe points - c.Next() - values, err = c.ReadStringBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp = []tsm1.Value{ - data[1].values[1], - } - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadStringBlock(&buf) - if err != nil { - t.Fatal(err) - } - - exp = nil - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } -} - -func TestFileStore_SeekToAsc_Middle(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(1, 1.0), - tsm1.NewValue(2, 2.0), - tsm1.NewValue(3, 3.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(4, 4.0)}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - // Search for an entry that exists in the second file - buf := make([]tsm1.FloatValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 3, true) - values, err := c.ReadFloatBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp := []tsm1.Value{data[0].values[2]} - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadFloatBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp = []tsm1.Value{data[1].values[0]} - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - -} - -func TestFileStore_SeekToAsc_End(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, 1.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(1, 2.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, 3.0)}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - buf := make([]tsm1.FloatValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 2, true) - values, err := c.ReadFloatBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp := data[2] - if got, exp := len(values), len(exp.values); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp.values { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } -} - -func TestFileStore_SeekToDesc_FromStart(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, 1.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(1, 2.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, 3.0)}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - // Search for an entry that exists in the second file - buf := make([]tsm1.FloatValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 0, false) - values, err := c.ReadFloatBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - exp := data[0] - if got, exp := len(values), len(exp.values); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp.values { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } -} - -func TestFileStore_SeekToDesc_Duplicate(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, 4.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, 1.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, 2.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, 3.0)}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - // Search for an entry that exists in the second file - buf := make([]tsm1.FloatValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 2, false) - values, err := c.ReadFloatBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - exp := []tsm1.Value{ - data[3].values[0], - } - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadFloatBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - exp = []tsm1.Value{ - data[1].values[0], - } - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } -} - -func TestFileStore_SeekToDesc_OverlapMaxFloat(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(1, 1.0), tsm1.NewValue(3, 3.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, 2.0), tsm1.NewValue(4, 4.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, 0.0), tsm1.NewValue(1, 1.1)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, 2.2)}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - // Search for an entry that exists in the second file - buf := make([]tsm1.FloatValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 5, false) - values, err := c.ReadFloatBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp := []tsm1.Value{ - data[3].values[0], - data[0].values[1], - data[1].values[1], - } - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadFloatBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - exp = []tsm1.Value{ - - data[2].values[0], - data[2].values[1], - } - - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } -} - -func TestFileStore_SeekToDesc_OverlapMaxInteger(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(1, int64(1)), tsm1.NewValue(3, int64(3))}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, int64(2)), tsm1.NewValue(4, int64(4))}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, int64(0)), tsm1.NewValue(1, int64(10))}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, int64(5))}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - // Search for an entry that exists in the second file - buf := make([]tsm1.IntegerValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 5, false) - values, err := c.ReadIntegerBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp := []tsm1.Value{ - data[3].values[0], - data[0].values[1], - data[1].values[1], - } - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadIntegerBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - exp = []tsm1.Value{ - data[2].values[0], - data[2].values[1], - } - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } -} -func TestFileStore_SeekToDesc_OverlapMaxUnsigned(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(1, uint64(1)), tsm1.NewValue(3, uint64(3))}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, uint64(2)), tsm1.NewValue(4, uint64(4))}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, uint64(0)), tsm1.NewValue(1, uint64(10))}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, uint64(5))}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - // Search for an entry that exists in the second file - buf := make([]tsm1.UnsignedValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 5, false) - values, err := c.ReadUnsignedBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp := []tsm1.Value{ - data[3].values[0], - data[0].values[1], - data[1].values[1], - } - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadUnsignedBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - exp = []tsm1.Value{ - data[2].values[0], - data[2].values[1], - } - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } -} - -func TestFileStore_SeekToDesc_OverlapMaxBoolean(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(1, true), tsm1.NewValue(3, true)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, true), tsm1.NewValue(4, true)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, true), tsm1.NewValue(1, false)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, false)}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - // Search for an entry that exists in the second file - buf := make([]tsm1.BooleanValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 5, false) - values, err := c.ReadBooleanBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp := []tsm1.Value{ - data[3].values[0], - data[0].values[1], - data[1].values[1], - } - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadBooleanBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - exp = []tsm1.Value{ - data[2].values[0], - data[2].values[1], - } - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } -} - -func TestFileStore_SeekToDesc_OverlapMaxString(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(1, "1.0"), tsm1.NewValue(3, "3.0")}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, "2.0"), tsm1.NewValue(4, "4.0")}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, "0.0"), tsm1.NewValue(1, "1.1")}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, "2.2")}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - // Search for an entry that exists in the second file - buf := make([]tsm1.StringValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 5, false) - values, err := c.ReadStringBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp := []tsm1.Value{ - data[3].values[0], - data[0].values[1], - data[1].values[1], - } - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadStringBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - exp = []tsm1.Value{ - data[2].values[0], - data[2].values[1], - } - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } -} - -func TestFileStore_SeekToDesc_AfterEnd(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(1, 1.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, 2.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(3, 3.0)}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - buf := make([]tsm1.FloatValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 4, false) - values, err := c.ReadFloatBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp := data[2] - if got, exp := len(values), len(exp.values); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp.values { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } -} - -func TestFileStore_SeekToDesc_AfterEnd_OverlapFloat(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 4 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(8, 0.0), tsm1.NewValue(9, 1.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, 2.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(3, 3.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(3, 4.0), tsm1.NewValue(7, 7.0)}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - buf := make([]tsm1.FloatValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 10, false) - values, err := c.ReadFloatBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp := []tsm1.Value{ - data[0].values[0], - data[0].values[1], - } - - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadFloatBlock(&buf) - - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp = []tsm1.Value{ - data[3].values[0], - data[3].values[1], - } - - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadFloatBlock(&buf) - - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp = []tsm1.Value{ - data[1].values[0], - } - - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadFloatBlock(&buf) - - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - if got, exp := len(values), 0; got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } -} - -func TestFileStore_SeekToDesc_AfterEnd_OverlapInteger(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(8, int64(0)), tsm1.NewValue(9, int64(1))}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, int64(2))}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(3, int64(3))}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(3, int64(4)), tsm1.NewValue(10, int64(7))}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - buf := make([]tsm1.IntegerValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 11, false) - values, err := c.ReadIntegerBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp := []tsm1.Value{ - data[3].values[0], - data[0].values[0], - data[0].values[1], - data[3].values[1], - } - - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadIntegerBlock(&buf) - - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp = []tsm1.Value{ - data[1].values[0], - } - - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadIntegerBlock(&buf) - - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - if got, exp := len(values), 0; got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } -} - -func TestFileStore_SeekToDesc_AfterEnd_OverlapUnsigned(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(8, uint64(0)), tsm1.NewValue(9, uint64(1))}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, uint64(2))}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(3, uint64(3))}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(3, uint64(4)), tsm1.NewValue(10, uint64(7))}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - buf := make([]tsm1.UnsignedValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 11, false) - values, err := c.ReadUnsignedBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp := []tsm1.Value{ - data[3].values[0], - data[0].values[0], - data[0].values[1], - data[3].values[1], - } - - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadUnsignedBlock(&buf) - - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp = []tsm1.Value{ - data[1].values[0], - } - - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadUnsignedBlock(&buf) - - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - if got, exp := len(values), 0; got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } -} - -func TestFileStore_SeekToDesc_AfterEnd_OverlapBoolean(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(8, true), tsm1.NewValue(9, true)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, true)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(3, false)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(3, true), tsm1.NewValue(7, false)}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - buf := make([]tsm1.BooleanValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 11, false) - values, err := c.ReadBooleanBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp := []tsm1.Value{ - data[0].values[0], - data[0].values[1], - } - - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadBooleanBlock(&buf) - - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp = []tsm1.Value{ - data[3].values[0], - data[3].values[1], - } - - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadBooleanBlock(&buf) - - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp = []tsm1.Value{ - data[1].values[0], - } - - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadBooleanBlock(&buf) - - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - if got, exp := len(values), 0; got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } -} - -func TestFileStore_SeekToDesc_AfterEnd_OverlapString(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(8, "eight"), tsm1.NewValue(9, "nine")}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, "two")}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(3, "three")}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(3, "four"), tsm1.NewValue(7, "seven")}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - buf := make([]tsm1.StringValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 11, false) - values, err := c.ReadStringBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp := []tsm1.Value{ - data[0].values[0], - data[0].values[1], - } - - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadStringBlock(&buf) - - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp = []tsm1.Value{ - data[3].values[0], - data[3].values[1], - } - - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadStringBlock(&buf) - - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp = []tsm1.Value{ - data[1].values[0], - } - - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadStringBlock(&buf) - - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - if got, exp := len(values), 0; got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } -} - -func TestFileStore_SeekToDesc_Middle(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(1, 1.0)}}, - keyValues{"cpu", []tsm1.Value{ - tsm1.NewValue(2, 2.0), - tsm1.NewValue(3, 3.0), - tsm1.NewValue(4, 4.0)}, - }, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - // Search for an entry that exists in the second file - buf := make([]tsm1.FloatValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 3, false) - values, err := c.ReadFloatBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp := []tsm1.Value{ - data[1].values[0], - data[1].values[1], - } - - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - c.Next() - values, err = c.ReadFloatBlock(&buf) - - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp = []tsm1.Value{ - data[0].values[0], - } - - if got, exp := len(values), len(exp); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } - - c.Next() - values, err = c.ReadFloatBlock(&buf) - - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - if got, exp := len(values), 0; got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } -} - -func TestFileStore_SeekToDesc_End(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, 1.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(1, 2.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, 3.0)}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - buf := make([]tsm1.FloatValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 2, false) - values, err := c.ReadFloatBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp := data[2] - if got, exp := len(values), len(exp.values); got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - for i, v := range exp.values { - if got, exp := values[i].Value(), v.Value(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", i, got, exp) - } - } -} - -func TestKeyCursor_TombstoneRange(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, 1.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(1, 2.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, 3.0)}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - if err := fs.DeleteRange([][]byte{[]byte("cpu")}, 1, 1); err != nil { - t.Fatalf("unexpected error delete range: %v", err) - } - - buf := make([]tsm1.FloatValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 0, true) - expValues := []int{0, 2} - for _, v := range expValues { - values, err := c.ReadFloatBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - exp := data[v] - if got, exp := len(values), 1; got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - if got, exp := values[0].String(), exp.values[0].String(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", 0, got, exp) - } - c.Next() - } -} - -func TestKeyCursor_TombstoneRange_PartialFirst(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, 0.0), tsm1.NewValue(1, 1.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, 2.0)}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - // Delete part of the block in the first file. - r := MustOpenTSMReader(files[0]) - r.DeleteRange([][]byte{[]byte("cpu")}, 1, 3) - - fs.Replace(nil, files) - - buf := make([]tsm1.FloatValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 0, true) - expValues := []tsm1.Value{tsm1.NewValue(0, 0.0), tsm1.NewValue(2, 2.0)} - - for _, exp := range expValues { - values, err := c.ReadFloatBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - if got, exp := len(values), 1; got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - if got, exp := values[0].String(), exp.String(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", 0, got, exp) - } - c.Next() - } -} - -func TestKeyCursor_TombstoneRange_PartialFloat(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{ - tsm1.NewValue(0, 1.0), - tsm1.NewValue(1, 2.0), - tsm1.NewValue(2, 3.0)}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - if err := fs.DeleteRange([][]byte{[]byte("cpu")}, 1, 1); err != nil { - t.Fatalf("unexpected error delete range: %v", err) - } - - buf := make([]tsm1.FloatValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 0, true) - values, err := c.ReadFloatBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - expValues := []tsm1.Value{data[0].values[0], data[0].values[2]} - for i, v := range expValues { - exp := v - if got, exp := len(values), 2; got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - if got, exp := values[i].String(), exp.String(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", 0, got, exp) - } - } -} - -func TestKeyCursor_TombstoneRange_PartialInteger(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{ - tsm1.NewValue(0, int64(1)), - tsm1.NewValue(1, int64(2)), - tsm1.NewValue(2, int64(3))}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - if err := fs.DeleteRange([][]byte{[]byte("cpu")}, 1, 1); err != nil { - t.Fatalf("unexpected error delete range: %v", err) - } - - buf := make([]tsm1.IntegerValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 0, true) - values, err := c.ReadIntegerBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - expValues := []tsm1.Value{data[0].values[0], data[0].values[2]} - for i, v := range expValues { - exp := v - if got, exp := len(values), 2; got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - if got, exp := values[i].String(), exp.String(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", 0, got, exp) - } - } -} - -func TestKeyCursor_TombstoneRange_PartialUnsigned(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{ - tsm1.NewValue(0, uint64(1)), - tsm1.NewValue(1, uint64(2)), - tsm1.NewValue(2, uint64(3))}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - if err := fs.DeleteRange([][]byte{[]byte("cpu")}, 1, 1); err != nil { - t.Fatalf("unexpected error delete range: %v", err) - } - - buf := make([]tsm1.UnsignedValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 0, true) - values, err := c.ReadUnsignedBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - expValues := []tsm1.Value{data[0].values[0], data[0].values[2]} - for i, v := range expValues { - exp := v - if got, exp := len(values), 2; got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - if got, exp := values[i].String(), exp.String(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", 0, got, exp) - } - } -} - -func TestKeyCursor_TombstoneRange_PartialString(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{ - tsm1.NewValue(0, "1"), - tsm1.NewValue(1, "2"), - tsm1.NewValue(2, "3")}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - if err := fs.DeleteRange([][]byte{[]byte("cpu")}, 1, 1); err != nil { - t.Fatalf("unexpected error delete range: %v", err) - } - - buf := make([]tsm1.StringValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 0, true) - values, err := c.ReadStringBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - expValues := []tsm1.Value{data[0].values[0], data[0].values[2]} - for i, v := range expValues { - exp := v - if got, exp := len(values), 2; got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - if got, exp := values[i].String(), exp.String(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", 0, got, exp) - } - } -} - -func TestKeyCursor_TombstoneRange_PartialBoolean(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{ - tsm1.NewValue(0, true), - tsm1.NewValue(1, false), - tsm1.NewValue(2, true)}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - if err := fs.DeleteRange([][]byte{[]byte("cpu")}, 1, 1); err != nil { - t.Fatalf("unexpected error delete range: %v", err) - } - - buf := make([]tsm1.BooleanValue, 1000) - c := fs.KeyCursor(context.Background(), []byte("cpu"), 0, true) - values, err := c.ReadBooleanBlock(&buf) - if err != nil { - t.Fatalf("unexpected error reading values: %v", err) - } - - expValues := []tsm1.Value{data[0].values[0], data[0].values[2]} - for i, v := range expValues { - exp := v - if got, exp := len(values), 2; got != exp { - t.Fatalf("value length mismatch: got %v, exp %v", got, exp) - } - - if got, exp := values[i].String(), exp.String(); got != exp { - t.Fatalf("read value mismatch(%d): got %v, exp %v", 0, got, exp) - } - } -} - -func TestFileStore_Open(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - - // Create 3 TSM files... - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, 1.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(1, 2.0)}}, - keyValues{"mem", []tsm1.Value{tsm1.NewValue(0, 1.0)}}, - } - - _, err := newFileDir(dir, data...) - if err != nil { - fatal(t, "creating test files", err) - } - - fs := tsm1.NewFileStore(dir) - if err := fs.Open(context.Background()); err != nil { - fatal(t, "opening file store", err) - } - defer fs.Close() - - if got, exp := fs.Count(), 3; got != exp { - t.Fatalf("file count mismatch: got %v, exp %v", got, exp) - } - - if got, exp := fs.CurrentGeneration(), 4; got != exp { - t.Fatalf("current ID mismatch: got %v, exp %v", got, exp) - } -} - -func TestFileStore_Remove(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - - // Create 3 TSM files... - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, 1.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(1, 2.0)}}, - keyValues{"mem", []tsm1.Value{tsm1.NewValue(0, 1.0)}}, - } - - files, err := newFileDir(dir, data...) - if err != nil { - fatal(t, "creating test files", err) - } - - fs := tsm1.NewFileStore(dir) - if err := fs.Open(context.Background()); err != nil { - fatal(t, "opening file store", err) - } - defer fs.Close() - - if got, exp := fs.Count(), 3; got != exp { - t.Fatalf("file count mismatch: got %v, exp %v", got, exp) - } - - if got, exp := fs.CurrentGeneration(), 4; got != exp { - t.Fatalf("current ID mismatch: got %v, exp %v", got, exp) - } - - fs.Replace(files[2:3], nil) - - if got, exp := fs.Count(), 2; got != exp { - t.Fatalf("file count mismatch: got %v, exp %v", got, exp) - } - - if got, exp := fs.CurrentGeneration(), 4; got != exp { - t.Fatalf("current ID mismatch: got %v, exp %v", got, exp) - } -} - -func TestFileStore_Replace(t *testing.T) { - if testing.Short() { - t.Skip("skipping test in short mode") - } - - dir := MustTempDir() - defer os.RemoveAll(dir) - - // Create 3 TSM files... - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, 1.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(1, 2.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, 3.0)}}, - } - - files, err := newFileDir(dir, data...) - if err != nil { - fatal(t, "creating test files", err) - } - - // Replace requires assumes new files have a .tmp extension - replacement := fmt.Sprintf("%s.%s", files[2], tsm1.TmpTSMFileExtension) - fs.RenameFile(files[2], replacement) - - fs := tsm1.NewFileStore(dir) - if err := fs.Open(context.Background()); err != nil { - fatal(t, "opening file store", err) - } - defer fs.Close() - - if got, exp := fs.Count(), 2; got != exp { - t.Fatalf("file count mismatch: got %v, exp %v", got, exp) - } - - // Should record references to the two existing TSM files - cur := fs.KeyCursor(context.Background(), []byte("cpu"), 0, true) - - // Should move the existing files out of the way, but allow query to complete - if err := fs.Replace(files[:2], []string{replacement}); err != nil { - t.Fatalf("replace: %v", err) - } - - if got, exp := fs.Count(), 1; got != exp { - t.Fatalf("file count mismatch: got %v, exp %v", got, exp) - } - - // There should be two blocks (1 in each file) - cur.Next() - buf := make([]tsm1.FloatValue, 10) - values, err := cur.ReadFloatBlock(&buf) - if err != nil { - t.Fatal(err) - } - if got, exp := len(values), 1; got != exp { - t.Fatalf("value len mismatch: got %v, exp %v", got, exp) - } - - cur.Next() - values, err = cur.ReadFloatBlock(&buf) - if err != nil { - t.Fatal(err) - } - if got, exp := len(values), 1; got != exp { - t.Fatalf("value len mismatch: got %v, exp %v", got, exp) - } - - // No more blocks for this cursor - cur.Next() - values, err = cur.ReadFloatBlock(&buf) - if err != nil { - t.Fatal(err) - } - if got, exp := len(values), 0; got != exp { - t.Fatalf("value len mismatch: got %v, exp %v", got, exp) - } - - // Release the references (files should get evicted by purger shortly) - cur.Close() - - time.Sleep(time.Second) - // Make sure the two TSM files used by the cursor are gone - if _, err := os.Stat(files[0]); !os.IsNotExist(err) { - t.Fatalf("stat file: %v", err) - } - if _, err := os.Stat(files[1]); !os.IsNotExist(err) { - t.Fatalf("stat file: %v", err) - } - - // Make sure the new file exists - if _, err := os.Stat(files[2]); err != nil { - t.Fatalf("stat file: %v", err) - } -} - -func TestFileStore_Open_Deleted(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - - // Create 3 TSM files... - data := []keyValues{ - keyValues{"cpu,host=server2!~#!value", []tsm1.Value{tsm1.NewValue(0, 1.0)}}, - keyValues{"cpu,host=server1!~#!value", []tsm1.Value{tsm1.NewValue(1, 2.0)}}, - keyValues{"mem,host=server1!~#!value", []tsm1.Value{tsm1.NewValue(0, 1.0)}}, - } - - _, err := newFileDir(dir, data...) - if err != nil { - fatal(t, "creating test files", err) - } - - fs := tsm1.NewFileStore(dir) - if err := fs.Open(context.Background()); err != nil { - fatal(t, "opening file store", err) - } - defer fs.Close() - - if got, exp := len(fs.Keys()), 3; got != exp { - t.Fatalf("file count mismatch: got %v, exp %v", got, exp) - } - - if err := fs.Delete([][]byte{[]byte("cpu,host=server2!~#!value")}); err != nil { - fatal(t, "deleting", err) - } - - fs2 := tsm1.NewFileStore(dir) - if err := fs2.Open(context.Background()); err != nil { - fatal(t, "opening file store", err) - } - defer fs2.Close() - - if got, exp := len(fs2.Keys()), 2; got != exp { - t.Fatalf("file count mismatch: got %v, exp %v", got, exp) - } -} - -func TestFileStore_Delete(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu,host=server2!~#!value", []tsm1.Value{tsm1.NewValue(0, 1.0)}}, - keyValues{"cpu,host=server1!~#!value", []tsm1.Value{tsm1.NewValue(1, 2.0)}}, - keyValues{"mem,host=server1!~#!value", []tsm1.Value{tsm1.NewValue(0, 1.0)}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - keys := fs.Keys() - if got, exp := len(keys), 3; got != exp { - t.Fatalf("key length mismatch: got %v, exp %v", got, exp) - } - - if err := fs.Delete([][]byte{[]byte("cpu,host=server2!~#!value")}); err != nil { - fatal(t, "deleting", err) - } - - keys = fs.Keys() - if got, exp := len(keys), 2; got != exp { - t.Fatalf("key length mismatch: got %v, exp %v", got, exp) - } -} - -func TestFileStore_Apply(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu,host=server2#!~#value", []tsm1.Value{tsm1.NewValue(0, 1.0)}}, - keyValues{"cpu,host=server1#!~#value", []tsm1.Value{tsm1.NewValue(1, 2.0)}}, - keyValues{"mem,host=server1#!~#value", []tsm1.Value{tsm1.NewValue(0, 1.0)}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - keys := fs.Keys() - if got, exp := len(keys), 3; got != exp { - t.Fatalf("key length mismatch: got %v, exp %v", got, exp) - } - - var n int64 - if err := fs.Apply(func(r tsm1.TSMFile) error { - atomic.AddInt64(&n, 1) - return nil - }); err != nil { - t.Fatalf("unexpected error deleting: %v", err) - } - - if got, exp := n, int64(3); got != exp { - t.Fatalf("apply mismatch: got %v, exp %v", got, exp) - } -} - -func TestFileStore_Stats(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - - // Create 3 TSM files... - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, 1.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(1, 2.0)}}, - keyValues{"mem", []tsm1.Value{tsm1.NewValue(0, 1.0)}}, - } - - files, err := newFileDir(dir, data...) - if err != nil { - fatal(t, "creating test files", err) - } - - filestore := tsm1.NewFileStore(dir) - if err := filestore.Open(context.Background()); err != nil { - fatal(t, "opening file store", err) - } - defer filestore.Close() - - stats := filestore.Stats() - if got, exp := len(stats), 3; got != exp { - t.Fatalf("file count mismatch: got %v, exp %v", got, exp) - } - - // Another call should result in the same stats being returned. - if got, exp := filestore.Stats(), stats; !reflect.DeepEqual(got, exp) { - t.Fatalf("got %v, exp %v", got, exp) - } - - // Removing one of the files should invalidate the cache. - filestore.Replace(files[0:1], nil) - if got, exp := len(filestore.Stats()), 2; got != exp { - t.Fatalf("file count mismatch: got %v, exp %v", got, exp) - } - - // Write a new TSM file that that is not open - newFile := MustWriteTSM(dir, 4, map[string][]tsm1.Value{ - "mem": []tsm1.Value{tsm1.NewValue(0, 1.0)}, - }) - - replacement := fmt.Sprintf("%s.%s.%s", files[2], tsm1.TmpTSMFileExtension, tsm1.TSMFileExtension) // Assumes new files have a .tmp extension - if err := fs.RenameFile(newFile, replacement); err != nil { - t.Fatalf("rename: %v", err) - } - // Replace 3 w/ 1 - if err := filestore.Replace(files, []string{replacement}); err != nil { - t.Fatalf("replace: %v", err) - } - - var found bool - stats = filestore.Stats() - for _, stat := range stats { - if strings.HasSuffix(stat.Path, fmt.Sprintf("%s.%s.%s", tsm1.TSMFileExtension, tsm1.TmpTSMFileExtension, tsm1.TSMFileExtension)) { - found = true - } - } - - if !found { - t.Fatalf("Didn't find %s in stats: %v", "foo", stats) - } - - newFile = MustWriteTSM(dir, 5, map[string][]tsm1.Value{ - "mem": []tsm1.Value{tsm1.NewValue(0, 1.0)}, - }) - - // Adding some files should invalidate the cache. - filestore.Replace(nil, []string{newFile}) - if got, exp := len(filestore.Stats()), 2; got != exp { - t.Fatalf("file count mismatch: got %v, exp %v", got, exp) - } -} - -func TestFileStore_CreateSnapshot(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, 1.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(1, 2.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(2, 3.0)}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - fs.Replace(nil, files) - - // Create a tombstone - if err := fs.DeleteRange([][]byte{[]byte("cpu")}, 1, 1); err != nil { - t.Fatalf("unexpected error delete range: %v", err) - } - - _, s, e := fs.CreateSnapshot(context.Background()) - if e != nil { - t.Fatal(e) - } - t.Logf("temp file for hard links: %q", s) - - tfs, e := ioutil.ReadDir(s) - if e != nil { - t.Fatal(e) - } - if len(tfs) == 0 { - t.Fatal("no files found") - } - - for _, f := range fs.Files() { - p := filepath.Join(s, filepath.Base(f.Path())) - t.Logf("checking for existence of hard link %q", p) - if _, err := os.Stat(p); os.IsNotExist(err) { - t.Fatalf("unable to find file %q", p) - } - for _, tf := range f.TombstoneFiles() { - p := filepath.Join(s, filepath.Base(tf.Path)) - t.Logf("checking for existence of hard link %q", p) - if _, err := os.Stat(p); os.IsNotExist(err) { - t.Fatalf("unable to find file %q", p) - } - } - } -} - -type mockObserver struct { - fileFinishing func(path string) error - fileUnlinking func(path string) error -} - -func (m mockObserver) FileFinishing(path string) error { - return m.fileFinishing(path) -} - -func (m mockObserver) FileUnlinking(path string) error { - return m.fileUnlinking(path) -} - -func TestFileStore_Observer(t *testing.T) { - var finishes, unlinks []string - m := mockObserver{ - fileFinishing: func(path string) error { - finishes = append(finishes, path) - return nil - }, - fileUnlinking: func(path string) error { - unlinks = append(unlinks, path) - return nil - }, - } - - check := func(results []string, expect ...string) { - t.Helper() - if len(results) != len(expect) { - t.Fatalf("wrong number of results: %d results != %d expected", len(results), len(expect)) - } - for i, ex := range expect { - if got := filepath.Base(results[i]); got != ex { - t.Fatalf("unexpected result: got %q != expected %q", got, ex) - } - } - } - - dir := MustTempDir() - defer os.RemoveAll(dir) - fs := tsm1.NewFileStore(dir) - fs.WithObserver(m) - - // Setup 3 files - data := []keyValues{ - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, 1.0), tsm1.NewValue(1, 2.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(10, 2.0)}}, - keyValues{"cpu", []tsm1.Value{tsm1.NewValue(20, 3.0)}}, - } - - files, err := newFiles(dir, data...) - if err != nil { - t.Fatalf("unexpected error creating files: %v", err) - } - - if err := fs.Replace(nil, files); err != nil { - t.Fatalf("error replacing: %v", err) - } - - // Create a tombstone - if err := fs.DeleteRange([][]byte{[]byte("cpu")}, 10, 10); err != nil { - t.Fatalf("unexpected error delete range: %v", err) - } - - // Check that we observed finishes correctly - check(finishes, - "000000000000001-000000001.tsm", - "000000000000002-000000001.tsm", - "000000000000003-000000001.tsm", - "000000000000002-000000001.tombstone.tmp", - ) - check(unlinks) - unlinks, finishes = nil, nil - - // remove files including a tombstone - if err := fs.Replace(files[1:3], nil); err != nil { - t.Fatal("error replacing") - } - - // Check that we observed unlinks correctly - check(finishes) - check(unlinks, - "000000000000002-000000001.tsm", - "000000000000002-000000001.tombstone", - "000000000000003-000000001.tsm", - ) - unlinks, finishes = nil, nil - - // add a tombstone for the first file multiple times. - if err := fs.DeleteRange([][]byte{[]byte("cpu")}, 0, 0); err != nil { - t.Fatalf("unexpected error delete range: %v", err) - } - if err := fs.DeleteRange([][]byte{[]byte("cpu")}, 1, 1); err != nil { - t.Fatalf("unexpected error delete range: %v", err) - } - - check(finishes, - "000000000000001-000000001.tombstone.tmp", - "000000000000001-000000001.tombstone.tmp", - ) - check(unlinks) - unlinks, finishes = nil, nil -} - -func newFileDir(dir string, values ...keyValues) ([]string, error) { - var files []string - - id := 1 - for _, v := range values { - f := MustTempFile(dir) - w, err := tsm1.NewTSMWriter(f) - if err != nil { - return nil, err - } - - if err := w.Write([]byte(v.key), v.values); err != nil { - return nil, err - } - - if err := w.WriteIndex(); err != nil { - return nil, err - } - - if err := w.Close(); err != nil { - return nil, err - } - newName := filepath.Join(filepath.Dir(f.Name()), tsm1.DefaultFormatFileName(id, 1)+".tsm") - if err := fs.RenameFile(f.Name(), newName); err != nil { - return nil, err - } - id++ - - files = append(files, newName) - } - return files, nil - -} - -func newFiles(dir string, values ...keyValues) ([]string, error) { - var files []string - - id := 1 - for _, v := range values { - f := MustTempFile(dir) - w, err := tsm1.NewTSMWriter(f) - if err != nil { - return nil, err - } - - if err := w.Write([]byte(v.key), v.values); err != nil { - return nil, err - } - - if err := w.WriteIndex(); err != nil { - return nil, err - } - - if err := w.Close(); err != nil { - return nil, err - } - - newName := filepath.Join(filepath.Dir(f.Name()), tsm1.DefaultFormatFileName(id, 1)+".tsm") - if err := fs.RenameFile(f.Name(), newName); err != nil { - return nil, err - } - id++ - - files = append(files, newName) - } - return files, nil -} - -type keyValues struct { - key string - values []tsm1.Value -} - -func MustTempDir() string { - dir, err := ioutil.TempDir("", "tsm1-test") - if err != nil { - panic(fmt.Sprintf("failed to create temp dir: %v", err)) - } - return dir -} - -func MustTempFile(dir string) *os.File { - f, err := ioutil.TempFile(dir, "tsm1test") - if err != nil { - panic(fmt.Sprintf("failed to create temp file: %v", err)) - } - return f -} - -func fatal(t *testing.T, msg string, err error) { - t.Fatalf("unexpected error %v: %v", msg, err) -} - -var fsResult []tsm1.FileStat - -func BenchmarkFileStore_Stats(b *testing.B) { - dir := MustTempDir() - defer os.RemoveAll(dir) - - // Create some TSM files... - data := make([]keyValues, 0, 1000) - for i := 0; i < 1000; i++ { - data = append(data, keyValues{"cpu", []tsm1.Value{tsm1.NewValue(0, 1.0)}}) - } - - _, err := newFileDir(dir, data...) - if err != nil { - b.Fatalf("creating benchmark files %v", err) - } - - fs := tsm1.NewFileStore(dir) - if testing.Verbose() { - fs.WithLogger(logger.New(os.Stderr)) - } - - if err := fs.Open(context.Background()); err != nil { - b.Fatalf("opening file store %v", err) - } - defer fs.Close() - - b.ReportAllocs() - b.ResetTimer() - for i := 0; i < b.N; i++ { - fsResult = fs.Stats() - } -} - -func TestDefaultFormatFileName(t *testing.T) { - testCases := []struct { - generation int - sequence int - expectedFilename string - }{{ - generation: 0, - sequence: 0, - expectedFilename: "000000000000000-000000000", - }, { - generation: 12345, - sequence: 98765, - expectedFilename: "000000000012345-000098765", - }, { - generation: 123, - sequence: 123456789, - expectedFilename: "000000000000123-123456789", - }, { - generation: 123, - sequence: 999999999, - expectedFilename: "000000000000123-999999999", - }, { - generation: int(math.Pow(1000, 5)) - 1, - sequence: 123, - expectedFilename: "999999999999999-000000123", - }} - - for _, testCase := range testCases { - t.Run(fmt.Sprintf("%d,%d", testCase.generation, testCase.sequence), func(t *testing.T) { - gotFilename := tsm1.DefaultFormatFileName(testCase.generation, testCase.sequence) - if gotFilename != testCase.expectedFilename { - t.Errorf("input %d,%d expected '%s' got '%s'", - testCase.generation, testCase.sequence, testCase.expectedFilename, gotFilename) - } - }) - } -} - -func TestDefaultParseFileName(t *testing.T) { - testCases := []struct { - filename string - expectedGeneration int - expectedSequence int - expectError bool - }{{ - filename: "0-0.tsm", - expectedGeneration: 0, - expectedSequence: 0, - expectError: true, - }, { - filename: "00000000000000a-00000000a.tsm", - expectError: true, - }, { - filename: "000000000000000-000000000.tsm", - expectedGeneration: 0, - expectedSequence: 0, - expectError: false, - }, { - filename: "000000000000001-000000002.tsm", - expectedGeneration: 1, - expectedSequence: 2, - expectError: false, - }, { - filename: "000000000000123-999999999.tsm", - expectedGeneration: 123, - expectedSequence: 999999999, - expectError: false, - }, { - filename: "123-999999999.tsm", - expectedGeneration: 123, - expectedSequence: 999999999, - expectError: false, - }, { - filename: "999999999999999-000000123.tsm", - expectedGeneration: int(math.Pow(1000, 5)) - 1, - expectedSequence: 123, - expectError: false, - }} - - for _, testCase := range testCases { - t.Run(testCase.filename, func(t *testing.T) { - generation, sequence, err := tsm1.DefaultParseFileName(testCase.filename) - if err != nil { - if !testCase.expectError { - t.Errorf("did not expected error '%v'", err) - } - return - } - - if testCase.expectedGeneration != generation || testCase.expectedSequence != sequence { - t.Errorf("input '%s' expected %d,%d got %d,%d", - testCase.filename, - testCase.expectedGeneration, testCase.expectedSequence, - generation, sequence) - } - }) - } -} diff --git a/tsdb/tsm1/float.go b/tsdb/tsm1/float.go deleted file mode 100644 index ad8f43b7d5..0000000000 --- a/tsdb/tsm1/float.go +++ /dev/null @@ -1,280 +0,0 @@ -package tsm1 - -/* -This code is originally from: https://github.com/dgryski/go-tsz and has been modified to remove -the timestamp compression fuctionality. - -It implements the float compression as presented in: http://www.vldb.org/pvldb/vol8/p1816-teller.pdf. -This implementation uses a sentinel value of NaN which means that float64 NaN cannot be stored using -this version. -*/ - -import ( - "bytes" - "fmt" - "math" - "math/bits" - - bitstream "github.com/dgryski/go-bitstream" -) - -// Note: an uncompressed format is not yet implemented. -// floatCompressedGorilla is a compressed format using the gorilla paper encoding -const floatCompressedGorilla = 1 - -// uvnan is the constant returned from math.NaN(). -const uvnan = 0x7FF8000000000001 - -// FloatEncoder encodes multiple float64s into a byte slice. -type FloatEncoder struct { - val float64 - err error - - leading uint64 - trailing uint64 - - buf bytes.Buffer - bw *bitstream.BitWriter - - first bool - finished bool -} - -// NewFloatEncoder returns a new FloatEncoder. -func NewFloatEncoder() *FloatEncoder { - s := FloatEncoder{ - first: true, - leading: ^uint64(0), - } - - s.bw = bitstream.NewWriter(&s.buf) - s.buf.WriteByte(floatCompressedGorilla << 4) - - return &s -} - -// Reset sets the encoder back to its initial state. -func (s *FloatEncoder) Reset() { - s.val = 0 - s.err = nil - s.leading = ^uint64(0) - s.trailing = 0 - s.buf.Reset() - s.buf.WriteByte(floatCompressedGorilla << 4) - - s.bw.Resume(0x0, 8) - - s.finished = false - s.first = true -} - -// Bytes returns a copy of the underlying byte buffer used in the encoder. -func (s *FloatEncoder) Bytes() ([]byte, error) { - return s.buf.Bytes(), s.err -} - -// Flush indicates there are no more values to encode. -func (s *FloatEncoder) Flush() { - if !s.finished { - // write an end-of-stream record - s.finished = true - s.Write(math.NaN()) - s.bw.Flush(bitstream.Zero) - } -} - -// Write encodes v to the underlying buffer. -func (s *FloatEncoder) Write(v float64) { - // Only allow NaN as a sentinel value - if math.IsNaN(v) && !s.finished { - s.err = fmt.Errorf("unsupported value: NaN") - return - } - if s.first { - // first point - s.val = v - s.first = false - s.bw.WriteBits(math.Float64bits(v), 64) - return - } - - vDelta := math.Float64bits(v) ^ math.Float64bits(s.val) - - if vDelta == 0 { - s.bw.WriteBit(bitstream.Zero) - } else { - s.bw.WriteBit(bitstream.One) - - leading := uint64(bits.LeadingZeros64(vDelta)) - trailing := uint64(bits.TrailingZeros64(vDelta)) - - // Clamp number of leading zeros to avoid overflow when encoding - leading &= 0x1F - if leading >= 32 { - leading = 31 - } - - // TODO(dgryski): check if it's 'cheaper' to reset the leading/trailing bits instead - if s.leading != ^uint64(0) && leading >= s.leading && trailing >= s.trailing { - s.bw.WriteBit(bitstream.Zero) - s.bw.WriteBits(vDelta>>s.trailing, 64-int(s.leading)-int(s.trailing)) - } else { - s.leading, s.trailing = leading, trailing - - s.bw.WriteBit(bitstream.One) - s.bw.WriteBits(leading, 5) - - // Note that if leading == trailing == 0, then sigbits == 64. But that - // value doesn't actually fit into the 6 bits we have. - // Luckily, we never need to encode 0 significant bits, since that would - // put us in the other case (vdelta == 0). So instead we write out a 0 and - // adjust it back to 64 on unpacking. - sigbits := 64 - leading - trailing - s.bw.WriteBits(sigbits, 6) - s.bw.WriteBits(vDelta>>trailing, int(sigbits)) - } - } - - s.val = v -} - -// FloatDecoder decodes a byte slice into multiple float64 values. -type FloatDecoder struct { - val uint64 - - leading uint64 - trailing uint64 - - br BitReader - b []byte - - first bool - finished bool - - err error -} - -// SetBytes initializes the decoder with b. Must call before calling Next(). -func (it *FloatDecoder) SetBytes(b []byte) error { - var v uint64 - if len(b) == 0 { - v = uvnan - } else { - // first byte is the compression type. - // we currently just have gorilla compression. - it.br.Reset(b[1:]) - - var err error - v, err = it.br.ReadBits(64) - if err != nil { - return err - } - } - - // Reset all fields. - it.val = v - it.leading = 0 - it.trailing = 0 - it.b = b - it.first = true - it.finished = false - it.err = nil - - return nil -} - -// Next returns true if there are remaining values to read. -func (it *FloatDecoder) Next() bool { - if it.err != nil || it.finished { - return false - } - - if it.first { - it.first = false - - // mark as finished if there were no values. - if it.val == uvnan { // IsNaN - it.finished = true - return false - } - - return true - } - - // read compressed value - var bit bool - if it.br.CanReadBitFast() { - bit = it.br.ReadBitFast() - } else if v, err := it.br.ReadBit(); err != nil { - it.err = err - return false - } else { - bit = v - } - - if !bit { - // it.val = it.val - } else { - var bit bool - if it.br.CanReadBitFast() { - bit = it.br.ReadBitFast() - } else if v, err := it.br.ReadBit(); err != nil { - it.err = err - return false - } else { - bit = v - } - - if !bit { - // reuse leading/trailing zero bits - // it.leading, it.trailing = it.leading, it.trailing - } else { - bits, err := it.br.ReadBits(5) - if err != nil { - it.err = err - return false - } - it.leading = bits - - bits, err = it.br.ReadBits(6) - if err != nil { - it.err = err - return false - } - mbits := bits - // 0 significant bits here means we overflowed and we actually need 64; see comment in encoder - if mbits == 0 { - mbits = 64 - } - it.trailing = 64 - it.leading - mbits - } - - mbits := uint(64 - it.leading - it.trailing) - bits, err := it.br.ReadBits(mbits) - if err != nil { - it.err = err - return false - } - - vbits := it.val - vbits ^= (bits << it.trailing) - - if vbits == uvnan { // IsNaN - it.finished = true - return false - } - it.val = vbits - } - - return true -} - -// Values returns the current float64 value. -func (it *FloatDecoder) Values() float64 { - return math.Float64frombits(it.val) -} - -// Error returns the current decoding error. -func (it *FloatDecoder) Error() error { - return it.err -} diff --git a/tsdb/tsm1/float_test.go b/tsdb/tsm1/float_test.go deleted file mode 100644 index 7c9650020f..0000000000 --- a/tsdb/tsm1/float_test.go +++ /dev/null @@ -1,338 +0,0 @@ -package tsm1_test - -import ( - "fmt" - "math" - "reflect" - "testing" - "testing/quick" - - "github.com/google/go-cmp/cmp" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" -) - -func TestFloatEncoder_Simple(t *testing.T) { - // Example from the paper - s := tsm1.NewFloatEncoder() - - s.Write(12) - s.Write(12) - s.Write(24) - - // extra tests - - // floating point masking/shifting bug - s.Write(13) - s.Write(24) - - // delta-of-delta sizes - s.Write(24) - s.Write(24) - s.Write(24) - - s.Flush() - - b, err := s.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var it tsm1.FloatDecoder - if err := it.SetBytes(b); err != nil { - t.Fatalf("unexpected error creating float decoder: %v", err) - } - - want := []float64{ - 12, - 12, - 24, - - 13, - 24, - - 24, - 24, - 24, - } - - for _, w := range want { - if !it.Next() { - t.Fatalf("Next()=false, want true") - } - vv := it.Values() - if w != vv { - t.Errorf("Values()=(%v), want (%v)\n", vv, w) - } - } - - if it.Next() { - t.Fatalf("Next()=true, want false") - } - - if err := it.Error(); err != nil { - t.Errorf("it.Error()=%v, want nil", err) - } -} - -func TestFloatEncoder_SimilarFloats(t *testing.T) { - s := tsm1.NewFloatEncoder() - want := []float64{ - 6.00065e+06, - 6.000656e+06, - 6.000657e+06, - - 6.000659e+06, - 6.000661e+06, - } - - for _, v := range want { - s.Write(v) - } - - s.Flush() - - b, err := s.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var it tsm1.FloatDecoder - if err := it.SetBytes(b); err != nil { - t.Fatalf("unexpected error creating float decoder: %v", err) - } - - for _, w := range want { - if !it.Next() { - t.Fatalf("Next()=false, want true") - } - vv := it.Values() - if w != vv { - t.Errorf("Values()=(%v), want (%v)\n", vv, w) - } - } - - if it.Next() { - t.Fatalf("Next()=true, want false") - } - - if err := it.Error(); err != nil { - t.Errorf("it.Error()=%v, want nil", err) - } -} - -var twoHoursData = []float64{ - // 2h of data, rows of 10 values - 761, 727, 763, 706, 700, 679, 757, 708, 739, 707, - 699, 740, 729, 766, 730, 715, 705, 693, 765, 724, - 799, 761, 737, 766, 756, 719, 722, 801, 747, 731, - 742, 744, 791, 750, 759, 809, 751, 705, 770, 792, - 727, 762, 772, 721, 748, 753, 744, 716, 776, 659, - 789, 766, 758, 690, 795, 770, 758, 723, 767, 765, - 693, 706, 681, 727, 724, 780, 678, 696, 758, 740, - 735, 700, 742, 747, 752, 734, 743, 732, 746, 770, - 780, 710, 731, 712, 712, 741, 770, 770, 754, 718, - 670, 775, 749, 795, 756, 741, 787, 721, 745, 782, - 765, 780, 811, 790, 836, 743, 858, 739, 762, 770, - 752, 763, 795, 792, 746, 786, 785, 774, 786, 718, -} - -func TestFloatEncoder_Roundtrip(t *testing.T) { - s := tsm1.NewFloatEncoder() - for _, p := range twoHoursData { - s.Write(p) - } - s.Flush() - - b, err := s.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var it tsm1.FloatDecoder - if err := it.SetBytes(b); err != nil { - t.Fatalf("unexpected error creating float decoder: %v", err) - } - - for _, w := range twoHoursData { - if !it.Next() { - t.Fatalf("Next()=false, want true") - } - vv := it.Values() - // t.Logf("it.Values()=(%+v, %+v)\n", time.Unix(int64(tt), 0), vv) - if w != vv { - t.Errorf("Values()=(%v), want (%v)\n", vv, w) - } - } - - if it.Next() { - t.Fatalf("Next()=true, want false") - } - - if err := it.Error(); err != nil { - t.Errorf("it.Error()=%v, want nil", err) - } -} - -func TestFloatEncoder_Roundtrip_NaN(t *testing.T) { - s := tsm1.NewFloatEncoder() - s.Write(1.0) - s.Write(math.NaN()) - s.Write(2.0) - s.Flush() - - _, err := s.Bytes() - if err == nil { - t.Fatalf("expected error. got nil") - } -} - -func TestFloatEncoder_Empty(t *testing.T) { - s := tsm1.NewFloatEncoder() - s.Flush() - - b, err := s.Bytes() - if err != nil { - t.Fatal(err) - } - - var dec tsm1.FloatDecoder - if err := dec.SetBytes(b); err != nil { - t.Fatal(err) - } - - var got []float64 - for dec.Next() { - got = append(got, dec.Values()) - } - - if len(got) != 0 { - t.Fatalf("got len %d, expected 0", len(got)) - } -} - -func Test_FloatEncoder_Quick(t *testing.T) { - quick.Check(func(values []float64) bool { - - expected := values - if values == nil { - expected = []float64{} - } - - // Write values to encoder. - enc := tsm1.NewFloatEncoder() - for _, v := range values { - enc.Write(v) - } - enc.Flush() - - // Read values out of decoder. - got := make([]float64, 0, len(values)) - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var dec tsm1.FloatDecoder - if err := dec.SetBytes(b); err != nil { - t.Fatal(err) - } - for dec.Next() { - got = append(got, dec.Values()) - } - - // Verify that input and output values match. - if !reflect.DeepEqual(expected, got) { - t.Fatalf("mismatch:\n\nexp=%#v\n\ngot=%#v\n\n", expected, got) - } - - return true - }, nil) -} - -func TestFloatDecoder_Empty(t *testing.T) { - var dec tsm1.FloatDecoder - if err := dec.SetBytes([]byte{}); err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if dec.Next() { - t.Fatalf("exp next == false, got true") - } -} - -func BenchmarkFloatEncoder(b *testing.B) { - for i := 0; i < b.N; i++ { - s := tsm1.NewFloatEncoder() - for _, tt := range twoHoursData { - s.Write(tt) - } - s.Flush() - } -} - -func BenchmarkFloatDecoder(b *testing.B) { - s := tsm1.NewFloatEncoder() - for _, tt := range twoHoursData { - s.Write(tt) - } - s.Flush() - bytes, err := s.Bytes() - if err != nil { - b.Fatalf("unexpected error: %v", err) - } - - b.ResetTimer() - - for i := 0; i < b.N; i++ { - var it tsm1.FloatDecoder - if err := it.SetBytes(bytes); err != nil { - b.Fatalf("unexpected error creating float decoder: %v", err) - } - - for j := 0; j < len(twoHoursData); it.Next() { - j++ - } - } -} - -func BenchmarkFloatDecoder_DecodeAll(b *testing.B) { - benchmarks := []int{ - 1, - 55, - 550, - 1000, - } - for _, size := range benchmarks { - s := tsm1.NewFloatEncoder() - for c := 0; c < size; c++ { - s.Write(twoHoursData[c%len(twoHoursData)]) - } - s.Flush() - bytes, err := s.Bytes() - if err != nil { - b.Fatalf("unexpected error: %v", err) - } - - b.Run(fmt.Sprintf("%d", size), func(b *testing.B) { - b.SetBytes(int64(len(bytes))) - - dst := make([]float64, size) - for i := 0; i < b.N; i++ { - var it tsm1.FloatDecoder - if err := it.SetBytes(bytes); err != nil { - b.Fatalf("unexpected error creating float decoder: %v", err) - } - - i := 0 - for it.Next() { - dst[i] = it.Values() - i++ - } - - if len(dst) != size { - b.Fatalf("unexpected length -got/+exp\n%s", cmp.Diff(len(dst), size)) - } - } - }) - } -} diff --git a/tsdb/tsm1/int.go b/tsdb/tsm1/int.go deleted file mode 100644 index 2d2b6d5f33..0000000000 --- a/tsdb/tsm1/int.go +++ /dev/null @@ -1,324 +0,0 @@ -package tsm1 - -// Integer encoding uses two different strategies depending on the range of values in -// the uncompressed data. Encoded values are first encoding used zig zag encoding. -// This interleaves positive and negative integers across a range of positive integers. -// -// For example, [-2,-1,0,1] becomes [3,1,0,2]. See -// https://developers.google.com/protocol-buffers/docs/encoding?hl=en#signed-integers -// for more information. -// -// If all the zig zag encoded values are less than 1 << 60 - 1, they are compressed using -// simple8b encoding. If any value is larger than 1 << 60 - 1, the values are stored uncompressed. -// -// Each encoded byte slice contains a 1 byte header followed by multiple 8 byte packed integers -// or 8 byte uncompressed integers. The 4 high bits of the first byte indicate the encoding type -// for the remaining bytes. -// -// There are currently two encoding types that can be used with room for 16 total. These additional -// encoding slots are reserved for future use. One improvement to be made is to use a patched -// encoding such as PFOR if only a small number of values exceed the max compressed value range. This -// should improve compression ratios with very large integers near the ends of the int64 range. - -import ( - "encoding/binary" - "fmt" - - "github.com/jwilder/encoding/simple8b" -) - -const ( - // intUncompressed is an uncompressed format using 8 bytes per point - intUncompressed = 0 - // intCompressedSimple is a bit-packed format using simple8b encoding - intCompressedSimple = 1 - // intCompressedRLE is a run-length encoding format - intCompressedRLE = 2 -) - -// IntegerEncoder encodes int64s into byte slices. -type IntegerEncoder struct { - prev int64 - rle bool - values []uint64 -} - -// NewIntegerEncoder returns a new integer encoder with an initial buffer of values sized at sz. -func NewIntegerEncoder(sz int) IntegerEncoder { - return IntegerEncoder{ - rle: true, - values: make([]uint64, 0, sz), - } -} - -// Flush is no-op -func (e *IntegerEncoder) Flush() {} - -// Reset sets the encoder back to its initial state. -func (e *IntegerEncoder) Reset() { - e.prev = 0 - e.rle = true - e.values = e.values[:0] -} - -// Write encodes v to the underlying buffers. -func (e *IntegerEncoder) Write(v int64) { - // Delta-encode each value as it's written. This happens before - // ZigZagEncoding because the deltas could be negative. - delta := v - e.prev - e.prev = v - enc := ZigZagEncode(delta) - if len(e.values) > 1 { - e.rle = e.rle && e.values[len(e.values)-1] == enc - } - - e.values = append(e.values, enc) -} - -// Bytes returns a copy of the underlying buffer. -func (e *IntegerEncoder) Bytes() ([]byte, error) { - // Only run-length encode if it could reduce storage size. - if e.rle && len(e.values) > 2 { - return e.encodeRLE() - } - - for _, v := range e.values { - // Value is too large to encode using packed format - if v > simple8b.MaxValue { - return e.encodeUncompressed() - } - } - - return e.encodePacked() -} - -func (e *IntegerEncoder) encodeRLE() ([]byte, error) { - // Large varints can take up to 10 bytes. We're storing 3 + 1 - // type byte. - var b [31]byte - - // 4 high bits used for the encoding type - b[0] = byte(intCompressedRLE) << 4 - - i := 1 - // The first value - binary.BigEndian.PutUint64(b[i:], e.values[0]) - i += 8 - // The first delta - i += binary.PutUvarint(b[i:], e.values[1]) - // The number of times the delta is repeated - i += binary.PutUvarint(b[i:], uint64(len(e.values)-1)) - - return b[:i], nil -} - -func (e *IntegerEncoder) encodePacked() ([]byte, error) { - if len(e.values) == 0 { - return nil, nil - } - - // Encode all but the first value. Fist value is written unencoded - // using 8 bytes. - encoded, err := simple8b.EncodeAll(e.values[1:]) - if err != nil { - return nil, err - } - - b := make([]byte, 1+(len(encoded)+1)*8) - // 4 high bits of first byte store the encoding type for the block - b[0] = byte(intCompressedSimple) << 4 - - // Write the first value since it's not part of the encoded values - binary.BigEndian.PutUint64(b[1:9], e.values[0]) - - // Write the encoded values - for i, v := range encoded { - binary.BigEndian.PutUint64(b[9+i*8:9+i*8+8], v) - } - return b, nil -} - -func (e *IntegerEncoder) encodeUncompressed() ([]byte, error) { - if len(e.values) == 0 { - return nil, nil - } - - b := make([]byte, 1+len(e.values)*8) - // 4 high bits of first byte store the encoding type for the block - b[0] = byte(intUncompressed) << 4 - - for i, v := range e.values { - binary.BigEndian.PutUint64(b[1+i*8:1+i*8+8], v) - } - return b, nil -} - -// IntegerDecoder decodes a byte slice into int64s. -type IntegerDecoder struct { - // 240 is the maximum number of values that can be encoded into a single uint64 using simple8b - values [240]uint64 - bytes []byte - i int - n int - prev int64 - first bool - - // The first value for a run-length encoded byte slice - rleFirst uint64 - - // The delta value for a run-length encoded byte slice - rleDelta uint64 - encoding byte - err error -} - -// SetBytes sets the underlying byte slice of the decoder. -func (d *IntegerDecoder) SetBytes(b []byte) { - if len(b) > 0 { - d.encoding = b[0] >> 4 - d.bytes = b[1:] - } else { - d.encoding = 0 - d.bytes = nil - } - - d.i = 0 - d.n = 0 - d.prev = 0 - d.first = true - - d.rleFirst = 0 - d.rleDelta = 0 - d.err = nil -} - -// Next returns true if there are any values remaining to be decoded. -func (d *IntegerDecoder) Next() bool { - if d.i >= d.n && len(d.bytes) == 0 { - return false - } - - d.i++ - - if d.i >= d.n { - switch d.encoding { - case intUncompressed: - d.decodeUncompressed() - case intCompressedSimple: - d.decodePacked() - case intCompressedRLE: - d.decodeRLE() - default: - d.err = fmt.Errorf("unknown encoding %v", d.encoding) - } - } - return d.err == nil && d.i < d.n -} - -// Error returns the last error encountered by the decoder. -func (d *IntegerDecoder) Error() error { - return d.err -} - -// Read returns the next value from the decoder. -func (d *IntegerDecoder) Read() int64 { - switch d.encoding { - case intCompressedRLE: - return ZigZagDecode(d.rleFirst) + int64(d.i)*ZigZagDecode(d.rleDelta) - default: - v := ZigZagDecode(d.values[d.i]) - // v is the delta encoded value, we need to add the prior value to get the original - v = v + d.prev - d.prev = v - return v - } -} - -func (d *IntegerDecoder) decodeRLE() { - if len(d.bytes) == 0 { - return - } - - if len(d.bytes) < 8 { - d.err = fmt.Errorf("integerDecoder: not enough data to decode RLE starting value") - return - } - - var i, n int - - // Next 8 bytes is the starting value - first := binary.BigEndian.Uint64(d.bytes[i : i+8]) - i += 8 - - // Next 1-10 bytes is the delta value - value, n := binary.Uvarint(d.bytes[i:]) - if n <= 0 { - d.err = fmt.Errorf("integerDecoder: invalid RLE delta value") - return - } - i += n - - // Last 1-10 bytes is how many times the value repeats - count, n := binary.Uvarint(d.bytes[i:]) - if n <= 0 { - d.err = fmt.Errorf("integerDecoder: invalid RLE repeat value") - return - } - - // Store the first value and delta value so we do not need to allocate - // a large values slice. We can compute the value at position d.i on - // demand. - d.rleFirst = first - d.rleDelta = value - d.n = int(count) + 1 - d.i = 0 - - // We've process all the bytes - d.bytes = nil -} - -func (d *IntegerDecoder) decodePacked() { - if len(d.bytes) == 0 { - return - } - - if len(d.bytes) < 8 { - d.err = fmt.Errorf("integerDecoder: not enough data to decode packed value") - return - } - - v := binary.BigEndian.Uint64(d.bytes[0:8]) - // The first value is always unencoded - if d.first { - d.first = false - d.n = 1 - d.values[0] = v - } else { - n, err := simple8b.Decode(&d.values, v) - if err != nil { - // Should never happen, only error that could be returned is if the the value to be decoded was not - // actually encoded by simple8b encoder. - d.err = fmt.Errorf("failed to decode value %v: %v", v, err) - } - - d.n = n - } - d.i = 0 - d.bytes = d.bytes[8:] -} - -func (d *IntegerDecoder) decodeUncompressed() { - if len(d.bytes) == 0 { - return - } - - if len(d.bytes) < 8 { - d.err = fmt.Errorf("integerDecoder: not enough data to decode uncompressed value") - return - } - - d.values[0] = binary.BigEndian.Uint64(d.bytes[0:8]) - d.i = 0 - d.n = 1 - d.bytes = d.bytes[8:] -} diff --git a/tsdb/tsm1/int_test.go b/tsdb/tsm1/int_test.go deleted file mode 100644 index d91a54e432..0000000000 --- a/tsdb/tsm1/int_test.go +++ /dev/null @@ -1,729 +0,0 @@ -package tsm1 - -import ( - "fmt" - "math" - "math/rand" - "reflect" - "testing" - "testing/quick" -) - -func Test_IntegerEncoder_NoValues(t *testing.T) { - enc := NewIntegerEncoder(0) - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if len(b) > 0 { - t.Fatalf("unexpected length: exp 0, got %v", len(b)) - } - - var dec IntegerDecoder - dec.SetBytes(b) - if dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } -} - -func Test_IntegerEncoder_One(t *testing.T) { - enc := NewIntegerEncoder(1) - v1 := int64(1) - - enc.Write(1) - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; intCompressedSimple != got { - t.Fatalf("encoding type mismatch: exp uncompressed, got %v", got) - } - - var dec IntegerDecoder - dec.SetBytes(b) - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if v1 != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v1) - } -} - -func Test_IntegerEncoder_Two(t *testing.T) { - enc := NewIntegerEncoder(2) - var v1, v2 int64 = 1, 2 - - enc.Write(v1) - enc.Write(v2) - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; intCompressedSimple != got { - t.Fatalf("encoding type mismatch: exp uncompressed, got %v", got) - } - - var dec IntegerDecoder - dec.SetBytes(b) - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if v1 != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v1) - } - - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if v2 != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v2) - } -} - -func Test_IntegerEncoder_Negative(t *testing.T) { - enc := NewIntegerEncoder(3) - var v1, v2, v3 int64 = -2, 0, 1 - - enc.Write(v1) - enc.Write(v2) - enc.Write(v3) - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; intCompressedSimple != got { - t.Fatalf("encoding type mismatch: exp uncompressed, got %v", got) - } - - var dec IntegerDecoder - dec.SetBytes(b) - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if v1 != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v1) - } - - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if v2 != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v2) - } - - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if v3 != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v3) - } -} - -func Test_IntegerEncoder_Large_Range(t *testing.T) { - enc := NewIntegerEncoder(2) - var v1, v2 int64 = math.MinInt64, math.MaxInt64 - enc.Write(v1) - enc.Write(v2) - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; intUncompressed != got { - t.Fatalf("encoding type mismatch: exp uncompressed, got %v", got) - } - - var dec IntegerDecoder - dec.SetBytes(b) - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if v1 != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v1) - } - - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if v2 != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v2) - } -} - -func Test_IntegerEncoder_Uncompressed(t *testing.T) { - enc := NewIntegerEncoder(3) - var v1, v2, v3 int64 = 0, 1, 1 << 60 - - enc.Write(v1) - enc.Write(v2) - enc.Write(v3) - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("expected error: %v", err) - } - - // 1 byte header + 3 * 8 byte values - if exp := 25; len(b) != exp { - t.Fatalf("length mismatch: got %v, exp %v", len(b), exp) - } - - if got := b[0] >> 4; intUncompressed != got { - t.Fatalf("encoding type mismatch: exp uncompressed, got %v", got) - } - - var dec IntegerDecoder - dec.SetBytes(b) - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if v1 != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v1) - } - - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if v2 != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v2) - } - - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if v3 != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), v3) - } -} - -func Test_IntegerEncoder_NegativeUncompressed(t *testing.T) { - values := []int64{ - -2352281900722994752, 1438442655375607923, -4110452567888190110, - -1221292455668011702, -1941700286034261841, -2836753127140407751, - 1432686216250034552, 3663244026151507025, -3068113732684750258, - -1949953187327444488, 3713374280993588804, 3226153669854871355, - -2093273755080502606, 1006087192578600616, -2272122301622271655, - 2533238229511593671, -4450454445568858273, 2647789901083530435, - 2761419461769776844, -1324397441074946198, -680758138988210958, - 94468846694902125, -2394093124890745254, -2682139311758778198, - } - enc := NewIntegerEncoder(256) - for _, v := range values { - enc.Write(v) - } - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("expected error: %v", err) - } - - if got := b[0] >> 4; intUncompressed != got { - t.Fatalf("encoding type mismatch: exp uncompressed, got %v", got) - } - - var dec IntegerDecoder - dec.SetBytes(b) - - i := 0 - for dec.Next() { - if i > len(values) { - t.Fatalf("read too many values: got %v, exp %v", i, len(values)) - } - - if values[i] != dec.Read() { - t.Fatalf("read value %d mismatch: got %v, exp %v", i, dec.Read(), values[i]) - } - i += 1 - } - - if i != len(values) { - t.Fatalf("failed to read enough values: got %v, exp %v", i, len(values)) - } -} - -func Test_IntegerEncoder_AllNegative(t *testing.T) { - enc := NewIntegerEncoder(3) - values := []int64{ - -10, -5, -1, - } - - for _, v := range values { - enc.Write(v) - } - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; intCompressedSimple != got { - t.Fatalf("encoding type mismatch: exp uncompressed, got %v", got) - } - - var dec IntegerDecoder - dec.SetBytes(b) - i := 0 - for dec.Next() { - if i > len(values) { - t.Fatalf("read too many values: got %v, exp %v", i, len(values)) - } - - if values[i] != dec.Read() { - t.Fatalf("read value %d mismatch: got %v, exp %v", i, dec.Read(), values[i]) - } - i += 1 - } - - if i != len(values) { - t.Fatalf("failed to read enough values: got %v, exp %v", i, len(values)) - } -} - -func Test_IntegerEncoder_CounterPacked(t *testing.T) { - enc := NewIntegerEncoder(16) - values := []int64{ - 1e15, 1e15 + 1, 1e15 + 2, 1e15 + 3, 1e15 + 4, 1e15 + 6, - } - - for _, v := range values { - enc.Write(v) - } - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if b[0]>>4 != intCompressedSimple { - t.Fatalf("unexpected encoding format: expected simple, got %v", b[0]>>4) - } - - // Should use 1 header byte + 2, 8 byte words if delta-encoding is used based on - // values sizes. Without delta-encoding, we'd get 49 bytes. - if exp := 17; len(b) != exp { - t.Fatalf("encoded length mismatch: got %v, exp %v", len(b), exp) - } - - var dec IntegerDecoder - dec.SetBytes(b) - i := 0 - for dec.Next() { - if i > len(values) { - t.Fatalf("read too many values: got %v, exp %v", i, len(values)) - } - - if values[i] != dec.Read() { - t.Fatalf("read value %d mismatch: got %v, exp %v", i, dec.Read(), values[i]) - } - i += 1 - } - - if i != len(values) { - t.Fatalf("failed to read enough values: got %v, exp %v", i, len(values)) - } -} - -func Test_IntegerEncoder_CounterRLE(t *testing.T) { - enc := NewIntegerEncoder(16) - values := []int64{ - 1e15, 1e15 + 1, 1e15 + 2, 1e15 + 3, 1e15 + 4, 1e15 + 5, - } - - for _, v := range values { - enc.Write(v) - } - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if b[0]>>4 != intCompressedRLE { - t.Fatalf("unexpected encoding format: expected RLE, got %v", b[0]>>4) - } - - // Should use 1 header byte, 8 byte first value, 1 var-byte for delta and 1 var-byte for - // count of deltas in this particular RLE. - if exp := 11; len(b) != exp { - t.Fatalf("encoded length mismatch: got %v, exp %v", len(b), exp) - } - - var dec IntegerDecoder - dec.SetBytes(b) - i := 0 - for dec.Next() { - if i > len(values) { - t.Fatalf("read too many values: got %v, exp %v", i, len(values)) - } - - if values[i] != dec.Read() { - t.Fatalf("read value %d mismatch: got %v, exp %v", i, dec.Read(), values[i]) - } - i += 1 - } - - if i != len(values) { - t.Fatalf("failed to read enough values: got %v, exp %v", i, len(values)) - } -} - -func Test_IntegerEncoder_Descending(t *testing.T) { - enc := NewIntegerEncoder(16) - values := []int64{ - 7094, 4472, 1850, - } - - for _, v := range values { - enc.Write(v) - } - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if b[0]>>4 != intCompressedRLE { - t.Fatalf("unexpected encoding format: expected simple, got %v", b[0]>>4) - } - - // Should use 1 header byte, 8 byte first value, 1 var-byte for delta and 1 var-byte for - // count of deltas in this particular RLE. - if exp := 12; len(b) != exp { - t.Fatalf("encoded length mismatch: got %v, exp %v", len(b), exp) - } - - var dec IntegerDecoder - dec.SetBytes(b) - i := 0 - for dec.Next() { - if i > len(values) { - t.Fatalf("read too many values: got %v, exp %v", i, len(values)) - } - - if values[i] != dec.Read() { - t.Fatalf("read value %d mismatch: got %v, exp %v", i, dec.Read(), values[i]) - } - i += 1 - } - - if i != len(values) { - t.Fatalf("failed to read enough values: got %v, exp %v", i, len(values)) - } -} - -func Test_IntegerEncoder_Flat(t *testing.T) { - enc := NewIntegerEncoder(16) - values := []int64{ - 1, 1, 1, 1, - } - - for _, v := range values { - enc.Write(v) - } - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if b[0]>>4 != intCompressedRLE { - t.Fatalf("unexpected encoding format: expected simple, got %v", b[0]>>4) - } - - // Should use 1 header byte, 8 byte first value, 1 var-byte for delta and 1 var-byte for - // count of deltas in this particular RLE. - if exp := 11; len(b) != exp { - t.Fatalf("encoded length mismatch: got %v, exp %v", len(b), exp) - } - - var dec IntegerDecoder - dec.SetBytes(b) - i := 0 - for dec.Next() { - if i > len(values) { - t.Fatalf("read too many values: got %v, exp %v", i, len(values)) - } - - if values[i] != dec.Read() { - t.Fatalf("read value %d mismatch: got %v, exp %v", i, dec.Read(), values[i]) - } - i += 1 - } - - if i != len(values) { - t.Fatalf("failed to read enough values: got %v, exp %v", i, len(values)) - } -} - -func Test_IntegerEncoder_MinMax(t *testing.T) { - enc := NewIntegerEncoder(2) - values := []int64{ - math.MinInt64, math.MaxInt64, - } - - for _, v := range values { - enc.Write(v) - } - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if b[0]>>4 != intUncompressed { - t.Fatalf("unexpected encoding format: expected simple, got %v", b[0]>>4) - } - - if exp := 17; len(b) != exp { - t.Fatalf("encoded length mismatch: got %v, exp %v", len(b), exp) - } - - var dec IntegerDecoder - dec.SetBytes(b) - i := 0 - for dec.Next() { - if i > len(values) { - t.Fatalf("read too many values: got %v, exp %v", i, len(values)) - } - - if values[i] != dec.Read() { - t.Fatalf("read value %d mismatch: got %v, exp %v", i, dec.Read(), values[i]) - } - i += 1 - } - - if i != len(values) { - t.Fatalf("failed to read enough values: got %v, exp %v", i, len(values)) - } -} - -func Test_IntegerEncoder_Quick(t *testing.T) { - quick.Check(func(values []int64) bool { - expected := values - if values == nil { - expected = []int64{} // is this really expected? - } - - // Write values to encoder. - enc := NewIntegerEncoder(1024) - for _, v := range values { - enc.Write(v) - } - - // Retrieve encoded bytes from encoder. - buf, err := enc.Bytes() - if err != nil { - t.Fatal(err) - } - - // Read values out of decoder. - got := make([]int64, 0, len(values)) - var dec IntegerDecoder - dec.SetBytes(buf) - for dec.Next() { - if err := dec.Error(); err != nil { - t.Fatal(err) - } - got = append(got, dec.Read()) - } - - // Verify that input and output values match. - if !reflect.DeepEqual(expected, got) { - t.Fatalf("mismatch:\n\nexp=%#v\n\ngot=%#v\n\n", expected, got) - } - - return true - }, nil) -} - -func Test_IntegerDecoder_Corrupt(t *testing.T) { - cases := []string{ - "", // Empty - "\x00abc", // Uncompressed: less than 8 bytes - "\x10abc", // Packed: less than 8 bytes - "\x20abc", // RLE: less than 8 bytes - "\x2012345678\x90", // RLE: valid starting value but invalid delta value - "\x2012345678\x01\x90", // RLE: valid starting, valid delta value, invalid repeat value - } - - for _, c := range cases { - var dec IntegerDecoder - dec.SetBytes([]byte(c)) - if dec.Next() { - t.Fatalf("exp next == false, got true") - } - } -} - -func BenchmarkIntegerEncoderRLE(b *testing.B) { - enc := NewIntegerEncoder(1024) - x := make([]int64, 1024) - for i := 0; i < len(x); i++ { - x[i] = int64(i) - enc.Write(x[i]) - } - - b.ResetTimer() - for i := 0; i < b.N; i++ { - enc.Bytes() - } -} - -func BenchmarkIntegerEncoderPackedSimple(b *testing.B) { - enc := NewIntegerEncoder(1024) - x := make([]int64, 1024) - for i := 0; i < len(x); i++ { - // Small amount of randomness prevents RLE from being used - x[i] = int64(i) + int64(rand.Intn(10)) - enc.Write(x[i]) - } - - b.ResetTimer() - for i := 0; i < b.N; i++ { - enc.Bytes() - enc.Reset() - for i := 0; i < len(x); i++ { - enc.Write(x[i]) - } - } -} - -func BenchmarkIntegerBatch_DecodeAllUncompressed(b *testing.B) { - benchmarks := []struct { - n int - }{ - {5}, - {55}, - {555}, - {1000}, - } - - values := []int64{ - -2352281900722994752, 1438442655375607923, -4110452567888190110, - -1221292455668011702, -1941700286034261841, -2836753127140407751, - 1432686216250034552, 3663244026151507025, -3068113732684750258, - -1949953187327444488, 3713374280993588804, 3226153669854871355, - -2093273755080502606, 1006087192578600616, -2272122301622271655, - 2533238229511593671, -4450454445568858273, 2647789901083530435, - 2761419461769776844, -1324397441074946198, -680758138988210958, - 94468846694902125, -2394093124890745254, -2682139311758778198, - } - - for _, bm := range benchmarks { - rand.Seed(int64(bm.n * 1e3)) - - enc := NewIntegerEncoder(bm.n) - for i := 0; i < bm.n; i++ { - enc.Write(values[rand.Int()%len(values)]) - } - bytes, _ := enc.Bytes() - - b.Run(fmt.Sprintf("%d", bm.n), func(b *testing.B) { - b.SetBytes(int64(len(bytes))) - b.ReportAllocs() - - dst := make([]int64, bm.n) - for i := 0; i < b.N; i++ { - var dec IntegerDecoder - dec.SetBytes(bytes) - var n int - for dec.Next() { - dst[n] = dec.Read() - n++ - } - } - }) - } -} - -func BenchmarkIntegerBatch_DecodeAllPackedSimple(b *testing.B) { - benchmarks := []struct { - n int - }{ - {5}, - {55}, - {555}, - {1000}, - } - for _, bm := range benchmarks { - rand.Seed(int64(bm.n * 1e3)) - - enc := NewIntegerEncoder(bm.n) - for i := 0; i < bm.n; i++ { - // Small amount of randomness prevents RLE from being used - enc.Write(int64(i) + int64(rand.Intn(10))) - } - bytes, _ := enc.Bytes() - - b.Run(fmt.Sprintf("%d", bm.n), func(b *testing.B) { - b.SetBytes(int64(len(bytes))) - b.ReportAllocs() - - dst := make([]int64, bm.n) - for i := 0; i < b.N; i++ { - var dec IntegerDecoder - dec.SetBytes(bytes) - var n int - for dec.Next() { - dst[n] = dec.Read() - n++ - } - } - }) - } -} - -func BenchmarkIntegerBatch_DecodeAllRLE(b *testing.B) { - benchmarks := []struct { - n int - delta int64 - }{ - {5, 1}, - {55, 1}, - {555, 1}, - {1000, 1}, - {1000, 0}, - } - for _, bm := range benchmarks { - enc := NewIntegerEncoder(bm.n) - acc := int64(0) - for i := 0; i < bm.n; i++ { - enc.Write(acc) - acc += bm.delta - } - bytes, _ := enc.Bytes() - - b.Run(fmt.Sprintf("%d_delta_%d", bm.n, bm.delta), func(b *testing.B) { - b.SetBytes(int64(len(bytes))) - b.ReportAllocs() - - dst := make([]int64, bm.n) - for i := 0; i < b.N; i++ { - var dec IntegerDecoder - dec.SetBytes(bytes) - var n int - for dec.Next() { - dst[n] = dec.Read() - n++ - } - } - }) - } -} diff --git a/tsdb/tsm1/keymerger.go b/tsdb/tsm1/keymerger.go deleted file mode 100644 index ecde6df289..0000000000 --- a/tsdb/tsm1/keymerger.go +++ /dev/null @@ -1,109 +0,0 @@ -package tsm1 - -import ( - "bytes" - "strings" - - "github.com/influxdata/influxdb/v2/models" -) - -// keyMerger is responsible for determining a merged set of tag keys -type keyMerger struct { - i int - tmp [][]byte - keys [2][][]byte -} - -func (km *keyMerger) Clear() { - km.i = 0 - km.keys[0] = km.keys[0][:0] - if km.tmp != nil { - tmp := km.tmp[:cap(km.tmp)] - for i := range tmp { - tmp[i] = nil - } - } -} - -func (km *keyMerger) Get() [][]byte { return km.keys[km.i&1] } - -func (km *keyMerger) String() string { - var s []string - for _, k := range km.Get() { - s = append(s, string(k)) - } - return strings.Join(s, ",") -} - -func (km *keyMerger) MergeTagKeys(tags models.Tags) { - if cap(km.tmp) < len(tags) { - km.tmp = make([][]byte, len(tags)) - } else { - km.tmp = km.tmp[:len(tags)] - } - - for i := range tags { - km.tmp[i] = tags[i].Key - } - - km.MergeKeys(km.tmp) -} - -func (km *keyMerger) MergeKeys(in [][]byte) { - keys := km.keys[km.i&1] - i, j := 0, 0 - for i < len(keys) && j < len(in) && bytes.Equal(keys[i], in[j]) { - i++ - j++ - } - - if j == len(in) { - // no new tags - return - } - - km.i = (km.i + 1) & 1 - l := len(keys) + len(in) - if cap(km.keys[km.i]) < l { - km.keys[km.i] = make([][]byte, l) - } else { - km.keys[km.i] = km.keys[km.i][:l] - } - - keya := km.keys[km.i] - - // back up the pointers - if i > 0 { - i-- - j-- - } - - k := i - copy(keya[:k], keys[:k]) - - for i < len(keys) && j < len(in) { - cmp := bytes.Compare(keys[i], in[j]) - if cmp < 0 { - keya[k] = keys[i] - i++ - } else if cmp > 0 { - keya[k] = in[j] - j++ - } else { - keya[k] = keys[i] - i++ - j++ - } - k++ - } - - if i < len(keys) { - k += copy(keya[k:], keys[i:]) - } - - if j < len(in) { - k += copy(keya[k:], in[j:]) - } - - km.keys[km.i] = keya[:k] -} diff --git a/tsdb/tsm1/keymerger_test.go b/tsdb/tsm1/keymerger_test.go deleted file mode 100644 index 623090c32d..0000000000 --- a/tsdb/tsm1/keymerger_test.go +++ /dev/null @@ -1,204 +0,0 @@ -package tsm1 - -import ( - "bytes" - "math/rand" - "strconv" - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/influxdata/influxdb/v2/models" -) - -func TestKeyMerger_MergeTagKeys(t *testing.T) { - tests := []struct { - name string - tags []models.Tags - exp string - }{ - { - name: "mixed", - tags: []models.Tags{ - models.ParseTags([]byte("foo,tag0=v0,tag1=v0,tag2=v0")), - models.ParseTags([]byte("foo,tag0=v0,tag1=v0,tag2=v1")), - models.ParseTags([]byte("foo,tag0=v0")), - models.ParseTags([]byte("foo,tag0=v0,tag3=v0")), - }, - exp: "tag0,tag1,tag2,tag3", - }, - { - name: "mixed 2", - tags: []models.Tags{ - models.ParseTags([]byte("foo,tag0=v0")), - models.ParseTags([]byte("foo,tag0=v0,tag3=v0")), - models.ParseTags([]byte("foo,tag0=v0,tag1=v0,tag2=v0")), - models.ParseTags([]byte("foo,tag0=v0,tag1=v0,tag2=v1")), - }, - exp: "tag0,tag1,tag2,tag3", - }, - { - name: "all different", - tags: []models.Tags{ - models.ParseTags([]byte("foo,tag0=v0")), - models.ParseTags([]byte("foo,tag1=v0")), - models.ParseTags([]byte("foo,tag2=v1")), - models.ParseTags([]byte("foo,tag3=v0")), - }, - exp: "tag0,tag1,tag2,tag3", - }, - { - name: "new tags,verify clear", - tags: []models.Tags{ - models.ParseTags([]byte("foo,tag9=v0")), - models.ParseTags([]byte("foo,tag8=v0")), - }, - exp: "tag8,tag9", - }, - } - - var km keyMerger - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - km.Clear() - for _, tags := range tt.tags { - km.MergeTagKeys(tags) - } - - if got := km.String(); !cmp.Equal(got, tt.exp) { - t.Errorf("unexpected keys -got/+exp\n%s", cmp.Diff(got, tt.exp)) - } - }) - } -} - -var commaB = []byte(",") - -func TestKeyMerger_MergeKeys(t *testing.T) { - - tests := []struct { - name string - keys [][][]byte - exp string - }{ - { - name: "mixed", - keys: [][][]byte{ - bytes.Split([]byte("tag0,tag1,tag2"), commaB), - bytes.Split([]byte("tag0,tag1,tag2"), commaB), - bytes.Split([]byte("tag0"), commaB), - bytes.Split([]byte("tag0,tag3"), commaB), - }, - exp: "tag0,tag1,tag2,tag3", - }, - { - name: "mixed 2", - keys: [][][]byte{ - bytes.Split([]byte("tag0"), commaB), - bytes.Split([]byte("tag0,tag3"), commaB), - bytes.Split([]byte("tag0,tag1,tag2"), commaB), - bytes.Split([]byte("tag0,tag1,tag2"), commaB), - }, - exp: "tag0,tag1,tag2,tag3", - }, - { - name: "all different", - keys: [][][]byte{ - bytes.Split([]byte("tag0"), commaB), - bytes.Split([]byte("tag3"), commaB), - bytes.Split([]byte("tag1"), commaB), - bytes.Split([]byte("tag2"), commaB), - }, - exp: "tag0,tag1,tag2,tag3", - }, - { - name: "new tags,verify clear", - keys: [][][]byte{ - bytes.Split([]byte("tag9"), commaB), - bytes.Split([]byte("tag8"), commaB), - }, - exp: "tag8,tag9", - }, - } - - var km keyMerger - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - km.Clear() - for _, keys := range tt.keys { - km.MergeKeys(keys) - } - - if got := km.String(); !cmp.Equal(got, tt.exp) { - t.Errorf("unexpected keys -got/+exp\n%s", cmp.Diff(got, tt.exp)) - } - }) - } -} - -func BenchmarkKeyMerger_MergeKeys(b *testing.B) { - keys := [][][]byte{ - bytes.Split([]byte("tag00,tag01,tag02"), commaB), - bytes.Split([]byte("tag00,tag01,tag02"), commaB), - bytes.Split([]byte("tag00,tag01,tag05,tag06,tag10,tag11,tag12,tag13,tag14,tag15"), commaB), - bytes.Split([]byte("tag00"), commaB), - bytes.Split([]byte("tag00,tag03"), commaB), - bytes.Split([]byte("tag01,tag03,tag13,tag14,tag15"), commaB), - bytes.Split([]byte("tag04,tag05"), commaB), - } - - rand.Seed(20040409) - - tests := []int{ - 10, - 1000, - 1000000, - } - - for _, n := range tests { - b.Run(strconv.Itoa(n), func(b *testing.B) { - b.ResetTimer() - - var km keyMerger - for i := 0; i < b.N; i++ { - for j := 0; j < n; j++ { - km.MergeKeys(keys[rand.Int()%len(keys)]) - } - km.Clear() - } - }) - } -} - -func BenchmarkKeyMerger_MergeTagKeys(b *testing.B) { - tags := []models.Tags{ - models.ParseTags([]byte("foo,tag00=v0,tag01=v0,tag02=v0")), - models.ParseTags([]byte("foo,tag00=v0,tag01=v0,tag02=v0")), - models.ParseTags([]byte("foo,tag00=v0,tag01=v0,tag05=v0,tag06=v0,tag10=v0,tag11=v0,tag12=v0,tag13=v0,tag14=v0,tag15=v0")), - models.ParseTags([]byte("foo,tag00=v0")), - models.ParseTags([]byte("foo,tag00=v0,tag03=v0")), - models.ParseTags([]byte("foo,tag01=v0,tag03=v0,tag13=v0,tag14=v0,tag15=v0")), - models.ParseTags([]byte("foo,tag04=v0,tag05=v0")), - } - - rand.Seed(20040409) - - tests := []int{ - 10, - 1000, - 1000000, - } - - for _, n := range tests { - b.Run(strconv.Itoa(n), func(b *testing.B) { - b.ResetTimer() - - var km keyMerger - for i := 0; i < b.N; i++ { - for j := 0; j < n; j++ { - km.MergeTagKeys(tags[rand.Int()%len(tags)]) - } - km.Clear() - } - }) - } -} diff --git a/tsdb/tsm1/metrics.go b/tsdb/tsm1/metrics.go deleted file mode 100644 index d69fb918e7..0000000000 --- a/tsdb/tsm1/metrics.go +++ /dev/null @@ -1,290 +0,0 @@ -package tsm1 - -import ( - "sort" - "sync" - - "github.com/prometheus/client_golang/prometheus" -) - -// The following package variables act as singletons, to be shared by all Engine -// instantiations. This allows multiple Engines to be instantiated within the -// same process. -var ( - bms *blockMetrics - mmu sync.RWMutex -) - -// PrometheusCollectors returns all prometheus metrics for the tsm1 package. -func PrometheusCollectors() []prometheus.Collector { - mmu.RLock() - defer mmu.RUnlock() - - var collectors []prometheus.Collector - if bms != nil { - collectors = append(collectors, bms.compactionMetrics.PrometheusCollectors()...) - collectors = append(collectors, bms.fileMetrics.PrometheusCollectors()...) - collectors = append(collectors, bms.cacheMetrics.PrometheusCollectors()...) - collectors = append(collectors, bms.readMetrics.PrometheusCollectors()...) - } - return collectors -} - -// namespace is the leading part of all published metrics for the Storage service. -const namespace = "storage" - -const compactionSubsystem = "compactions" // sub-system associated with metrics for compactions. -const fileStoreSubsystem = "tsm_files" // sub-system associated with metrics for TSM files. -const cacheSubsystem = "cache" // sub-system associated with metrics for the cache. -const readSubsystem = "reads" // sub-system associated with metrics for reads. - -// blockMetrics are a set of metrics concerned with tracking data about block storage. -type blockMetrics struct { - labels prometheus.Labels - *compactionMetrics - *fileMetrics - *cacheMetrics - *readMetrics -} - -// newBlockMetrics initialises the prometheus metrics for the block subsystem. -func newBlockMetrics(labels prometheus.Labels) *blockMetrics { - return &blockMetrics{ - labels: labels, - compactionMetrics: newCompactionMetrics(labels), - fileMetrics: newFileMetrics(labels), - cacheMetrics: newCacheMetrics(labels), - readMetrics: newReadMetrics(labels), - } -} - -// PrometheusCollectors satisfies the prom.PrometheusCollector interface. -func (m *blockMetrics) PrometheusCollectors() []prometheus.Collector { - var metrics []prometheus.Collector - metrics = append(metrics, m.compactionMetrics.PrometheusCollectors()...) - metrics = append(metrics, m.fileMetrics.PrometheusCollectors()...) - metrics = append(metrics, m.cacheMetrics.PrometheusCollectors()...) - metrics = append(metrics, m.readMetrics.PrometheusCollectors()...) - return metrics -} - -// compactionMetrics are a set of metrics concerned with tracking data about compactions. -type compactionMetrics struct { - CompactionsActive *prometheus.GaugeVec - CompactionDuration *prometheus.HistogramVec - CompactionQueue *prometheus.GaugeVec - - // The following metrics include a ``"status" = {ok, error}` label - Compactions *prometheus.CounterVec -} - -// newCompactionMetrics initialises the prometheus metrics for compactions. -func newCompactionMetrics(labels prometheus.Labels) *compactionMetrics { - names := []string{"level"} // All compaction metrics have a `level` label. - for k := range labels { - names = append(names, k) - } - sort.Strings(names) - - totalCompactionsNames := append(append([]string(nil), names...), []string{"reason", "status"}...) - sort.Strings(totalCompactionsNames) - - return &compactionMetrics{ - Compactions: prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: compactionSubsystem, - Name: "total", - Help: "Number of times cache snapshotted or TSM compaction attempted.", - }, totalCompactionsNames), - CompactionsActive: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: compactionSubsystem, - Name: "active", - Help: "Number of active compactions.", - }, names), - CompactionDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: namespace, - Subsystem: compactionSubsystem, - Name: "duration_seconds", - Help: "Time taken for a successful compaction or snapshot.", - // 30 buckets spaced exponentially between 5s and ~53 minutes. - Buckets: prometheus.ExponentialBuckets(5.0, 1.25, 30), - }, names), - CompactionQueue: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: compactionSubsystem, - Name: "queued", - Help: "Number of queued compactions.", - }, names), - } -} - -// PrometheusCollectors satisfies the prom.PrometheusCollector interface. -func (m *compactionMetrics) PrometheusCollectors() []prometheus.Collector { - return []prometheus.Collector{ - m.Compactions, - m.CompactionsActive, - m.CompactionDuration, - m.CompactionQueue, - } -} - -// fileMetrics are a set of metrics concerned with tracking data about compactions. -type fileMetrics struct { - DiskSize *prometheus.GaugeVec - Files *prometheus.GaugeVec -} - -// newFileMetrics initialises the prometheus metrics for tracking files on disk. -func newFileMetrics(labels prometheus.Labels) *fileMetrics { - var names []string - for k := range labels { - names = append(names, k) - } - names = append(names, "level") - sort.Strings(names) - - return &fileMetrics{ - DiskSize: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: fileStoreSubsystem, - Name: "disk_bytes", - Help: "Number of bytes TSM files using on disk.", - }, names), - Files: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: fileStoreSubsystem, - Name: "total", - Help: "Number of files.", - }, names), - } -} - -// PrometheusCollectors satisfies the prom.PrometheusCollector interface. -func (m *fileMetrics) PrometheusCollectors() []prometheus.Collector { - return []prometheus.Collector{ - m.DiskSize, - m.Files, - } -} - -// cacheMetrics are a set of metrics concerned with tracking data about the TSM Cache. -type cacheMetrics struct { - MemSize *prometheus.GaugeVec - DiskSize *prometheus.GaugeVec - SnapshotsActive *prometheus.GaugeVec - Age *prometheus.GaugeVec - SnapshottedBytes *prometheus.CounterVec - - // The following metrics include a ``"status" = {ok, error, dropped}` label - WrittenBytes *prometheus.CounterVec - Writes *prometheus.CounterVec -} - -// newCacheMetrics initialises the prometheus metrics for compactions. -func newCacheMetrics(labels prometheus.Labels) *cacheMetrics { - var names []string - for k := range labels { - names = append(names, k) - } - sort.Strings(names) - - writeNames := append(append([]string(nil), names...), "status") - sort.Strings(writeNames) - - return &cacheMetrics{ - MemSize: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: cacheSubsystem, - Name: "inuse_bytes", - Help: "In-memory size of cache.", - }, names), - DiskSize: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: cacheSubsystem, - Name: "disk_bytes", - Help: "Number of bytes on disk used by snapshot data.", - }, names), - SnapshotsActive: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: cacheSubsystem, - Name: "snapshots_active", - Help: "Number of active concurrent snapshots (>1 when splitting the cache).", - }, names), - Age: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: cacheSubsystem, - Name: "age_seconds", - Help: "Age in seconds of the current cache (time since last snapshot or initialisation).", - }, names), - SnapshottedBytes: prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: cacheSubsystem, - Name: "snapshot_bytes", - Help: "Number of bytes snapshotted.", - }, names), - WrittenBytes: prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: cacheSubsystem, - Name: "written_bytes", - Help: "Number of bytes successfully written to the Cache.", - }, writeNames), - Writes: prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: cacheSubsystem, - Name: "writes_total", - Help: "Number of writes to the Cache.", - }, writeNames), - } -} - -// PrometheusCollectors satisfies the prom.PrometheusCollector interface. -func (m *cacheMetrics) PrometheusCollectors() []prometheus.Collector { - return []prometheus.Collector{ - m.MemSize, - m.DiskSize, - m.SnapshotsActive, - m.Age, - m.SnapshottedBytes, - m.WrittenBytes, - m.Writes, - } -} - -// readMetrics are a set of metrics concerned with tracking data engine reads. -type readMetrics struct { - Cursors *prometheus.CounterVec - Seeks *prometheus.CounterVec -} - -// newReadMetrics initialises the prometheus metrics for tracking reads. -func newReadMetrics(labels prometheus.Labels) *readMetrics { - var names []string - for k := range labels { - names = append(names, k) - } - sort.Strings(names) - - return &readMetrics{ - Cursors: prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: readSubsystem, - Name: "cursors", - Help: "Number of cursors created.", - }, names), - Seeks: prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: readSubsystem, - Name: "seeks", - Help: "Number of tsm locations seeked.", - }, names), - } -} - -// PrometheusCollectors satisfies the prom.PrometheusCollector interface. -func (m *readMetrics) PrometheusCollectors() []prometheus.Collector { - return []prometheus.Collector{ - m.Cursors, - m.Seeks, - } -} diff --git a/tsdb/tsm1/metrics_test.go b/tsdb/tsm1/metrics_test.go deleted file mode 100644 index 6fd6cf9d09..0000000000 --- a/tsdb/tsm1/metrics_test.go +++ /dev/null @@ -1,218 +0,0 @@ -package tsm1 - -import ( - "testing" - - "github.com/influxdata/influxdb/v2/kit/prom/promtest" - "github.com/prometheus/client_golang/prometheus" -) - -func TestMetrics_Filestore(t *testing.T) { - // metrics to be shared by multiple file stores. - metrics := newFileMetrics(prometheus.Labels{"engine_id": "", "node_id": ""}) - t2 := newFileTracker(metrics, prometheus.Labels{"engine_id": "1", "node_id": "0"}) - t3 := newFileTracker(metrics, prometheus.Labels{"engine_id": "2", "node_id": "0"}) - - reg := prometheus.NewRegistry() - reg.MustRegister(metrics.PrometheusCollectors()...) - - // Generate some measurements. - t2.AddBytes(200, 1) - t2.SetFileCount(map[int]uint64{1: 4, 4: 3, 5: 1}) - t3.SetBytes(map[int]uint64{1: 500, 4: 100, 5: 100}) - - // Test that all the correct metrics are present. - mfs, err := reg.Gather() - if err != nil { - t.Fatal(err) - } - - base := namespace + "_" + fileStoreSubsystem + "_" - m2Bytes := promtest.MustFindMetric(t, mfs, base+"disk_bytes", prometheus.Labels{"engine_id": "1", "node_id": "0", "level": "1"}) - m2Files1 := promtest.MustFindMetric(t, mfs, base+"total", prometheus.Labels{"engine_id": "1", "node_id": "0", "level": "1"}) - m2Files2 := promtest.MustFindMetric(t, mfs, base+"total", prometheus.Labels{"engine_id": "1", "node_id": "0", "level": "4+"}) - m3Bytes1 := promtest.MustFindMetric(t, mfs, base+"disk_bytes", prometheus.Labels{"engine_id": "2", "node_id": "0", "level": "1"}) - m3Bytes2 := promtest.MustFindMetric(t, mfs, base+"disk_bytes", prometheus.Labels{"engine_id": "2", "node_id": "0", "level": "4+"}) - - if m, got, exp := m2Bytes, m2Bytes.GetGauge().GetValue(), 200.0; got != exp { - t.Errorf("[%s] got %v, expected %v", m, got, exp) - } - - if m, got, exp := m2Files1, m2Files1.GetGauge().GetValue(), 4.0; got != exp { - t.Errorf("[%s] got %v, expected %v", m, got, exp) - } - - if m, got, exp := m2Files2, m2Files2.GetGauge().GetValue(), 4.0; got != exp { - t.Errorf("[%s] got %v, expected %v", m, got, exp) - } - - if m, got, exp := m3Bytes1, m3Bytes1.GetGauge().GetValue(), 500.0; got != exp { - t.Errorf("[%s] got %v, expected %v", m, got, exp) - } - - if m, got, exp := m3Bytes2, m3Bytes2.GetGauge().GetValue(), 200.0; got != exp { - t.Errorf("[%s] got %v, expected %v", m, got, exp) - } -} - -func TestMetrics_Cache(t *testing.T) { - // metrics to be shared by multiple file stores. - metrics := newCacheMetrics(prometheus.Labels{"engine_id": "", "node_id": ""}) - - t1 := newCacheTracker(metrics, prometheus.Labels{"engine_id": "0", "node_id": "0"}) - t2 := newCacheTracker(metrics, prometheus.Labels{"engine_id": "1", "node_id": "0"}) - - reg := prometheus.NewRegistry() - reg.MustRegister(metrics.PrometheusCollectors()...) - - base := namespace + "_" + cacheSubsystem + "_" - - // All the metric names - gauges := []string{ - base + "inuse_bytes", - base + "disk_bytes", - base + "age_seconds", - base + "snapshots_active", - } - - counters := []string{ - base + "snapshot_bytes", - base + "written_bytes", - base + "writes_total", - } - - // Generate some measurements. - for i, tracker := range []*cacheTracker{t1, t2} { - tracker.SetMemBytes(uint64(i + len(gauges[0]))) - tracker.SetDiskBytes(uint64(i + len(gauges[1]))) - tracker.metrics.Age.With(tracker.Labels()).Set(float64(i + len(gauges[2]))) - tracker.SetSnapshotsActive(uint64(i + len(gauges[3]))) - - tracker.AddSnapshottedBytes(uint64(i + len(counters[0]))) - tracker.AddWrittenBytesOK(uint64(i + len(counters[1]))) - - labels := tracker.Labels() - labels["status"] = "ok" - tracker.metrics.Writes.With(labels).Add(float64(i + len(counters[2]))) - } - - // Test that all the correct metrics are present. - mfs, err := reg.Gather() - if err != nil { - t.Fatal(err) - } - - // The label variants for the two caches. - labelVariants := []prometheus.Labels{ - prometheus.Labels{"engine_id": "0", "node_id": "0"}, - prometheus.Labels{"engine_id": "1", "node_id": "0"}, - } - - for i, labels := range labelVariants { - for _, name := range gauges { - exp := float64(i + len(name)) - metric := promtest.MustFindMetric(t, mfs, name, labels) - if got := metric.GetGauge().GetValue(); got != exp { - t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) - } - } - - for _, name := range counters { - exp := float64(i + len(name)) - - if name == counters[1] || name == counters[2] { - labels["status"] = "ok" - } - metric := promtest.MustFindMetric(t, mfs, name, labels) - if got := metric.GetCounter().GetValue(); got != exp { - t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) - } - } - } -} - -func TestMetrics_Compactions(t *testing.T) { - // metrics to be shared by multiple file stores. - metrics := newCompactionMetrics(prometheus.Labels{"engine_id": "", "node_id": ""}) - - t1 := newCompactionTracker(metrics, prometheus.Labels{"engine_id": "0", "node_id": "0"}) - t2 := newCompactionTracker(metrics, prometheus.Labels{"engine_id": "1", "node_id": "0"}) - - reg := prometheus.NewRegistry() - reg.MustRegister(metrics.PrometheusCollectors()...) - - base := namespace + "_" + compactionSubsystem + "_" - - // All the metric names - gauges := []string{ - base + "active", - base + "queued", - } - - counters := []string{base + "total"} - histograms := []string{base + "duration_seconds"} - - // Generate some measurements. - for i, tracker := range []*compactionTracker{t1, t2} { - labels := tracker.Labels(2) - tracker.metrics.CompactionsActive.With(labels).Add(float64(i + len(gauges[0]))) - tracker.SetQueue(2, uint64(i+len(gauges[1]))) - - labels = tracker.Labels(2) - labels["status"] = "ok" - labels["reason"] = CacheStatusAgeExceeded.String() - tracker.metrics.Compactions.With(labels).Add(float64(i + len(counters[0]))) - - labels = tracker.Labels(2) - tracker.metrics.CompactionDuration.With(labels).Observe(float64(i + len(histograms[0]))) - } - - // Test that all the correct metrics are present. - mfs, err := reg.Gather() - if err != nil { - t.Fatal(err) - } - - // The label variants for the two caches. - labelVariants := []prometheus.Labels{ - prometheus.Labels{"engine_id": "0", "node_id": "0"}, - prometheus.Labels{"engine_id": "1", "node_id": "0"}, - } - - for i, labels := range labelVariants { - labels["level"] = "2" - - for _, name := range gauges { - exp := float64(i + len(name)) - metric := promtest.MustFindMetric(t, mfs, name, labels) - if got := metric.GetGauge().GetValue(); got != exp { - t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) - } - } - - for _, name := range counters { - exp := float64(i + len(name)) - - // Make a copy since we need to add a label - l := make(prometheus.Labels, len(labels)) - for k, v := range labels { - l[k] = v - } - l["status"] = "ok" - l["reason"] = CacheStatusAgeExceeded.String() - - metric := promtest.MustFindMetric(t, mfs, name, l) - if got := metric.GetCounter().GetValue(); got != exp { - t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) - } - } - - for _, name := range histograms { - exp := float64(i + len(name)) - metric := promtest.MustFindMetric(t, mfs, name, labels) - if got := metric.GetHistogram().GetSampleSum(); got != exp { - t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp) - } - } - } -} diff --git a/tsdb/tsm1/mmap_unix.go b/tsdb/tsm1/mmap_unix.go deleted file mode 100644 index 707aa845d8..0000000000 --- a/tsdb/tsm1/mmap_unix.go +++ /dev/null @@ -1,43 +0,0 @@ -// +build !windows,!plan9 - -package tsm1 - -import ( - "os" - "syscall" - - "golang.org/x/sys/unix" -) - -func mmap(f *os.File, offset int64, length int) ([]byte, error) { - // anonymous mapping - if f == nil { - return unix.Mmap(-1, 0, length, syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_ANON|syscall.MAP_PRIVATE) - } - - mmap, err := unix.Mmap(int(f.Fd()), 0, length, syscall.PROT_READ, syscall.MAP_SHARED) - if err != nil { - return nil, err - } - - return mmap, nil -} - -func munmap(b []byte) (err error) { - return unix.Munmap(b) -} - -// madviseWillNeed gives the kernel the mmap madvise value MADV_WILLNEED, hinting -// that we plan on using the provided buffer in the near future. -func madviseWillNeed(b []byte) error { - return madvise(b, syscall.MADV_WILLNEED) -} - -func madviseDontNeed(b []byte) error { - return madvise(b, syscall.MADV_DONTNEED) -} - -// From: github.com/boltdb/bolt/bolt_unix.go -func madvise(b []byte, advice int) (err error) { - return unix.Madvise(b, advice) -} diff --git a/tsdb/tsm1/mmap_windows.go b/tsdb/tsm1/mmap_windows.go deleted file mode 100644 index 8a437e23f5..0000000000 --- a/tsdb/tsm1/mmap_windows.go +++ /dev/null @@ -1,133 +0,0 @@ -package tsm1 - -import ( - "errors" - "os" - "reflect" - "sync" - "syscall" - "unsafe" -) - -// mmap implementation for Windows -// Based on: https://github.com/edsrzf/mmap-go -// Based on: https://github.com/boltdb/bolt/bolt_windows.go -// Ref: https://groups.google.com/forum/#!topic/golang-nuts/g0nLwQI9www - -// We keep this map so that we can get back the original handle from the memory address. -var handleLock sync.Mutex -var handleMap = map[uintptr]syscall.Handle{} -var fileMap = map[uintptr]*os.File{} - -func openSharedFile(f *os.File) (file *os.File, err error) { - - var access, createmode, sharemode uint32 - var sa *syscall.SecurityAttributes - - access = syscall.GENERIC_READ - sharemode = uint32(syscall.FILE_SHARE_READ | syscall.FILE_SHARE_WRITE | syscall.FILE_SHARE_DELETE) - createmode = syscall.OPEN_EXISTING - fileName := f.Name() - - pathp, err := syscall.UTF16PtrFromString(fileName) - if err != nil { - return nil, err - } - - h, e := syscall.CreateFile(pathp, access, sharemode, sa, createmode, syscall.FILE_ATTRIBUTE_NORMAL, 0) - - if e != nil { - return nil, e - } - //NewFile does not add finalizer, need to close this manually - return os.NewFile(uintptr(h), fileName), nil -} - -func mmap(f *os.File, offset int64, length int) (out []byte, err error) { - // TODO: Add support for anonymous mapping on windows - if f == nil { - return make([]byte, length), nil - } - - // Open a file mapping handle. - sizelo := uint32(length >> 32) - sizehi := uint32(length) & 0xffffffff - - sharedHandle, errno := openSharedFile(f) - if errno != nil { - return nil, os.NewSyscallError("CreateFile", errno) - } - - h, errno := syscall.CreateFileMapping(syscall.Handle(sharedHandle.Fd()), nil, syscall.PAGE_READONLY, sizelo, sizehi, nil) - if h == 0 { - return nil, os.NewSyscallError("CreateFileMapping", errno) - } - - // Create the memory map. - addr, errno := syscall.MapViewOfFile(h, syscall.FILE_MAP_READ, 0, 0, uintptr(length)) - if addr == 0 { - return nil, os.NewSyscallError("MapViewOfFile", errno) - } - - handleLock.Lock() - handleMap[addr] = h - fileMap[addr] = sharedHandle - handleLock.Unlock() - - // Convert to a byte array. - hdr := (*reflect.SliceHeader)(unsafe.Pointer(&out)) - hdr.Data = uintptr(unsafe.Pointer(addr)) - hdr.Len = length - hdr.Cap = length - - return -} - -// munmap Windows implementation -// Based on: https://github.com/edsrzf/mmap-go -// Based on: https://github.com/boltdb/bolt/bolt_windows.go -func munmap(b []byte) (err error) { - handleLock.Lock() - defer handleLock.Unlock() - - addr := (uintptr)(unsafe.Pointer(&b[0])) - if err := syscall.UnmapViewOfFile(addr); err != nil { - return os.NewSyscallError("UnmapViewOfFile", err) - } - - handle, ok := handleMap[addr] - if !ok { - // should be impossible; we would've seen the error above - return errors.New("unknown base address") - } - delete(handleMap, addr) - - e := syscall.CloseHandle(syscall.Handle(handle)) - if e != nil { - return os.NewSyscallError("CloseHandle", e) - } - - file, ok := fileMap[addr] - if !ok { - // should be impossible; we would've seen the error above - return errors.New("unknown base address") - } - delete(fileMap, addr) - - e = file.Close() - if e != nil { - return errors.New("close file" + e.Error()) - } - return nil -} - -// madviseWillNeed is unsupported on Windows. -func madviseWillNeed(b []byte) error { return nil } - -// madviseDontNeed is unsupported on Windows. -func madviseDontNeed(b []byte) error { return nil } - -func madvise(b []byte, advice int) error { - // Not implemented - return nil -} diff --git a/tsdb/tsm1/predicate.go b/tsdb/tsm1/predicate.go deleted file mode 100644 index de5c2db146..0000000000 --- a/tsdb/tsm1/predicate.go +++ /dev/null @@ -1,691 +0,0 @@ -package tsm1 - -import ( - "bytes" - "fmt" - "regexp" - - "github.com/influxdata/influxdb/v2" - "github.com/influxdata/influxdb/v2/storage/reads/datatypes" -) - -// Predicate is something that can match on a series key. -type Predicate interface { - Clone() influxdb.Predicate - Matches(key []byte) bool - Marshal() ([]byte, error) -} - -const ( // Enumeration of all predicate versions we support unmarshalling. - predicateVersionZero = '\x00' -) - -// UnmarshalPredicate takes stored predicate bytes from a Marshal call and returns a Predicate. -func UnmarshalPredicate(data []byte) (Predicate, error) { - if len(data) == 0 { - return nil, nil - } else if data[0] != predicateVersionZero { - return nil, fmt.Errorf("unknown tag byte: %x", data[0]) - } - - pred := new(datatypes.Predicate) - if err := pred.Unmarshal(data[1:]); err != nil { - return nil, err - } - return NewProtobufPredicate(pred) -} - -// -// Design -// - -// Predicates lazily evaluate with memoization so that we can walk a series key -// by the tags without parsing them into a structure and allocating. Each node -// in a predicate tree keeps a cache if it has enough information to have a -// definite value. The predicate state keeps track of all of the tag key/value -// pairs passed to it, and has a reset function to start over for a new series key. -// -// For example, imagine a query like -// -// ("tag1" == "val1" AND "tag2" == "val2") OR "tag3" == "val3" -// -// The state would have tag values set on it like -// -// state.Set("tag1", "val1") => NeedMore -// state.Set("tag2", "not-val2") => NeedMore -// state.Set("tag3", "val3") => True -// -// where after the first Set, the AND and OR clauses are both NeedMore, after -// the second Set, the AND clause is False and the OR clause is NeedMore, and -// after the last Set, the AND clause is still False, and the OR clause is True. -// -// Fast resetting is achieved by having each cache maintain a pointer to the state -// and both having a generation number. When the state resets, it bumps the generation -// number, and when the value is set in the cache, it is set with the current generation -// of the state. When querying the cache, it checks if the generation still matches. - -// -// Protobuf Implementation -// - -// NewProtobufPredicate returns a Predicate that matches based on the comparison structure -// described by the incoming protobuf. -func NewProtobufPredicate(pred *datatypes.Predicate) (Predicate, error) { - // Walk the predicate to collect the tag refs - locs := make(map[string]int) - walkPredicateNodes(pred.Root, func(node *datatypes.Node) { - if node.GetNodeType() == datatypes.NodeTypeTagRef { - switch value := node.GetValue().(type) { - case *datatypes.Node_TagRefValue: - // Only add to the matcher locations the first time we encounter - // the tag key reference. This prevents problems with redundant - // predicates like: - // - // foo = a AND foo = b - // foo = c AND foo = d - if _, ok := locs[value.TagRefValue]; !ok { - locs[value.TagRefValue] = len(locs) - } - } - } - }) - - // Construct the shared state and root predicate node. - state := newPredicateState(locs) - root, err := buildPredicateNode(state, pred.Root) - if err != nil { - return nil, err - } - - return &predicateMatcher{ - pred: pred, - state: state, - root: root, - }, nil -} - -// predicateMatcher implements Predicate for a protobuf. -type predicateMatcher struct { - pred *datatypes.Predicate - state *predicateState - root predicateNode -} - -// Clone returns a deep copy of p's state and root node. -// -// It is not safe to modify p.pred on the returned clone. -func (p *predicateMatcher) Clone() influxdb.Predicate { - state := p.state.Clone() - return &predicateMatcher{ - pred: p.pred, - state: state, - root: p.root.Clone(state), - } -} - -// Matches checks if the key matches the predicate by feeding individual tags into the -// state and returning as soon as the root node has a definite answer. -func (p *predicateMatcher) Matches(key []byte) bool { - p.state.Reset() - - // Extract the series from the composite key - key, _ = SeriesAndFieldFromCompositeKey(key) - - // Determine which popping algorithm to use. If there are no escape characters - // we can use the quicker method that only works in that case. - popTag := predicatePopTag - if bytes.IndexByte(key, '\\') != -1 { - popTag = predicatePopTagEscape - } - - // Feed tag pairs into the state and update until we have a definite response. - var tag, value []byte - for len(key) > 0 { - tag, value, key = popTag(key) - if tag == nil || !p.state.Set(tag, value) { - continue - } - resp := p.root.Update() - if resp == predicateResponse_true { - return true - } else if resp == predicateResponse_false { - return false - } - } - - // If it always needed more then it didn't match. For example, consider if - // the predicate matches `tag1=val1` but tag1 is not present in the key. - return false -} - -// Marshal returns a buffer representing the protobuf predicate. -func (p *predicateMatcher) Marshal() ([]byte, error) { - // Prefix it with the version byte so that we can change in the future if necessary - buf := make([]byte, 1+p.pred.Size()) - buf[0] = predicateVersionZero - _, err := p.pred.MarshalTo(buf[1:]) - return buf, err -} - -// walkPredicateNodes recursively calls the function for each node. -func walkPredicateNodes(node *datatypes.Node, fn func(node *datatypes.Node)) { - fn(node) - for _, ch := range node.Children { - walkPredicateNodes(ch, fn) - } -} - -// buildPredicateNode takes a protobuf node and converts it into a predicateNode. It is strict -// in what it accepts. -func buildPredicateNode(state *predicateState, node *datatypes.Node) (predicateNode, error) { - switch node.GetNodeType() { - case datatypes.NodeTypeComparisonExpression: - children := node.GetChildren() - if len(children) != 2 { - return nil, fmt.Errorf("invalid number of children for logical expression: %v", len(children)) - } - left, right := children[0], children[1] - - comp := &predicateNodeComparison{ - predicateCache: newPredicateCache(state), - comp: node.GetComparison(), - } - - // Fill in the left side of the comparison - switch left.GetNodeType() { - // Tag refs look up the location of the tag in the state - case datatypes.NodeTypeTagRef: - idx, ok := state.locs[left.GetTagRefValue()] - if !ok { - return nil, fmt.Errorf("invalid tag ref in comparison: %v", left.GetTagRefValue()) - } - comp.leftIndex = idx - - // Left literals are only allowed to be strings - case datatypes.NodeTypeLiteral: - lit, ok := left.GetValue().(*datatypes.Node_StringValue) - if !ok { - return nil, fmt.Errorf("invalid left literal in comparison: %v", left.GetValue()) - } - comp.leftLiteral = []byte(lit.StringValue) - - default: - return nil, fmt.Errorf("invalid left node in comparison: %v", left.GetNodeType()) - } - - // Fill in the right side of the comparison - switch right.GetNodeType() { - // Tag refs look up the location of the tag in the state - case datatypes.NodeTypeTagRef: - idx, ok := state.locs[right.GetTagRefValue()] - if !ok { - return nil, fmt.Errorf("invalid tag ref in comparison: %v", right.GetTagRefValue()) - } - comp.rightIndex = idx - - // Right literals are allowed to be regexes as well as strings - case datatypes.NodeTypeLiteral: - switch lit := right.GetValue().(type) { - case *datatypes.Node_StringValue: - comp.rightLiteral = []byte(lit.StringValue) - - case *datatypes.Node_RegexValue: - reg, err := regexp.Compile(lit.RegexValue) - if err != nil { - return nil, err - } - comp.rightReg = reg - - default: - return nil, fmt.Errorf("invalid right literal in comparison: %v", right.GetValue()) - } - - default: - return nil, fmt.Errorf("invalid right node in comparison: %v", right.GetNodeType()) - } - - // Ensure that a regex is set on the right if and only if the comparison is a regex - if comp.rightReg == nil { - if comp.comp == datatypes.ComparisonRegex || comp.comp == datatypes.ComparisonNotRegex { - return nil, fmt.Errorf("invalid comparison involving regex: %v", node) - } - } else { - if comp.comp != datatypes.ComparisonRegex && comp.comp != datatypes.ComparisonNotRegex { - return nil, fmt.Errorf("invalid comparison not against regex: %v", node) - } - } - - return comp, nil - - case datatypes.NodeTypeLogicalExpression: - children := node.GetChildren() - if len(children) != 2 { - return nil, fmt.Errorf("invalid number of children for logical expression: %v", len(children)) - } - - left, err := buildPredicateNode(state, children[0]) - if err != nil { - return nil, err - } - right, err := buildPredicateNode(state, children[1]) - if err != nil { - return nil, err - } - - switch node.GetLogical() { - case datatypes.LogicalAnd: - return &predicateNodeAnd{ - predicateCache: newPredicateCache(state), - left: left, - right: right, - }, nil - - case datatypes.LogicalOr: - return &predicateNodeOr{ - predicateCache: newPredicateCache(state), - left: left, - right: right, - }, nil - - default: - return nil, fmt.Errorf("unknown logical type: %v", node.GetLogical()) - } - - default: - return nil, fmt.Errorf("unsupported predicate type: %v", node.GetNodeType()) - } -} - -// -// Predicate Responses -// - -type predicateResponse uint8 - -const ( - predicateResponse_needMore predicateResponse = iota - predicateResponse_true - predicateResponse_false -) - -// -// Predicate State -// - -// predicateState keeps track of tag key=>value mappings with cheap methods -// to reset to a blank state. -type predicateState struct { - gen uint64 - locs map[string]int - values [][]byte -} - -// newPredicateState creates a predicateState given a map of keys to indexes into an -// an array. -func newPredicateState(locs map[string]int) *predicateState { - return &predicateState{ - gen: 1, // so that caches start out unfilled since they start at 0 - locs: locs, - values: make([][]byte, len(locs)), - } -} - -// Clone returns a deep copy of p. -func (p *predicateState) Clone() *predicateState { - q := &predicateState{ - gen: p.gen, - locs: make(map[string]int, len(p.locs)), - values: make([][]byte, len(p.values)), - } - - for k, v := range p.locs { - q.locs[k] = v - } - copy(q.values, p.values) - - return q -} - -// Reset clears any set values for the state. -func (p *predicateState) Reset() { - p.gen++ - - for i := range p.values { - p.values[i] = nil - } -} - -// Set sets the key to be the value and returns true if the key is part of the considered -// set of keys. -func (p *predicateState) Set(key, value []byte) bool { - i, ok := p.locs[string(key)] - if ok { - p.values[i] = value - } - return ok -} - -// -// Predicate Cache -// - -// predicateCache interacts with the predicateState to keep determined responses -// memoized until the state has been Reset to avoid recomputing nodes. -type predicateCache struct { - state *predicateState - gen uint64 - resp predicateResponse -} - -// newPredicateCache constructs a predicateCache for the provided state. -func newPredicateCache(state *predicateState) predicateCache { - return predicateCache{ - state: state, - gen: 0, - resp: predicateResponse_needMore, - } -} - -// Clone returns a deep copy of p. -func (p *predicateCache) Clone(state *predicateState) *predicateCache { - if state == nil { - state = p.state.Clone() - } - return &predicateCache{ - state: state, - gen: p.gen, - resp: p.resp, - } -} - -// Cached returns the cached response and a boolean indicating if it is valid. -func (p *predicateCache) Cached() (predicateResponse, bool) { - return p.resp, p.gen == p.state.gen -} - -// Store sets the cache to the provided response until the state is Reset. -func (p *predicateCache) Store(resp predicateResponse) { - p.gen = p.state.gen - p.resp = resp -} - -// -// Predicate Nodes -// - -// predicateNode is the interface that any parts of a predicate tree implement. -type predicateNode interface { - // Update informs the node that the state has been updated and asks it to return - // a response. - Update() predicateResponse - - // Clone returns a deep copy of the node. - Clone(state *predicateState) predicateNode -} - -// predicateNodeAnd combines two predicate nodes with an And. -type predicateNodeAnd struct { - predicateCache - left, right predicateNode -} - -// Clone returns a deep copy of p. -func (p *predicateNodeAnd) Clone(state *predicateState) predicateNode { - return &predicateNodeAnd{ - predicateCache: *p.predicateCache.Clone(state), - left: p.left.Clone(state), - right: p.right.Clone(state), - } -} - -// Update checks if both of the left and right nodes are true. If either is false -// then the node is definitely false. Otherwise, it needs more information. -func (p *predicateNodeAnd) Update() predicateResponse { - if resp, ok := p.Cached(); ok { - return resp - } - - left := p.left.Update() - if left == predicateResponse_false { - p.Store(predicateResponse_false) - return predicateResponse_false - } else if left == predicateResponse_needMore { - return predicateResponse_needMore - } - - right := p.right.Update() - if right == predicateResponse_false { - p.Store(predicateResponse_false) - return predicateResponse_false - } else if right == predicateResponse_needMore { - return predicateResponse_needMore - } - - return predicateResponse_true -} - -// predicateNodeOr combines two predicate nodes with an Or. -type predicateNodeOr struct { - predicateCache - left, right predicateNode -} - -// Clone returns a deep copy of p. -func (p *predicateNodeOr) Clone(state *predicateState) predicateNode { - return &predicateNodeOr{ - predicateCache: *p.predicateCache.Clone(state), - left: p.left.Clone(state), - right: p.right.Clone(state), - } -} - -// Update checks if either the left and right nodes are true. If both nodes -// are false, then the node is definitely fasle. Otherwise, it needs more information. -func (p *predicateNodeOr) Update() predicateResponse { - if resp, ok := p.Cached(); ok { - return resp - } - - left := p.left.Update() - if left == predicateResponse_true { - p.Store(predicateResponse_true) - return predicateResponse_true - } - - right := p.right.Update() - if right == predicateResponse_true { - p.Store(predicateResponse_true) - return predicateResponse_true - } - - if left == predicateResponse_false && right == predicateResponse_false { - p.Store(predicateResponse_false) - return predicateResponse_false - } - - return predicateResponse_needMore -} - -// predicateNodeComparison compares values of tags. -type predicateNodeComparison struct { - predicateCache - comp datatypes.Node_Comparison - rightReg *regexp.Regexp - leftLiteral []byte - rightLiteral []byte - leftIndex int - rightIndex int -} - -// Clone returns a deep copy of p. -func (p *predicateNodeComparison) Clone(state *predicateState) predicateNode { - q := &predicateNodeComparison{ - predicateCache: *p.predicateCache.Clone(state), - comp: p.comp, - rightReg: p.rightReg, - leftIndex: p.leftIndex, - rightIndex: p.rightIndex, - } - - if p.leftLiteral != nil { - q.leftLiteral = make([]byte, len(p.leftLiteral)) - copy(q.leftLiteral, p.leftLiteral) - } - if p.rightLiteral != nil { - q.rightLiteral = make([]byte, len(p.rightLiteral)) - copy(q.rightLiteral, p.rightLiteral) - } - return q -} - -// Update checks if both sides of the comparison are determined, and if so, evaluates -// the comparison to a determined truth value. -func (p *predicateNodeComparison) Update() predicateResponse { - if resp, ok := p.Cached(); ok { - return resp - } - - left := p.leftLiteral - if left == nil { - left = p.state.values[p.leftIndex] - if left == nil { - return predicateResponse_needMore - } - } - - right := p.rightLiteral - if right == nil && p.rightReg == nil { - right = p.state.values[p.rightIndex] - if right == nil { - return predicateResponse_needMore - } - } - - if predicateEval(p.comp, left, right, p.rightReg) { - p.Store(predicateResponse_true) - return predicateResponse_true - } else { - p.Store(predicateResponse_false) - return predicateResponse_false - } -} - -// predicateEval is a helper to do the appropriate comparison depending on which comparison -// enumeration value was passed. -func predicateEval(comp datatypes.Node_Comparison, left, right []byte, rightReg *regexp.Regexp) bool { - switch comp { - case datatypes.ComparisonEqual: - return string(left) == string(right) - case datatypes.ComparisonNotEqual: - return string(left) != string(right) - case datatypes.ComparisonStartsWith: - return bytes.HasPrefix(left, right) - case datatypes.ComparisonLess: - return string(left) < string(right) - case datatypes.ComparisonLessEqual: - return string(left) <= string(right) - case datatypes.ComparisonGreater: - return string(left) > string(right) - case datatypes.ComparisonGreaterEqual: - return string(left) >= string(right) - case datatypes.ComparisonRegex: - return rightReg.Match(left) - case datatypes.ComparisonNotRegex: - return !rightReg.Match(left) - } - return false -} - -// -// Popping Tags -// - -// The models package has some of this logic as well, but doesn't export ways to get -// at individual tags one at a time. In the common, no escape characters case, popping -// the first tag off of a series key takes around ~10ns. - -// predicatePopTag pops a tag=value pair from the front of series, returning the -// remainder in rest. it assumes there are no escaped characters in the series. -func predicatePopTag(series []byte) (tag, value []byte, rest []byte) { - // find the first ',' - i := bytes.IndexByte(series, ',') - if i >= 0 && i < len(series) { - series, rest = series[:i], series[i+1:] - } - - // find the first '=' - j := bytes.IndexByte(series, '=') - if j >= 0 && j < len(series) { - tag, value = series[:j], series[j+1:] - } - - return tag, value, rest -} - -// predicatePopTagEscape pops a tag=value pair from the front of series, returning the -// remainder in rest. it assumes there are possibly/likely escaped characters in the series. -func predicatePopTagEscape(series []byte) (tag, value []byte, rest []byte) { - // find the first unescaped ',' - for j := uint(0); j < uint(len(series)); { - i := bytes.IndexByte(series[j:], ',') - if i < 0 { - break // this is the last tag pair - } - - ui := uint(i) + j // make index relative to full series slice - if ui > 0 && series[ui-1] == '\\' { // the comma is escaped - j = ui + 1 - continue - } - - series, rest = series[:ui], series[ui+1:] - break - } - - // find the first unescaped '=' - for j := uint(0); j < uint(len(series)); { - i := bytes.IndexByte(series[j:], '=') - if i < 0 { - break // there is no tag value - } - ui := uint(i) + j // make index relative to full series slice - if ui > 0 && series[ui-1] == '\\' { // the equals is escaped - j = ui + 1 - continue - } - - tag, value = series[:ui], series[ui+1:] - break - } - - // sad time: it's possible this tag/value has escaped characters, so we have to - // find an unescape them. since the byte slice may refer to read-only memory, we - // can't do this in place, so we make copies. - if bytes.IndexByte(tag, '\\') != -1 { - unescapedTag := make([]byte, 0, len(tag)) - for i, c := range tag { - if c == '\\' && i+1 < len(tag) { - if c := tag[i+1]; c == ',' || c == ' ' || c == '=' { - continue - } - } - unescapedTag = append(unescapedTag, c) - } - tag = unescapedTag - } - - if bytes.IndexByte(value, '\\') != -1 { - unescapedValue := make([]byte, 0, len(value)) - for i, c := range value { - if c == '\\' && i+1 < len(value) { - if c := value[i+1]; c == ',' || c == ' ' || c == '=' { - continue - } - } - unescapedValue = append(unescapedValue, c) - } - value = unescapedValue - } - - return tag, value, rest -} diff --git a/tsdb/tsm1/predicate_test.go b/tsdb/tsm1/predicate_test.go deleted file mode 100644 index ff9350911c..0000000000 --- a/tsdb/tsm1/predicate_test.go +++ /dev/null @@ -1,544 +0,0 @@ -package tsm1 - -import ( - "fmt" - "reflect" - "testing" - - "github.com/influxdata/influxdb/v2/storage/reads/datatypes" -) - -func TestPredicatePopTagEscape(t *testing.T) { - cases := []struct { - Key string - Tag string - Value string - Rest string - }{ - {Key: "", Tag: "", Value: "", Rest: ""}, - {Key: "invalid", Tag: "", Value: "", Rest: ""}, - {Key: "region=west,server=b,foo=bar", Tag: "region", Value: "west", Rest: "server=b,foo=bar"}, - {Key: "region=west", Tag: "region", Value: "west", Rest: ""}, - {Key: `re\=gion=west,server=a`, Tag: `re=gion`, Value: "west", Rest: "server=a"}, - {Key: `region=w\,est,server=a`, Tag: `region`, Value: "w,est", Rest: "server=a"}, - {Key: `hi\ yo\ =w\,est,server=a`, Tag: `hi yo `, Value: "w,est", Rest: "server=a"}, - {Key: `\ e\ \=o=world,server=a`, Tag: ` e =o`, Value: "world", Rest: "server=a"}, - } - - for _, c := range cases { - tag, value, rest := predicatePopTagEscape([]byte(c.Key)) - if string(tag) != c.Tag { - t.Fatalf("got returned tag %q expected %q", tag, c.Tag) - } else if string(value) != c.Value { - t.Fatalf("got returned value %q expected %q", value, c.Value) - } else if string(rest) != c.Rest { - t.Fatalf("got returned remainder %q expected %q", rest, c.Rest) - } - } -} - -func TestPredicate_Matches(t *testing.T) { - cases := []struct { - Name string - Predicate *datatypes.Predicate - Key string - Matches bool - }{ - { - Name: "Basic Matching", - Predicate: predicate( - comparisonNode(datatypes.ComparisonEqual, tagNode("tag3"), stringNode("val3"))), - Key: "bucketorg,tag3=val3", - Matches: true, - }, - - { - Name: "Basic Unmatching", - Predicate: predicate( - comparisonNode(datatypes.ComparisonEqual, tagNode("tag3"), stringNode("val3"))), - Key: "bucketorg,tag3=val2", - Matches: false, - }, - - { - Name: "Compound Logical Matching", - Predicate: predicate( - orNode( - andNode( - comparisonNode(datatypes.ComparisonEqual, tagNode("foo"), stringNode("bar")), - comparisonNode(datatypes.ComparisonEqual, tagNode("baz"), stringNode("no"))), - comparisonNode(datatypes.ComparisonEqual, tagNode("tag3"), stringNode("val3")))), - Key: "bucketorg,foo=bar,baz=bif,tag3=val3", - Matches: true, - }, - - { - Name: "Compound Logical Unmatching", - Predicate: predicate( - orNode( - andNode( - comparisonNode(datatypes.ComparisonEqual, tagNode("foo"), stringNode("bar")), - comparisonNode(datatypes.ComparisonEqual, tagNode("baz"), stringNode("no"))), - comparisonNode(datatypes.ComparisonEqual, tagNode("tag3"), stringNode("val3")))), - Key: "bucketorg,foo=bar,baz=bif,tag3=val2", - Matches: false, - }, - - { - Name: "Logical Or Short Circuit", - Predicate: predicate( - orNode( - comparisonNode(datatypes.ComparisonEqual, tagNode("foo"), stringNode("bar")), - comparisonNode(datatypes.ComparisonEqual, tagNode("baz"), stringNode("no")))), - Key: "bucketorg,baz=bif,foo=bar,tag3=val3", - Matches: true, - }, - - { - Name: "Logical And Short Circuit", - Predicate: predicate( - andNode( - comparisonNode(datatypes.ComparisonEqual, tagNode("foo"), stringNode("no")), - comparisonNode(datatypes.ComparisonEqual, tagNode("baz"), stringNode("bif")))), - Key: "bucketorg,baz=bif,foo=bar,tag3=val3", - Matches: false, - }, - - { - Name: "Logical And Matching", - Predicate: predicate( - andNode( - comparisonNode(datatypes.ComparisonEqual, tagNode("foo"), stringNode("bar")), - comparisonNode(datatypes.ComparisonEqual, tagNode("baz"), stringNode("bif")))), - Key: "bucketorg,baz=bif,foo=bar,tag3=val3", - Matches: true, - }, - - { - Name: "Logical And Matching Reduce (Simplify)", - Predicate: predicate( - andNode( - comparisonNode(datatypes.ComparisonEqual, tagNode("foo"), stringNode("bar")), - comparisonNode(datatypes.ComparisonNotEqual, tagNode("foo"), stringNode("bif")))), - Key: "bucketorg,baz=bif,foo=bar,tag3=val3", - Matches: true, - }, - - { - Name: "Regex Matching", - Predicate: predicate( - comparisonNode(datatypes.ComparisonRegex, tagNode("tag3"), regexNode("...3"))), - Key: "bucketorg,tag3=val3", - Matches: true, - }, - - { - Name: "NotRegex Matching", - Predicate: predicate( - comparisonNode(datatypes.ComparisonNotRegex, tagNode("tag3"), regexNode("...4"))), - Key: "bucketorg,tag3=val3", - Matches: true, - }, - - { - Name: "Regex Unmatching", - Predicate: predicate( - comparisonNode(datatypes.ComparisonRegex, tagNode("tag3"), regexNode("...4"))), - Key: "bucketorg,tag3=val3", - Matches: false, - }, - - { - Name: "NotRegex Unmatching", - Predicate: predicate( - comparisonNode(datatypes.ComparisonNotRegex, tagNode("tag3"), regexNode("...3"))), - Key: "bucketorg,tag3=val3", - Matches: false, - }, - - { - Name: "Basic Matching Reversed", - Predicate: predicate( - comparisonNode(datatypes.ComparisonEqual, stringNode("val3"), tagNode("tag3"))), - Key: "bucketorg,tag2=val2,tag3=val3", - Matches: true, - }, - - { - Name: "Tag Matching Tag", - Predicate: predicate( - comparisonNode(datatypes.ComparisonEqual, tagNode("tag4"), tagNode("tag3"))), - Key: "bucketorg,tag3=val3,tag4=val3", - Matches: true, - }, - - { - Name: "No Tag", - Predicate: predicate( - comparisonNode(datatypes.ComparisonEqual, tagNode("tag4"), stringNode("val4"))), - Key: "bucketorg,tag3=val3", - Matches: false, - }, - - { - Name: "Not Equal", - Predicate: predicate( - comparisonNode(datatypes.ComparisonNotEqual, tagNode("tag3"), stringNode("val4"))), - Key: "bucketorg,tag3=val3", - Matches: true, - }, - - { - Name: "Starts With", - Predicate: predicate( - comparisonNode(datatypes.ComparisonStartsWith, tagNode("tag3"), stringNode("va"))), - Key: "bucketorg,tag3=val3", - Matches: true, - }, - - { - Name: "Less", - Predicate: predicate( - comparisonNode(datatypes.ComparisonLess, tagNode("tag3"), stringNode("val4"))), - Key: "bucketorg,tag3=val3", - Matches: true, - }, - - { - Name: "Less Equal", - Predicate: predicate( - comparisonNode(datatypes.ComparisonLessEqual, tagNode("tag3"), stringNode("val4"))), - Key: "bucketorg,tag3=val3", - Matches: true, - }, - - { - Name: "Greater", - Predicate: predicate( - comparisonNode(datatypes.ComparisonGreater, tagNode("tag3"), stringNode("u"))), - Key: "bucketorg,tag3=val3", - Matches: true, - }, - - { - Name: "Greater Equal;", - Predicate: predicate( - comparisonNode(datatypes.ComparisonGreaterEqual, tagNode("tag3"), stringNode("u"))), - Key: "bucketorg,tag3=val3", - Matches: true, - }, - - { - Name: "Escaping Matching", - Predicate: predicate( - comparisonNode(datatypes.ComparisonEqual, tagNode("tag3"), stringNode("val3"))), - Key: `bucketorg,tag1=\,foo,tag2=\ bar,tag2\=more=val2\,\ \=hello,tag3=val3`, - Matches: true, - }, - } - - for _, test := range cases { - t.Run(test.Name, func(t *testing.T) { - pred, err := NewProtobufPredicate(test.Predicate) - if err != nil { - t.Fatal("compile failure:", err) - } - - if got, exp := pred.Matches([]byte(test.Key)), test.Matches; got != exp { - t.Fatal("match failure:", "got", got, "!=", "exp", exp) - } - - // Clone and try again. - pred = pred.Clone() - if got, exp := pred.Matches([]byte(test.Key)), test.Matches; got != exp { - t.Fatal("cloned match failure:", "got", got, "!=", "exp", exp) - } - }) - } -} - -func TestPredicate_Unmarshal(t *testing.T) { - protoPred := predicate( - orNode( - andNode( - comparisonNode(datatypes.ComparisonEqual, tagNode("foo"), stringNode("bar")), - comparisonNode(datatypes.ComparisonEqual, tagNode("baz"), stringNode("no"))), - comparisonNode(datatypes.ComparisonEqual, tagNode("tag3"), stringNode("val3")))) - - pred1, err := NewProtobufPredicate(protoPred) - if err != nil { - t.Fatal(err) - } - - predData, err := pred1.Marshal() - if err != nil { - t.Fatal(err) - } - - pred2, err := UnmarshalPredicate(predData) - if err != nil { - t.Fatal(err) - } - - if !reflect.DeepEqual(pred1, pred2) { - t.Fatal("mismatch on unmarshal") - } -} - -func TestPredicate_Unmarshal_InvalidTag(t *testing.T) { - _, err := UnmarshalPredicate([]byte("\xff")) - if err == nil { - t.Fatal("expected error") - } -} - -func TestPredicate_Unmarshal_InvalidProtobuf(t *testing.T) { - _, err := UnmarshalPredicate([]byte("\x00\xff")) - if err == nil { - t.Fatal("expected error") - } -} - -func TestPredicate_Unmarshal_Empty(t *testing.T) { - pred, err := UnmarshalPredicate(nil) - if err != nil { - t.Fatal(err) - } else if pred != nil { - t.Fatal("expected no predicate") - } -} - -func TestPredicate_Invalid_Protobuf(t *testing.T) { - cases := []struct { - Name string - Predicate *datatypes.Predicate - }{ - { - Name: "Invalid Comparison Num Children", - Predicate: predicate(&datatypes.Node{ - NodeType: datatypes.NodeTypeComparisonExpression, - Value: &datatypes.Node_Comparison_{Comparison: datatypes.ComparisonEqual}, - Children: []*datatypes.Node{{}, {}, {}}, - }), - }, - - { - Name: "Mismatching Left Tag Type", - Predicate: predicate( - comparisonNode(datatypes.ComparisonEqual, &datatypes.Node{ - NodeType: datatypes.NodeTypeTagRef, - Value: &datatypes.Node_IntegerValue{IntegerValue: 2}, - }, tagNode("tag"))), - }, - - { - Name: "Mismatching Left Literal Type", - Predicate: predicate( - comparisonNode(datatypes.ComparisonEqual, &datatypes.Node{ - NodeType: datatypes.NodeTypeLiteral, - Value: &datatypes.Node_IntegerValue{IntegerValue: 2}, - }, tagNode("tag"))), - }, - - { - Name: "Invalid Left Node Type", - Predicate: predicate( - comparisonNode(datatypes.ComparisonEqual, &datatypes.Node{ - NodeType: datatypes.NodeTypeComparisonExpression, - Value: &datatypes.Node_Comparison_{Comparison: datatypes.ComparisonEqual}, - }, tagNode("tag"))), - }, - - { - Name: "Mismatching Right Tag Type", - Predicate: predicate( - comparisonNode(datatypes.ComparisonEqual, tagNode("tag"), &datatypes.Node{ - NodeType: datatypes.NodeTypeTagRef, - Value: &datatypes.Node_IntegerValue{IntegerValue: 2}, - })), - }, - - { - Name: "Invalid Regex", - Predicate: predicate( - comparisonNode(datatypes.ComparisonRegex, tagNode("tag3"), regexNode("("))), - }, - - { - Name: "Mismatching Right Literal Type", - Predicate: predicate( - comparisonNode(datatypes.ComparisonEqual, tagNode("tag"), &datatypes.Node{ - NodeType: datatypes.NodeTypeLiteral, - Value: &datatypes.Node_IntegerValue{IntegerValue: 2}, - })), - }, - - { - Name: "Invalid Right Node Type", - Predicate: predicate( - comparisonNode(datatypes.ComparisonEqual, tagNode("tag"), &datatypes.Node{ - NodeType: datatypes.NodeTypeComparisonExpression, - Value: &datatypes.Node_Comparison_{Comparison: datatypes.ComparisonEqual}, - })), - }, - - { - Name: "Invalid Comparison Without Regex", - Predicate: predicate( - comparisonNode(datatypes.ComparisonRegex, tagNode("tag3"), stringNode("val3"))), - }, - - { - Name: "Invalid Comparison With Regex", - Predicate: predicate( - comparisonNode(datatypes.ComparisonEqual, tagNode("tag3"), regexNode("."))), - }, - - { - Name: "Invalid Logical Operation Children", - Predicate: predicate(&datatypes.Node{ - NodeType: datatypes.NodeTypeLogicalExpression, - Value: &datatypes.Node_Logical_{Logical: datatypes.LogicalAnd}, - Children: []*datatypes.Node{{}, {}, {}}, - }), - }, - - { - Name: "Invalid Left Logical Expression", - Predicate: predicate( - andNode( - tagNode("tag"), - comparisonNode(datatypes.ComparisonEqual, tagNode("tag3"), stringNode("val3")), - )), - }, - - { - Name: "Invalid Right Logical Expression", - Predicate: predicate( - andNode( - comparisonNode(datatypes.ComparisonEqual, tagNode("tag3"), stringNode("val3")), - tagNode("tag"), - )), - }, - - { - Name: "Invalid Logical Value", - Predicate: predicate(&datatypes.Node{ - NodeType: datatypes.NodeTypeLogicalExpression, - Value: &datatypes.Node_Logical_{Logical: 9999}, - Children: []*datatypes.Node{ - comparisonNode(datatypes.ComparisonEqual, tagNode("tag3"), stringNode("val3")), - comparisonNode(datatypes.ComparisonEqual, tagNode("tag3"), stringNode("val3")), - }, - }), - }, - - { - Name: "Invalid Root Node", - Predicate: predicate(tagNode("tag3")), - }, - } - - for _, test := range cases { - t.Run(test.Name, func(t *testing.T) { - _, err := NewProtobufPredicate(test.Predicate) - if err == nil { - t.Fatal("expected compile failure") - } - }) - } -} - -func BenchmarkPredicate(b *testing.B) { - run := func(b *testing.B, predicate *datatypes.Predicate) { - pred, err := NewProtobufPredicate(predicate) - if err != nil { - b.Fatal(err) - } - - series := []byte("bucketorg,") - for i := 0; i < 10; i++ { - series = append(series, fmt.Sprintf("tag%d=val%d,", i, i)...) - } - series = series[:len(series)-1] - - b.SetBytes(int64(len(series))) - b.ReportAllocs() - b.ResetTimer() - - for i := 0; i < b.N; i++ { - pred.Matches(series) - } - } - - b.Run("Basic", func(b *testing.B) { - run(b, predicate( - comparisonNode(datatypes.ComparisonEqual, tagNode("tag5"), stringNode("val5")), - )) - }) - - b.Run("Compound", func(b *testing.B) { - run(b, predicate( - orNode( - andNode( - comparisonNode(datatypes.ComparisonEqual, tagNode("tag0"), stringNode("val0")), - comparisonNode(datatypes.ComparisonEqual, tagNode("tag6"), stringNode("val5")), - ), - comparisonNode(datatypes.ComparisonEqual, tagNode("tag5"), stringNode("val5")), - ), - )) - }) -} - -// -// Helpers to create predicate protobufs -// - -func tagNode(s string) *datatypes.Node { - return &datatypes.Node{ - NodeType: datatypes.NodeTypeTagRef, - Value: &datatypes.Node_TagRefValue{TagRefValue: s}, - } -} - -func stringNode(s string) *datatypes.Node { - return &datatypes.Node{ - NodeType: datatypes.NodeTypeLiteral, - Value: &datatypes.Node_StringValue{StringValue: s}, - } -} - -func regexNode(s string) *datatypes.Node { - return &datatypes.Node{ - NodeType: datatypes.NodeTypeLiteral, - Value: &datatypes.Node_RegexValue{RegexValue: s}, - } -} - -func comparisonNode(comp datatypes.Node_Comparison, left, right *datatypes.Node) *datatypes.Node { - return &datatypes.Node{ - NodeType: datatypes.NodeTypeComparisonExpression, - Value: &datatypes.Node_Comparison_{Comparison: comp}, - Children: []*datatypes.Node{left, right}, - } -} - -func andNode(left, right *datatypes.Node) *datatypes.Node { - return &datatypes.Node{ - NodeType: datatypes.NodeTypeLogicalExpression, - Value: &datatypes.Node_Logical_{Logical: datatypes.LogicalAnd}, - Children: []*datatypes.Node{left, right}, - } -} - -func orNode(left, right *datatypes.Node) *datatypes.Node { - return &datatypes.Node{ - NodeType: datatypes.NodeTypeLogicalExpression, - Value: &datatypes.Node_Logical_{Logical: datatypes.LogicalOr}, - Children: []*datatypes.Node{left, right}, - } -} - -func predicate(root *datatypes.Node) *datatypes.Predicate { - return &datatypes.Predicate{Root: root} -} diff --git a/tsdb/tsm1/reader.gen.go b/tsdb/tsm1/reader.gen.go deleted file mode 100644 index 37987d5410..0000000000 --- a/tsdb/tsm1/reader.gen.go +++ /dev/null @@ -1,330 +0,0 @@ -// Generated by tmpl -// https://github.com/benbjohnson/tmpl -// -// DO NOT EDIT! -// Source: reader.gen.go.tmpl - -package tsm1 - -import ( - "github.com/influxdata/influxdb/v2/tsdb/cursors" -) - -// ReadFloatBlockAt returns the float values corresponding to the given index entry. -func (t *TSMReader) ReadFloatBlockAt(entry *IndexEntry, vals *[]FloatValue) ([]FloatValue, error) { - t.mu.RLock() - v, err := t.accessor.readFloatBlock(entry, vals) - t.mu.RUnlock() - return v, err -} - -// ReadFloatArrayBlockAt fills vals with the float values corresponding to the given index entry. -func (t *TSMReader) ReadFloatArrayBlockAt(entry *IndexEntry, vals *cursors.FloatArray) error { - t.mu.RLock() - err := t.accessor.readFloatArrayBlock(entry, vals) - t.mu.RUnlock() - return err -} - -// ReadIntegerBlockAt returns the integer values corresponding to the given index entry. -func (t *TSMReader) ReadIntegerBlockAt(entry *IndexEntry, vals *[]IntegerValue) ([]IntegerValue, error) { - t.mu.RLock() - v, err := t.accessor.readIntegerBlock(entry, vals) - t.mu.RUnlock() - return v, err -} - -// ReadIntegerArrayBlockAt fills vals with the integer values corresponding to the given index entry. -func (t *TSMReader) ReadIntegerArrayBlockAt(entry *IndexEntry, vals *cursors.IntegerArray) error { - t.mu.RLock() - err := t.accessor.readIntegerArrayBlock(entry, vals) - t.mu.RUnlock() - return err -} - -// ReadUnsignedBlockAt returns the unsigned values corresponding to the given index entry. -func (t *TSMReader) ReadUnsignedBlockAt(entry *IndexEntry, vals *[]UnsignedValue) ([]UnsignedValue, error) { - t.mu.RLock() - v, err := t.accessor.readUnsignedBlock(entry, vals) - t.mu.RUnlock() - return v, err -} - -// ReadUnsignedArrayBlockAt fills vals with the unsigned values corresponding to the given index entry. -func (t *TSMReader) ReadUnsignedArrayBlockAt(entry *IndexEntry, vals *cursors.UnsignedArray) error { - t.mu.RLock() - err := t.accessor.readUnsignedArrayBlock(entry, vals) - t.mu.RUnlock() - return err -} - -// ReadStringBlockAt returns the string values corresponding to the given index entry. -func (t *TSMReader) ReadStringBlockAt(entry *IndexEntry, vals *[]StringValue) ([]StringValue, error) { - t.mu.RLock() - v, err := t.accessor.readStringBlock(entry, vals) - t.mu.RUnlock() - return v, err -} - -// ReadStringArrayBlockAt fills vals with the string values corresponding to the given index entry. -func (t *TSMReader) ReadStringArrayBlockAt(entry *IndexEntry, vals *cursors.StringArray) error { - t.mu.RLock() - err := t.accessor.readStringArrayBlock(entry, vals) - t.mu.RUnlock() - return err -} - -// ReadBooleanBlockAt returns the boolean values corresponding to the given index entry. -func (t *TSMReader) ReadBooleanBlockAt(entry *IndexEntry, vals *[]BooleanValue) ([]BooleanValue, error) { - t.mu.RLock() - v, err := t.accessor.readBooleanBlock(entry, vals) - t.mu.RUnlock() - return v, err -} - -// ReadBooleanArrayBlockAt fills vals with the boolean values corresponding to the given index entry. -func (t *TSMReader) ReadBooleanArrayBlockAt(entry *IndexEntry, vals *cursors.BooleanArray) error { - t.mu.RLock() - err := t.accessor.readBooleanArrayBlock(entry, vals) - t.mu.RUnlock() - return err -} - -// blockAccessor abstracts a method of accessing blocks from a -// TSM file. -type blockAccessor interface { - init() (*indirectIndex, error) - read(key []byte, timestamp int64) ([]Value, error) - readAll(key []byte) ([]Value, error) - readBlock(entry *IndexEntry, values []Value) ([]Value, error) - readFloatBlock(entry *IndexEntry, values *[]FloatValue) ([]FloatValue, error) - readFloatArrayBlock(entry *IndexEntry, values *cursors.FloatArray) error - readIntegerBlock(entry *IndexEntry, values *[]IntegerValue) ([]IntegerValue, error) - readIntegerArrayBlock(entry *IndexEntry, values *cursors.IntegerArray) error - readUnsignedBlock(entry *IndexEntry, values *[]UnsignedValue) ([]UnsignedValue, error) - readUnsignedArrayBlock(entry *IndexEntry, values *cursors.UnsignedArray) error - readStringBlock(entry *IndexEntry, values *[]StringValue) ([]StringValue, error) - readStringArrayBlock(entry *IndexEntry, values *cursors.StringArray) error - readBooleanBlock(entry *IndexEntry, values *[]BooleanValue) ([]BooleanValue, error) - readBooleanArrayBlock(entry *IndexEntry, values *cursors.BooleanArray) error - readBytes(entry *IndexEntry, buf []byte) (uint32, []byte, error) - rename(path string) error - path() string - close() error - free() error -} - -func (m *mmapAccessor) readFloatBlock(entry *IndexEntry, values *[]FloatValue) ([]FloatValue, error) { - m.incAccess() - - m.mu.RLock() - if int64(len(m.b)) < entry.Offset+int64(entry.Size) { - m.mu.RUnlock() - return nil, ErrTSMClosed - } - - b := m.b[entry.Offset+4 : entry.Offset+int64(entry.Size)] - a, err := DecodeFloatBlock(b, values) - m.mu.RUnlock() - - if err != nil { - return nil, err - } else if err := m.wait(b); err != nil { - return nil, err - } - - return a, nil -} - -func (m *mmapAccessor) readFloatArrayBlock(entry *IndexEntry, values *cursors.FloatArray) error { - m.incAccess() - - m.mu.RLock() - if int64(len(m.b)) < entry.Offset+int64(entry.Size) { - m.mu.RUnlock() - return ErrTSMClosed - } - - b := m.b[entry.Offset+4 : entry.Offset+int64(entry.Size)] - err := DecodeFloatArrayBlock(b, values) - m.mu.RUnlock() - - if err != nil { - return err - } else if err := m.wait(b); err != nil { - return err - } - return nil -} - -func (m *mmapAccessor) readIntegerBlock(entry *IndexEntry, values *[]IntegerValue) ([]IntegerValue, error) { - m.incAccess() - - m.mu.RLock() - if int64(len(m.b)) < entry.Offset+int64(entry.Size) { - m.mu.RUnlock() - return nil, ErrTSMClosed - } - - b := m.b[entry.Offset+4 : entry.Offset+int64(entry.Size)] - a, err := DecodeIntegerBlock(b, values) - m.mu.RUnlock() - - if err != nil { - return nil, err - } else if err := m.wait(b); err != nil { - return nil, err - } - - return a, nil -} - -func (m *mmapAccessor) readIntegerArrayBlock(entry *IndexEntry, values *cursors.IntegerArray) error { - m.incAccess() - - m.mu.RLock() - if int64(len(m.b)) < entry.Offset+int64(entry.Size) { - m.mu.RUnlock() - return ErrTSMClosed - } - - b := m.b[entry.Offset+4 : entry.Offset+int64(entry.Size)] - err := DecodeIntegerArrayBlock(b, values) - m.mu.RUnlock() - - if err != nil { - return err - } else if err := m.wait(b); err != nil { - return err - } - return nil -} - -func (m *mmapAccessor) readUnsignedBlock(entry *IndexEntry, values *[]UnsignedValue) ([]UnsignedValue, error) { - m.incAccess() - - m.mu.RLock() - if int64(len(m.b)) < entry.Offset+int64(entry.Size) { - m.mu.RUnlock() - return nil, ErrTSMClosed - } - - b := m.b[entry.Offset+4 : entry.Offset+int64(entry.Size)] - a, err := DecodeUnsignedBlock(b, values) - m.mu.RUnlock() - - if err != nil { - return nil, err - } else if err := m.wait(b); err != nil { - return nil, err - } - - return a, nil -} - -func (m *mmapAccessor) readUnsignedArrayBlock(entry *IndexEntry, values *cursors.UnsignedArray) error { - m.incAccess() - - m.mu.RLock() - if int64(len(m.b)) < entry.Offset+int64(entry.Size) { - m.mu.RUnlock() - return ErrTSMClosed - } - - b := m.b[entry.Offset+4 : entry.Offset+int64(entry.Size)] - err := DecodeUnsignedArrayBlock(b, values) - m.mu.RUnlock() - - if err != nil { - return err - } else if err := m.wait(b); err != nil { - return err - } - return nil -} - -func (m *mmapAccessor) readStringBlock(entry *IndexEntry, values *[]StringValue) ([]StringValue, error) { - m.incAccess() - - m.mu.RLock() - if int64(len(m.b)) < entry.Offset+int64(entry.Size) { - m.mu.RUnlock() - return nil, ErrTSMClosed - } - - b := m.b[entry.Offset+4 : entry.Offset+int64(entry.Size)] - a, err := DecodeStringBlock(b, values) - m.mu.RUnlock() - - if err != nil { - return nil, err - } else if err := m.wait(b); err != nil { - return nil, err - } - - return a, nil -} - -func (m *mmapAccessor) readStringArrayBlock(entry *IndexEntry, values *cursors.StringArray) error { - m.incAccess() - - m.mu.RLock() - if int64(len(m.b)) < entry.Offset+int64(entry.Size) { - m.mu.RUnlock() - return ErrTSMClosed - } - - b := m.b[entry.Offset+4 : entry.Offset+int64(entry.Size)] - err := DecodeStringArrayBlock(b, values) - m.mu.RUnlock() - - if err != nil { - return err - } else if err := m.wait(b); err != nil { - return err - } - return nil -} - -func (m *mmapAccessor) readBooleanBlock(entry *IndexEntry, values *[]BooleanValue) ([]BooleanValue, error) { - m.incAccess() - - m.mu.RLock() - if int64(len(m.b)) < entry.Offset+int64(entry.Size) { - m.mu.RUnlock() - return nil, ErrTSMClosed - } - - b := m.b[entry.Offset+4 : entry.Offset+int64(entry.Size)] - a, err := DecodeBooleanBlock(b, values) - m.mu.RUnlock() - - if err != nil { - return nil, err - } else if err := m.wait(b); err != nil { - return nil, err - } - - return a, nil -} - -func (m *mmapAccessor) readBooleanArrayBlock(entry *IndexEntry, values *cursors.BooleanArray) error { - m.incAccess() - - m.mu.RLock() - if int64(len(m.b)) < entry.Offset+int64(entry.Size) { - m.mu.RUnlock() - return ErrTSMClosed - } - - b := m.b[entry.Offset+4 : entry.Offset+int64(entry.Size)] - err := DecodeBooleanArrayBlock(b, values) - m.mu.RUnlock() - - if err != nil { - return err - } else if err := m.wait(b); err != nil { - return err - } - return nil -} diff --git a/tsdb/tsm1/reader.gen.go.tmpl b/tsdb/tsm1/reader.gen.go.tmpl deleted file mode 100644 index fa7036bd76..0000000000 --- a/tsdb/tsm1/reader.gen.go.tmpl +++ /dev/null @@ -1,86 +0,0 @@ -package tsm1 - -import ( - "github.com/influxdata/influxdb/v2/tsdb/cursors" -) - -{{range .}} -// Read{{.Name}}BlockAt returns the {{.name}} values corresponding to the given index entry. -func (t *TSMReader) Read{{.Name}}BlockAt(entry *IndexEntry, vals *[]{{.Name}}Value) ([]{{.Name}}Value, error) { - t.mu.RLock() - v, err := t.accessor.read{{.Name}}Block(entry, vals) - t.mu.RUnlock() - return v, err -} - -// Read{{.Name}}ArrayBlockAt fills vals with the {{.name}} values corresponding to the given index entry. -func (t *TSMReader) Read{{.Name}}ArrayBlockAt(entry *IndexEntry, vals *cursors.{{.Name}}Array) error { - t.mu.RLock() - err := t.accessor.read{{.Name}}ArrayBlock(entry, vals) - t.mu.RUnlock() - return err -} -{{end}} - -// blockAccessor abstracts a method of accessing blocks from a -// TSM file. -type blockAccessor interface { - init() (*indirectIndex, error) - read(key []byte, timestamp int64) ([]Value, error) - readAll(key []byte) ([]Value, error) - readBlock(entry *IndexEntry, values []Value) ([]Value, error) -{{- range .}} - read{{.Name}}Block(entry *IndexEntry, values *[]{{.Name}}Value) ([]{{.Name}}Value, error) - read{{.Name}}ArrayBlock(entry *IndexEntry, values *cursors.{{.Name}}Array) error -{{- end}} - readBytes(entry *IndexEntry, buf []byte) (uint32, []byte, error) - rename(path string) error - path() string - close() error - free() error -} - -{{range .}} -func (m *mmapAccessor) read{{.Name}}Block(entry *IndexEntry, values *[]{{.Name}}Value) ([]{{.Name}}Value, error) { - m.incAccess() - - m.mu.RLock() - if int64(len(m.b)) < entry.Offset+int64(entry.Size) { - m.mu.RUnlock() - return nil, ErrTSMClosed - } - - b := m.b[entry.Offset+4:entry.Offset+int64(entry.Size)] - a, err := Decode{{.Name}}Block(b, values) - m.mu.RUnlock() - - if err != nil { - return nil, err - } else if err := m.wait(b); err != nil { - return nil, err - } - - return a, nil -} - -func (m *mmapAccessor) read{{.Name}}ArrayBlock(entry *IndexEntry, values *cursors.{{.Name}}Array) error { - m.incAccess() - - m.mu.RLock() - if int64(len(m.b)) < entry.Offset+int64(entry.Size) { - m.mu.RUnlock() - return ErrTSMClosed - } - - b := m.b[entry.Offset+4:entry.Offset+int64(entry.Size)] - err := Decode{{.Name}}ArrayBlock(b, values) - m.mu.RUnlock() - - if err != nil { - return err - } else if err := m.wait(b); err != nil { - return err - } - return nil -} -{{end}} diff --git a/tsdb/tsm1/reader.gen.go.tmpldata b/tsdb/tsm1/reader.gen.go.tmpldata deleted file mode 100644 index 236ba310ba..0000000000 --- a/tsdb/tsm1/reader.gen.go.tmpldata +++ /dev/null @@ -1,22 +0,0 @@ -[ - { - "Name":"Float", - "name":"float" - }, - { - "Name":"Integer", - "name":"integer" - }, - { - "Name":"Unsigned", - "name":"unsigned" - }, - { - "Name":"String", - "name":"string" - }, - { - "Name":"Boolean", - "name":"boolean" - } -] diff --git a/tsdb/tsm1/reader.go b/tsdb/tsm1/reader.go deleted file mode 100644 index 137b6c2ae5..0000000000 --- a/tsdb/tsm1/reader.go +++ /dev/null @@ -1,643 +0,0 @@ -package tsm1 - -import ( - "bufio" - "fmt" - "os" - "sync" - "sync/atomic" - - "github.com/influxdata/influxdb/v2/pkg/mincore" - "go.uber.org/zap" - "golang.org/x/time/rate" -) - -// ErrFileInUse is returned when attempting to remove or close a TSM file that is still being used. -var ErrFileInUse = fmt.Errorf("file still in use") - -// TSMReader is a reader for a TSM file. -type TSMReader struct { - // refs is the count of active references to this reader. - refs int64 - refsWG sync.WaitGroup - - logger *zap.Logger - madviseWillNeed bool // Hint to the kernel with MADV_WILLNEED. - mu sync.RWMutex - - // accessor provides access and decoding of blocks for the reader. - accessor blockAccessor - - // index is the index of all blocks. - index TSMIndex - - // tombstoner ensures tombstoned keys are not available by the index. - tombstoner *Tombstoner - - // size is the size of the file on disk. - size int64 - - // lastModified is the last time this file was modified on disk - lastModified int64 - - // deleteMu limits concurrent deletes - deleteMu sync.Mutex - - // limiter rate limits page faults by the underlying memory maps. - pageFaultLimiter *rate.Limiter -} - -type tsmReaderOption func(*TSMReader) - -// WithMadviseWillNeed is an option for specifying whether to provide a MADV_WILL need hint to the kernel. -var WithMadviseWillNeed = func(willNeed bool) tsmReaderOption { - return func(r *TSMReader) { - r.madviseWillNeed = willNeed - } -} - -var WithTSMReaderPageFaultLimiter = func(limiter *rate.Limiter) tsmReaderOption { - return func(r *TSMReader) { - r.pageFaultLimiter = limiter - } -} - -var WithTSMReaderLogger = func(logger *zap.Logger) tsmReaderOption { - return func(r *TSMReader) { - r.logger = logger - } -} - -// NewTSMReader returns a new TSMReader from the given file. -func NewTSMReader(f *os.File, options ...tsmReaderOption) (*TSMReader, error) { - t := &TSMReader{ - logger: zap.NewNop(), - } - for _, option := range options { - option(t) - } - - stat, err := f.Stat() - if err != nil { - return nil, err - } - t.size = stat.Size() - t.lastModified = stat.ModTime().UnixNano() - accessor := &mmapAccessor{ - logger: t.logger, - f: f, - mmapWillNeed: t.madviseWillNeed, - } - - index, err := accessor.init() - if err != nil { - return nil, err - } - - // Set a limiter if passed in through options. - if t.pageFaultLimiter != nil { - accessor.pageFaultLimiter = mincore.NewLimiter(t.pageFaultLimiter, accessor.b) - } - - t.accessor = accessor - t.index = index - t.tombstoner = NewTombstoner(t.Path(), index.MaybeContainsKey) - - if err := t.applyTombstones(); err != nil { - return nil, err - } - - return t, nil -} - -// WithObserver sets the observer for the TSM reader. -func (t *TSMReader) WithObserver(obs FileStoreObserver) { - if obs == nil { - obs = noFileStoreObserver{} - } - t.tombstoner.WithObserver(obs) -} - -func (t *TSMReader) applyTombstones() error { - var cur, prev Tombstone - batch := make([][]byte, 0, 4096) - - if err := t.tombstoner.Walk(func(ts Tombstone) error { - if ts.Prefix { - pred, err := UnmarshalPredicate(ts.Predicate) - if err != nil { - return err - } - t.index.DeletePrefix(ts.Key, ts.Min, ts.Max, pred, nil) - return nil - } - - cur = ts - if len(batch) > 0 { - if prev.Min != cur.Min || prev.Max != cur.Max { - t.index.DeleteRange(batch, prev.Min, prev.Max) - batch = batch[:0] - } - } - - // Copy the tombstone key and re-use the buffers to avoid allocations - n := len(batch) - batch = batch[:n+1] - if cap(batch[n]) < len(ts.Key) { - batch[n] = make([]byte, len(ts.Key)) - } else { - batch[n] = batch[n][:len(ts.Key)] - } - copy(batch[n], ts.Key) - - if len(batch) >= 4096 { - t.index.DeleteRange(batch, prev.Min, prev.Max) - batch = batch[:0] - } - - prev = ts - return nil - }); err != nil { - return fmt.Errorf("init: read tombstones: %v", err) - } - - if len(batch) > 0 { - t.index.DeleteRange(batch, cur.Min, cur.Max) - } - return nil -} - -func (t *TSMReader) Free() error { - t.mu.RLock() - defer t.mu.RUnlock() - return t.accessor.free() -} - -// Path returns the path of the file the TSMReader was initialized with. -func (t *TSMReader) Path() string { - t.mu.RLock() - p := t.accessor.path() - t.mu.RUnlock() - return p -} - -// ReadAt returns the values corresponding to the given index entry. -func (t *TSMReader) ReadAt(entry *IndexEntry, vals []Value) ([]Value, error) { - t.mu.RLock() - v, err := t.accessor.readBlock(entry, vals) - t.mu.RUnlock() - return v, err -} - -// Read returns the values corresponding to the block at the given key and timestamp. -func (t *TSMReader) Read(key []byte, timestamp int64) ([]Value, error) { - t.mu.RLock() - v, err := t.accessor.read(key, timestamp) - t.mu.RUnlock() - return v, err -} - -// ReadAll returns all values for a key in all blocks. -func (t *TSMReader) ReadAll(key []byte) ([]Value, error) { - t.mu.RLock() - v, err := t.accessor.readAll(key) - t.mu.RUnlock() - return v, err -} - -func (t *TSMReader) ReadBytes(e *IndexEntry, b []byte) (uint32, []byte, error) { - t.mu.RLock() - n, v, err := t.accessor.readBytes(e, b) - t.mu.RUnlock() - return n, v, err -} - -// Type returns the type of values stored at the given key. -func (t *TSMReader) Type(key []byte) (byte, error) { - return t.index.Type(key) -} - -// MeasurementStats returns the on-disk measurement stats for this file, if available. -func (t *TSMReader) MeasurementStats() (MeasurementStats, error) { - f, err := os.Open(StatsFilename(t.Path())) - if os.IsNotExist(err) { - return make(MeasurementStats), nil - } else if err != nil { - return nil, err - } - defer f.Close() - - stats := make(MeasurementStats) - if _, err := stats.ReadFrom(bufio.NewReader(f)); err != nil { - return nil, err - } - return stats, err -} - -// Close closes the TSMReader. -func (t *TSMReader) Close() error { - t.refsWG.Wait() - - t.mu.Lock() - defer t.mu.Unlock() - - if err := t.accessor.close(); err != nil { - return err - } - - return t.index.Close() -} - -// Ref records a usage of this TSMReader. If there are active references -// when the reader is closed or removed, the reader will remain open until -// there are no more references. -func (t *TSMReader) Ref() { - atomic.AddInt64(&t.refs, 1) - t.refsWG.Add(1) -} - -// Unref removes a usage record of this TSMReader. If the Reader was closed -// by another goroutine while there were active references, the file will -// be closed and remove -func (t *TSMReader) Unref() { - atomic.AddInt64(&t.refs, -1) - t.refsWG.Done() -} - -// InUse returns whether the TSMReader currently has any active references. -func (t *TSMReader) InUse() bool { - refs := atomic.LoadInt64(&t.refs) - return refs > 0 -} - -// Remove removes any underlying files stored on disk for this reader. -func (t *TSMReader) Remove() error { - t.mu.Lock() - defer t.mu.Unlock() - return t.remove() -} - -// Rename renames the underlying file to the new path. -func (t *TSMReader) Rename(path string) error { - t.mu.Lock() - defer t.mu.Unlock() - return t.accessor.rename(path) -} - -// Remove removes any underlying files stored on disk for this reader. -func (t *TSMReader) remove() error { - path := t.accessor.path() - - if t.InUse() { - return ErrFileInUse - } - - if path != "" { - if err := os.RemoveAll(path); err != nil { - return err - } else if err := os.RemoveAll(StatsFilename(path)); err != nil && !os.IsNotExist(err) { - return err - } - } - - if err := t.tombstoner.Delete(); err != nil { - return err - } - return nil -} - -// Contains returns whether the given key is present in the index. -func (t *TSMReader) Contains(key []byte) bool { - return t.index.Contains(key) -} - -// MaybeContainsValue returns true if key and time might exists in this file. This function -// could return true even though the actual point does not exist. For example, the key may -// exist in this file, but not have a point exactly at time t. -func (t *TSMReader) MaybeContainsValue(key []byte, ts int64) bool { - return t.index.MaybeContainsValue(key, ts) -} - -// Delete deletes blocks indicated by keys. -func (t *TSMReader) Delete(keys [][]byte) error { - if !t.index.Delete(keys) { - return nil - } - if err := t.tombstoner.Add(keys); err != nil { - return err - } - if err := t.tombstoner.Flush(); err != nil { - return err - } - return nil -} - -// DeleteRange removes the given points for keys between minTime and maxTime. The series -// keys passed in must be sorted. -func (t *TSMReader) DeleteRange(keys [][]byte, minTime, maxTime int64) error { - if !t.index.DeleteRange(keys, minTime, maxTime) { - return nil - } - if err := t.tombstoner.AddRange(keys, minTime, maxTime); err != nil { - return err - } - if err := t.tombstoner.Flush(); err != nil { - return err - } - return nil -} - -// DeletePrefix removes the given points for keys beginning with prefix. It calls dead with -// any keys that became dead as a result of this call. -func (t *TSMReader) DeletePrefix(prefix []byte, minTime, maxTime int64, - pred Predicate, dead func([]byte)) error { - - // Marshal the predicate if passed for adding to the tombstone. - var predData []byte - if pred != nil { - var err error - predData, err = pred.Marshal() - if err != nil { - return err - } - } - - if !t.index.DeletePrefix(prefix, minTime, maxTime, pred, dead) { - return nil - } - if err := t.tombstoner.AddPrefixRange(prefix, minTime, maxTime, predData); err != nil { - return err - } - if err := t.tombstoner.Flush(); err != nil { - return err - } - return nil -} - -// Iterator returns an iterator over the keys starting at the provided key. You must -// call Next before calling any of the accessors. -func (t *TSMReader) Iterator(key []byte) TSMIterator { - return t.index.Iterator(key) -} - -// OverlapsTimeRange returns true if the time range of the file intersect min and max. -func (t *TSMReader) OverlapsTimeRange(min, max int64) bool { - return t.index.OverlapsTimeRange(min, max) -} - -// OverlapsKeyRange returns true if the key range of the file intersect min and max. -func (t *TSMReader) OverlapsKeyRange(min, max []byte) bool { - return t.index.OverlapsKeyRange(min, max) -} - -// OverlapsKeyPrefixRange returns true if the key range of the file -// intersects min and max, evaluating up to the length of min and max -// of the key range. -func (t *TSMReader) OverlapsKeyPrefixRange(min, max []byte) bool { - return t.index.OverlapsKeyPrefixRange(min, max) -} - -// TimeRange returns the min and max time across all keys in the file. -func (t *TSMReader) TimeRange() (int64, int64) { - return t.index.TimeRange() -} - -// KeyRange returns the min and max key across all keys in the file. -func (t *TSMReader) KeyRange() ([]byte, []byte) { - return t.index.KeyRange() -} - -// KeyCount returns the count of unique keys in the TSMReader. -func (t *TSMReader) KeyCount() int { - return t.index.KeyCount() -} - -// ReadEntries reads the index entries for key into entries. -func (t *TSMReader) ReadEntries(key []byte, entries []IndexEntry) ([]IndexEntry, error) { - return t.index.ReadEntries(key, entries) -} - -// IndexSize returns the size of the index in bytes. -func (t *TSMReader) IndexSize() uint32 { - return t.index.Size() -} - -// Size returns the size of the underlying file in bytes. -func (t *TSMReader) Size() uint32 { - t.mu.RLock() - size := t.size - t.mu.RUnlock() - return uint32(size) -} - -// LastModified returns the last time the underlying file was modified. -func (t *TSMReader) LastModified() int64 { - t.mu.RLock() - lm := t.lastModified - for _, ts := range t.tombstoner.TombstoneFiles() { - if ts.LastModified > lm { - lm = ts.LastModified - } - } - t.mu.RUnlock() - return lm -} - -// HasTombstones return true if there are any tombstone entries recorded. -func (t *TSMReader) HasTombstones() bool { - t.mu.RLock() - b := t.tombstoner.HasTombstones() - t.mu.RUnlock() - return b -} - -// TombstoneFiles returns any tombstone files associated with this TSM file. -func (t *TSMReader) TombstoneFiles() []FileStat { - t.mu.RLock() - fs := t.tombstoner.TombstoneFiles() - t.mu.RUnlock() - return fs -} - -// TombstoneRange returns ranges of time that are deleted for the given key. -func (t *TSMReader) TombstoneRange(key []byte, buf []TimeRange) []TimeRange { - t.mu.RLock() - tr := t.index.TombstoneRange(key, buf) - t.mu.RUnlock() - return tr -} - -// Stats returns the FileStat for the TSMReader's underlying file. -func (t *TSMReader) Stats() FileStat { - minTime, maxTime := t.index.TimeRange() - minKey, maxKey := t.index.KeyRange() - return FileStat{ - Path: t.Path(), - Size: t.Size(), - CreatedAt: t.lastModified, // tsm file only - LastModified: t.LastModified(), // tsm file & tombstones - MinTime: minTime, - MaxTime: maxTime, - MinKey: minKey, - MaxKey: maxKey, - HasTombstone: t.tombstoner.HasTombstones(), - } -} - -// BlockIterator returns a BlockIterator for the underlying TSM file. -func (t *TSMReader) BlockIterator() *BlockIterator { - t.mu.RLock() - iter := t.index.Iterator(nil) - t.mu.RUnlock() - - return &BlockIterator{ - r: t, - iter: iter, - } -} - -// TimeRangeIterator returns an iterator over the keys, starting at the provided -// key. Calling the HasData accessor will return true if data exists for the -// interval [min, max] for the current key. -// Next must be called before calling any of the accessors. -func (t *TSMReader) TimeRangeIterator(key []byte, min, max int64) *TimeRangeIterator { - t.mu.RLock() - iter := t.index.Iterator(key) - t.mu.RUnlock() - - return &TimeRangeIterator{ - timeRangeBlockReader: timeRangeBlockReader{ - r: t, - iter: iter, - tr: TimeRange{ - Min: min, - Max: max, - }, - }, - } -} - -// TimeRangeMaxTimeIterator returns an iterator over the keys, starting at the provided -// key. Calling the HasData and MaxTime accessors will be restricted to the -// interval [min, max] for the current key and MaxTime ≤ max. -// Next must be called before calling any of the accessors. -func (t *TSMReader) TimeRangeMaxTimeIterator(key []byte, min, max int64) *TimeRangeMaxTimeIterator { - t.mu.RLock() - iter := t.index.Iterator(key) - t.mu.RUnlock() - - return &TimeRangeMaxTimeIterator{ - timeRangeBlockReader: timeRangeBlockReader{ - r: t, - iter: iter, - tr: TimeRange{ - Min: min, - Max: max, - }, - }, - } -} - -type BatchDeleter interface { - DeleteRange(keys [][]byte, min, max int64) error - Commit() error - Rollback() error -} - -type batchDelete struct { - r *TSMReader -} - -func (b *batchDelete) DeleteRange(keys [][]byte, minTime, maxTime int64) error { - if len(keys) == 0 { - return nil - } - - // If the keys can't exist in this TSM file, skip it. - minKey, maxKey := keys[0], keys[len(keys)-1] - if !b.r.index.OverlapsKeyRange(minKey, maxKey) { - return nil - } - - // If the timerange can't exist in this TSM file, skip it. - if !b.r.index.OverlapsTimeRange(minTime, maxTime) { - return nil - } - - if err := b.r.tombstoner.AddRange(keys, minTime, maxTime); err != nil { - return err - } - - return nil -} - -func (b *batchDelete) Commit() error { - defer b.r.deleteMu.Unlock() - if err := b.r.tombstoner.Flush(); err != nil { - return err - } - - return b.r.applyTombstones() -} - -func (b *batchDelete) Rollback() error { - defer b.r.deleteMu.Unlock() - return b.r.tombstoner.Rollback() -} - -// BatchDelete returns a BatchDeleter. Only a single goroutine may run a BatchDelete at a time. -// Callers must either Commit or Rollback the operation. -func (r *TSMReader) BatchDelete() BatchDeleter { - r.deleteMu.Lock() - return &batchDelete{r: r} -} - -type BatchDeleters []BatchDeleter - -func (a BatchDeleters) DeleteRange(keys [][]byte, min, max int64) error { - errC := make(chan error, len(a)) - for _, b := range a { - go func(b BatchDeleter) { errC <- b.DeleteRange(keys, min, max) }(b) - } - - var err error - for i := 0; i < len(a); i++ { - dErr := <-errC - if dErr != nil { - err = dErr - } - } - return err -} - -func (a BatchDeleters) Commit() error { - errC := make(chan error, len(a)) - for _, b := range a { - go func(b BatchDeleter) { errC <- b.Commit() }(b) - } - - var err error - for i := 0; i < len(a); i++ { - dErr := <-errC - if dErr != nil { - err = dErr - } - } - return err -} - -func (a BatchDeleters) Rollback() error { - errC := make(chan error, len(a)) - for _, b := range a { - go func(b BatchDeleter) { errC <- b.Rollback() }(b) - } - - var err error - for i := 0; i < len(a); i++ { - dErr := <-errC - if dErr != nil { - err = dErr - } - } - return err -} diff --git a/tsdb/tsm1/reader_block_iterator.go b/tsdb/tsm1/reader_block_iterator.go deleted file mode 100644 index 7ff188e96f..0000000000 --- a/tsdb/tsm1/reader_block_iterator.go +++ /dev/null @@ -1,55 +0,0 @@ -package tsm1 - -// BlockIterator allows iterating over each block in a TSM file in order. It provides -// raw access to the block bytes without decoding them. -type BlockIterator struct { - r *TSMReader - iter *TSMIndexIterator - entries []IndexEntry -} - -// PeekNext returns the next key to be iterated or an empty string. -func (b *BlockIterator) PeekNext() []byte { - if len(b.entries) > 1 { - return b.iter.Key() - } - return b.iter.Peek() -} - -// Next returns true if there are more blocks to iterate through. -func (b *BlockIterator) Next() bool { - if b.iter.Err() != nil { - return false - } - - if len(b.entries) > 0 { - b.entries = b.entries[1:] - if len(b.entries) > 0 { - return true - } - } - - if !b.iter.Next() { - return false - } - b.entries = b.iter.Entries() - - return len(b.entries) > 0 -} - -// Read reads information about the next block to be iterated. -func (b *BlockIterator) Read() (key []byte, minTime int64, maxTime int64, typ byte, checksum uint32, buf []byte, err error) { - if err := b.iter.Err(); err != nil { - return nil, 0, 0, 0, 0, nil, err - } - checksum, buf, err = b.r.ReadBytes(&b.entries[0], nil) - if err != nil { - return nil, 0, 0, 0, 0, nil, err - } - return b.iter.Key(), b.entries[0].MinTime, b.entries[0].MaxTime, b.iter.Type(), checksum, buf, err -} - -// Err returns any errors encounter during iteration. -func (b *BlockIterator) Err() error { - return b.iter.Err() -} diff --git a/tsdb/tsm1/reader_block_iterator_test.go b/tsdb/tsm1/reader_block_iterator_test.go deleted file mode 100644 index 48893c4a7d..0000000000 --- a/tsdb/tsm1/reader_block_iterator_test.go +++ /dev/null @@ -1,280 +0,0 @@ -package tsm1 - -import ( - "os" - "sort" - "testing" -) - -func TestBlockIterator_Single(t *testing.T) { - dir := mustTempDir() - defer os.RemoveAll(dir) - f := mustTempFile(dir) - - w, err := NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - values := []Value{NewValue(0, int64(1))} - if err := w.Write([]byte("cpu"), values); err != nil { - t.Fatalf("unexpected error writing: %v", err) - - } - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - fd, err := os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error opening: %v", err) - } - - r, err := NewTSMReader(fd) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - - var count int - iter := r.BlockIterator() - for iter.Next() { - key, minTime, maxTime, typ, _, buf, err := iter.Read() - if err != nil { - t.Fatalf("unexpected error creating iterator: %v", err) - } - - if got, exp := string(key), "cpu"; got != exp { - t.Fatalf("key mismatch: got %v, exp %v", got, exp) - } - - if got, exp := minTime, int64(0); got != exp { - t.Fatalf("min time mismatch: got %v, exp %v", got, exp) - } - - if got, exp := maxTime, int64(0); got != exp { - t.Fatalf("max time mismatch: got %v, exp %v", got, exp) - } - - if got, exp := typ, BlockInteger; got != exp { - t.Fatalf("block type mismatch: got %v, exp %v", got, exp) - } - - if len(buf) == 0 { - t.Fatalf("buf length = 0") - } - - count++ - } - - if got, exp := count, len(values); got != exp { - t.Fatalf("value count mismatch: got %v, exp %v", got, exp) - } -} - -func TestBlockIterator_Tombstone(t *testing.T) { - dir := mustTempDir() - defer os.RemoveAll(dir) - f := mustTempFile(dir) - - w, err := NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - values := []Value{NewValue(0, int64(1))} - if err := w.Write([]byte("cpu"), values); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - - if err := w.Write([]byte("mem"), values); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - fd, err := os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error opening: %v", err) - } - - r, err := NewTSMReader(fd) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - - iter := r.BlockIterator() - for iter.Next() { - // Trigger a delete during iteration. This should cause an error condition for - // the BlockIterator - r.Delete([][]byte{[]byte("cpu")}) - } - - if iter.Err() == nil { - t.Fatalf("expected error: got nil") - } -} - -func TestBlockIterator_MultipleBlocks(t *testing.T) { - dir := mustTempDir() - defer os.RemoveAll(dir) - f := mustTempFile(dir) - - w, err := NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - values1 := []Value{NewValue(0, int64(1))} - if err := w.Write([]byte("cpu"), values1); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - - values2 := []Value{NewValue(1, int64(2))} - if err := w.Write([]byte("cpu"), values2); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - fd, err := os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error opening: %v", err) - } - - r, err := NewTSMReader(fd) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - - var count int - expData := []Values{values1, values2} - iter := r.BlockIterator() - var i int - for iter.Next() { - key, minTime, maxTime, typ, _, buf, err := iter.Read() - - if err != nil { - t.Fatalf("unexpected error creating iterator: %v", err) - } - - if got, exp := string(key), "cpu"; got != exp { - t.Fatalf("key mismatch: got %v, exp %v", got, exp) - } - - if got, exp := minTime, expData[i][0].UnixNano(); got != exp { - t.Fatalf("min time mismatch: got %v, exp %v", got, exp) - } - - if got, exp := maxTime, expData[i][0].UnixNano(); got != exp { - t.Fatalf("max time mismatch: got %v, exp %v", got, exp) - } - - if got, exp := typ, BlockInteger; got != exp { - t.Fatalf("block type mismatch: got %v, exp %v", got, exp) - } - - if len(buf) == 0 { - t.Fatalf("buf length = 0") - } - - count++ - i++ - } - - if got, exp := count, 2; got != exp { - t.Fatalf("value count mismatch: got %v, exp %v", got, exp) - } -} - -func TestBlockIterator_Sorted(t *testing.T) { - dir := mustTempDir() - defer os.RemoveAll(dir) - f := mustTempFile(dir) - - w, err := NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - values := map[string][]Value{ - "mem": []Value{NewValue(0, int64(1))}, - "cycles": []Value{NewValue(0, ^uint64(0))}, - "cpu": []Value{NewValue(1, float64(2))}, - "disk": []Value{NewValue(1, true)}, - "load": []Value{NewValue(1, "string")}, - } - - keys := make([]string, 0, len(values)) - for k := range values { - keys = append(keys, k) - } - sort.Strings(keys) - - for _, k := range keys { - if err := w.Write([]byte(k), values[k]); err != nil { - t.Fatalf("unexpected error writing: %v", err) - - } - } - - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - fd, err := os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error opening: %v", err) - } - - r, err := NewTSMReader(fd) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - - var count int - iter := r.BlockIterator() - var lastKey string - for iter.Next() { - key, _, _, _, _, buf, err := iter.Read() - - if string(key) < lastKey { - t.Fatalf("keys not sorted: got %v, last %v", key, lastKey) - } - - lastKey = string(key) - - if err != nil { - t.Fatalf("unexpected error creating iterator: %v", err) - } - - if len(buf) == 0 { - t.Fatalf("buf length = 0") - } - - count++ - } - - if got, exp := count, len(values); got != exp { - t.Fatalf("value count mismatch: got %v, exp %v", got, exp) - } -} diff --git a/tsdb/tsm1/reader_fault_buffer.go b/tsdb/tsm1/reader_fault_buffer.go deleted file mode 100644 index fa484b92a0..0000000000 --- a/tsdb/tsm1/reader_fault_buffer.go +++ /dev/null @@ -1,47 +0,0 @@ -package tsm1 - -import ( - "math/rand" - "runtime" - "sync/atomic" -) - -// fault buffer is a by-default disabled helper to keep track of estimates of page faults -// during accesses. use the constants below to turn it on or off and benchmarks will report -// their estimates. - -const ( - faultBufferEnabled = false - faultBufferSampleStacks = false -) - -type faultBuffer struct { - faults uint64 - page uint64 - b []byte - samples [][]uintptr -} - -func (m *faultBuffer) len() uint32 { return uint32(len(m.b)) } - -func (m *faultBuffer) access(start, length uint32) []byte { - if faultBufferEnabled { - current, page := int64(atomic.LoadUint64(&m.page)), int64(start)/4096 - if page != current && page != current+1 { // assume kernel precaches next page - atomic.AddUint64(&m.faults, 1) - if faultBufferSampleStacks && rand.Intn(1000) == 0 { - var stack [256]uintptr - n := runtime.Callers(0, stack[:]) - m.samples = append(m.samples, stack[:n:n]) - } - } - atomic.StoreUint64(&m.page, uint64(page)) - } - - end := m.len() - if length > 0 { - end = start + length - } - - return m.b[start:end] -} diff --git a/tsdb/tsm1/reader_index.go b/tsdb/tsm1/reader_index.go deleted file mode 100644 index 4969d118b8..0000000000 --- a/tsdb/tsm1/reader_index.go +++ /dev/null @@ -1,910 +0,0 @@ -package tsm1 - -import ( - "bytes" - "encoding/binary" - "errors" - "fmt" - "math" - "sort" - "sync" - - "go.uber.org/zap" -) - -// TSMIndex represent the index section of a TSM file. The index records all -// blocks, their locations, sizes, min and max times. -type TSMIndex interface { - // Delete removes the given keys from the index. Returns true if there were any changes. - Delete(keys [][]byte) bool - - // DeleteRange removes the given keys with data between minTime and maxTime from the index. - // Returns true if there were any changes. - DeleteRange(keys [][]byte, minTime, maxTime int64) bool - - // DeletePrefix removes keys that begin with the given prefix with data between minTime and - // maxTime from the index. Returns true if there were any changes. It calls dead with any - // keys that became dead as a result of this call. - DeletePrefix(prefix []byte, minTime, maxTime int64, pred Predicate, dead func([]byte)) bool - - // MaybeContainsKey returns true if the given key may exist in the index. This is faster than - // Contains but, may return false positives. - MaybeContainsKey(key []byte) bool - - // Contains return true if the given key exists in the index. - Contains(key []byte) bool - - // MaybeContainsValue returns true if key and time might exist in this file. This function - // could return true even though the actual point does not exists. For example, the key may - // exist in this file, but not have a point exactly at time t. - MaybeContainsValue(key []byte, timestamp int64) bool - - // ReadEntries reads the index entries for key into entries. - ReadEntries(key []byte, entries []IndexEntry) ([]IndexEntry, error) - - // Entry returns the index entry for the specified key and timestamp. If no entry - // matches the key and timestamp, nil is returned. - Entry(key []byte, timestamp int64) *IndexEntry - - // KeyCount returns the count of unique keys in the index. - KeyCount() int - - // Iterator returns an iterator over the keys starting at the provided key. You must - // call Next before calling any of the accessors. - Iterator([]byte) *TSMIndexIterator - - // OverlapsTimeRange returns true if the time range of the file intersect min and max. - OverlapsTimeRange(min, max int64) bool - - // OverlapsKeyRange returns true if the min and max keys of the file overlap the arguments min and max. - OverlapsKeyRange(min, max []byte) bool - - // OverlapsKeyPrefixRange returns true if the key range of the file - // intersects min and max, evaluating up to the length of min and max - // of the key range. - OverlapsKeyPrefixRange(min, max []byte) bool - - // Size returns the size of the current index in bytes. - Size() uint32 - - // TimeRange returns the min and max time across all keys in the file. - TimeRange() (int64, int64) - - // TombstoneRange returns ranges of time that are deleted for the given key. - TombstoneRange(key []byte, buf []TimeRange) []TimeRange - - // KeyRange returns the min and max keys in the file. - KeyRange() ([]byte, []byte) - - // Type returns the block type of the values stored for the key. Returns one of - // BlockFloat64, BlockInt64, BlockBool, BlockString. If key does not exist, - // an error is returned. - Type(key []byte) (byte, error) - - // UnmarshalBinary populates an index from an encoded byte slice - // representation of an index. - UnmarshalBinary(b []byte) error - - // Close closes the index and releases any resources. - Close() error -} - -// indirectIndex is a TSMIndex that uses a raw byte slice representation of an index. This -// implementation can be used for indexes that may be MMAPed into memory. -type indirectIndex struct { - mu sync.RWMutex - logger *zap.Logger - - // indirectIndex works a follows. Assuming we have an index structure in memory as - // the diagram below: - // - // ┌────────────────────────────────────────────────────────────────────┐ - // │ Index │ - // ├─┬──────────────────────┬──┬───────────────────────┬───┬────────────┘ - // │0│ │62│ │145│ - // ├─┴───────┬─────────┬────┼──┴──────┬─────────┬──────┼───┴─────┬──────┐ - // │Key 1 Len│ Key │... │Key 2 Len│ Key 2 │ ... │ Key 3 │ ... │ - // │ 2 bytes │ N bytes │ │ 2 bytes │ N bytes │ │ 2 bytes │ │ - // └─────────┴─────────┴────┴─────────┴─────────┴──────┴─────────┴──────┘ - - // We would build an `offsets` slices where each element pointers to the byte location - // for the first key in the index slice. - - // ┌────────────────────────────────────────────────────────────────────┐ - // │ Offsets │ - // ├────┬────┬────┬─────────────────────────────────────────────────────┘ - // │ 0 │ 62 │145 │ - // └────┴────┴────┘ - - // Using this offset slice we can find `Key 2` by doing a binary search - // over the offsets slice. Instead of comparing the value in the offsets - // (e.g. `62`), we use that as an index into the underlying index to - // retrieve the key at position `62` and perform our comparisons with that. - - // When we have identified the correct position in the index for a given - // key, we could perform another binary search or a linear scan. This - // should be fast as well since each index entry is 28 bytes and all - // contiguous in memory. The current implementation uses a linear scan since the - // number of block entries is expected to be < 100 per key. - - // b is the underlying index byte slice. This could be a copy on the heap or an MMAP - // slice reference - b faultBuffer - - // ro contains the positions in b for each key as well as the first bytes of each key - // to avoid disk seeks. - ro readerOffsets - - // minKey, maxKey are the minium and maximum (lexicographically sorted) contained in the - // file - minKey, maxKey []byte - - // minTime, maxTime are the minimum and maximum times contained in the file across all - // series. - minTime, maxTime int64 - - // tombstones contains only the tombstoned keys with subset of time values deleted. An - // entry would exist here if a subset of the points for a key were deleted and the file - // had not be re-compacted to remove the points on disk. - tombstones map[uint32][]TimeRange - - // prefixTombstones contains the tombestoned keys with a subset of the values deleted that - // all share the same prefix. - prefixTombstones *prefixTree -} - -// NewIndirectIndex returns a new indirect index. -func NewIndirectIndex() *indirectIndex { - return &indirectIndex{ - tombstones: make(map[uint32][]TimeRange), - prefixTombstones: newPrefixTree(), - } -} - -// MaybeContainsKey returns true of key may exist in this index. -func (d *indirectIndex) MaybeContainsKey(key []byte) bool { - return bytes.Compare(key, d.minKey) >= 0 && bytes.Compare(key, d.maxKey) <= 0 -} - -// ReadEntries returns all index entries for a key. -func (d *indirectIndex) ReadEntries(key []byte, entries []IndexEntry) ([]IndexEntry, error) { - d.mu.RLock() - defer d.mu.RUnlock() - - iter := d.ro.Iterator() - exact, _ := iter.Seek(key, &d.b) - if !exact { - return nil, nil - } - - entries, err := readEntries(d.b.access(iter.EntryOffset(&d.b), 0), entries) - if err != nil { - return nil, err - } - - return entries, nil -} - -// Entry returns the index entry for the specified key and timestamp. If no entry -// matches the key an timestamp, nil is returned. -func (d *indirectIndex) Entry(key []byte, timestamp int64) *IndexEntry { - entries, err := d.ReadEntries(key, nil) - if err != nil { - d.logger.Error("Error reading tsm index key", zap.String("key", fmt.Sprintf("%q", key))) - return nil - } - for _, entry := range entries { - if entry.Contains(timestamp) { - return &entry - } - } - return nil -} - -// KeyCount returns the count of unique keys in the index. -func (d *indirectIndex) KeyCount() int { - d.mu.RLock() - n := len(d.ro.offsets) - d.mu.RUnlock() - return n -} - -// Iterator returns an iterator over the keys starting at the provided key. You must -// call Next before calling any of the accessors. -func (d *indirectIndex) Iterator(key []byte) *TSMIndexIterator { - d.mu.RLock() - iter := d.ro.Iterator() - _, ok := iter.Seek(key, &d.b) - ti := &TSMIndexIterator{ - d: d, - n: int(len(d.ro.offsets)), - b: &d.b, - iter: &iter, - first: true, - ok: ok, - } - d.mu.RUnlock() - - return ti -} - -// Delete removes the given keys from the index. -func (d *indirectIndex) Delete(keys [][]byte) bool { - if len(keys) == 0 { - return false - } - - d.mu.RLock() - iter := d.ro.Iterator() - for _, key := range keys { - if !iter.Next() || !bytes.Equal(iter.Key(&d.b), key) { - if exact, _ := iter.Seek(key, &d.b); !exact { - continue - } - } - - delete(d.tombstones, iter.Offset()) - iter.Delete() - } - d.mu.RUnlock() - - if !iter.HasDeletes() { - return false - } - - d.mu.Lock() - iter.Done() - d.mu.Unlock() - - return true -} - -// insertTimeRange adds a time range described by the minTime and maxTime into ts. -func insertTimeRange(ts []TimeRange, minTime, maxTime int64) []TimeRange { - n := sort.Search(len(ts), func(i int) bool { - if ts[i].Min == minTime { - return ts[i].Max >= maxTime - } - return ts[i].Min > minTime - }) - - ts = append(ts, TimeRange{}) - copy(ts[n+1:], ts[n:]) - ts[n] = TimeRange{Min: minTime, Max: maxTime} - return ts -} - -// pendingTombstone is a type that describes a pending insertion of a tombstone. -type pendingTombstone struct { - Key int - Index int - Offset uint32 - EntryOffset uint32 - Tombstones int -} - -// coversEntries checks if all of the stored tombstones including one for minTime and maxTime cover -// all of the index entries. It mutates the entries slice to do the work, so be sure to make a copy -// if you must. -func (d *indirectIndex) coversEntries(offset uint32, key []byte, buf []TimeRange, - entries []IndexEntry, minTime, maxTime int64) ([]TimeRange, bool) { - - // grab the tombstones from the prefixes. these come out unsorted, so we sort - // them and place them in the merger section named unsorted. - buf = d.prefixTombstones.Search(key, buf[:0]) - if len(buf) > 1 { - sort.Slice(buf, func(i, j int) bool { return buf[i].Less(buf[j]) }) - } - - // create the merger with the other tombstone entries: the ones for the specific - // key and the one we have proposed to add. - merger := timeRangeMerger{ - fromMap: d.tombstones[offset], - fromPrefix: buf, - single: TimeRange{Min: minTime, Max: maxTime}, - used: false, - } - - return buf, timeRangesCoverEntries(merger, entries) -} - -// DeleteRange removes the given keys with data between minTime and maxTime from the index. -func (d *indirectIndex) DeleteRange(keys [][]byte, minTime, maxTime int64) bool { - // If we're deleting everything, we won't need to worry about partial deletes. - if minTime <= d.minTime && maxTime >= d.maxTime { - return d.Delete(keys) - } - - // Is the range passed in outside of the time range for the file? - if minTime > d.maxTime || maxTime < d.minTime { - return false - } - - // General outline: - // Under the read lock, determine the set of actions we need to - // take and on what keys to take them. Then, under the write - // lock, perform those actions. We keep track of some state - // during the read lock to make double checking under the - // write lock cheap. - - d.mu.RLock() - iter := d.ro.Iterator() - var ( - ok bool - trbuf []TimeRange - entries []IndexEntry - pending []pendingTombstone - err error - ) - - for i, key := range keys { - if !iter.Next() || !bytes.Equal(iter.Key(&d.b), key) { - if exact, _ := iter.Seek(key, &d.b); !exact { - continue - } - } - - entryOffset := iter.EntryOffset(&d.b) - entries, err = readEntriesTimes(d.b.access(entryOffset, 0), entries) - if err != nil { - // If we have an error reading the entries for a key, we should just pretend - // the whole key is deleted. Maybe a better idea is to report this up somehow - // but that's for another time. - iter.Delete() - continue - } - - // Is the time range passed outside of the time range we have stored for this key? - min, max := entries[0].MinTime, entries[len(entries)-1].MaxTime - if minTime > max || maxTime < min { - continue - } - - // Does the range passed in cover every value for the key? - if minTime <= min && maxTime >= max { - iter.Delete() - continue - } - - // Does adding the minTime and maxTime cover the entries? - offset := iter.Offset() - trbuf, ok = d.coversEntries(offset, key, trbuf, entries, minTime, maxTime) - if ok { - iter.Delete() - continue - } - - // Save that we should add a tombstone for this key, and how many tombstones - // already existed to avoid double checks. - pending = append(pending, pendingTombstone{ - Key: i, - Index: iter.Index(), - Offset: offset, - EntryOffset: entryOffset, - Tombstones: len(d.tombstones[offset]) + d.prefixTombstones.Count(key), - }) - } - - d.mu.RUnlock() - - if len(pending) == 0 && !iter.HasDeletes() { - return false - } - - d.mu.Lock() - defer d.mu.Unlock() - - for _, p := range pending { - key := keys[p.Key] - - // Check the existing tombstones. If the length did not change, then we know - // that we don't need to double check coverage, since we only ever increase the - // number of tombstones for a key. - if trs := d.tombstones[p.Offset]; p.Tombstones == len(trs)+d.prefixTombstones.Count(key) { - d.tombstones[p.Offset] = insertTimeRange(trs, minTime, maxTime) - continue - } - - // Since the length changed, we have to do the expensive overlap check again. - // We re-read the entries again under the write lock because this should be - // rare and only during concurrent deletes to the same key. We could make - // a copy of the entries before getting here, but that penalizes the common - // no-concurrent case. - entries, err = readEntriesTimes(d.b.access(p.EntryOffset, 0), entries) - if err != nil { - // If we have an error reading the entries for a key, we should just pretend - // the whole key is deleted. Maybe a better idea is to report this up somehow - // but that's for another time. - delete(d.tombstones, p.Offset) - iter.SetIndex(p.Index) - if iter.Offset() == p.Offset { - iter.Delete() - } - continue - } - - trbuf, ok = d.coversEntries(p.Offset, key, trbuf, entries, minTime, maxTime) - if ok { - delete(d.tombstones, p.Offset) - iter.SetIndex(p.Index) - if iter.Offset() == p.Offset { - iter.Delete() - } - continue - } - - // Append the TimeRange into the tombstones. - trs := d.tombstones[p.Offset] - d.tombstones[p.Offset] = insertTimeRange(trs, minTime, maxTime) - } - - iter.Done() - return true -} - -// DeletePrefix removes keys that begin with the given prefix with data between minTime and -// maxTime from the index. Returns true if there were any changes. It calls dead with any -// keys that became dead as a result of this call. -func (d *indirectIndex) DeletePrefix(prefix []byte, minTime, maxTime int64, - pred Predicate, dead func([]byte)) bool { - - if dead == nil { - dead = func([]byte) {} - } - - // If we're deleting everything, we won't need to worry about partial deletes. - partial := !(minTime <= d.minTime && maxTime >= d.maxTime) - - // Is the range passed in outside of the time range for the file? - if minTime > d.maxTime || maxTime < d.minTime { - return false - } - - d.mu.RLock() - var ( - ok bool - trbuf []TimeRange - entries []IndexEntry - pending []pendingTombstone - keys [][]byte - err error - mustTrack bool - ) - - // seek to the earliest key with the prefix, and start iterating. we can't call - // next until after we've checked the key, so keep a "first" flag. - first := true - iter := d.ro.Iterator() - for { - if first { - if _, ok := iter.Seek(prefix, &d.b); !ok { - break - } - } else if !iter.Next() { - break - } - - first = false - key := iter.Key(&d.b) - if !bytes.HasPrefix(key, prefix) { - break - } - - // If we have a predicate, skip the key if it doesn't match. - if pred != nil && !pred.Matches(key) { - continue - } - - // if we're not doing a partial delete, we don't need to read the entries and - // can just delete the key and move on. - if !partial { - dead(key) - iter.Delete() - continue - } - - entryOffset := iter.EntryOffset(&d.b) - entries, err = readEntriesTimes(d.b.access(entryOffset, 0), entries) - if err != nil { - // If we have an error reading the entries for a key, we should just pretend - // the whole key is deleted. Maybe a better idea is to report this up somehow - // but that's for another time. - dead(key) - iter.Delete() - continue - } - - // Is the time range passed outside the range we have stored for the key? - min, max := entries[0].MinTime, entries[len(entries)-1].MaxTime - if minTime > max || maxTime < min { - continue - } - - // Does the range passed cover every value for the key? - if minTime <= min && maxTime >= max { - dead(key) - iter.Delete() - continue - } - - // Does adding the minTime and maxTime cover the entries? - offset := iter.Offset() - trbuf, ok = d.coversEntries(offset, iter.Key(&d.b), trbuf, entries, minTime, maxTime) - if ok { - dead(key) - iter.Delete() - continue - } - - // Otherwise, we have to track it in the prefix tombstones list. - mustTrack = true - - // If we have a predicate, we must keep track of a pending tombstone entry for the key. - if pred != nil { - pending = append(pending, pendingTombstone{ - Key: len(keys), - Index: iter.Index(), - Offset: offset, - EntryOffset: entryOffset, - Tombstones: len(d.tombstones[offset]) + d.prefixTombstones.Count(key), - }) - keys = append(keys, key) - } - } - d.mu.RUnlock() - - // Check and abort if nothing needs to be done. - if !mustTrack && len(pending) == 0 && !iter.HasDeletes() { - return false - } - - d.mu.Lock() - defer d.mu.Unlock() - - if pred == nil { - // If we don't have a predicate, we can add a single prefix tombstone entry. - if mustTrack { - d.prefixTombstones.Append(prefix, TimeRange{Min: minTime, Max: maxTime}) - } - - // Clean up any fully deleted keys. - if iter.HasDeletes() { - iter.Done() - } - return true - } - - // Otherwise, we must walk the pending deletes individually. - for _, p := range pending { - key := keys[p.Key] - - // Check the existing tombstones. If the length did not change, then we know - // that we don't need to double check coverage, since we only ever increase the - // number of tombstones for a key. - if trs := d.tombstones[p.Offset]; p.Tombstones == len(trs)+d.prefixTombstones.Count(key) { - d.tombstones[p.Offset] = insertTimeRange(trs, minTime, maxTime) - continue - } - - // Since the length changed, we have to do the expensive overlap check again. - // We re-read the entries again under the write lock because this should be - // rare and only during concurrent deletes to the same key. We could make - // a copy of the entries before getting here, but that penalizes the common - // no-concurrent case. - entries, err = readEntriesTimes(d.b.access(p.EntryOffset, 0), entries) - if err != nil { - // If we have an error reading the entries for a key, we should just pretend - // the whole key is deleted. Maybe a better idea is to report this up somehow - // but that's for another time. - delete(d.tombstones, p.Offset) - iter.SetIndex(p.Index) - if iter.Offset() == p.Offset { - dead(key) - iter.Delete() - } - continue - } - - // If it does cover, remove the key entirely. - trbuf, ok = d.coversEntries(p.Offset, key, trbuf, entries, minTime, maxTime) - if ok { - delete(d.tombstones, p.Offset) - iter.SetIndex(p.Index) - if iter.Offset() == p.Offset { - dead(key) - iter.Delete() - } - continue - } - - // Append the TimeRange into the tombstones. - trs := d.tombstones[p.Offset] - d.tombstones[p.Offset] = insertTimeRange(trs, minTime, maxTime) - } - - // Clean up any fully deleted keys. - if iter.HasDeletes() { - iter.Done() - } - return true -} - -// TombstoneRange returns ranges of time that are deleted for the given key. -func (d *indirectIndex) TombstoneRange(key []byte, buf []TimeRange) []TimeRange { - d.mu.RLock() - rs := d.prefixTombstones.Search(key, buf[:0]) - iter := d.ro.Iterator() - exact, _ := iter.Seek(key, &d.b) - if exact { - rs = append(rs, d.tombstones[iter.Offset()]...) - } - d.mu.RUnlock() - return rs -} - -// Contains return true if the given key exists in the index. -func (d *indirectIndex) Contains(key []byte) bool { - d.mu.RLock() - iter := d.ro.Iterator() - exact, _ := iter.Seek(key, &d.b) - d.mu.RUnlock() - return exact -} - -// MaybeContainsValue returns true if key and time might exist in this file. -func (d *indirectIndex) MaybeContainsValue(key []byte, timestamp int64) bool { - d.mu.RLock() - defer d.mu.RUnlock() - - iter := d.ro.Iterator() - exact, _ := iter.Seek(key, &d.b) - if !exact { - return false - } - - for _, t := range d.tombstones[iter.Offset()] { - if t.Min <= timestamp && timestamp <= t.Max { - return false - } - } - - if d.prefixTombstones.checkOverlap(key, timestamp) { - return false - } - - entries, err := d.ReadEntries(key, nil) - if err != nil { - d.logger.Error("Error reading tsm index key", zap.String("key", fmt.Sprintf("%q", key))) - return false - } - - for _, entry := range entries { - if entry.Contains(timestamp) { - return true - } - } - - return false -} - -// Type returns the block type of the values stored for the key. -func (d *indirectIndex) Type(key []byte) (byte, error) { - d.mu.RLock() - defer d.mu.RUnlock() - - iter := d.ro.Iterator() - exact, _ := iter.Seek(key, &d.b) - if !exact { - return 0, errors.New("key does not exist") - } - - return d.b.access(iter.EntryOffset(&d.b), 1)[0], nil -} - -// OverlapsTimeRange returns true if the time range of the file intersect min and max. -func (d *indirectIndex) OverlapsTimeRange(min, max int64) bool { - return d.minTime <= max && d.maxTime >= min -} - -// OverlapsKeyRange returns true if the min and max keys of the file overlap the arguments min and max. -func (d *indirectIndex) OverlapsKeyRange(min, max []byte) bool { - return bytes.Compare(d.minKey, max) <= 0 && bytes.Compare(d.maxKey, min) >= 0 -} - -// OverlapsKeyPrefixRange returns true if the key range of the file -// intersects min and max, evaluating up to the length of min and max -// of the key range. -func (d *indirectIndex) OverlapsKeyPrefixRange(min, max []byte) bool { - minKey, maxKey := d.minKey, d.maxKey - if len(maxKey) > len(min) { - maxKey = maxKey[:len(min)] - } - if len(minKey) > len(max) { - minKey = minKey[:len(max)] - } - return bytes.Compare(minKey, max) <= 0 && bytes.Compare(maxKey, min) >= 0 -} - -// KeyRange returns the min and max keys in the index. -func (d *indirectIndex) KeyRange() ([]byte, []byte) { - return d.minKey, d.maxKey -} - -// TimeRange returns the min and max time across all keys in the index. -func (d *indirectIndex) TimeRange() (int64, int64) { - return d.minTime, d.maxTime -} - -// MarshalBinary returns a byte slice encoded version of the index. -func (d *indirectIndex) MarshalBinary() ([]byte, error) { - d.mu.RLock() - defer d.mu.RUnlock() - - return d.b.b, nil -} - -// UnmarshalBinary populates an index from an encoded byte slice -// representation of an index. -func (d *indirectIndex) UnmarshalBinary(b []byte) error { - d.mu.Lock() - defer d.mu.Unlock() - - // Keep a reference to the actual index bytes - d.b = faultBuffer{b: b} - if len(b) == 0 { - return nil - } - - // make sure a uint32 is sufficient to store any offset into the index. - if uint64(len(b)) != uint64(uint32(len(b))) { - return fmt.Errorf("indirectIndex: too large to open") - } - - var minTime, maxTime int64 = math.MaxInt64, math.MinInt64 - - // To create our "indirect" index, we need to find the location of all the keys in - // the raw byte slice. The keys are listed once each (in sorted order). Following - // each key is a time ordered list of index entry blocks for that key. The loop below - // basically skips across the slice keeping track of the counter when we are at a key - // field. - var i uint32 - var ro readerOffsets - - iMax := uint32(len(b)) - if iMax > math.MaxInt32 { - return fmt.Errorf("indirectIndex: too large to store offsets") - } - - for i < iMax { - offset := i // save for when we add to the data structure - - // Skip to the start of the values - // key length value (2) + type (1) + length of key - if i+2 >= iMax { - return fmt.Errorf("indirectIndex: not enough data for key length value") - } - keyLength := uint32(binary.BigEndian.Uint16(b[i : i+2])) - i += 2 - - if i+keyLength+indexTypeSize >= iMax { - return fmt.Errorf("indirectIndex: not enough data for key and type") - } - ro.AddKey(offset, b[i:i+keyLength]) - i += keyLength + indexTypeSize - - // count of index entries - if i+indexCountSize >= iMax { - return fmt.Errorf("indirectIndex: not enough data for index entries count") - } - count := uint32(binary.BigEndian.Uint16(b[i : i+indexCountSize])) - if count == 0 { - return fmt.Errorf("indirectIndex: key exits with no entries") - } - i += indexCountSize - - // Find the min time for the block - if i+8 >= iMax { - return fmt.Errorf("indirectIndex: not enough data for min time") - } - minT := int64(binary.BigEndian.Uint64(b[i : i+8])) - if minT < minTime { - minTime = minT - } - - i += (count - 1) * indexEntrySize - - // Find the max time for the block - if i+16 >= iMax { - return fmt.Errorf("indirectIndex: not enough data for max time") - } - maxT := int64(binary.BigEndian.Uint64(b[i+8 : i+16])) - if maxT > maxTime { - maxTime = maxT - } - - i += indexEntrySize - } - - ro.Done() - - firstOfs := ro.offsets[0] - key := readKey(b[firstOfs:]) - d.minKey = key - - lastOfs := ro.offsets[len(ro.offsets)-1] - key = readKey(b[lastOfs:]) - d.maxKey = key - - d.minTime = minTime - d.maxTime = maxTime - d.ro = ro - - return nil -} - -// Size returns the size of the current index in bytes. -func (d *indirectIndex) Size() uint32 { - d.mu.RLock() - defer d.mu.RUnlock() - - return d.b.len() -} - -func (d *indirectIndex) Close() error { - return nil -} - -func readKey(b []byte) (key []byte) { - size := binary.BigEndian.Uint16(b[:2]) - return b[2 : 2+size] -} - -func readEntries(b []byte, entries []IndexEntry) ([]IndexEntry, error) { - if len(b) < indexTypeSize+indexCountSize { - return entries[:0], errors.New("readEntries: data too short for headers") - } - - count := int(binary.BigEndian.Uint16(b[indexTypeSize : indexTypeSize+indexCountSize])) - if cap(entries) < count { - entries = make([]IndexEntry, count) - } else { - entries = entries[:count] - } - b = b[indexTypeSize+indexCountSize:] - - for i := range entries { - if err := entries[i].UnmarshalBinary(b); err != nil { - return entries[:0], err - } - b = b[indexEntrySize:] - } - - return entries, nil -} - -// readEntriesTimes is a helper function to read entries at the provided buffer but -// only reading in the min and max times. -func readEntriesTimes(b []byte, entries []IndexEntry) ([]IndexEntry, error) { - if len(b) < indexTypeSize+indexCountSize { - return entries[:0], errors.New("readEntries: data too short for headers") - } - - count := int(binary.BigEndian.Uint16(b[indexTypeSize : indexTypeSize+indexCountSize])) - if cap(entries) < count { - entries = make([]IndexEntry, count) - } else { - entries = entries[:count] - } - b = b[indexTypeSize+indexCountSize:] - - for i := range entries { - if len(b) < indexEntrySize { - return entries[:0], errors.New("readEntries: stream too short for entry") - } - entries[i].MinTime = int64(binary.BigEndian.Uint64(b[0:8])) - entries[i].MaxTime = int64(binary.BigEndian.Uint64(b[8:16])) - b = b[indexEntrySize:] - } - - return entries, nil -} diff --git a/tsdb/tsm1/reader_index_iterator.go b/tsdb/tsm1/reader_index_iterator.go deleted file mode 100644 index 7314089175..0000000000 --- a/tsdb/tsm1/reader_index_iterator.go +++ /dev/null @@ -1,149 +0,0 @@ -package tsm1 - -import ( - "errors" -) - -var errKeyCountChanged = errors.New("TSMIndexIterator: key count changed during iteration") - -// TSMIndexIterator allows one to iterate over the TSM index. -type TSMIndexIterator struct { - b *faultBuffer - n int - d *indirectIndex - iter *readerOffsetsIterator - - // if true, don't need to advance iter on the call to Next - first bool - peeked bool - - ok bool - err error - - offset uint32 - eoffset uint32 - - // lazily loaded from offset and eoffset - key []byte - typ byte - entries []IndexEntry -} - -// Next advances the iterator and reports if it is still valid. -func (t *TSMIndexIterator) Next() bool { - t.d.mu.RLock() - if n := len(t.d.ro.offsets); t.n != n { - t.err, t.ok = errKeyCountChanged, false - } - if !t.ok || t.err != nil { - t.d.mu.RUnlock() - return false - } - if !t.peeked && !t.first { - t.ok = t.iter.Next() - } - if !t.ok { - t.d.mu.RUnlock() - return false - } - - t.peeked = false - t.first = false - - t.offset = t.iter.Offset() - t.eoffset = t.iter.EntryOffset(t.b) - t.d.mu.RUnlock() - - // reset lazy loaded state - t.key = nil - t.typ = 0 - t.entries = t.entries[:0] - return true -} - -// Seek points the iterator at the smallest key greater than or equal to the -// given key, returning true if it was an exact match. It returns false for -// ok if the key does not exist. -func (t *TSMIndexIterator) Seek(key []byte) (exact, ok bool) { - t.d.mu.RLock() - if n := len(t.d.ro.offsets); t.n != n { - t.err, t.ok = errKeyCountChanged, false - } - if t.err != nil { - t.d.mu.RUnlock() - return false, false - } - - t.peeked = false - t.first = false - - exact, t.ok = t.iter.Seek(key, t.b) - if !t.ok { - t.d.mu.RUnlock() - return false, false - } - - t.offset = t.iter.Offset() - t.eoffset = t.iter.EntryOffset(t.b) - t.d.mu.RUnlock() - - // reset lazy loaded state - t.key = nil - t.typ = 0 - t.entries = t.entries[:0] - return exact, true -} - -// Peek reports the next key or nil if there is not one or an error happened. -func (t *TSMIndexIterator) Peek() []byte { - if !t.ok || t.err != nil { - return nil - } - if !t.peeked { - t.ok = t.iter.Next() - t.peeked = true - } - - if !t.ok { - return nil - } - - return t.iter.Key(t.b) -} - -// Key reports the current key. -func (t *TSMIndexIterator) Key() []byte { - if t.key == nil { - buf := t.b.access(t.offset, 0) - t.key = readKey(buf) - t.typ = buf[2+len(t.key)] - } - return t.key -} - -// Type reports the current type. -func (t *TSMIndexIterator) Type() byte { - if t.key == nil { - buf := t.b.access(t.offset, 0) - t.key = readKey(buf) - t.typ = buf[2+len(t.key)] - } - return t.typ -} - -// Entries reports the current list of entries. -func (t *TSMIndexIterator) Entries() []IndexEntry { - if len(t.entries) == 0 { - buf := t.b.access(t.eoffset, 0) - t.entries, t.err = readEntries(buf, t.entries) - } - if t.err != nil { - return nil - } - return t.entries -} - -// Err reports if an error stopped the iteration. -func (t *TSMIndexIterator) Err() error { - return t.err -} diff --git a/tsdb/tsm1/reader_index_iterator_test.go b/tsdb/tsm1/reader_index_iterator_test.go deleted file mode 100644 index 993a7c5636..0000000000 --- a/tsdb/tsm1/reader_index_iterator_test.go +++ /dev/null @@ -1,118 +0,0 @@ -package tsm1 - -import ( - "reflect" - "testing" - - "github.com/google/go-cmp/cmp" -) - -func TestIndirectIndexIterator(t *testing.T) { - checkEqual := func(t *testing.T, got, exp interface{}) { - t.Helper() - if !reflect.DeepEqual(got, exp) { - t.Fatalf("expected: %v but got: %v\n%v", exp, got, cmp.Diff(got, exp)) - } - } - - index := NewIndexWriter() - index.Add([]byte("cpu1"), BlockInteger, 0, 10, 10, 20) - index.Add([]byte("cpu1"), BlockInteger, 10, 20, 10, 20) - index.Add([]byte("cpu2"), BlockInteger, 0, 10, 10, 20) - index.Add([]byte("cpu2"), BlockInteger, 10, 20, 10, 20) - index.Add([]byte("mem"), BlockInteger, 0, 10, 10, 20) - ind := loadIndex(t, index) - - // check that the iterator walks the whole index - iter := ind.Iterator(nil) - checkEqual(t, iter.Next(), true) - checkEqual(t, iter.Peek(), []byte("cpu2")) - checkEqual(t, iter.Key(), []byte("cpu1")) - checkEqual(t, iter.Type(), BlockInteger) - checkEqual(t, iter.Entries(), []IndexEntry{ - {0, 10, 10, 20}, - {10, 20, 10, 20}, - }) - checkEqual(t, iter.Next(), true) - checkEqual(t, iter.Peek(), []byte("mem")) - checkEqual(t, iter.Key(), []byte("cpu2")) - checkEqual(t, iter.Type(), BlockInteger) - checkEqual(t, iter.Entries(), []IndexEntry{ - {0, 10, 10, 20}, - {10, 20, 10, 20}, - }) - checkEqual(t, iter.Next(), true) - checkEqual(t, iter.Peek(), []byte(nil)) - checkEqual(t, iter.Key(), []byte("mem")) - checkEqual(t, iter.Type(), BlockInteger) - checkEqual(t, iter.Entries(), []IndexEntry{ - {0, 10, 10, 20}, - }) - checkEqual(t, iter.Next(), false) - checkEqual(t, iter.Err(), error(nil)) - - // check can seek and iterate index - iter = ind.Iterator(nil) - exact, ok := iter.Seek([]byte("cpu2")) - checkEqual(t, exact, true) - checkEqual(t, ok, true) - checkEqual(t, iter.Key(), []byte("cpu2")) - checkEqual(t, iter.Type(), BlockInteger) - checkEqual(t, iter.Entries(), []IndexEntry{ - {0, 10, 10, 20}, - {10, 20, 10, 20}, - }) - checkEqual(t, iter.Next(), true) - checkEqual(t, iter.Key(), []byte("mem")) - checkEqual(t, iter.Next(), false) - exact, ok = iter.Seek([]byte("cpu1")) - checkEqual(t, exact, true) - checkEqual(t, ok, true) - checkEqual(t, iter.Key(), []byte("cpu1")) - exact, ok = iter.Seek([]byte("cpu3")) - checkEqual(t, exact, false) - checkEqual(t, ok, true) - checkEqual(t, iter.Key(), []byte("mem")) - exact, ok = iter.Seek([]byte("cpu0")) - checkEqual(t, exact, false) - checkEqual(t, ok, true) - checkEqual(t, iter.Key(), []byte("cpu1")) - exact, ok = iter.Seek([]byte("zzz")) - checkEqual(t, exact, false) - checkEqual(t, ok, false) - checkEqual(t, iter.Next(), false) - checkEqual(t, iter.Err(), error(nil)) - - // delete the cpu2 key and make sure it's skipped - ind.Delete([][]byte{[]byte("cpu2")}) - iter = ind.Iterator(nil) - checkEqual(t, iter.Next(), true) - checkEqual(t, iter.Peek(), []byte("mem")) - checkEqual(t, iter.Key(), []byte("cpu1")) - checkEqual(t, iter.Type(), BlockInteger) - checkEqual(t, iter.Entries(), []IndexEntry{ - {0, 10, 10, 20}, - {10, 20, 10, 20}, - }) - checkEqual(t, iter.Next(), true) - checkEqual(t, iter.Peek(), []byte(nil)) - checkEqual(t, iter.Key(), []byte("mem")) - checkEqual(t, iter.Type(), BlockInteger) - checkEqual(t, iter.Entries(), []IndexEntry{ - {0, 10, 10, 20}, - }) - checkEqual(t, iter.Next(), false) - checkEqual(t, iter.Err(), error(nil)) - - // check that seek works - iter = ind.Iterator([]byte("d")) - checkEqual(t, iter.Next(), true) - checkEqual(t, iter.Peek(), []byte(nil)) - checkEqual(t, iter.Key(), []byte("mem")) - checkEqual(t, iter.Type(), BlockInteger) - checkEqual(t, iter.Entries(), []IndexEntry{ - {0, 10, 10, 20}, - }) - checkEqual(t, iter.Next(), false) - checkEqual(t, iter.Err(), error(nil)) -} diff --git a/tsdb/tsm1/reader_index_test.go b/tsdb/tsm1/reader_index_test.go deleted file mode 100644 index 76b0cfc130..0000000000 --- a/tsdb/tsm1/reader_index_test.go +++ /dev/null @@ -1,643 +0,0 @@ -package tsm1 - -import ( - "bytes" - "fmt" - "math" - "math/rand" - "reflect" - "sync" - "sync/atomic" - "testing" -) - -func loadIndex(tb testing.TB, w IndexWriter) *indirectIndex { - tb.Helper() - - b, err := w.MarshalBinary() - fatalIfErr(tb, "marshaling index", err) - - indir := NewIndirectIndex() - fatalIfErr(tb, "unmarshaling index", indir.UnmarshalBinary(b)) - - return indir -} - -func TestIndirectIndex_Entries_NonExistent(t *testing.T) { - index := NewIndexWriter() - index.Add([]byte("cpu"), BlockFloat64, 0, 1, 10, 100) - index.Add([]byte("cpu"), BlockFloat64, 2, 3, 20, 200) - ind := loadIndex(t, index) - - // mem has not been added to the index so we should get no entries back - // for both - exp := index.Entries([]byte("mem")) - entries, err := ind.ReadEntries([]byte("mem"), nil) - if err != nil { - t.Fatal(err) - } - - if got, exp := len(entries), len(exp); got != exp && exp != 0 { - t.Fatalf("entries length mismatch: got %v, exp %v", got, exp) - } -} - -func TestIndirectIndex_Type(t *testing.T) { - index := NewIndexWriter() - index.Add([]byte("cpu"), BlockInteger, 0, 1, 10, 20) - ind := loadIndex(t, index) - - typ, err := ind.Type([]byte("cpu")) - if err != nil { - fatal(t, "reading type", err) - } - - if got, exp := typ, BlockInteger; got != exp { - t.Fatalf("type mismatch: got %v, exp %v", got, exp) - } -} - -func TestIndirectIndex_Delete(t *testing.T) { - check := func(t *testing.T, got, exp bool) { - t.Helper() - if exp != got { - t.Fatalf("expected: %v but got: %v", exp, got) - } - } - - index := NewIndexWriter() - index.Add([]byte("cpu1"), BlockInteger, 0, 10, 10, 20) - index.Add([]byte("cpu1"), BlockInteger, 10, 20, 10, 20) - index.Add([]byte("cpu2"), BlockInteger, 0, 10, 10, 20) - index.Add([]byte("cpu2"), BlockInteger, 10, 20, 10, 20) - index.Add([]byte("mem"), BlockInteger, 0, 10, 10, 20) - ind := loadIndex(t, index) - - ind.Delete([][]byte{[]byte("cpu1")}) - - check(t, ind.Contains([]byte("mem")), true) - check(t, ind.Contains([]byte("cpu1")), false) - check(t, ind.Contains([]byte("cpu2")), true) - - ind.Delete([][]byte{[]byte("cpu1"), []byte("cpu2")}) - - check(t, ind.Contains([]byte("mem")), true) - check(t, ind.Contains([]byte("cpu1")), false) - check(t, ind.Contains([]byte("cpu2")), false) - - ind.Delete([][]byte{[]byte("mem")}) - - check(t, ind.Contains([]byte("mem")), false) - check(t, ind.Contains([]byte("cpu1")), false) - check(t, ind.Contains([]byte("cpu2")), false) -} - -func TestIndirectIndex_DeleteRange(t *testing.T) { - check := func(t *testing.T, got, exp bool) { - t.Helper() - if exp != got { - t.Fatalf("expected: %v but got: %v", exp, got) - } - } - - index := NewIndexWriter() - index.Add([]byte("cpu1"), BlockInteger, 0, 10, 10, 20) - index.Add([]byte("cpu1"), BlockInteger, 10, 20, 10, 20) - index.Add([]byte("cpu2"), BlockInteger, 0, 10, 10, 20) - index.Add([]byte("cpu2"), BlockInteger, 10, 20, 10, 20) - index.Add([]byte("mem"), BlockInteger, 0, 10, 10, 20) - ind := loadIndex(t, index) - - ind.DeleteRange([][]byte{[]byte("cpu1")}, 5, 15) - - check(t, ind.Contains([]byte("mem")), true) - check(t, ind.Contains([]byte("cpu1")), true) - check(t, ind.MaybeContainsValue([]byte("cpu1"), 4), true) - check(t, ind.MaybeContainsValue([]byte("cpu1"), 5), false) - check(t, ind.MaybeContainsValue([]byte("cpu1"), 10), false) - check(t, ind.MaybeContainsValue([]byte("cpu1"), 15), false) - check(t, ind.MaybeContainsValue([]byte("cpu1"), 16), true) - check(t, ind.Contains([]byte("cpu2")), true) - check(t, ind.MaybeContainsValue([]byte("cpu2"), 4), true) - check(t, ind.MaybeContainsValue([]byte("cpu2"), 5), true) - check(t, ind.MaybeContainsValue([]byte("cpu2"), 10), true) - check(t, ind.MaybeContainsValue([]byte("cpu2"), 15), true) - check(t, ind.MaybeContainsValue([]byte("cpu2"), 16), true) - - ind.DeleteRange([][]byte{[]byte("cpu1"), []byte("cpu2")}, 0, 5) - - check(t, ind.Contains([]byte("mem")), true) - check(t, ind.Contains([]byte("cpu1")), true) - check(t, ind.MaybeContainsValue([]byte("cpu1"), 4), false) - check(t, ind.MaybeContainsValue([]byte("cpu1"), 5), false) - check(t, ind.MaybeContainsValue([]byte("cpu1"), 10), false) - check(t, ind.MaybeContainsValue([]byte("cpu1"), 15), false) - check(t, ind.MaybeContainsValue([]byte("cpu1"), 16), true) - check(t, ind.Contains([]byte("cpu2")), true) - check(t, ind.MaybeContainsValue([]byte("cpu2"), 4), false) - check(t, ind.MaybeContainsValue([]byte("cpu2"), 5), false) - check(t, ind.MaybeContainsValue([]byte("cpu2"), 10), true) - check(t, ind.MaybeContainsValue([]byte("cpu2"), 15), true) - check(t, ind.MaybeContainsValue([]byte("cpu2"), 16), true) - - ind.DeleteRange([][]byte{[]byte("cpu1"), []byte("cpu2")}, 15, 20) - - check(t, ind.Contains([]byte("mem")), true) - check(t, ind.Contains([]byte("cpu1")), false) - check(t, ind.MaybeContainsValue([]byte("cpu1"), 4), false) - check(t, ind.MaybeContainsValue([]byte("cpu1"), 5), false) - check(t, ind.MaybeContainsValue([]byte("cpu1"), 10), false) - check(t, ind.MaybeContainsValue([]byte("cpu1"), 15), false) - check(t, ind.MaybeContainsValue([]byte("cpu1"), 16), false) - check(t, ind.Contains([]byte("cpu2")), true) - check(t, ind.MaybeContainsValue([]byte("cpu2"), 4), false) - check(t, ind.MaybeContainsValue([]byte("cpu2"), 5), false) - check(t, ind.MaybeContainsValue([]byte("cpu2"), 10), true) - check(t, ind.MaybeContainsValue([]byte("cpu2"), 15), false) - check(t, ind.MaybeContainsValue([]byte("cpu2"), 16), false) -} - -func TestIndirectIndex_DeletePrefix(t *testing.T) { - check := func(t *testing.T, got, exp bool) { - t.Helper() - if exp != got { - t.Fatalf("expected: %v but got: %v", exp, got) - } - } - - index := NewIndexWriter() - index.Add([]byte("cpu1"), BlockInteger, 0, 10, 10, 20) - index.Add([]byte("cpu1"), BlockInteger, 10, 20, 10, 20) - index.Add([]byte("cpu2"), BlockInteger, 0, 10, 10, 20) - index.Add([]byte("cpu2"), BlockInteger, 10, 20, 10, 20) - index.Add([]byte("mem"), BlockInteger, 0, 10, 10, 20) - ind := loadIndex(t, index) - - ind.DeletePrefix([]byte("c"), 5, 15, nil, nil) - - check(t, ind.Contains([]byte("mem")), true) - check(t, ind.Contains([]byte("cpu1")), true) - check(t, ind.MaybeContainsValue([]byte("cpu1"), 4), true) - check(t, ind.MaybeContainsValue([]byte("cpu1"), 5), false) - check(t, ind.MaybeContainsValue([]byte("cpu1"), 10), false) - check(t, ind.MaybeContainsValue([]byte("cpu1"), 15), false) - check(t, ind.MaybeContainsValue([]byte("cpu1"), 16), true) - check(t, ind.Contains([]byte("cpu2")), true) - check(t, ind.MaybeContainsValue([]byte("cpu2"), 4), true) - check(t, ind.MaybeContainsValue([]byte("cpu2"), 5), false) - check(t, ind.MaybeContainsValue([]byte("cpu2"), 10), false) - check(t, ind.MaybeContainsValue([]byte("cpu2"), 15), false) - check(t, ind.MaybeContainsValue([]byte("cpu2"), 16), true) - - ind.DeletePrefix([]byte("cp"), 0, 5, nil, nil) - - check(t, ind.Contains([]byte("mem")), true) - check(t, ind.Contains([]byte("cpu1")), true) - check(t, ind.MaybeContainsValue([]byte("cpu1"), 4), false) - check(t, ind.MaybeContainsValue([]byte("cpu1"), 5), false) - check(t, ind.MaybeContainsValue([]byte("cpu1"), 10), false) - check(t, ind.MaybeContainsValue([]byte("cpu1"), 15), false) - check(t, ind.MaybeContainsValue([]byte("cpu1"), 16), true) - check(t, ind.Contains([]byte("cpu2")), true) - check(t, ind.MaybeContainsValue([]byte("cpu2"), 4), false) - check(t, ind.MaybeContainsValue([]byte("cpu2"), 5), false) - check(t, ind.MaybeContainsValue([]byte("cpu2"), 10), false) - check(t, ind.MaybeContainsValue([]byte("cpu2"), 15), false) - check(t, ind.MaybeContainsValue([]byte("cpu2"), 16), true) - - ind.DeletePrefix([]byte("cpu"), 15, 20, nil, nil) - - check(t, ind.Contains([]byte("mem")), true) - check(t, ind.Contains([]byte("cpu1")), false) - check(t, ind.MaybeContainsValue([]byte("cpu1"), 4), false) - check(t, ind.MaybeContainsValue([]byte("cpu1"), 5), false) - check(t, ind.MaybeContainsValue([]byte("cpu1"), 10), false) - check(t, ind.MaybeContainsValue([]byte("cpu1"), 15), false) - check(t, ind.MaybeContainsValue([]byte("cpu1"), 16), false) - check(t, ind.Contains([]byte("cpu2")), false) - check(t, ind.MaybeContainsValue([]byte("cpu2"), 4), false) - check(t, ind.MaybeContainsValue([]byte("cpu2"), 5), false) - check(t, ind.MaybeContainsValue([]byte("cpu2"), 10), false) - check(t, ind.MaybeContainsValue([]byte("cpu2"), 15), false) - check(t, ind.MaybeContainsValue([]byte("cpu2"), 16), false) -} - -func TestIndirectIndex_DeletePrefix_NoMatch(t *testing.T) { - check := func(t *testing.T, got, exp bool) { - t.Helper() - if exp != got { - t.Fatalf("expected: %v but got: %v", exp, got) - } - } - - index := NewIndexWriter() - index.Add([]byte("cpu"), BlockInteger, 0, 10, 10, 20) - ind := loadIndex(t, index) - - ind.DeletePrefix([]byte("b"), 5, 5, nil, nil) - ind.DeletePrefix([]byte("d"), 5, 5, nil, nil) - - check(t, ind.Contains([]byte("cpu")), true) - check(t, ind.MaybeContainsValue([]byte("cpu"), 5), true) -} - -func TestIndirectIndex_DeletePrefix_Dead(t *testing.T) { - check := func(t *testing.T, got, exp interface{}) { - t.Helper() - if !reflect.DeepEqual(exp, got) { - t.Fatalf("expected: %q but got: %q", exp, got) - } - } - - var keys [][]byte - dead := func(key []byte) { keys = append(keys, append([]byte(nil), key...)) } - - b := func(keys ...string) (out [][]byte) { - for _, key := range keys { - out = append(out, []byte(key)) - } - return out - } - - index := NewIndexWriter() - index.Add([]byte("cpu"), BlockInteger, 0, 10, 10, 20) - index.Add([]byte("dpu"), BlockInteger, 0, 10, 10, 20) - ind := loadIndex(t, index) - - ind.DeletePrefix([]byte("b"), 5, 5, nil, dead) - check(t, keys, b()) - - ind.DeletePrefix([]byte("c"), 0, 9, nil, dead) - check(t, keys, b()) - - ind.DeletePrefix([]byte("c"), 9, 10, nil, dead) - check(t, keys, b("cpu")) - - ind.DeletePrefix([]byte("d"), -50, 50, nil, dead) - check(t, keys, b("cpu", "dpu")) -} - -func TestIndirectIndex_DeletePrefix_Dead_Fuzz(t *testing.T) { - key := bytes.Repeat([]byte("X"), 32) - check := func(t *testing.T, got, exp interface{}) { - t.Helper() - if !reflect.DeepEqual(exp, got) { - t.Fatalf("expected: %v but got: %v", exp, got) - } - } - - for i := 0; i < 5000; i++ { - // Create an index with the key in it - writer := NewIndexWriter() - writer.Add(key, BlockInteger, 0, 10, 10, 20) - ind := loadIndex(t, writer) - - // Keep track if dead is ever called. - happened := uint64(0) - dead := func([]byte) { atomic.AddUint64(&happened, 1) } - - // Build up a random set of operations to delete the key. - ops := make([]func(), 9) - for j := range ops { - n := int64(j) - if rand.Intn(2) == 0 { - kn := key[:rand.Intn(len(key))] - ops[j] = func() { ind.DeletePrefix(kn, n, n+1, nil, dead) } - } else { - ops[j] = func() { ind.DeleteRange([][]byte{key}, n, n+1) } - } - } - - // Since we will run the ops concurrently, this shuffle is unnecessary - // but it might provide more coverage of random orderings than the - // scheduler randomness alone. - rand.Shuffle(len(ops), func(i, j int) { ops[i], ops[j] = ops[j], ops[i] }) - - // Run the operations concurrently. The key should never be dead. - var wg sync.WaitGroup - for _, op := range ops { - op := op - wg.Add(1) - go func() { op(); wg.Done() }() - } - wg.Wait() - check(t, happened, uint64(0)) - - // Run the last delete operation. It should kill the key. - ind.DeletePrefix(key, 9, 10, nil, dead) - check(t, happened, uint64(1)) - } -} - -// -// indirectIndex benchmarks -// - -const ( - indexKeyCount = 500000 - indexBlockCount = 100 -) - -type indexCacheInfo struct { - index *indirectIndex - offsets []uint32 - prefixes []prefixEntry - allKeys [][]byte - bytes []byte -} - -func (i *indexCacheInfo) reset() { - i.index.ro.offsets = append([]uint32(nil), i.offsets...) - i.index.ro.prefixes = append([]prefixEntry(nil), i.prefixes...) - i.index.tombstones = make(map[uint32][]TimeRange) - i.index.prefixTombstones = newPrefixTree() - resetFaults(i.index) -} - -var ( - indexCache = map[string]*indexCacheInfo{} - indexSizes = map[string][2]int{ - "large": {500000, 100}, - "med": {1000, 1000}, - "small": {5000, 2}, - } -) - -func getFaults(indirect *indirectIndex) int64 { - return int64(atomic.LoadUint64(&indirect.b.faults)) -} - -func resetFaults(indirect *indirectIndex) { - if indirect != nil { - indirect.b = faultBuffer{b: indirect.b.b} - } -} - -func getIndex(tb testing.TB, name string) (*indirectIndex, *indexCacheInfo) { - info, ok := indexCache[name] - if ok { - info.reset() - return info.index, info - } - info = new(indexCacheInfo) - - sizes, ok := indexSizes[name] - if !ok { - sizes = [2]int{indexKeyCount, indexBlockCount} - } - keys, blocks := sizes[0], sizes[1] - - writer := NewIndexWriter() - - // add a ballast key that starts at -1 so that we don't trigger optimizations - // when deleting [0, MaxInt] - writer.Add([]byte("ballast"), BlockFloat64, -1, 1, 0, 100) - - for i := 0; i < keys; i++ { - key := []byte(fmt.Sprintf("cpu-%08d", i)) - info.allKeys = append(info.allKeys, key) - for j := 0; j < blocks; j++ { - writer.Add(key, BlockFloat64, 0, 100, 10, 100) - } - } - - var err error - info.bytes, err = writer.MarshalBinary() - if err != nil { - tb.Fatalf("unexpected error marshaling index: %v", err) - } - - info.index = NewIndirectIndex() - if err = info.index.UnmarshalBinary(info.bytes); err != nil { - tb.Fatalf("unexpected error unmarshaling index: %v", err) - } - info.offsets = append([]uint32(nil), info.index.ro.offsets...) - info.prefixes = append([]prefixEntry(nil), info.index.ro.prefixes...) - - indexCache[name] = info - return info.index, info -} - -func BenchmarkIndirectIndex_UnmarshalBinary(b *testing.B) { - indirect, info := getIndex(b, "large") - b.ReportAllocs() - b.ResetTimer() - - for i := 0; i < b.N; i++ { - if err := indirect.UnmarshalBinary(info.bytes); err != nil { - b.Fatalf("unexpected error unmarshaling index: %v", err) - } - } -} - -func BenchmarkIndirectIndex_Entries(b *testing.B) { - indirect, _ := getIndex(b, "med") - b.ReportAllocs() - b.ResetTimer() - - for i := 0; i < b.N; i++ { - resetFaults(indirect) - indirect.ReadEntries([]byte("cpu-00000001"), nil) - } - - if faultBufferEnabled { - b.SetBytes(getFaults(indirect) * 4096) - b.Log("recorded faults:", getFaults(indirect)) - } -} - -func BenchmarkIndirectIndex_ReadEntries(b *testing.B) { - var entries []IndexEntry - indirect, _ := getIndex(b, "med") - b.ReportAllocs() - b.ResetTimer() - - for i := 0; i < b.N; i++ { - resetFaults(indirect) - entries, _ = indirect.ReadEntries([]byte("cpu-00000001"), entries) - } - - if faultBufferEnabled { - b.SetBytes(getFaults(indirect) * 4096) - b.Log("recorded faults:", getFaults(indirect)) - } -} - -func BenchmarkBlockIterator_Next(b *testing.B) { - indirect, _ := getIndex(b, "med") - r := TSMReader{index: indirect} - b.ResetTimer() - - for i := 0; i < b.N; i++ { - resetFaults(indirect) - bi := r.BlockIterator() - for bi.Next() { - } - } - - if faultBufferEnabled { - b.SetBytes(getFaults(indirect) * 4096) - b.Log("recorded faults:", getFaults(indirect)) - } -} - -func BenchmarkIndirectIndex_DeleteRangeLast(b *testing.B) { - indirect, _ := getIndex(b, "large") - keys := [][]byte{[]byte("cpu-00999999")} - - b.ReportAllocs() - b.ResetTimer() - - for i := 0; i < b.N; i++ { - resetFaults(indirect) - indirect.DeleteRange(keys, 10, 50) - } - - if faultBufferEnabled { - b.SetBytes(getFaults(indirect) * 4096) - b.Log("recorded faults:", getFaults(indirect)) - } -} - -func BenchmarkIndirectIndex_DeleteRangeFull(b *testing.B) { - run := func(b *testing.B, name string) { - indirect, _ := getIndex(b, name) - b.ReportAllocs() - b.ResetTimer() - - for i := 0; i < b.N; i++ { - b.StopTimer() - var info *indexCacheInfo - indirect, info = getIndex(b, name) - b.StartTimer() - - for i := 0; i < len(info.allKeys); i += 4096 { - n := i + 4096 - if n > len(info.allKeys) { - n = len(info.allKeys) - } - indirect.DeleteRange(info.allKeys[i:n], 10, 50) - } - } - - if faultBufferEnabled { - b.SetBytes(getFaults(indirect) * 4096) - b.Log("recorded faults:", getFaults(indirect)) - } - } - - b.Run("Large", func(b *testing.B) { run(b, "large") }) - b.Run("Small", func(b *testing.B) { run(b, "small") }) -} - -func BenchmarkIndirectIndex_DeleteRangeFull_Covered(b *testing.B) { - run := func(b *testing.B, name string) { - indirect, _ := getIndex(b, name) - b.ReportAllocs() - b.ResetTimer() - - for i := 0; i < b.N; i++ { - b.StopTimer() - var info *indexCacheInfo - indirect, info = getIndex(b, name) - b.StartTimer() - - for i := 0; i < len(info.allKeys); i += 4096 { - n := i + 4096 - if n > len(info.allKeys) { - n = len(info.allKeys) - } - indirect.DeleteRange(info.allKeys[i:n], 0, math.MaxInt64) - } - } - - if faultBufferEnabled { - b.SetBytes(getFaults(indirect) * 4096) - b.Log("recorded faults:", getFaults(indirect)) - } - } - - b.Run("Large", func(b *testing.B) { run(b, "large") }) - b.Run("Small", func(b *testing.B) { run(b, "small") }) -} - -func BenchmarkIndirectIndex_Delete(b *testing.B) { - run := func(b *testing.B, name string) { - indirect, _ := getIndex(b, name) - b.ReportAllocs() - b.ResetTimer() - - for i := 0; i < b.N; i++ { - b.StopTimer() - var info *indexCacheInfo - indirect, info = getIndex(b, name) - b.StartTimer() - - for i := 0; i < len(info.allKeys); i += 4096 { - n := i + 4096 - if n > len(info.allKeys) { - n = len(info.allKeys) - } - indirect.Delete(info.allKeys[i:n]) - } - } - - if faultBufferEnabled { - b.SetBytes(getFaults(indirect) * 4096) - b.Log("recorded faults:", getFaults(indirect)) - } - } - - b.Run("Large", func(b *testing.B) { run(b, "large") }) - b.Run("Small", func(b *testing.B) { run(b, "small") }) -} - -func BenchmarkIndirectIndex_DeletePrefixFull(b *testing.B) { - prefix := []byte("cpu-") - run := func(b *testing.B, name string) { - indirect, _ := getIndex(b, name) - b.ReportAllocs() - b.ResetTimer() - - for i := 0; i < b.N; i++ { - b.StopTimer() - indirect, _ = getIndex(b, name) - b.StartTimer() - - indirect.DeletePrefix(prefix, 10, 50, nil, nil) - } - - if faultBufferEnabled { - b.SetBytes(getFaults(indirect) * 4096) - b.Log("recorded faults:", getFaults(indirect)) - } - } - - b.Run("Large", func(b *testing.B) { run(b, "large") }) - b.Run("Small", func(b *testing.B) { run(b, "small") }) -} - -func BenchmarkIndirectIndex_DeletePrefixFull_Covered(b *testing.B) { - prefix := []byte("cpu-") - run := func(b *testing.B, name string) { - indirect, _ := getIndex(b, name) - b.ReportAllocs() - b.ResetTimer() - - for i := 0; i < b.N; i++ { - b.StopTimer() - indirect, _ = getIndex(b, name) - b.StartTimer() - - indirect.DeletePrefix(prefix, 0, math.MaxInt64, nil, nil) - } - - if faultBufferEnabled { - b.SetBytes(getFaults(indirect) * 4096) - b.Log("recorded faults:", getFaults(indirect)) - } - } - - b.Run("Large", func(b *testing.B) { run(b, "large") }) - b.Run("Small", func(b *testing.B) { run(b, "small") }) -} diff --git a/tsdb/tsm1/reader_mmap.go b/tsdb/tsm1/reader_mmap.go deleted file mode 100644 index cbb936d8d6..0000000000 --- a/tsdb/tsm1/reader_mmap.go +++ /dev/null @@ -1,273 +0,0 @@ -package tsm1 - -import ( - "context" - "encoding/binary" - "fmt" - "os" - "sync" - "sync/atomic" - - "github.com/influxdata/influxdb/v2/pkg/fs" - "github.com/influxdata/influxdb/v2/pkg/mincore" - "go.uber.org/zap" -) - -// mmapAccess is mmap based block accessor. It access blocks through an -// MMAP file interface. -type mmapAccessor struct { - accessCount uint64 // Counter incremented everytime the mmapAccessor is accessed - freeCount uint64 // Counter to determine whether the accessor can free its resources - - logger *zap.Logger - mmapWillNeed bool // If true then mmap advise value MADV_WILLNEED will be provided the kernel for b. - - mu sync.RWMutex - b []byte - f *os.File - _path string // If the underlying file is renamed then this gets updated - - pageFaultLimiter *mincore.Limiter // limits page fault accesses - - index *indirectIndex -} - -func (m *mmapAccessor) init() (*indirectIndex, error) { - m.mu.Lock() - defer m.mu.Unlock() - - // Set the path explicitly. - m._path = m.f.Name() - - if err := verifyVersion(m.f); err != nil { - return nil, err - } - - var err error - - if _, err := m.f.Seek(0, 0); err != nil { - return nil, err - } - - stat, err := m.f.Stat() - if err != nil { - return nil, err - } - - m.b, err = mmap(m.f, 0, int(stat.Size())) - if err != nil { - return nil, err - } - if len(m.b) < 8 { - return nil, fmt.Errorf("mmapAccessor: byte slice too small for indirectIndex") - } - - // Hint to the kernel that we will be reading the file. It would be better to hint - // that we will be reading the index section, but that's not been - // implemented as yet. - if m.mmapWillNeed { - if err := madviseWillNeed(m.b); err != nil { - return nil, err - } - } - - indexOfsPos := len(m.b) - 8 - indexStart := binary.BigEndian.Uint64(m.b[indexOfsPos : indexOfsPos+8]) - if indexStart >= uint64(indexOfsPos) { - return nil, fmt.Errorf("mmapAccessor: invalid indexStart") - } - - m.index = NewIndirectIndex() - if err := m.index.UnmarshalBinary(m.b[indexStart:indexOfsPos]); err != nil { - return nil, err - } - m.index.logger = m.logger - - // Allow resources to be freed immediately if requested - m.incAccess() - atomic.StoreUint64(&m.freeCount, 1) - - return m.index, nil -} - -func (m *mmapAccessor) free() error { - accessCount := atomic.LoadUint64(&m.accessCount) - freeCount := atomic.LoadUint64(&m.freeCount) - - // Already freed everything. - if freeCount == 0 && accessCount == 0 { - return nil - } - - // Were there accesses after the last time we tried to free? - // If so, don't free anything and record the access count that we - // see now for the next check. - if accessCount != freeCount { - atomic.StoreUint64(&m.freeCount, accessCount) - return nil - } - - // Reset both counters to zero to indicate that we have freed everything. - atomic.StoreUint64(&m.accessCount, 0) - atomic.StoreUint64(&m.freeCount, 0) - - m.mu.RLock() - defer m.mu.RUnlock() - - return madviseDontNeed(m.b) -} - -func (m *mmapAccessor) incAccess() { - atomic.AddUint64(&m.accessCount, 1) -} - -func (m *mmapAccessor) rename(path string) error { - m.incAccess() - - m.mu.Lock() - defer m.mu.Unlock() - - if err := fs.RenameFileWithReplacement(m._path, path); err != nil { - return err - } - m._path = path - return nil -} - -func (m *mmapAccessor) read(key []byte, timestamp int64) ([]Value, error) { - entry := m.index.Entry(key, timestamp) - if entry == nil { - return nil, nil - } - - return m.readBlock(entry, nil) -} - -func (m *mmapAccessor) readBlock(entry *IndexEntry, values []Value) ([]Value, error) { - m.incAccess() - - m.mu.RLock() - defer m.mu.RUnlock() - - if int64(len(m.b)) < entry.Offset+int64(entry.Size) { - return nil, ErrTSMClosed - } - //TODO: Validate checksum - var err error - b := m.b[entry.Offset+4 : entry.Offset+int64(entry.Size)] - values, err = DecodeBlock(b, values) - if err != nil { - return nil, err - } - - // Rate limit page faults. - if err := m.wait(b); err != nil { - return nil, err - } - - return values, nil -} - -func (m *mmapAccessor) readBytes(entry *IndexEntry, b []byte) (uint32, []byte, error) { - m.incAccess() - - m.mu.RLock() - if int64(len(m.b)) < entry.Offset+int64(entry.Size) { - m.mu.RUnlock() - return 0, nil, ErrTSMClosed - } - - // return the bytes after the 4 byte checksum - crc, block := binary.BigEndian.Uint32(m.b[entry.Offset:entry.Offset+4]), m.b[entry.Offset+4:entry.Offset+int64(entry.Size)] - m.mu.RUnlock() - - // Rate limit page faults. - if err := m.wait(m.b[entry.Offset : entry.Offset+4]); err != nil { - return 0, nil, err - } else if err := m.wait(block); err != nil { - return 0, nil, err - } - - return crc, block, nil -} - -// readAll returns all values for a key in all blocks. -func (m *mmapAccessor) readAll(key []byte) ([]Value, error) { - m.incAccess() - - blocks, err := m.index.ReadEntries(key, nil) - if len(blocks) == 0 || err != nil { - return nil, err - } - - tombstones := m.index.TombstoneRange(key, nil) - - m.mu.RLock() - defer m.mu.RUnlock() - - var temp []Value - var values []Value - for _, block := range blocks { - var skip bool - for _, t := range tombstones { - // Should we skip this block because it contains points that have been deleted - if t.Min <= block.MinTime && t.Max >= block.MaxTime { - skip = true - break - } - } - - if skip { - continue - } - //TODO: Validate checksum - temp = temp[:0] - // The +4 is the 4 byte checksum length - temp, err = DecodeBlock(m.b[block.Offset+4:block.Offset+int64(block.Size)], temp) - if err != nil { - return nil, err - } else if err := m.wait(m.b[block.Offset+4 : block.Offset+int64(block.Size)]); err != nil { - return nil, err - } - - // Filter out any values that were deleted - for _, t := range tombstones { - temp = Values(temp).Exclude(t.Min, t.Max) - } - - values = append(values, temp...) - } - - return values, nil -} - -func (m *mmapAccessor) path() string { - m.mu.RLock() - defer m.mu.RUnlock() - return m._path -} - -func (m *mmapAccessor) close() error { - m.mu.Lock() - defer m.mu.Unlock() - - if m.b == nil { - return nil - } - - err := munmap(m.b) - if err != nil { - return err - } - - m.b = nil - return m.f.Close() -} - -// wait rate limits page faults to the underlying data. Skipped if limiter is not set. -func (m *mmapAccessor) wait(b []byte) error { - if m.pageFaultLimiter == nil { - return nil - } - return m.pageFaultLimiter.WaitRange(context.Background(), b) -} diff --git a/tsdb/tsm1/reader_offsets.go b/tsdb/tsm1/reader_offsets.go deleted file mode 100644 index fc3488984b..0000000000 --- a/tsdb/tsm1/reader_offsets.go +++ /dev/null @@ -1,255 +0,0 @@ -package tsm1 - -import ( - "bytes" - "encoding/binary" - "sync/atomic" - "unsafe" -) - -// readerOffsets keeps track of offsets of keys for an indirectIndex. -type readerOffsets struct { - offsets []uint32 - prefixes []prefixEntry - entry prefixEntry -} - -// prefixEntry keeps a prefix along with a prefix sum of the total number of -// keys with the given prefix. -type prefixEntry struct { - pre prefix - total int // partial sums -} - -// prefix is a byte prefix of a key that sorts the same way the key does. -type prefix [8]byte - -const prefixSize = len(prefix{}) - -// comparePrefix is like bytes.Compare but for a prefix. -func comparePrefix(a, b prefix) int { - au, bu := binary.BigEndian.Uint64(a[:8]), binary.BigEndian.Uint64(b[:8]) - if au == bu { - return 0 - } else if au < bu { - return -1 - } - return 1 -} - -// keyPrefix returns a prefix that can be used with compare -// to sort the same way the bytes would. -func keyPrefix(key []byte) (pre prefix) { - if len(key) >= prefixSize { - return *(*prefix)(unsafe.Pointer(&key[0])) - } - copy(pre[:], key) - return pre -} - -// searchPrefix returns the index of the prefixEntry for the nth offset. -func (r *readerOffsets) searchPrefix(n int) int { - i, j := 0, len(r.prefixes) - for i < j { - h := int(uint(i+j) >> 1) - if n >= r.prefixes[h].total { - i = h + 1 - } else { - j = h - } - } - return i -} - -// AddKey tracks the key in the readerOffsets at the given offset. -func (r *readerOffsets) AddKey(offset uint32, key []byte) { - r.offsets = append(r.offsets, offset) - pre := keyPrefix(key) - if r.entry.pre != pre && r.entry.total != 0 { - r.prefixes = append(r.prefixes, r.entry) - } - r.entry.pre = pre - r.entry.total++ -} - -// done signals that we are done adding keys. -func (r *readerOffsets) Done() { - r.prefixes = append(r.prefixes, r.entry) -} - -// Iterator returns an iterator that can walk and seek around the keys cheaply. -func (r *readerOffsets) Iterator() readerOffsetsIterator { - return readerOffsetsIterator{r: r, first: true} -} - -// -// iterator stuff -// - -// readerOffsetsIterator iterates over the keys in readerOffsets. -type readerOffsetsIterator struct { - r *readerOffsets - first bool // is this the first call to next? - del bool // has delete been called? - i int // index into offsets - pi int // index into prefixes - ks rOIKeyState // current key state -} - -// rOIKeyState keeps track of cached information for the current key. -type rOIKeyState struct { - length uint16 - key []byte -} - -// Index returns the current pointed at index. -func (ri *readerOffsetsIterator) Index() int { return ri.i } - -// setIndex sets the reader to the given index and clears any cached state. -func (ri *readerOffsetsIterator) setIndex(i, pi int) { - ri.i, ri.pi, ri.ks = i, pi, rOIKeyState{} -} - -// Length returns the length of the current pointed at key. -func (ri *readerOffsetsIterator) Length(b *faultBuffer) uint16 { - if ri.ks.length == 0 { - buf := b.access(ri.Offset(), 2) - ri.ks.length = uint16(buf[0])<<8 | uint16(buf[1]) - } - return ri.ks.length -} - -// Key returns the current pointed at key. -func (ri *readerOffsetsIterator) Key(b *faultBuffer) []byte { - if ri.ks.key == nil { - ri.ks.key = b.access(ri.KeyOffset(), uint32(ri.Length(b))) - } - return ri.ks.key -} - -// KeyOffset returns the offset of the current pointed at the key. -func (ri *readerOffsetsIterator) KeyOffset() uint32 { - return ri.Offset() + 2 -} - -// EntryOffset returns the offset of the current pointed at entries (including type byte). -func (ri *readerOffsetsIterator) EntryOffset(b *faultBuffer) uint32 { - return ri.Offset() + 2 + uint32(ri.Length(b)) -} - -// Prefix returns the current pointed at prefix. -func (ri *readerOffsetsIterator) Prefix() prefix { - return ri.r.prefixes[ri.pi].pre -} - -// Offset returns the current pointed at offset. -func (ri *readerOffsetsIterator) Offset() uint32 { - return atomic.LoadUint32(&ri.r.offsets[ri.i]) &^ (1 << 31) -} - -// Next advances the iterator and returns true if it points at a value. -func (ri *readerOffsetsIterator) Next() bool { - if ri.i >= len(ri.r.offsets) { - return false - } else if ri.first { - ri.first = false - return true - } - - ri.i++ - ri.ks = rOIKeyState{} - - for ri.pi < len(ri.r.prefixes) && ri.i >= ri.r.prefixes[ri.pi].total { - ri.pi++ - } - - return ri.i < len(ri.r.offsets) -} - -// Done should be called to finalize up any deletes. Must be called under a write lock. -func (ri *readerOffsetsIterator) Done() { - if !ri.del { - return - } - ri.del = false - - j, psub, pi := 0, 0, 0 - for i, v := range ri.r.offsets { - for pi < len(ri.r.prefixes) && i >= ri.r.prefixes[pi].total { - ri.r.prefixes[pi].total -= psub - pi++ - } - - if v&(1<<31) > 0 { - psub++ - continue - } - - if i != j { - ri.r.offsets[j] = ri.r.offsets[i] - } - j++ - } - - ri.r.offsets = ri.r.offsets[:j] -} - -// Delete flags the entry to be deleted on the next call to Done. Is safe for -// concurrent use under a read lock, but Done must be called under a write lock. -func (ri *readerOffsetsIterator) Delete() { - ri.del = true - if offset := ri.Offset(); offset&(1<<31) == 0 { - atomic.StoreUint32(&ri.r.offsets[ri.i], offset|(1<<31)) - } -} - -// HasDeletes returns true if the iterator has any Delete calls. -func (ri *readerOffsetsIterator) HasDeletes() bool { return ri.del } - -// Seek points the iterator at the smallest key greater than or equal to the -// given key, returning true if it was an exact match. It returns false for -// ok if the key does not exist. -func (ri *readerOffsetsIterator) Seek(key []byte, b *faultBuffer) (exact, ok bool) { - ri.first = false - - pre, i, j, pi := keyPrefix(key), 0, len(ri.r.offsets), 0 - - for i < j { - h := int(uint(i+j) >> 1) - pi = ri.r.searchPrefix(h) - ri.setIndex(h, pi) - - switch ri.Compare(key, pre, b) { - case -1: - i = h + 1 - case 1: - j = h - default: - return true, true - } - } - - ri.setIndex(i, pi) - if ri.i >= len(ri.r.offsets) { - return false, false - } - - for ri.pi < len(ri.r.prefixes) && ri.i >= ri.r.prefixes[ri.pi].total { - ri.pi++ - } - - return bytes.Equal(ri.Key(b), key), true -} - -// SetIndex sets the iterator to point at the nth element. -func (ri *readerOffsetsIterator) SetIndex(n int) { - ri.setIndex(n, ri.r.searchPrefix(n)) -} - -// Compare is like bytes.Compare with the pointed at key, but reduces the amount of faults. -func (ri *readerOffsetsIterator) Compare(key []byte, pre prefix, b *faultBuffer) int { - if cmp := comparePrefix(ri.Prefix(), pre); cmp != 0 { - return cmp - } - return bytes.Compare(ri.Key(b), key) -} diff --git a/tsdb/tsm1/reader_offsets_test.go b/tsdb/tsm1/reader_offsets_test.go deleted file mode 100644 index 7712fd87e2..0000000000 --- a/tsdb/tsm1/reader_offsets_test.go +++ /dev/null @@ -1,161 +0,0 @@ -package tsm1 - -import ( - "fmt" - "math/rand" - "testing" -) - -func TestReaderOffsets(t *testing.T) { - const numKeys = 100 - - check := func(t *testing.T, what string, got, exp interface{}, extra ...interface{}) { - t.Helper() - if got != exp { - args := []interface{}{"incorrect", what, "got:", got, "exp:", exp} - args = append(args, extra...) - t.Fatal(args...) - } - } - - makeKey := func(i int) string { return fmt.Sprintf("%09d", i) } - - makeRO := func() (readerOffsets, *faultBuffer) { - var buf []byte - var ro readerOffsets - for i := 0; i < numKeys; i++ { - ro.AddKey(addKey(&buf, makeKey(i))) - } - ro.Done() - - return ro, &faultBuffer{b: buf} - } - - t.Run("Create_SingleKey", func(t *testing.T) { - var buf []byte - var ro readerOffsets - ro.AddKey(addKey(&buf, makeKey(0))) - ro.Done() - - check(t, "offsets", len(ro.offsets), 1) - check(t, "prefixes", len(ro.prefixes), 1) - }) - - t.Run("Create", func(t *testing.T) { - ro, _ := makeRO() - - check(t, "offsets", len(ro.offsets), numKeys) - check(t, "prefixes", len(ro.prefixes), numKeys/10) - }) - - t.Run("Iterate", func(t *testing.T) { - ro, fb := makeRO() - - iter := ro.Iterator() - for i := 0; iter.Next(); i++ { - check(t, "key", string(iter.Key(fb)), makeKey(i)) - } - }) - - t.Run("Seek", func(t *testing.T) { - ro, fb := makeRO() - exact, ok := false, false - - iter := ro.Iterator() - for i := 0; i < numKeys-1; i++ { - exact, ok = iter.Seek([]byte(makeKey(i)), fb) - check(t, "exact", exact, true) - check(t, "ok", ok, true) - check(t, "key", string(iter.Key(fb)), makeKey(i)) - - exact, ok = iter.Seek([]byte(makeKey(i)+"0"), fb) - check(t, "exact", exact, false) - check(t, "ok", ok, true) - check(t, "key", string(iter.Key(fb)), makeKey(i+1)) - } - - exact, ok = iter.Seek([]byte(makeKey(numKeys-1)), fb) - check(t, "exact", exact, true) - check(t, "ok", ok, true) - check(t, "key", string(iter.Key(fb)), makeKey(numKeys-1)) - - exact, ok = iter.Seek([]byte(makeKey(numKeys-1)+"0"), fb) - check(t, "exact", exact, false) - check(t, "ok", ok, false) - - exact, ok = iter.Seek([]byte("1"), fb) - check(t, "exact", exact, false) - check(t, "ok", ok, false) - - exact, ok = iter.Seek(nil, fb) - check(t, "exact", exact, false) - check(t, "ok", ok, true) - check(t, "key", string(iter.Key(fb)), makeKey(0)) - }) - - t.Run("Delete", func(t *testing.T) { - ro, fb := makeRO() - - iter := ro.Iterator() - for i := 0; iter.Next(); i++ { - if i%2 == 0 { - continue - } - iter.Delete() - } - iter.Done() - - iter = ro.Iterator() - for i := 0; iter.Next(); i++ { - check(t, "key", string(iter.Key(fb)), makeKey(2*i)) - } - }) - - t.Run("Fuzz", func(t *testing.T) { - for i := 0; i < 100; i++ { - ro, fb := makeRO() - deleted := make(map[string]struct{}) - iter := ro.Iterator() - - for i := 0; i < numKeys; i++ { - // delete a random key. if we seek past, delete the first key. - _, ok := iter.Seek([]byte(makeKey(rand.Intn(numKeys))), fb) - if !ok { - iter.Seek(nil, fb) - } - key := string(iter.Key(fb)) - _, ok = deleted[key] - check(t, "key deleted", ok, false, "for key", key) - deleted[key] = struct{}{} - iter.Delete() - iter.Done() - - // seek to every key that isn't deleted. - for i := 0; i < numKeys; i++ { - key := makeKey(i) - if _, ok := deleted[key]; ok { - continue - } - - exact, ok := iter.Seek([]byte(key), fb) - check(t, "exact", exact, true, "for key", key) - check(t, "ok", ok, true, "for key", key) - check(t, "key", string(iter.Key(fb)), key) - } - } - - check(t, "amount deleted", len(deleted), numKeys) - iter = ro.Iterator() - check(t, "next", iter.Next(), false) - } - }) -} - -func addKey(buf *[]byte, key string) (uint32, []byte) { - offset := len(*buf) - *buf = append(*buf, byte(len(key)>>8), byte(len(key))) - *buf = append(*buf, key...) - *buf = append(*buf, 0) - *buf = append(*buf, make([]byte, indexEntrySize)...) - return uint32(offset), []byte(key) -} diff --git a/tsdb/tsm1/reader_prefix_tree.go b/tsdb/tsm1/reader_prefix_tree.go deleted file mode 100644 index 55dcd0d97e..0000000000 --- a/tsdb/tsm1/reader_prefix_tree.go +++ /dev/null @@ -1,116 +0,0 @@ -package tsm1 - -type prefixTreeKey [8]byte - -const prefixTreeKeySize = len(prefixTreeKey{}) - -// prefixTree is a type that keeps track of a slice of time ranges for prefixes and allows -// querying for all of the time ranges for prefixes that match a provided key. It chunks -// added prefixes by 8 bytes and then by 1 byte because typical prefixes will be 8 or 16 -// bytes. This allows for effectively O(1) searches, but degrades to O(len(key)) in the -// worst case when there is a matching prefix for every byte of the key. Appending a prefix -// is similar. -type prefixTree struct { - values []TimeRange - short map[byte]*prefixTree - long map[prefixTreeKey]*prefixTree -} - -func newPrefixTree() *prefixTree { - return &prefixTree{ - short: make(map[byte]*prefixTree), - long: make(map[prefixTreeKey]*prefixTree), - } -} - -func (p *prefixTree) Append(prefix []byte, values ...TimeRange) { - if len(prefix) >= prefixTreeKeySize { - var lookup prefixTreeKey - copy(lookup[:], prefix) - - ch, ok := p.long[lookup] - if !ok { - ch = newPrefixTree() - p.long[lookup] = ch - } - ch.Append(prefix[prefixTreeKeySize:], values...) - - } else if len(prefix) > 0 { - ch, ok := p.short[prefix[0]] - if !ok { - ch = newPrefixTree() - p.short[prefix[0]] = ch - } - ch.Append(prefix[1:], values...) - - } else { - p.values = append(p.values, values...) - } -} - -func (p *prefixTree) Search(key []byte, buf []TimeRange) []TimeRange { - buf = append(buf, p.values...) - - if len(key) > 0 { - if ch, ok := p.short[key[0]]; ok { - buf = ch.Search(key[1:], buf) - } - } - - if len(key) >= prefixTreeKeySize { - var lookup prefixTreeKey - copy(lookup[:], key) - - if ch, ok := p.long[lookup]; ok { - buf = ch.Search(key[prefixTreeKeySize:], buf) - } - } - - return buf -} - -func (p *prefixTree) Count(key []byte) int { - count := len(p.values) - - if len(key) > 0 { - if ch, ok := p.short[key[0]]; ok { - count += ch.Count(key[1:]) - } - } - - if len(key) >= prefixTreeKeySize { - var lookup prefixTreeKey - copy(lookup[:], key) - - if ch, ok := p.long[lookup]; ok { - count += ch.Count(key[prefixTreeKeySize:]) - } - } - - return count -} - -func (p *prefixTree) checkOverlap(key []byte, ts int64) bool { - for _, t := range p.values { - if t.Min <= ts && ts <= t.Max { - return true - } - } - - if len(key) > 0 { - if ch, ok := p.short[key[0]]; ok && ch.checkOverlap(key[1:], ts) { - return true - } - } - - if len(key) >= prefixTreeKeySize { - var lookup prefixTreeKey - copy(lookup[:], key) - - if ch, ok := p.long[lookup]; ok && ch.checkOverlap(key[prefixTreeKeySize:], ts) { - return true - } - } - - return false -} diff --git a/tsdb/tsm1/reader_prefix_tree_test.go b/tsdb/tsm1/reader_prefix_tree_test.go deleted file mode 100644 index 392523639b..0000000000 --- a/tsdb/tsm1/reader_prefix_tree_test.go +++ /dev/null @@ -1,120 +0,0 @@ -package tsm1 - -import ( - "bytes" - "fmt" - "reflect" - "testing" - - "github.com/google/go-cmp/cmp" -) - -func TestPrefixTree(t *testing.T) { - t.Run("Basic", func(t *testing.T) { - ranges := func(ns ...int64) (out []TimeRange) { - for _, n := range ns { - out = append(out, TimeRange{n, n}) - } - return out - } - - check := func(t *testing.T, tree *prefixTree, key string, exp []TimeRange) { - t.Helper() - got := tree.Search([]byte(key), nil) - if !reflect.DeepEqual(got, exp) { - t.Fatalf("bad search: %q:\n%v", key, cmp.Diff(got, exp)) - } - } - - tree := newPrefixTree() - tree.Append([]byte("abcdefghABCDEFGH"), ranges(1)...) - tree.Append([]byte("abcdefgh01234567"), ranges(2)...) - tree.Append([]byte("abcd"), ranges(3)...) - tree.Append([]byte("0123"), ranges(4)...) - tree.Append([]byte("abcdefghABCDEFGH-m1"), ranges(5)...) - tree.Append([]byte("abcdefghABCDEFGH-m1"), ranges(6)...) - tree.Append([]byte("abcdefgh01234567-m1"), ranges(7)...) - tree.Append([]byte("abcdefgh01234567-m1"), ranges(8)...) - tree.Append([]byte("abcdefgh"), ranges(9, 10)...) - - check(t, tree, "abcd", ranges(3)) - check(t, tree, "abcdefgh", ranges(3, 9, 10)) - check(t, tree, "abcdefghABCDEFGH", ranges(3, 9, 10, 1)) - check(t, tree, "abcdefghABCDEFGH-m1", ranges(3, 9, 10, 1, 5, 6)) - check(t, tree, "abcdefgh01234567-m1", ranges(3, 9, 10, 2, 7, 8)) - }) -} - -// Typical results on a 2018 MPB. Pay special attention to the -// 8 and 16 results as they are the most likely. -// -// BenchmarkPrefixTree/Append/0-8 300000000 5.93 ns/op -// BenchmarkPrefixTree/Append/4-8 20000000 93.7 ns/op -// BenchmarkPrefixTree/Append/8-8 100000000 12.9 ns/op -// BenchmarkPrefixTree/Append/12-8 20000000 100.0 ns/op -// BenchmarkPrefixTree/Append/16-8 100000000 20.4 ns/op -// BenchmarkPrefixTree/Append/20-8 20000000 111.0 ns/op -// BenchmarkPrefixTree/Append/24-8 50000000 28.5 ns/op -// BenchmarkPrefixTree/Append/28-8 20000000 118.0 ns/op -// BenchmarkPrefixTree/Append/32-8 50000000 35.8 ns/op -// BenchmarkPrefixTree/Search/Best/0-8 300000000 5.76 ns/op -// BenchmarkPrefixTree/Search/Best/4-8 20000000 102.0 ns/op -// BenchmarkPrefixTree/Search/Best/8-8 100000000 18.5 ns/op -// BenchmarkPrefixTree/Search/Best/12-8 20000000 116.0 ns/op -// BenchmarkPrefixTree/Search/Best/16-8 50000000 31.9 ns/op -// BenchmarkPrefixTree/Search/Best/20-8 10000000 131.0 ns/op -// BenchmarkPrefixTree/Search/Best/24-8 30000000 45.3 ns/op -// BenchmarkPrefixTree/Search/Best/28-8 10000000 142.0 ns/op -// BenchmarkPrefixTree/Search/Best/32-8 20000000 58.0 ns/op -// BenchmarkPrefixTree/Search/Worst/0-8 300000000 5.79 ns/op -// BenchmarkPrefixTree/Search/Worst/4-8 20000000 79.2 ns/op -// BenchmarkPrefixTree/Search/Worst/8-8 10000000 199.0 ns/op -// BenchmarkPrefixTree/Search/Worst/12-8 5000000 301.0 ns/op -// BenchmarkPrefixTree/Search/Worst/16-8 3000000 422.0 ns/op -// BenchmarkPrefixTree/Search/Worst/20-8 3000000 560.0 ns/op -// BenchmarkPrefixTree/Search/Worst/24-8 2000000 683.0 ns/op -// BenchmarkPrefixTree/Search/Worst/28-8 2000000 772.0 ns/op -// BenchmarkPrefixTree/Search/Worst/32-8 2000000 875.0 ns/op -func BenchmarkPrefixTree(b *testing.B) { - b.Run("Append", func(b *testing.B) { - run := func(b *testing.B, prefix []byte) { - tree := newPrefixTree() - - for i := 0; i < b.N; i++ { - tree.Append(prefix) - } - } - - for i := 0; i <= 32; i += 4 { - b.Run(fmt.Sprint(i), func(b *testing.B) { run(b, bytes.Repeat([]byte("0"), i)) }) - } - }) - - b.Run("Search", func(b *testing.B) { - run := func(b *testing.B, worst bool) { - run := func(b *testing.B, key []byte) { - tree := newPrefixTree() - if worst { - for i := range key { - tree.Append(key[:i]) - } - } else { - tree.Append(key) - } - b.ReportAllocs() - b.ResetTimer() - - for i := 0; i < b.N; i++ { - tree.Search(key, nil) - } - } - - for i := 0; i <= 32; i += 4 { - b.Run(fmt.Sprint(i), func(b *testing.B) { run(b, bytes.Repeat([]byte("0"), i)) }) - } - } - - b.Run("Best", func(b *testing.B) { run(b, false) }) - b.Run("Worst", func(b *testing.B) { run(b, true) }) - }) -} diff --git a/tsdb/tsm1/reader_range_iterator.go b/tsdb/tsm1/reader_range_iterator.go deleted file mode 100644 index e708a97e61..0000000000 --- a/tsdb/tsm1/reader_range_iterator.go +++ /dev/null @@ -1,213 +0,0 @@ -package tsm1 - -import ( - "github.com/influxdata/influxdb/v2/tsdb/cursors" -) - -// TimeRangeIterator will iterate over the keys of a TSM file, starting at -// the provided key. It is used to determine if each key has data which exists -// within a specified time interval. -type TimeRangeIterator struct { - timeRangeBlockReader -} - -// Next advances the iterator and reports if it is still valid. -func (b *TimeRangeIterator) Next() bool { - if b.Err() != nil { - return false - } - - return b.iter.Next() -} - -// Seek points the iterator at the smallest key greater than or equal to the -// given key, returning true if it was an exact match. It returns false for -// ok if the key does not exist. -func (b *TimeRangeIterator) Seek(key []byte) (exact, ok bool) { - if b.Err() != nil { - return false, false - } - - return b.iter.Seek(key) -} - -// HasData reports true if the current key has data for the time range. -func (b *TimeRangeIterator) HasData() bool { - if b.Err() != nil { - return false - } - - e, ts := b.getEntriesAndTombstones() - if len(e) == 0 { - return false - } - - if len(ts) == 0 { - // no tombstones, fast path will avoid decoding blocks - // if queried time interval intersects with one of the entries - if intersectsEntry(e, b.tr) { - return true - } - } - - for i := range e { - if !b.readBlock(&e[i]) { - return false - } - - // remove tombstoned timestamps - for i := range ts { - b.a.Exclude(ts[i].Min, ts[i].Max) - } - - if b.a.Contains(b.tr.Min, b.tr.Max) { - return true - } - } - - return false -} - -// The timeRangeBlockReader provides common behavior -// for enumerating keys over a given time range and -// accumulating statistics. -type timeRangeBlockReader struct { - r *TSMReader - iter *TSMIndexIterator - tr TimeRange - err error - stats cursors.CursorStats - - // temporary storage - trbuf []TimeRange - buf []byte - a cursors.TimestampArray -} - -func (b *timeRangeBlockReader) Err() error { - if b.err != nil { - return b.err - } - return b.iter.Err() -} - -// Key reports the current key. -func (b *timeRangeBlockReader) Key() []byte { - return b.iter.Key() -} - -// Type reports the current block type. -func (b *timeRangeBlockReader) Type() byte { - return b.iter.Type() -} - -func (b *timeRangeBlockReader) getEntriesAndTombstones() ([]IndexEntry, []TimeRange) { - if b.err != nil { - return nil, nil - } - - e := excludeEntries(b.iter.Entries(), b.tr) - if len(e) == 0 { - return nil, nil - } - - b.trbuf = b.r.TombstoneRange(b.iter.Key(), b.trbuf[:0]) - var ts []TimeRange - if len(b.trbuf) > 0 { - ts = excludeTimeRanges(b.trbuf, b.tr) - } - - return e, ts -} - -// readBlock reads the block identified by IndexEntry e and accumulates -// statistics. readBlock returns true on success. -func (b *timeRangeBlockReader) readBlock(e *IndexEntry) bool { - _, b.buf, b.err = b.r.ReadBytes(e, b.buf) - if b.err != nil { - return false - } - - b.err = DecodeTimestampArrayBlock(b.buf, &b.a) - if b.err != nil { - return false - } - - b.stats.ScannedBytes += b.a.Len() * 8 // sizeof Timestamp (int64) - b.stats.ScannedValues += b.a.Len() - return true -} - -// Stats returns statistics accumulated by the iterator for any block reads. -func (b *timeRangeBlockReader) Stats() cursors.CursorStats { - return b.stats -} - -/* -intersectsEntry determines whether the range [min, max] -intersects one or both boundaries of IndexEntry. - - +------------------+ - | IndexEntry | -+---------+------------------+---------+ -| RANGE | | RANGE | -+-+-------+-+ +----+----+----+ - | RANGE | | RANGE | - +----+----+-----------+---------+ - | RANGE | - +--------------------------+ -*/ - -// intersectsEntry determines if tr overlaps one or both boundaries -// of at least one element of e. If that is the case, -// and the block has no tombstones, the block timestamps do not -// need to be decoded. -func intersectsEntry(e []IndexEntry, tr TimeRange) bool { - for i := range e { - min, max := e[i].MinTime, e[i].MaxTime - if tr.Overlaps(min, max) && !tr.Within(min, max) { - return true - } - } - return false -} - -// excludeEntries returns a slice which excludes leading and trailing -// elements of e that are outside the time range specified by tr. -func excludeEntries(e []IndexEntry, tr TimeRange) []IndexEntry { - for i := range e { - if e[i].OverlapsTimeRange(tr.Min, tr.Max) { - e = e[i:] - break - } - } - - for i := range e { - if !e[i].OverlapsTimeRange(tr.Min, tr.Max) { - e = e[:i] - break - } - } - - return e -} - -// excludeTimeRanges returns a slice which excludes leading and trailing -// elements of e that are outside the time range specified by tr. -func excludeTimeRanges(e []TimeRange, tr TimeRange) []TimeRange { - for i := range e { - if e[i].Overlaps(tr.Min, tr.Max) { - e = e[i:] - break - } - } - - for i := range e { - if !e[i].Overlaps(tr.Min, tr.Max) { - e = e[:i] - break - } - } - - return e -} diff --git a/tsdb/tsm1/reader_range_iterator_test.go b/tsdb/tsm1/reader_range_iterator_test.go deleted file mode 100644 index d5aebdf6e7..0000000000 --- a/tsdb/tsm1/reader_range_iterator_test.go +++ /dev/null @@ -1,698 +0,0 @@ -package tsm1 - -import ( - "fmt" - "os" - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/influxdata/influxdb/v2" - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/cursors" -) - -func TestTimeRangeIterator(t *testing.T) { - tsm := mustWriteTSM( - bucket{ - org: 0x50, - bucket: 0x60, - w: writes( - mw("cpu", - kw("tag0=val0", - vals(tvi(1000, 1), tvi(1010, 2), tvi(1020, 3)), - vals(tvi(2000, 1), tvi(2010, 2), tvi(2020, 3)), - ), - kw("tag0=val1", - vals(tvi(2000, 1), tvi(2010, 2), tvi(2020, 3)), - vals(tvi(3000, 1), tvi(3010, 2), tvi(3020, 3)), - ), - ), - ), - }, - - bucket{ - org: 0x51, - bucket: 0x61, - w: writes( - mw("mem", - kw("tag0=val0", - vals(tvi(1000, 1), tvi(1010, 2), tvi(1020, 3)), - vals(tvi(2000, 1), tvi(2010, 2), tvi(2020, 3)), - ), - kw("tag0=val1", - vals(tvi(1000, 1), tvi(1010, 2), tvi(1020, 3)), - vals(tvi(2000, 1)), - ), - kw("tag0=val2", - vals(tvi(2000, 1), tvi(2010, 2), tvi(2020, 3)), - vals(tvi(3000, 1), tvi(3010, 2), tvi(3020, 3)), - ), - ), - ), - }, - ) - defer tsm.RemoveAll() - - orgBucket := func(org, bucket uint) []byte { - n := tsdb.EncodeName(influxdb.ID(org), influxdb.ID(bucket)) - return n[:] - } - - type args struct { - min int64 - max int64 - } - - type res struct { - k string - hasData bool - } - - EXP := func(r ...interface{}) (rr []res) { - for i := 0; i+1 < len(r); i += 2 { - rr = append(rr, res{k: r[i].(string), hasData: r[i+1].(bool)}) - } - return - } - - type test struct { - name string - args args - exp []res - expStats cursors.CursorStats - } - - type bucketTest struct { - org, bucket uint - m string - tests []test - } - - r := tsm.TSMReader() - - runTests := func(name string, tests []bucketTest) { - t.Run(name, func(t *testing.T) { - for _, bt := range tests { - key := orgBucket(bt.org, bt.bucket) - t.Run(fmt.Sprintf("0x%x-0x%x", bt.org, bt.bucket), func(t *testing.T) { - for _, tt := range bt.tests { - t.Run(tt.name, func(t *testing.T) { - iter := r.TimeRangeIterator(key, tt.args.min, tt.args.max) - count := 0 - for i, exp := range tt.exp { - if !iter.Next() { - t.Errorf("Next(%d): expected true", i) - } - - expKey := makeKey(influxdb.ID(bt.org), influxdb.ID(bt.bucket), bt.m, exp.k) - if got := iter.Key(); !cmp.Equal(got, expKey) { - t.Errorf("Key(%d): -got/+exp\n%v", i, cmp.Diff(got, expKey)) - } - - if got := iter.HasData(); got != exp.hasData { - t.Errorf("HasData(%d): -got/+exp\n%v", i, cmp.Diff(got, exp.hasData)) - } - count++ - } - if count != len(tt.exp) { - t.Errorf("count: -got/+exp\n%v", cmp.Diff(count, len(tt.exp))) - } - - if got := iter.Stats(); !cmp.Equal(got, tt.expStats) { - t.Errorf("Stats: -got/+exp\n%v", cmp.Diff(got, tt.expStats)) - } - }) - - } - }) - } - }) - } - - runTests("before delete", []bucketTest{ - { - org: 0x50, - bucket: 0x60, - m: "cpu", - tests: []test{ - { - name: "cover file", - args: args{ - min: 900, - max: 10000, - }, - exp: EXP("tag0=val0", true, "tag0=val1", true), - expStats: cursors.CursorStats{ScannedValues: 0, ScannedBytes: 0}, - }, - { - name: "within block", - args: args{ - min: 2001, - max: 2011, - }, - exp: EXP("tag0=val0", true, "tag0=val1", true), - expStats: cursors.CursorStats{ScannedValues: 6, ScannedBytes: 48}, - }, - { - name: "to_2999", - args: args{ - min: 0, - max: 2999, - }, - exp: EXP("tag0=val0", true, "tag0=val1", true), - expStats: cursors.CursorStats{ScannedValues: 0, ScannedBytes: 0}, - }, - { - name: "intersects block", - args: args{ - min: 1500, - max: 2500, - }, - exp: EXP("tag0=val0", true, "tag0=val1", true), - expStats: cursors.CursorStats{ScannedValues: 0, ScannedBytes: 0}, - }, - }, - }, - - { - org: 0x51, - bucket: 0x61, - m: "mem", - tests: []test{ - { - name: "cover file", - args: args{ - min: 900, - max: 10000, - }, - exp: EXP("tag0=val0", true, "tag0=val1", true, "tag0=val2", true), - expStats: cursors.CursorStats{ScannedValues: 0, ScannedBytes: 0}, - }, - { - name: "within block", - args: args{ - min: 2001, - max: 2011, - }, - exp: EXP("tag0=val0", true, "tag0=val1", false, "tag0=val2", true), - expStats: cursors.CursorStats{ScannedValues: 6, ScannedBytes: 48}, - }, - { - name: "1000_2999", - args: args{ - min: 1000, - max: 2500, - }, - exp: EXP("tag0=val0", true, "tag0=val1", true, "tag0=val2", true), - expStats: cursors.CursorStats{ScannedValues: 0, ScannedBytes: 0}, - }, - }, - }, - }) - - tsm.MustDeletePrefix(orgBucket(0x50, 0x60), 0, 2999) - tsm.MustDelete(makeKey(0x51, 0x61, "mem", "tag0=val0")) - tsm.MustDeleteRange(2000, 2999, - makeKey(0x51, 0x61, "mem", "tag0=val1"), - makeKey(0x51, 0x61, "mem", "tag0=val2"), - ) - - runTests("after delete", []bucketTest{ - { - org: 0x50, - bucket: 0x60, - m: "cpu", - tests: []test{ - { - name: "cover file", - args: args{ - min: 900, - max: 10000, - }, - exp: EXP("tag0=val1", true), - expStats: cursors.CursorStats{ScannedValues: 6, ScannedBytes: 48}, - }, - { - name: "within block", - args: args{ - min: 2001, - max: 2011, - }, - exp: nil, - expStats: cursors.CursorStats{ScannedValues: 0, ScannedBytes: 0}, - }, - { - name: "to_2999", - args: args{ - min: 0, - max: 2999, - }, - exp: EXP("tag0=val1", false), - expStats: cursors.CursorStats{ScannedValues: 3, ScannedBytes: 24}, - }, - { - name: "intersects block", - args: args{ - min: 1500, - max: 2500, - }, - exp: EXP("tag0=val1", false), - expStats: cursors.CursorStats{ScannedValues: 3, ScannedBytes: 24}, - }, - { - name: "beyond all tombstones", - args: args{ - min: 3000, - max: 4000, - }, - exp: EXP("tag0=val1", true), - expStats: cursors.CursorStats{ScannedValues: 0, ScannedBytes: 0}, - }, - }, - }, - - { - org: 0x51, - bucket: 0x61, - m: "mem", - tests: []test{ - { - name: "cover file", - args: args{ - min: 900, - max: 10000, - }, - exp: EXP("tag0=val1", true, "tag0=val2", true), - expStats: cursors.CursorStats{ScannedValues: 9, ScannedBytes: 72}, - }, - { - name: "within block", - args: args{ - min: 2001, - max: 2011, - }, - exp: EXP("tag0=val1", false, "tag0=val2", false), - expStats: cursors.CursorStats{ScannedValues: 3, ScannedBytes: 24}, - }, - { - name: "1000_2500", - args: args{ - min: 1000, - max: 2500, - }, - exp: EXP("tag0=val1", true, "tag0=val2", false), - expStats: cursors.CursorStats{ScannedValues: 6, ScannedBytes: 48}, - }, - }, - }, - }) -} - -func TestExcludeEntries(t *testing.T) { - entries := func(ts ...int64) (e []IndexEntry) { - for i := 0; i+1 < len(ts); i += 2 { - e = append(e, IndexEntry{MinTime: ts[i], MaxTime: ts[i+1]}) - } - return - } - - eq := func(a, b []IndexEntry) bool { - if len(a) == 0 && len(b) == 0 { - return true - } - return cmp.Equal(a, b) - } - - type args struct { - e []IndexEntry - min int64 - max int64 - } - tests := []struct { - name string - args args - exp []IndexEntry - }{ - { - args: args{ - e: entries(0, 10, 12, 15, 19, 21), - min: 11, - max: 13, - }, - exp: entries(12, 15), - }, - { - args: args{ - e: entries(0, 10, 12, 15, 19, 21), - min: 10, - max: 13, - }, - exp: entries(0, 10, 12, 15), - }, - { - args: args{ - e: entries(0, 10, 12, 15, 19, 21), - min: 12, - max: 30, - }, - exp: entries(12, 15, 19, 21), - }, - { - args: args{ - e: entries(0, 10, 12, 15, 19, 21), - min: 13, - max: 20, - }, - exp: entries(12, 15, 19, 21), - }, - { - args: args{ - e: entries(0, 10, 12, 15, 19, 21), - min: 0, - max: 100, - }, - exp: entries(0, 10, 12, 15, 19, 21), - }, - { - args: args{ - e: entries(0, 10, 13, 15, 19, 21), - min: 11, - max: 12, - }, - exp: entries(), - }, - { - args: args{ - e: entries(12, 15, 19, 21), - min: 0, - max: 9, - }, - exp: entries(), - }, - { - args: args{ - e: entries(12, 15, 19, 21), - min: 22, - max: 30, - }, - exp: entries(), - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if got := excludeEntries(tt.args.e, TimeRange{tt.args.min, tt.args.max}); !cmp.Equal(got, tt.exp, cmp.Comparer(eq)) { - t.Errorf("excludeEntries() -got/+exp\n%v", cmp.Diff(got, tt.exp)) - } - }) - } -} - -func TestExcludeTimeRanges(t *testing.T) { - entries := func(ts ...int64) (e []TimeRange) { - for i := 0; i+1 < len(ts); i += 2 { - e = append(e, TimeRange{Min: ts[i], Max: ts[i+1]}) - } - return - } - - eq := func(a, b []TimeRange) bool { - if len(a) == 0 && len(b) == 0 { - return true - } - return cmp.Equal(a, b) - } - - type args struct { - e []TimeRange - min int64 - max int64 - } - tests := []struct { - name string - args args - exp []TimeRange - }{ - { - args: args{ - e: entries(0, 10, 12, 15, 19, 21), - min: 11, - max: 13, - }, - exp: entries(12, 15), - }, - { - args: args{ - e: entries(0, 10, 12, 15, 19, 21), - min: 10, - max: 13, - }, - exp: entries(0, 10, 12, 15), - }, - { - args: args{ - e: entries(0, 10, 12, 15, 19, 21), - min: 12, - max: 30, - }, - exp: entries(12, 15, 19, 21), - }, - { - args: args{ - e: entries(0, 10, 12, 15, 19, 21), - min: 0, - max: 100, - }, - exp: entries(0, 10, 12, 15, 19, 21), - }, - { - args: args{ - e: entries(0, 10, 13, 15, 19, 21), - min: 11, - max: 12, - }, - exp: entries(), - }, - { - args: args{ - e: entries(12, 15, 19, 21), - min: 0, - max: 9, - }, - exp: entries(), - }, - { - args: args{ - e: entries(12, 15, 19, 21), - min: 22, - max: 30, - }, - exp: entries(), - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if got := excludeTimeRanges(tt.args.e, TimeRange{tt.args.min, tt.args.max}); !cmp.Equal(got, tt.exp, cmp.Comparer(eq)) { - t.Errorf("excludeEntries() -got/+exp\n%v", cmp.Diff(got, tt.exp)) - } - }) - } -} - -func TestIntersectsEntries(t *testing.T) { - entries := func(ts ...int64) (e []IndexEntry) { - for i := 0; i+1 < len(ts); i += 2 { - e = append(e, IndexEntry{MinTime: ts[i], MaxTime: ts[i+1]}) - } - return - } - - type args struct { - e []IndexEntry - tr TimeRange - } - tests := []struct { - name string - args args - exp bool - }{ - { - name: "", - args: args{ - e: entries(5, 10, 13, 15, 19, 21, 22, 27), - tr: TimeRange{6, 9}, - }, - exp: false, - }, - { - args: args{ - e: entries(5, 10, 13, 15, 19, 21, 22, 27), - tr: TimeRange{11, 12}, - }, - exp: false, - }, - { - args: args{ - e: entries(5, 10, 13, 15, 19, 21, 22, 27), - tr: TimeRange{2, 4}, - }, - exp: false, - }, - { - args: args{ - e: entries(5, 10, 13, 15, 19, 21, 22, 27), - tr: TimeRange{28, 40}, - }, - exp: false, - }, - - { - args: args{ - e: entries(5, 10, 13, 15, 19, 21, 22, 27), - tr: TimeRange{3, 11}, - }, - exp: true, - }, - { - args: args{ - e: entries(5, 10, 13, 15, 19, 21, 22, 27), - tr: TimeRange{5, 27}, - }, - exp: true, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if got := intersectsEntry(tt.args.e, tt.args.tr); got != tt.exp { - t.Errorf("excludeEntries() -got/+exp\n%v", cmp.Diff(got, tt.exp)) - } - }) - } -} - -type bucket struct { - org, bucket influxdb.ID - w []measurementWrite -} - -func writes(w ...measurementWrite) []measurementWrite { - return w -} - -type measurementWrite struct { - m string - w []keyWrite -} - -func mw(m string, w ...keyWrite) measurementWrite { - return measurementWrite{m, w} -} - -type keyWrite struct { - k string - w []Values -} - -func kw(k string, w ...Values) keyWrite { return keyWrite{k, w} } -func vals(tv ...Value) Values { return tv } -func tvi(ts int64, v int64) Value { return NewIntegerValue(ts, v) } - -type tsmState struct { - dir string - file string - r *TSMReader -} - -const fieldName = "v" - -func makeKey(org, bucket influxdb.ID, m string, k string) []byte { - name := tsdb.EncodeName(org, bucket) - line := string(m) + "," + k - tags := make(models.Tags, 1) - tags[0] = models.NewTag(models.MeasurementTagKeyBytes, []byte(m)) - tags = append(tags, models.ParseTags([]byte(line))...) - tags = append(tags, models.NewTag(models.FieldKeyTagKeyBytes, []byte(fieldName))) - return SeriesFieldKeyBytes(string(models.MakeKey(name[:], tags)), fieldName) -} - -func mustWriteTSM(writes ...bucket) (s *tsmState) { - dir := mustTempDir() - defer func() { - if s == nil { - _ = os.RemoveAll(dir) - } - }() - - f := mustTempFile(dir) - - w, err := NewTSMWriter(f) - if err != nil { - panic(fmt.Sprintf("unexpected error creating writer: %v", err)) - } - - for _, ob := range writes { - for _, mw := range ob.w { - for _, kw := range mw.w { - key := makeKey(ob.org, ob.bucket, mw.m, kw.k) - for _, vw := range kw.w { - if err := w.Write(key, vw); err != nil { - panic(fmt.Sprintf("Write failed: %v", err)) - } - } - } - } - } - - if err := w.WriteIndex(); err != nil { - panic(fmt.Sprintf("WriteIndex: %v", err)) - } - - if err := w.Close(); err != nil { - panic(fmt.Sprintf("Close: %v", err)) - } - - fd, err := os.Open(f.Name()) - if err != nil { - panic(fmt.Sprintf("os.Open: %v", err)) - } - - r, err := NewTSMReader(fd) - if err != nil { - panic(fmt.Sprintf("NewTSMReader: %v", err)) - } - - return &tsmState{ - dir: dir, - file: f.Name(), - r: r, - } -} - -func (s *tsmState) TSMReader() *TSMReader { - return s.r -} - -func (s *tsmState) RemoveAll() { - _ = os.RemoveAll(s.dir) -} - -func (s *tsmState) MustDeletePrefix(key []byte, min, max int64) { - err := s.r.DeletePrefix(key, min, max, nil, nil) - if err != nil { - panic(fmt.Sprintf("DeletePrefix: %v", err)) - } -} - -func (s *tsmState) MustDelete(keys ...[]byte) { - err := s.r.Delete(keys) - if err != nil { - panic(fmt.Sprintf("Delete: %v", err)) - } -} - -func (s *tsmState) MustDeleteRange(min, max int64, keys ...[]byte) { - err := s.r.DeleteRange(keys, min, max) - if err != nil { - panic(fmt.Sprintf("DeleteRange: %v", err)) - } -} diff --git a/tsdb/tsm1/reader_range_maxtime_iterator.go b/tsdb/tsm1/reader_range_maxtime_iterator.go deleted file mode 100644 index 178c6156b3..0000000000 --- a/tsdb/tsm1/reader_range_maxtime_iterator.go +++ /dev/null @@ -1,141 +0,0 @@ -package tsm1 - -import ( - "github.com/influxdata/influxdb/v2/models" -) - -const ( - // InvalidMinNanoTime is an invalid nano timestamp that has an ordinal - // value lower than models.MinNanoTime, the minimum valid timestamp - // that can be represented. - InvalidMinNanoTime = models.MinNanoTime - 1 -) - -// TimeRangeMaxTimeIterator will iterate over the keys of a TSM file, starting at -// the provided key. It is used to determine if each key has data which exists -// within a specified time interval. -type TimeRangeMaxTimeIterator struct { - timeRangeBlockReader - - // cached values - maxTime int64 - hasData bool - isLoaded bool -} - -// Next advances the iterator and reports if it is still valid. -func (b *TimeRangeMaxTimeIterator) Next() bool { - if b.Err() != nil { - return false - } - - b.clearIsLoaded() - - return b.iter.Next() -} - -// Seek points the iterator at the smallest key greater than or equal to the -// given key, returning true if it was an exact match. It returns false for -// ok if the key does not exist. -func (b *TimeRangeMaxTimeIterator) Seek(key []byte) (exact, ok bool) { - if b.Err() != nil { - return false, false - } - - b.clearIsLoaded() - - return b.iter.Seek(key) -} - -// HasData reports true if the current key has data for the time range. -func (b *TimeRangeMaxTimeIterator) HasData() bool { - if b.Err() != nil { - return false - } - - b.load() - - return b.hasData -} - -// MaxTime returns the maximum timestamp for the current key within the -// requested time range. If an error occurred or there is no data, -// InvalidMinTimeStamp will be returned, which is less than models.MinTimeStamp. -// This property can be leveraged when enumerating keys to find the maximum timestamp, -// as this value will always be lower than any valid timestamp returned. -// -// NOTE: If MaxTime is equal to the upper bounds of the queried time range, it -// means data was found equal to or beyond the requested time range and -// does not mean that data exists at that specific timestamp. -func (b *TimeRangeMaxTimeIterator) MaxTime() int64 { - if b.Err() != nil { - return InvalidMinNanoTime - } - - b.load() - - return b.maxTime -} - -func (b *TimeRangeMaxTimeIterator) clearIsLoaded() { b.isLoaded = false } - -// setMaxTime sets maxTime = min(b.tr.Max, max) and -// returns true if maxTime == b.tr.Max, indicating -// the iterator has reached the upper bound. -func (b *TimeRangeMaxTimeIterator) setMaxTime(max int64) bool { - if max > b.tr.Max { - b.maxTime = b.tr.Max - return true - } - b.maxTime = max - return false -} - -func (b *TimeRangeMaxTimeIterator) load() { - if b.isLoaded { - return - } - - b.isLoaded = true - b.hasData = false - b.maxTime = InvalidMinNanoTime - - e, ts := b.getEntriesAndTombstones() - if len(e) == 0 { - return - } - - if len(ts) == 0 { - // no tombstones, fast path will avoid decoding blocks - // if queried time interval intersects with one of the entries - if intersectsEntry(e, b.tr) { - b.hasData = true - b.setMaxTime(e[len(e)-1].MaxTime) - return - } - } - - for i := range e { - if !b.readBlock(&e[i]) { - goto ERROR - } - - // remove tombstoned timestamps - for i := range ts { - b.a.Exclude(ts[i].Min, ts[i].Max) - } - - if b.a.Contains(b.tr.Min, b.tr.Max) { - b.hasData = true - if b.setMaxTime(b.a.MaxTime()) { - return - } - } - } - - return -ERROR: - // ERROR ensures cached state is set to invalid values - b.hasData = false - b.maxTime = InvalidMinNanoTime -} diff --git a/tsdb/tsm1/reader_range_maxtime_iterator_test.go b/tsdb/tsm1/reader_range_maxtime_iterator_test.go deleted file mode 100644 index 7d6e9c73f5..0000000000 --- a/tsdb/tsm1/reader_range_maxtime_iterator_test.go +++ /dev/null @@ -1,313 +0,0 @@ -package tsm1 - -import ( - "fmt" - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/influxdata/influxdb/v2" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/cursors" -) - -func TestTimeRangeMaxTimeIterator(t *testing.T) { - tsm := mustWriteTSM( - bucket{ - org: 0x50, - bucket: 0x60, - w: writes( - mw("cpu", - kw("tag0=val0", - vals(tvi(1000, 1), tvi(1010, 2), tvi(1020, 3)), - vals(tvi(2000, 1), tvi(2010, 2), tvi(2020, 3)), - ), - kw("tag0=val1", - vals(tvi(2000, 1), tvi(2010, 2), tvi(2020, 3)), - vals(tvi(3000, 1), tvi(3010, 2), tvi(3020, 3)), - ), - ), - ), - }, - - bucket{ - org: 0x51, - bucket: 0x61, - w: writes( - mw("mem", - kw("tag0=val0", - vals(tvi(1000, 1), tvi(1010, 2), tvi(1020, 3)), - vals(tvi(2000, 1), tvi(2010, 2), tvi(2020, 3)), - ), - kw("tag0=val1", - vals(tvi(1000, 1), tvi(1010, 2), tvi(1020, 3)), - vals(tvi(2000, 1)), - ), - kw("tag0=val2", - vals(tvi(2000, 1), tvi(2010, 2), tvi(2020, 3)), - vals(tvi(3000, 1), tvi(3010, 2), tvi(3020, 3)), - ), - ), - ), - }, - ) - defer tsm.RemoveAll() - - orgBucket := func(org, bucket uint) []byte { - n := tsdb.EncodeName(influxdb.ID(org), influxdb.ID(bucket)) - return n[:] - } - - type args struct { - min int64 - max int64 - } - - type res struct { - k string - hasData bool - maxTime int64 - } - - EXP := func(r ...interface{}) (rr []res) { - for i := 0; i+2 < len(r); i += 3 { - rr = append(rr, res{k: r[i].(string), hasData: r[i+1].(bool), maxTime: int64(r[i+2].(int))}) - } - return - } - - type test struct { - name string - args args - exp []res - expStats cursors.CursorStats - } - - type bucketTest struct { - org, bucket uint - m string - tests []test - } - - r := tsm.TSMReader() - - runTests := func(name string, tests []bucketTest) { - t.Run(name, func(t *testing.T) { - for _, bt := range tests { - key := orgBucket(bt.org, bt.bucket) - t.Run(fmt.Sprintf("0x%x-0x%x", bt.org, bt.bucket), func(t *testing.T) { - for _, tt := range bt.tests { - t.Run(tt.name, func(t *testing.T) { - iter := r.TimeRangeMaxTimeIterator(key, tt.args.min, tt.args.max) - count := 0 - for i, exp := range tt.exp { - if !iter.Next() { - t.Errorf("Next(%d): expected true", i) - } - - expKey := makeKey(influxdb.ID(bt.org), influxdb.ID(bt.bucket), bt.m, exp.k) - if got := iter.Key(); !cmp.Equal(got, expKey) { - t.Errorf("Key(%d): -got/+exp\n%v", i, cmp.Diff(got, expKey)) - } - - if got := iter.HasData(); got != exp.hasData { - t.Errorf("HasData(%d): -got/+exp\n%v", i, cmp.Diff(got, exp.hasData)) - } - - if got := iter.MaxTime(); got != exp.maxTime { - t.Errorf("MaxTime(%d): -got/+exp\n%v", i, cmp.Diff(got, exp.maxTime)) - } - count++ - } - if count != len(tt.exp) { - t.Errorf("count: -got/+exp\n%v", cmp.Diff(count, len(tt.exp))) - } - - if got := iter.Stats(); !cmp.Equal(got, tt.expStats) { - t.Errorf("Stats: -got/+exp\n%v", cmp.Diff(got, tt.expStats)) - } - }) - - } - }) - } - }) - } - - runTests("before delete", []bucketTest{ - { - org: 0x50, - bucket: 0x60, - m: "cpu", - tests: []test{ - { - name: "cover file", - args: args{ - min: 900, - max: 10000, - }, - exp: EXP("tag0=val0", true, 2020, "tag0=val1", true, 3020), - expStats: cursors.CursorStats{ScannedValues: 0, ScannedBytes: 0}, - }, - { - name: "within block", - args: args{ - min: 2001, - max: 2011, - }, - exp: EXP("tag0=val0", true, 2011, "tag0=val1", true, 2011), - expStats: cursors.CursorStats{ScannedValues: 6, ScannedBytes: 48}, - }, - { - name: "to_2999", - args: args{ - min: 0, - max: 2999, - }, - exp: EXP("tag0=val0", true, 2020, "tag0=val1", true, 2020), - expStats: cursors.CursorStats{ScannedValues: 0, ScannedBytes: 0}, - }, - { - name: "intersects block", - args: args{ - min: 1500, - max: 2500, - }, - exp: EXP("tag0=val0", true, 2020, "tag0=val1", true, 2020), - expStats: cursors.CursorStats{ScannedValues: 0, ScannedBytes: 0}, - }, - }, - }, - - { - org: 0x51, - bucket: 0x61, - m: "mem", - tests: []test{ - { - name: "cover file", - args: args{ - min: 900, - max: 10000, - }, - exp: EXP("tag0=val0", true, 2020, "tag0=val1", true, 2000, "tag0=val2", true, 3020), - expStats: cursors.CursorStats{ScannedValues: 0, ScannedBytes: 0}, - }, - { - name: "within block", - args: args{ - min: 2001, - max: 2011, - }, - exp: EXP("tag0=val0", true, 2011, "tag0=val1", false, int(InvalidMinNanoTime), "tag0=val2", true, 2011), - expStats: cursors.CursorStats{ScannedValues: 6, ScannedBytes: 48}, - }, - { - name: "1000_2999", - args: args{ - min: 1000, - max: 2500, - }, - exp: EXP("tag0=val0", true, 2020, "tag0=val1", true, 2000, "tag0=val2", true, 2020), - expStats: cursors.CursorStats{ScannedValues: 0, ScannedBytes: 0}, - }, - }, - }, - }) - - tsm.MustDeletePrefix(orgBucket(0x50, 0x60), 0, 2999) - tsm.MustDelete(makeKey(0x51, 0x61, "mem", "tag0=val0")) - tsm.MustDeleteRange(2000, 2999, - makeKey(0x51, 0x61, "mem", "tag0=val1"), - makeKey(0x51, 0x61, "mem", "tag0=val2"), - ) - - runTests("after delete", []bucketTest{ - { - org: 0x50, - bucket: 0x60, - m: "cpu", - tests: []test{ - { - name: "cover file", - args: args{ - min: 900, - max: 10000, - }, - exp: EXP("tag0=val1", true, 3020), - expStats: cursors.CursorStats{ScannedValues: 6, ScannedBytes: 48}, - }, - { - name: "within block", - args: args{ - min: 2001, - max: 2011, - }, - exp: nil, - expStats: cursors.CursorStats{ScannedValues: 0, ScannedBytes: 0}, - }, - { - name: "to_2999", - args: args{ - min: 0, - max: 2999, - }, - exp: EXP("tag0=val1", false, int(InvalidMinNanoTime)), - expStats: cursors.CursorStats{ScannedValues: 3, ScannedBytes: 24}, - }, - { - name: "intersects block", - args: args{ - min: 1500, - max: 2500, - }, - exp: EXP("tag0=val1", false, int(InvalidMinNanoTime)), - expStats: cursors.CursorStats{ScannedValues: 3, ScannedBytes: 24}, - }, - { - name: "beyond all tombstones", - args: args{ - min: 3000, - max: 4000, - }, - exp: EXP("tag0=val1", true, 3020), - expStats: cursors.CursorStats{ScannedValues: 0, ScannedBytes: 0}, - }, - }, - }, - - { - org: 0x51, - bucket: 0x61, - m: "mem", - tests: []test{ - { - name: "cover file", - args: args{ - min: 900, - max: 10000, - }, - exp: EXP("tag0=val1", true, 1020, "tag0=val2", true, 3020), - expStats: cursors.CursorStats{ScannedValues: 10, ScannedBytes: 80}, - }, - { - name: "within block", - args: args{ - min: 2001, - max: 2011, - }, - exp: EXP("tag0=val1", false, int(InvalidMinNanoTime), "tag0=val2", false, int(InvalidMinNanoTime)), - expStats: cursors.CursorStats{ScannedValues: 3, ScannedBytes: 24}, - }, - { - name: "1000_2500", - args: args{ - min: 1000, - max: 2500, - }, - exp: EXP("tag0=val1", true, 1020, "tag0=val2", false, int(InvalidMinNanoTime)), - expStats: cursors.CursorStats{ScannedValues: 7, ScannedBytes: 56}, - }, - }, - }, - }) -} diff --git a/tsdb/tsm1/reader_test.go b/tsdb/tsm1/reader_test.go deleted file mode 100644 index 33e32557ad..0000000000 --- a/tsdb/tsm1/reader_test.go +++ /dev/null @@ -1,1620 +0,0 @@ -package tsm1 - -import ( - "io/ioutil" - "math" - "os" - "path/filepath" - "sort" - "testing" -) - -func fatal(t testing.TB, msg string, err error) { - t.Helper() - t.Fatalf("unexpected error %v: %v", msg, err) -} - -func fatalIfErr(t testing.TB, msg string, err error) { - t.Helper() - if err != nil { - fatal(t, msg, err) - } -} - -func TestTSMReader_Type(t *testing.T) { - dir := mustTempDir() - defer os.RemoveAll(dir) - f := mustTempFile(dir) - - w, err := NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - values := []Value{NewValue(0, int64(1))} - if err := w.Write([]byte("cpu"), values); err != nil { - t.Fatalf("unexpected error writing: %v", err) - - } - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - f, err = os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error opening: %v", err) - } - r, err := NewTSMReader(f) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - - typ, err := r.Type([]byte("cpu")) - if err != nil { - fatal(t, "reading type", err) - } - - if got, exp := typ, BlockInteger; got != exp { - t.Fatalf("type mismatch: got %v, exp %v", got, exp) - } -} - -func TestIndexWriter_MaxBlocks(t *testing.T) { - index := NewIndexWriter() - for i := 0; i < 1<<16; i++ { - index.Add([]byte("cpu"), BlockFloat64, 0, 1, 10, 20) - } - - if _, err := index.MarshalBinary(); err == nil { - t.Fatalf("expected max block count error. got nil") - } -} - -func TestTSMReader_MMAP_ReadAll(t *testing.T) { - dir := mustTempDir() - defer os.RemoveAll(dir) - f := mustTempFile(dir) - defer f.Close() - - w, err := NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - var data = map[string][]Value{ - "float": []Value{NewValue(1, 1.0)}, - "int": []Value{NewValue(1, int64(1))}, - "uint": []Value{NewValue(1, ^uint64(0))}, - "bool": []Value{NewValue(1, true)}, - "string": []Value{NewValue(1, "foo")}, - } - - keys := make([]string, 0, len(data)) - for k := range data { - keys = append(keys, k) - } - sort.Strings(keys) - - for _, k := range keys { - if err := w.Write([]byte(k), data[k]); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - } - - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error writing index: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - f, err = os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error open file: %v", err) - } - - r, err := NewTSMReader(f) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - defer r.Close() - - var count int - for k, vals := range data { - readValues, err := r.ReadAll([]byte(k)) - if err != nil { - t.Fatalf("unexpected error readin: %v", err) - } - - if exp := len(vals); exp != len(readValues) { - t.Fatalf("read values length mismatch: got %v, exp %v", len(readValues), exp) - } - - for i, v := range vals { - if v.Value() != readValues[i].Value() { - t.Fatalf("read value mismatch(%d): got %v, exp %d", i, readValues[i].Value(), v.Value()) - } - } - count++ - } - - if got, exp := count, len(data); got != exp { - t.Fatalf("read values count mismatch: got %v, exp %v", got, exp) - } -} - -func TestTSMReader_MMAP_Read(t *testing.T) { - dir := mustTempDir() - defer os.RemoveAll(dir) - f := mustTempFile(dir) - defer f.Close() - - w, err := NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - var data = map[string][]Value{ - "float": []Value{ - NewValue(1, 1.0)}, - "int": []Value{ - NewValue(1, int64(1))}, - "uint": []Value{ - NewValue(1, ^uint64(0))}, - "bool": []Value{ - NewValue(1, true)}, - "string": []Value{ - NewValue(1, "foo")}, - } - - keys := make([]string, 0, len(data)) - for k := range data { - keys = append(keys, k) - } - sort.Strings(keys) - - for _, k := range keys { - if err := w.Write([]byte(k), data[k]); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - } - - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error writing index: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - f, err = os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error open file: %v", err) - } - - r, err := NewTSMReader(f) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - defer r.Close() - - var count int - for k, vals := range data { - readValues, err := r.Read([]byte(k), vals[0].UnixNano()) - if err != nil { - t.Fatalf("unexpected error readin: %v", err) - } - - if exp := len(vals); exp != len(readValues) { - t.Fatalf("read values length mismatch: got %v, exp %v", len(readValues), exp) - } - - for i, v := range vals { - if v.Value() != readValues[i].Value() { - t.Fatalf("read value mismatch(%d): got %v, exp %d", i, readValues[i].Value(), v.Value()) - } - } - count++ - } - - if got, exp := count, len(data); got != exp { - t.Fatalf("read values count mismatch: got %v, exp %v", got, exp) - } -} - -func TestTSMReader_MMAP_Keys(t *testing.T) { - dir := mustTempDir() - defer os.RemoveAll(dir) - f := mustTempFile(dir) - defer f.Close() - - w, err := NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - var data = map[string][]Value{ - "float": []Value{ - NewValue(1, 1.0)}, - "int": []Value{ - NewValue(1, int64(1))}, - "uint": []Value{ - NewValue(1, ^uint64(0))}, - "bool": []Value{ - NewValue(1, true)}, - "string": []Value{ - NewValue(1, "foo")}, - } - - keys := make([]string, 0, len(data)) - for k := range data { - keys = append(keys, k) - } - sort.Strings(keys) - - for _, k := range keys { - if err := w.Write([]byte(k), data[k]); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - } - - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error writing index: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - f, err = os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error open file: %v", err) - } - - r, err := NewTSMReader(f) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - defer r.Close() - - var count int - for k, vals := range data { - readValues, err := r.Read([]byte(k), vals[0].UnixNano()) - if err != nil { - t.Fatalf("unexpected error readin: %v", err) - } - - if exp := len(vals); exp != len(readValues) { - t.Fatalf("read values length mismatch: got %v, exp %v", len(readValues), exp) - } - - for i, v := range vals { - if v.Value() != readValues[i].Value() { - t.Fatalf("read value mismatch(%d): got %v, exp %d", i, readValues[i].Value(), v.Value()) - } - } - count++ - } - - if got, exp := count, len(data); got != exp { - t.Fatalf("read values count mismatch: got %v, exp %v", got, exp) - } -} - -func TestTSMReader_MMAP_Tombstone(t *testing.T) { - dir := mustTempDir() - defer os.RemoveAll(dir) - f := mustTempFile(dir) - defer f.Close() - - w, err := NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - values := []Value{NewValue(0, 1.0)} - if err := w.Write([]byte("cpu"), values); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - - if err := w.Write([]byte("mem"), values); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error writing index: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - f, err = os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error open file: %v", err) - } - - r, err := NewTSMReader(f) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - - if err := r.Delete([][]byte{[]byte("mem")}); err != nil { - t.Fatalf("unexpected error deleting: %v", err) - } - - r, err = NewTSMReader(f) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - defer r.Close() - - if got, exp := r.KeyCount(), 1; got != exp { - t.Fatalf("key length mismatch: got %v, exp %v", got, exp) - } -} - -func TestTSMReader_MMAP_TombstoneRange(t *testing.T) { - dir := mustTempDir() - defer os.RemoveAll(dir) - f := mustTempFile(dir) - defer f.Close() - - w, err := NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - expValues := []Value{ - NewValue(1, 1.0), - NewValue(2, 2.0), - NewValue(3, 3.0), - } - if err := w.Write([]byte("cpu"), expValues); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error writing index: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - f, err = os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error open file: %v", err) - } - - r, err := NewTSMReader(f) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - - if err := r.DeleteRange([][]byte{[]byte("cpu")}, 2, math.MaxInt64); err != nil { - t.Fatalf("unexpected error deleting: %v", err) - } - defer r.Close() - - if got, exp := r.MaybeContainsValue([]byte("cpu"), 1), true; got != exp { - t.Fatalf("MaybeContainsValue mismatch: got %v, exp %v", got, exp) - } - - if got, exp := r.MaybeContainsValue([]byte("cpu"), 3), false; got != exp { - t.Fatalf("MaybeContainsValue mismatch: got %v, exp %v", got, exp) - } - - values, err := r.ReadAll([]byte("cpu")) - if err != nil { - t.Fatalf("unexpected error reading all: %v", err) - } - - if got, exp := len(values), 1; got != exp { - t.Fatalf("values length mismatch: got %v, exp %v", got, exp) - } - - if got, exp := values[0].String(), expValues[0].String(); got != exp { - t.Fatalf("value mismatch: got %v, exp %v", got, exp) - } -} - -func TestTSMReader_MMAP_TombstoneOutsideTimeRange(t *testing.T) { - dir := mustTempDir() - defer os.RemoveAll(dir) - f := mustTempFile(dir) - defer f.Close() - - w, err := NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - expValues := []Value{ - NewValue(1, 1.0), - NewValue(2, 2.0), - NewValue(3, 3.0), - } - if err := w.Write([]byte("cpu"), expValues); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error writing index: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - f, err = os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error open file: %v", err) - } - - r, err := NewTSMReader(f) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - - if err := r.DeleteRange([][]byte{[]byte("cpu")}, 0, 0); err != nil { - t.Fatalf("unexpected error deleting: %v", err) - } - defer r.Close() - - if got, exp := r.MaybeContainsValue([]byte("cpu"), 1), true; got != exp { - t.Fatalf("MaybeContainsValue mismatch: got %v, exp %v", got, exp) - } - - if got, exp := r.MaybeContainsValue([]byte("cpu"), 2), true; got != exp { - t.Fatalf("MaybeContainsValue mismatch: got %v, exp %v", got, exp) - } - - if got, exp := r.MaybeContainsValue([]byte("cpu"), 3), true; got != exp { - t.Fatalf("MaybeContainsValue mismatch: got %v, exp %v", got, exp) - } - - if got, exp := r.HasTombstones(), false; got != exp { - t.Fatalf("HasTombstones mismatch: got %v, exp %v", got, exp) - } - - if got, exp := len(r.TombstoneFiles()), 0; got != exp { - t.Fatalf("TombstoneFiles len mismatch: got %v, exp %v", got, exp) - } -} - -func TestTSMReader_MMAP_TombstoneOutsideKeyRange(t *testing.T) { - dir := mustTempDir() - defer os.RemoveAll(dir) - f := mustTempFile(dir) - defer f.Close() - - w, err := NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - expValues := []Value{ - NewValue(1, 1.0), - NewValue(2, 2.0), - NewValue(3, 3.0), - } - if err := w.Write([]byte("cpu"), expValues); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error writing index: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - f, err = os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error open file: %v", err) - } - - r, err := NewTSMReader(f) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - - if err := r.DeleteRange([][]byte{[]byte("mem")}, 0, 3); err != nil { - t.Fatalf("unexpected error deleting: %v", err) - } - defer r.Close() - - if got, exp := r.MaybeContainsValue([]byte("cpu"), 1), true; got != exp { - t.Fatalf("MaybeContainsValue mismatch: got %v, exp %v", got, exp) - } - - if got, exp := r.MaybeContainsValue([]byte("cpu"), 2), true; got != exp { - t.Fatalf("MaybeContainsValue mismatch: got %v, exp %v", got, exp) - } - - if got, exp := r.MaybeContainsValue([]byte("cpu"), 3), true; got != exp { - t.Fatalf("MaybeContainsValue mismatch: got %v, exp %v", got, exp) - } - - if got, exp := r.HasTombstones(), false; got != exp { - t.Fatalf("HasTombstones mismatch: got %v, exp %v", got, exp) - } - - if got, exp := len(r.TombstoneFiles()), 0; got != exp { - t.Fatalf("TombstoneFiles len mismatch: got %v, exp %v", got, exp) - - } -} - -func TestTSMReader_MMAP_TombstoneOverlapKeyRange(t *testing.T) { - dir := mustTempDir() - defer os.RemoveAll(dir) - f := mustTempFile(dir) - defer f.Close() - - w, err := NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - expValues := []Value{ - NewValue(1, 1.0), - NewValue(2, 2.0), - NewValue(3, 3.0), - } - if err := w.Write([]byte("cpu,app=foo,host=server-0#!~#value"), expValues); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - - if err := w.Write([]byte("cpu,app=foo,host=server-73379#!~#value"), expValues); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error writing index: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - f, err = os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error open file: %v", err) - } - - r, err := NewTSMReader(f) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - - if err := r.DeleteRange([][]byte{ - []byte("cpu,app=foo,host=server-0#!~#value"), - []byte("cpu,app=foo,host=server-73379#!~#value"), - []byte("cpu,app=foo,host=server-99999#!~#value")}, - math.MinInt64, math.MaxInt64); err != nil { - t.Fatalf("unexpected error deleting: %v", err) - } - defer r.Close() - - if got, exp := r.Contains([]byte("cpu,app=foo,host=server-0#!~#value")), false; got != exp { - t.Fatalf("MaybeContainsValue mismatch: got %v, exp %v", got, exp) - } - - if got, exp := r.Contains([]byte("cpu,app=foo,host=server-73379#!~#value")), false; got != exp { - t.Fatalf("MaybeContainsValue mismatch: got %v, exp %v", got, exp) - } - - if got, exp := r.HasTombstones(), true; got != exp { - t.Fatalf("HasTombstones mismatch: got %v, exp %v", got, exp) - } - - if got, exp := len(r.TombstoneFiles()), 1; got != exp { - t.Fatalf("TombstoneFiles len mismatch: got %v, exp %v", got, exp) - } -} - -func TestTSMReader_MMAP_TombstoneFullRange(t *testing.T) { - dir := mustTempDir() - defer os.RemoveAll(dir) - f := mustTempFile(dir) - defer f.Close() - - w, err := NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - expValues := []Value{ - NewValue(1, 1.0), - NewValue(2, 2.0), - NewValue(3, 3.0), - } - if err := w.Write([]byte("cpu"), expValues); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error writing index: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - f, err = os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error open file: %v", err) - } - - r, err := NewTSMReader(f) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - - if err := r.DeleteRange([][]byte{[]byte("cpu")}, math.MinInt64, math.MaxInt64); err != nil { - t.Fatalf("unexpected error deleting: %v", err) - } - defer r.Close() - - values, err := r.ReadAll([]byte("cpu")) - if err != nil { - t.Fatalf("unexpected error reading all: %v", err) - } - - if got, exp := len(values), 0; got != exp { - t.Fatalf("values length mismatch: got %v, exp %v", got, exp) - } -} - -func TestTSMReader_MMAP_TombstoneFullRangeMultiple(t *testing.T) { - dir := mustTempDir() - defer os.RemoveAll(dir) - f := mustTempFile(dir) - defer f.Close() - - w, err := NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - expValues := []Value{ - NewValue(1, 1.0), - NewValue(2, 2.0), - NewValue(3, 3.0), - } - if err := w.Write([]byte("cpu"), expValues); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - - expValues1 := []Value{ - NewValue(3, 1.0), - NewValue(4, 2.0), - NewValue(5, 3.0), - } - - if err := w.Write([]byte("mem"), expValues1); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error writing index: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - f, err = os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error open file: %v", err) - } - - r, err := NewTSMReader(f) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - defer r.Close() - - if err := r.DeleteRange([][]byte{[]byte("mem"), []byte("cpu")}, 0, 3); err != nil { - t.Fatalf("unexpected error deleting: %v", err) - } - - // Make sure everything is deleted - values, err := r.ReadAll([]byte("cpu")) - if err != nil { - t.Fatalf("unexpected error reading all: %v", err) - } - - if got, exp := len(values), 0; got != exp { - t.Fatalf("values length mismatch: got %v, exp %v", got, exp) - } - - values, err = r.ReadAll([]byte("mem")) - if err != nil { - t.Fatalf("unexpected error reading all: %v", err) - } - - if got, exp := len(values), 2; got != exp { - t.Fatalf("values length mismatch: got %v, exp %v", got, exp) - } -} - -func TestTSMReader_MMAP_TombstoneMultipleRanges(t *testing.T) { - dir := mustTempDir() - defer os.RemoveAll(dir) - f := mustTempFile(dir) - defer f.Close() - - w, err := NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - expValues := []Value{ - NewValue(1, 1.0), - NewValue(2, 2.0), - NewValue(3, 3.0), - NewValue(4, 4.0), - NewValue(5, 5.0), - } - if err := w.Write([]byte("cpu"), expValues); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error writing index: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - f, err = os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error open file: %v", err) - } - - r, err := NewTSMReader(f) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - defer r.Close() - - if err := r.DeleteRange([][]byte{[]byte("cpu")}, 2, 2); err != nil { - t.Fatalf("unexpected error deleting: %v", err) - } - - if err := r.DeleteRange([][]byte{[]byte("cpu")}, 4, 4); err != nil { - t.Fatalf("unexpected error deleting: %v", err) - } - - values, err := r.ReadAll([]byte("cpu")) - if err != nil { - t.Fatalf("unexpected error reading all: %v", err) - } - - if got, exp := len(values), 3; got != exp { - t.Fatalf("values length mismatch: got %v, exp %v", got, exp) - } -} - -func TestTSMReader_MMAP_TombstoneMultipleRangesFull(t *testing.T) { - dir := mustTempDir() - defer os.RemoveAll(dir) - f := mustTempFile(dir) - defer f.Close() - - w, err := NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - expValues := []Value{ - NewValue(1, 1.0), - NewValue(2, 2.0), - } - if err := w.Write([]byte("cpu"), expValues); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error writing index: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - f, err = os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error open file: %v", err) - } - - r, err := NewTSMReader(f) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - defer r.Close() - - if err := r.DeleteRange([][]byte{[]byte("cpu")}, 1, 1); err != nil { - t.Fatalf("unexpected error deleting: %v", err) - } - - if err := r.DeleteRange([][]byte{[]byte("cpu")}, 2, 2); err != nil { - t.Fatalf("unexpected error deleting: %v", err) - } - - if got, exp := r.KeyCount(), 0; got != exp { - t.Fatalf("key count mismatch: got %v, exp %v", got, exp) - } - - values, err := r.ReadAll([]byte("cpu")) - if err != nil { - t.Fatalf("unexpected error reading all: %v", err) - } - - if got, exp := len(values), 0; got != exp { - t.Fatalf("values length mismatch: got %v, exp %v", got, exp) - } -} - -func TestTSMReader_MMAP_TombstoneMultipleRangesNoOverlap(t *testing.T) { - dir := mustTempDir() - defer os.RemoveAll(dir) - f := mustTempFile(dir) - defer f.Close() - - w, err := NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - expValues := []Value{ - NewValue(1, 1.0), - NewValue(2, 2.0), - NewValue(3, 2.0), - } - if err := w.Write([]byte("cpu"), expValues); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error writing index: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - f, err = os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error open file: %v", err) - } - - r, err := NewTSMReader(f) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - defer r.Close() - - if err := r.DeleteRange([][]byte{[]byte("cpu")}, 1, 1); err != nil { - t.Fatalf("unexpected error deleting: %v", err) - } - - if err := r.DeleteRange([][]byte{[]byte("cpu")}, 3, 3); err != nil { - t.Fatalf("unexpected error deleting: %v", err) - } - - if got, exp := r.KeyCount(), 1; got != exp { - t.Fatalf("key count mismatch: got %v, exp %v", got, exp) - } - - values, err := r.ReadAll([]byte("cpu")) - if err != nil { - t.Fatalf("unexpected error reading all: %v", err) - } - - if got, exp := len(values), 1; got != exp { - t.Fatalf("values length mismatch: got %v, exp %v", got, exp) - } -} - -func TestTSMReader_MMAP_TombstoneOutsideRange(t *testing.T) { - dir := mustTempDir() - defer os.RemoveAll(dir) - f := mustTempFile(dir) - defer f.Close() - - w, err := NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - cpuValues := []Value{ - NewValue(1, 1.0), - NewValue(2, 2.0), - NewValue(3, 3.0), - } - if err := w.Write([]byte("cpu"), cpuValues); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - - memValues := []Value{ - NewValue(1, 1.0), - NewValue(2, 2.0), - NewValue(30, 3.0), - } - if err := w.Write([]byte("mem"), memValues); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error writing index: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - f, err = os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error open file: %v", err) - } - - r, err := NewTSMReader(f) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - - if err := r.DeleteRange([][]byte{[]byte("cpu"), []byte("mem")}, 5, math.MaxInt64); err != nil { - t.Fatalf("unexpected error deleting: %v", err) - } - defer r.Close() - - if got, exp := r.KeyCount(), 2; got != exp { - t.Fatalf("key count mismatch: got %v, exp %v", got, exp) - } - - if got, exp := len(r.TombstoneRange([]byte("cpu"), nil)), 0; got != exp { - t.Fatalf("tombstone range mismatch: got %v, exp %v", got, exp) - } - - values, err := r.ReadAll([]byte("cpu")) - if err != nil { - t.Fatalf("unexpected error reading all: %v", err) - } - - if got, exp := len(values), len(cpuValues); got != exp { - t.Fatalf("values length mismatch: got %v, exp %v", got, exp) - } - - if got, exp := len(r.TombstoneRange([]byte("mem"), nil)), 1; got != exp { - t.Fatalf("tombstone range mismatch: got %v, exp %v", got, exp) - } - - values, err = r.ReadAll([]byte("mem")) - if err != nil { - t.Fatalf("unexpected error reading all: %v", err) - } - - if got, exp := len(values), len(memValues[:2]); got != exp { - t.Fatalf("values length mismatch: got %v, exp %v", got, exp) - } - -} - -func TestTSMReader_MMAP_Stats(t *testing.T) { - dir := mustTempDir() - defer os.RemoveAll(dir) - f := mustTempFile(dir) - defer f.Close() - - w, err := NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - values1 := []Value{NewValue(0, 1.0)} - if err := w.Write([]byte("cpu"), values1); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - - values2 := []Value{NewValue(1, 1.0)} - if err := w.Write([]byte("mem"), values2); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error writing index: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - f, err = os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error open file: %v", err) - } - - r, err := NewTSMReader(f) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - defer r.Close() - - stats := r.Stats() - if got, exp := string(stats.MinKey), "cpu"; got != exp { - t.Fatalf("min key mismatch: got %v, exp %v", got, exp) - } - - if got, exp := string(stats.MaxKey), "mem"; got != exp { - t.Fatalf("max key mismatch: got %v, exp %v", got, exp) - } - - if got, exp := stats.MinTime, values1[0].UnixNano(); got != exp { - t.Fatalf("min time mismatch: got %v, exp %v", got, exp) - } - - if got, exp := stats.MaxTime, values2[0].UnixNano(); got != exp { - t.Fatalf("max time mismatch: got %v, exp %v", got, exp) - } - - if got, exp := r.KeyCount(), 2; got != exp { - t.Fatalf("key length mismatch: got %v, exp %v", got, exp) - } -} - -// Ensure that we return an error if we try to open a non-tsm file -func TestTSMReader_VerifiesFileType(t *testing.T) { - dir := mustTempDir() - defer os.RemoveAll(dir) - f := mustTempFile(dir) - defer f.Close() - - // write some garbage - f.Write([]byte{0x23, 0xac, 0x99, 0x22, 0x77, 0x23, 0xac, 0x99, 0x22, 0x77, 0x23, 0xac, 0x99, 0x22, 0x77, 0x23, 0xac, 0x99, 0x22, 0x77}) - - _, err := NewTSMReader(f) - if err == nil { - t.Fatal("expected error trying to open non-tsm file") - } -} - -func TestIndirectIndex_Entries(t *testing.T) { - index := NewIndexWriter() - index.Add([]byte("cpu"), BlockFloat64, 0, 1, 10, 100) - index.Add([]byte("cpu"), BlockFloat64, 2, 3, 20, 200) - exp := index.Entries([]byte("cpu")) - - index.Add([]byte("mem"), BlockFloat64, 0, 1, 10, 100) - - b, err := index.MarshalBinary() - if err != nil { - t.Fatalf("unexpected error marshaling index: %v", err) - } - - indirect := NewIndirectIndex() - if err := indirect.UnmarshalBinary(b); err != nil { - t.Fatalf("unexpected error unmarshaling index: %v", err) - } - - entries, err := indirect.ReadEntries([]byte("cpu"), nil) - if err != nil { - t.Fatal(err) - } - - if got, exp := len(entries), len(exp); got != exp { - t.Fatalf("entries length mismatch: got %v, exp %v", got, exp) - } - - for i, exp := range exp { - got := entries[i] - if exp.MinTime != got.MinTime { - t.Fatalf("minTime mismatch: got %v, exp %v", got.MinTime, exp.MinTime) - } - - if exp.MaxTime != got.MaxTime { - t.Fatalf("minTime mismatch: got %v, exp %v", got.MaxTime, exp.MaxTime) - } - - if exp.Size != got.Size { - t.Fatalf("size mismatch: got %v, exp %v", got.Size, exp.Size) - } - if exp.Offset != got.Offset { - t.Fatalf("size mismatch: got %v, exp %v", got.Offset, exp.Offset) - } - } -} - -func TestDirectIndex_KeyCount(t *testing.T) { - index := NewIndexWriter() - index.Add([]byte("cpu"), BlockFloat64, 0, 1, 10, 20) - index.Add([]byte("cpu"), BlockFloat64, 1, 2, 20, 30) - index.Add([]byte("mem"), BlockFloat64, 0, 1, 10, 20) - - // 2 distinct keys - if got, exp := index.KeyCount(), 2; got != exp { - t.Fatalf("length mismatch: got %v, exp %v", got, exp) - } -} - -func TestTSMReader_UnmarshalBinary_BlockCountOverflow(t *testing.T) { - dir := mustTempDir() - defer os.RemoveAll(dir) - f := mustTempFile(dir) - defer f.Close() - - w, err := NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - for i := 0; i < 3280; i++ { - w.Write([]byte("cpu"), []Value{NewValue(int64(i), float64(i))}) - } - - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - f, err = os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error open file: %v", err) - } - - r, err := NewTSMReader(f) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - defer r.Close() -} - -func TestCompacted_NotFull(t *testing.T) { - dir := mustTempDir() - defer os.RemoveAll(dir) - f := mustTempFile(dir) - - w, err := NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - values := []Value{NewValue(0, 1.0)} - if err := w.Write([]byte("cpu"), values); err != nil { - t.Fatalf("unexpected error writing: %v", err) - - } - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error writing index: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - fd, err := os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error open file: %v", err) - } - - r, err := NewTSMReader(fd) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - - iter := r.BlockIterator() - if !iter.Next() { - t.Fatalf("expected next, got false") - } - - _, _, _, _, _, block, err := iter.Read() - if err != nil { - t.Fatalf("unexpected error reading block: %v", err) - } - - if got, exp := BlockCount(block), 1; got != exp { - t.Fatalf("block count mismatch: got %v, exp %v", got, exp) - } -} - -func TestTSMReader_File_ReadAll(t *testing.T) { - dir := mustTempDir() - defer os.RemoveAll(dir) - f := mustTempFile(dir) - defer f.Close() - - w, err := NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - var data = map[string][]Value{ - "float": []Value{ - NewValue(1, 1.0)}, - "int": []Value{ - NewValue(1, int64(1))}, - "uint": []Value{ - NewValue(1, ^uint64(0))}, - "bool": []Value{ - NewValue(1, true)}, - "string": []Value{ - NewValue(1, "foo")}, - } - - keys := make([]string, 0, len(data)) - for k := range data { - keys = append(keys, k) - } - sort.Strings(keys) - - for _, k := range keys { - if err := w.Write([]byte(k), data[k]); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - } - - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error writing index: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - f, err = os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error open file: %v", err) - } - - r, err := NewTSMReader(f) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - defer r.Close() - - var count int - for k, vals := range data { - readValues, err := r.ReadAll([]byte(k)) - if err != nil { - t.Fatalf("unexpected error reading: %v", err) - } - - if exp := len(vals); exp != len(readValues) { - t.Fatalf("read values length mismatch: exp %v, got %v", exp, len(readValues)) - } - - for i, v := range vals { - if exp, got := v.Value(), readValues[i].Value(); exp != got { - t.Fatalf("read value mismatch(%d): exp %v, got %d", i, v.Value(), readValues[i].Value()) - } - } - count++ - } - - if exp, got := len(data), count; exp != got { - t.Fatalf("read values count mismatch: exp %v, got %v", exp, got) - } -} - -func TestTSMReader_FuzzCrashes(t *testing.T) { - cases := []string{ - "", - "\x16\xd1\x16\xd1\x01\x10\x14X\xfb\x03\xac~\x80\xf0\x00\x00\x00I^K" + - "_\xf0\x00\x00\x00D424259389w\xf0\x00\x00\x00" + - "o\x93\bO\x10?\xf0\x00\x00\x00\x00\b\x00\xc2_\xff\xd8\x0fX^" + - "/\xbf\xe8\x00\x00\x00\x00\x00\x01\x00\bctr#!~#n\x00" + - "\x00\x01\x14X\xfb\xb0\x03\xac~\x80\x14X\xfb\xb1\x00\xd4ܥ\x00\x00" + - "\x00\x00\x00\x00\x00\x05\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00E", - "\x16\xd1\x16\xd1\x01\x80'Z\\\x00\v)\x00\x00\x00\x00;\x9a\xca\x00" + - "\x01\x05\x10?\xf0\x00\x00\x00\x00\x00\x00\xc2_\xff\xd6\x1d\xd4&\xed\v" + - "\xc5\xf7\xfb\xc0\x00\x00\x00\x00\x00 \x00\x06a#!~#v\x00\x00" + - "\x01\x00\x00\x00\x00;\x9a\xca\x00\x00\x00\x00\x01*\x05\xf2\x00\x00\x00\x00" + - "\x00\x00\x00\x00\x00\x00\x00\x00\x002", - "\x16\xd1\x16\xd1\x01\x80\xf0\x00\x00\x00I^K_\xf0\x00\x00\x00D7" + - "\nw\xf0\x00\x00\x00o\x93\bO\x10?\xf0\x00\x00\x00\x00\x00\x00\xc2" + - "_\xff\x14X\xfb\xb0\x03\xac~\x80\x14X\xfb\xb1\x00\xd4ܥ\x00\x00" + - "\x00\x00\x00\x00\x00\x05\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00E", - "\x16\xd1\x16\xd1\x01000000000000000" + - "00000000000000000000" + - "0000000000\x00\x000\x00\x0100000" + - "000\x00\x00\x00\x00\x00\x00\x002", - "\x16\xd1\x16\xd1\x01", - "\x16\xd1\x16\xd1\x01\x00\x00o\x93\bO\x10?\xf0\x00\x00\x00\x00X^" + - "/\xbf\xe8\x00\x00\x00\x00\x00\x01\x00\bctr#!~#n\x00" + - "\x00\x01\x14X\xfb\xb0\x03\xac~\x80\x14X\xfb\xb1\x00\xd4ܥ\x00\x00" + - "\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00E", - } - - for _, c := range cases { - func() { - dir := mustTempDir() - defer os.RemoveAll(dir) - - filename := filepath.Join(dir, "x.tsm") - if err := ioutil.WriteFile(filename, []byte(c), 0600); err != nil { - t.Fatalf("exp no error, got %s", err) - } - defer os.RemoveAll(dir) - - f, err := os.Open(filename) - if err != nil { - t.Fatalf("exp no error, got %s", err) - } - defer f.Close() - - r, err := NewTSMReader(f) - if err != nil { - return - } - defer r.Close() - - iter := r.BlockIterator() - for iter.Next() { - key, _, _, _, _, _, err := iter.Read() - if err != nil { - return - } - - _, _ = r.Type(key) - - if _, err = r.ReadAll(key); err != nil { - return - } - } - }() - } -} - -func TestTSMReader_File_Read(t *testing.T) { - dir := mustTempDir() - defer os.RemoveAll(dir) - f := mustTempFile(dir) - defer f.Close() - - w, err := NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - var data = map[string][]Value{ - "float": []Value{ - NewValue(1, 1.0)}, - "int": []Value{ - NewValue(1, int64(1))}, - "uint": []Value{ - NewValue(1, ^uint64(0))}, - "bool": []Value{ - NewValue(1, true)}, - "string": []Value{ - NewValue(1, "foo")}, - } - - keys := make([]string, 0, len(data)) - for k := range data { - keys = append(keys, k) - } - sort.Strings(keys) - - for _, k := range keys { - if err := w.Write([]byte(k), data[k]); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - } - - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error writing index: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - f, err = os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error open file: %v", err) - } - - r, err := NewTSMReader(f) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - defer r.Close() - - var count int - for k, vals := range data { - readValues, err := r.Read([]byte(k), vals[0].UnixNano()) - if err != nil { - t.Fatalf("unexpected error readin: %v", err) - } - - if exp, got := len(vals), len(readValues); exp != got { - t.Fatalf("read values length mismatch: exp %v, got %v", exp, len(readValues)) - } - - for i, v := range vals { - if v.Value() != readValues[i].Value() { - t.Fatalf("read value mismatch(%d): exp %v, got %d", i, v.Value(), readValues[i].Value()) - } - } - count++ - } - - if exp, got := count, len(data); exp != got { - t.Fatalf("read values count mismatch: exp %v, got %v", exp, got) - } -} - -func TestTSMReader_References(t *testing.T) { - dir := mustTempDir() - defer os.RemoveAll(dir) - f := mustTempFile(dir) - defer f.Close() - - w, err := NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - var data = map[string][]Value{ - "float": []Value{ - NewValue(1, 1.0)}, - "int": []Value{ - NewValue(1, int64(1))}, - "uint": []Value{ - NewValue(1, ^uint64(0))}, - "bool": []Value{ - NewValue(1, true)}, - "string": []Value{ - NewValue(1, "foo")}, - } - - keys := make([]string, 0, len(data)) - for k := range data { - keys = append(keys, k) - } - sort.Strings(keys) - - for _, k := range keys { - if err := w.Write([]byte(k), data[k]); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - } - - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error writing index: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - f, err = os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error open file: %v", err) - } - - r, err := NewTSMReader(f) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - defer r.Close() - - r.Ref() - - if err := r.Remove(); err != ErrFileInUse { - t.Fatalf("expected error removing reader: %v", err) - } - - var count int - for k, vals := range data { - readValues, err := r.Read([]byte(k), vals[0].UnixNano()) - if err != nil { - t.Fatalf("unexpected error readin: %v", err) - } - - if exp, got := len(vals), len(readValues); exp != got { - t.Fatalf("read values length mismatch: exp %v, got %v", exp, len(readValues)) - } - - for i, v := range vals { - if v.Value() != readValues[i].Value() { - t.Fatalf("read value mismatch(%d): exp %v, got %d", i, v.Value(), readValues[i].Value()) - } - } - count++ - } - - if exp, got := count, len(data); exp != got { - t.Fatalf("read values count mismatch: exp %v, got %v", exp, got) - } - r.Unref() - - if err := r.Close(); err != nil { - t.Fatalf("unexpected error closing reader: %v", err) - } - - if err := r.Remove(); err != nil { - t.Fatalf("unexpected error removing reader: %v", err) - } -} - -func TestTSMReader_DeletePrefix(t *testing.T) { - dir := mustTempDir() - defer os.RemoveAll(dir) - f := mustTempFile(dir) - - // create data in a tsm file - w, err := NewTSMWriter(f) - fatalIfErr(t, "creating writer", err) - - err = w.Write([]byte("cpu"), []Value{ - NewValue(0, int64(1)), - NewValue(5, int64(2)), - NewValue(10, int64(3)), - NewValue(15, int64(4)), - }) - fatalIfErr(t, "writing", err) - - err = w.WriteIndex() - fatalIfErr(t, "writing index", err) - - err = w.Close() - fatalIfErr(t, "closing", err) - - // open the tsm file and delete the prefix - f, err = os.Open(f.Name()) - fatalIfErr(t, "opening", err) - - r, err := NewTSMReader(f) - fatalIfErr(t, "creating reader", err) - - err = r.DeletePrefix([]byte("c"), 0, 5, nil, nil) - fatalIfErr(t, "deleting prefix", err) - - values, err := r.ReadAll([]byte("cpu")) - fatalIfErr(t, "reading values", err) - if got, exp := len(values), 2; got != exp { - t.Fatalf("wrong number of values: %d but wanted: %d", got, exp) - } - if got, exp := values[0], NewValue(10, int64(3)); got != exp { - t.Fatalf("wrong value: %q but wanted %q", got, exp) - } - if got, exp := values[1], NewValue(15, int64(4)); got != exp { - t.Fatalf("wrong value: %q but wanted %q", got, exp) - } - - err = r.Close() - fatalIfErr(t, "closing reader", err) - - // open the tsm file and check that the deletes still happened - f, err = os.Open(f.Name()) - fatalIfErr(t, "opening", err) - - r, err = NewTSMReader(f) - fatalIfErr(t, "creating reader", err) - - values, err = r.ReadAll([]byte("cpu")) - fatalIfErr(t, "reading values", err) - if got, exp := len(values), 2; got != exp { - t.Fatalf("wrong number of values: %d but wanted: %d", got, exp) - } - if got, exp := values[0], NewValue(10, int64(3)); got != exp { - t.Fatalf("wrong value: %q but wanted %q", got, exp) - } - if got, exp := values[1], NewValue(15, int64(4)); got != exp { - t.Fatalf("wrong value: %q but wanted %q", got, exp) - } -} diff --git a/tsdb/tsm1/reader_time_range.go b/tsdb/tsm1/reader_time_range.go deleted file mode 100644 index f6c95beada..0000000000 --- a/tsdb/tsm1/reader_time_range.go +++ /dev/null @@ -1,95 +0,0 @@ -package tsm1 - -// TimeRange holds a min and max timestamp. -type TimeRange struct { - Min, Max int64 -} - -func (t TimeRange) Overlaps(min, max int64) bool { - return t.Min <= max && t.Max >= min -} - -// Within returns true if min < t.Min and t.Max < max and therefore the interval [t.Min, t.Max] is -// contained within [min, max] -func (t TimeRange) Within(min, max int64) bool { - return min < t.Min && t.Max < max -} - -func (t TimeRange) Less(o TimeRange) bool { - return t.Min < o.Min || (t.Min == o.Min && t.Max < o.Max) -} - -// timeRangesCoverEntries returns true if the time ranges fully cover the entries. -func timeRangesCoverEntries(merger timeRangeMerger, entries []IndexEntry) (covers bool) { - if len(entries) == 0 { - return true - } - - mustCover := entries[0].MinTime - ts, ok := merger.Pop() - - for len(entries) > 0 && ok { - switch { - // If the tombstone does not include mustCover, we - // know we do not fully cover every entry. - case ts.Min > mustCover: - return false - - // Otherwise, if the tombstone covers the rest of - // the entry, consume it and bump mustCover to the - // start of the next entry. - case ts.Max >= entries[0].MaxTime: - entries = entries[1:] - if len(entries) > 0 { - mustCover = entries[0].MinTime - } - - // Otherwise, we're still inside of an entry, and - // so the tombstone must adjoin the current tombstone. - default: - if ts.Max >= mustCover { - mustCover = ts.Max + 1 - } - ts, ok = merger.Pop() - } - } - - return len(entries) == 0 -} - -// timeRangeMerger is a special purpose data structure to merge three sources of -// TimeRanges so that we can check if they cover a slice of index entries. -type timeRangeMerger struct { - fromMap []TimeRange - fromPrefix []TimeRange - single TimeRange - used bool // if single has been used -} - -// Pop returns the next TimeRange in sorted order and a boolean indicating that -// there was a TimeRange to read. -func (t *timeRangeMerger) Pop() (out TimeRange, ok bool) { - var where *[]TimeRange - var what []TimeRange - - if len(t.fromMap) > 0 { - where, what = &t.fromMap, t.fromMap[1:] - out, ok = t.fromMap[0], true - } - - if len(t.fromPrefix) > 0 && (!ok || t.fromPrefix[0].Less(out)) { - where, what = &t.fromPrefix, t.fromPrefix[1:] - out, ok = t.fromPrefix[0], true - } - - if !t.used && (!ok || t.single.Less(out)) { - t.used = true - return t.single, true - } - - if ok { - *where = what - } - - return out, ok -} diff --git a/tsdb/tsm1/reader_time_range_test.go b/tsdb/tsm1/reader_time_range_test.go deleted file mode 100644 index e84563e5d0..0000000000 --- a/tsdb/tsm1/reader_time_range_test.go +++ /dev/null @@ -1,100 +0,0 @@ -package tsm1 - -import ( - "reflect" - "sort" - "testing" - - "github.com/google/go-cmp/cmp" -) - -func TestTimeRangeMerger(t *testing.T) { - ranges := func(ns ...int64) (out []TimeRange) { - for _, n := range ns { - out = append(out, TimeRange{n, n}) - } - return out - } - - check := func(t *testing.T, exp []TimeRange, merger timeRangeMerger) { - t.Helper() - - var got []TimeRange - for { - tr, ok := merger.Pop() - if !ok { - break - } - got = append(got, tr) - } - - if !reflect.DeepEqual(got, exp) { - t.Fatalf("bad merge:\n%v", cmp.Diff(got, exp)) - } - } - - check(t, ranges(0, 1, 2, 3, 4, 5, 6), timeRangeMerger{ - fromMap: ranges(0, 2, 6), - fromPrefix: ranges(1, 3, 5), - single: TimeRange{4, 4}, - }) - - check(t, ranges(0, 1, 2), timeRangeMerger{ - fromMap: ranges(0, 1, 2), - used: true, - }) - - check(t, ranges(0, 1, 2), timeRangeMerger{ - fromPrefix: ranges(0, 1, 2), - used: true, - }) - - check(t, ranges(0), timeRangeMerger{ - single: TimeRange{0, 0}, - }) - - check(t, ranges(0, 0, 0), timeRangeMerger{ - fromMap: ranges(0), - fromPrefix: ranges(0), - single: TimeRange{0, 0}, - }) -} - -func TestTimeRangeCoverEntries(t *testing.T) { - ranges := func(ns ...int64) (out []TimeRange) { - for i := 0; i+1 < len(ns); i += 2 { - out = append(out, TimeRange{ns[i], ns[i+1]}) - } - return out - } - - entries := func(ns ...int64) (out []IndexEntry) { - for i := 0; i+1 < len(ns); i += 2 { - out = append(out, IndexEntry{MinTime: ns[i], MaxTime: ns[i+1]}) - } - return out - } - - check := func(t *testing.T, ranges []TimeRange, entries []IndexEntry, covers bool) { - t.Helper() - sort.Slice(ranges, func(i, j int) bool { return ranges[i].Less(ranges[j]) }) - got := timeRangesCoverEntries(timeRangeMerger{fromMap: ranges, used: true}, entries) - if got != covers { - t.Fatalf("bad covers:\nranges: %v\nentries: %v\ncovers: %v\ngot: %v", - ranges, entries, covers, got) - } - } - - check(t, ranges(0, 0, 1, 1, 2, 2), entries(0, 0, 1, 1, 2, 2), true) - check(t, ranges(0, 0, 1, 1, 2, 2), entries(0, 0, 2, 2), true) - check(t, ranges(0, 0, 1, 1, 2, 2), entries(3, 3), false) - check(t, ranges(0, 0, 1, 1, 2, 2), entries(-1, -1), false) - check(t, ranges(0, 10), entries(1, 1, 2, 2), true) - check(t, ranges(0, 1, 1, 2), entries(0, 0, 1, 1, 2, 2), true) - check(t, ranges(0, 10), entries(0, 0, 2, 2), true) - check(t, ranges(0, 1, 1, 2), entries(0, 0, 2, 2), true) - check(t, ranges(0, 1, 4, 5), entries(0, 0, 5, 5), true) - check(t, ranges(), entries(), true) - check(t, ranges(), entries(0, 0), false) - check(t, ranges(0, 0), entries(), true) -} diff --git a/tsdb/tsm1/report.go b/tsdb/tsm1/report.go deleted file mode 100644 index 825b00db02..0000000000 --- a/tsdb/tsm1/report.go +++ /dev/null @@ -1,341 +0,0 @@ -package tsm1 - -import ( - "bytes" - "errors" - "fmt" - "io" - "io/ioutil" - "math" - "os" - "path/filepath" - "sort" - "strconv" - "strings" - "text/tabwriter" - "time" - - "github.com/influxdata/influxdb/v2" - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/pkg/hll" - "github.com/influxdata/influxdb/v2/tsdb" -) - -// Report runs a report over tsm data -type Report struct { - Stderr io.Writer - Stdout io.Writer - - Dir string - OrgID, BucketID *influxdb.ID // Calculate only results for the provided org or bucket id. - Pattern string // Providing "01.tsm" for example would filter for level 1 files. - Detailed bool // Detailed will segment cardinality by tag keys. - Exact bool // Exact determines if estimation or exact methods are used to determine cardinality. -} - -// ReportSummary provides a summary of the cardinalities in the processed fileset. -type ReportSummary struct { - Min, Max int64 - Total uint64 //The exact or estimated unique set of series keys across all files. - Organizations map[string]uint64 // The exact or estimated unique set of series keys segmented by org. - Buckets map[string]uint64 // The exact or estimated unique set of series keys segmented by bucket. - - // These are calculated when the detailed flag is in use. - Measurements map[string]uint64 // The exact or estimated unique set of series keys segmented by the measurement tag. - FieldKeys map[string]uint64 // The exact or estimated unique set of series keys segmented by the field tag. - TagKeys map[string]uint64 // The exact or estimated unique set of series keys segmented by tag keys. -} - -func newReportSummary() *ReportSummary { - return &ReportSummary{ - Organizations: map[string]uint64{}, - Buckets: map[string]uint64{}, - Measurements: map[string]uint64{}, - FieldKeys: map[string]uint64{}, - TagKeys: map[string]uint64{}, - } -} - -// Run executes the Report. -// -// Calling Run with print set to true emits data about each file to the report's -// Stdout fd as it is generated. -func (r *Report) Run(print bool) (*ReportSummary, error) { - if r.Stderr == nil { - r.Stderr = os.Stderr - } - if r.Stdout == nil { - r.Stdout = os.Stdout - } - - if !print { - r.Stderr, r.Stdout = ioutil.Discard, ioutil.Discard - } - - newCounterFn := newHLLCounter - estTitle := " (est)" - if r.Exact { - estTitle = "" - newCounterFn = newExactCounter - } - - fi, err := os.Stat(r.Dir) - if err != nil { - return nil, err - } else if !fi.IsDir() { - return nil, errors.New("data directory not valid") - } - - totalSeries := newCounterFn() // The exact or estimated unique set of series keys across all files. - orgCardinalities := map[string]counter{} // The exact or estimated unique set of series keys segmented by org. - bucketCardinalities := map[string]counter{} // The exact or estimated unique set of series keys segmented by bucket. - - // These are calculated when the detailed flag is in use. - mCardinalities := map[string]counter{} // The exact or estimated unique set of series keys segmented by the measurement tag. - fCardinalities := map[string]counter{} // The exact or estimated unique set of series keys segmented by the field tag. - tCardinalities := map[string]counter{} // The exact or estimated unique set of series keys segmented by tag keys. - - start := time.Now() - - tw := tabwriter.NewWriter(r.Stdout, 8, 2, 1, ' ', 0) - fmt.Fprintln(tw, strings.Join([]string{"File", "Series", "New" + estTitle, "Min Time", "Max Time", "Load Time"}, "\t")) - - minTime, maxTime := int64(math.MaxInt64), int64(math.MinInt64) - - files, err := filepath.Glob(filepath.Join(r.Dir, "*.tsm")) - if err != nil { - panic(err) // Only error would be a bad pattern; not runtime related. - } - var processedFiles int - - var tagBuf models.Tags // Buffer that can be re-used when parsing keys. - for _, path := range files { - if r.Pattern != "" && !strings.Contains(path, r.Pattern) { - continue - } - - file, err := os.OpenFile(path, os.O_RDONLY, 0600) - if err != nil { - fmt.Fprintf(r.Stderr, "error: %s: %v. Exiting.\n", path, err) - return nil, err - } - - loadStart := time.Now() - reader, err := NewTSMReader(file) - if err != nil { - fmt.Fprintf(r.Stderr, "error: %s: %v. Skipping file.\n", file.Name(), err) - continue - } - loadTime := time.Since(loadStart) - processedFiles++ - - // Tracks the current total, so it's possible to know how many new series this file adds. - currentTotalCount := totalSeries.Count() - - seriesCount := reader.KeyCount() - itr := reader.Iterator(nil) - if itr == nil { - return nil, errors.New("invalid TSM file, no index iterator") - } - - for itr.Next() { - key := itr.Key() - - var a [16]byte // TODO(edd) if this shows up we can use a different API to DecodeName. - copy(a[:], key[:16]) - org, bucket := tsdb.DecodeName(a) - if r.OrgID != nil && *r.OrgID != org { // If filtering on single org or bucket then skip if no match - // org does not match. - continue - } else if r.BucketID != nil && *r.BucketID != bucket { - // bucket does not match. - continue - } - - totalSeries.Add(key) // Update total cardinality. - - // Update org cardinality - orgCount := orgCardinalities[org.String()] - if orgCount == nil { - orgCount = newCounterFn() - orgCardinalities[org.String()] = orgCount - } - orgCount.Add(key) - - // Update bucket cardinality. - bucketCount := bucketCardinalities[bucket.String()] - if bucketCount == nil { - bucketCount = newCounterFn() - bucketCardinalities[bucket.String()] = bucketCount - } - bucketCount.Add(key) - - // Update tag cardinalities. - if r.Detailed { - sep := bytes.Index(key, KeyFieldSeparatorBytes) - seriesKey := key[:sep] // Snip the tsm1 field key off. - _, tagBuf = models.ParseKeyBytesWithTags(seriesKey, tagBuf) - - for _, t := range tagBuf { - tk := string(t.Key) - switch tk { - case models.MeasurementTagKey: - mname := string(t.Value) - // Total series cardinality segmented by measurement name. - mCount := mCardinalities[mname] // measurement name. - if mCount == nil { - mCount = newCounterFn() - mCardinalities[mname] = mCount - } - mCount.Add(key) // full series keys associated with measurement name. - case models.FieldKeyTagKey: - mname := tagBuf.GetString(models.MeasurementTagKey) - fCount := fCardinalities[mname] - if fCount == nil { - fCount = newCounterFn() - fCardinalities[mname] = fCount - } - fCount.Add(t.Value) // field keys associated with measurement name. - default: - tagCount := tCardinalities[tk] - if tagCount == nil { - tagCount = newCounterFn() - tCardinalities[tk] = tagCount - } - tagCount.Add(t.Value) - } - } - } - } - - minT, maxT := reader.TimeRange() - if minT < minTime { - minTime = minT - } - if maxT > maxTime { - maxTime = maxT - } - - if err := reader.Close(); err != nil { - return nil, fmt.Errorf("error: %s: %v. Exiting", path, err) - } - - fmt.Fprintln(tw, strings.Join([]string{ - filepath.Base(file.Name()), - strconv.FormatInt(int64(seriesCount), 10), - strconv.FormatInt(int64(totalSeries.Count()-currentTotalCount), 10), - time.Unix(0, minT).UTC().Format(time.RFC3339Nano), - time.Unix(0, maxT).UTC().Format(time.RFC3339Nano), - loadTime.String(), - }, "\t")) - if r.Detailed { - if err := tw.Flush(); err != nil { - return nil, err - } - } - } - - if err := tw.Flush(); err != nil { - return nil, err - } - - summary := newReportSummary() - summary.Min = minTime - summary.Max = maxTime - summary.Total = totalSeries.Count() - - println() - - println("Summary:") - fmt.Printf(" Files: %d (%d skipped)\n", processedFiles, len(files)-processedFiles) - fmt.Printf(" Series Cardinality%s: %d\n", estTitle, totalSeries.Count()) - fmt.Printf(" Time Range: %s - %s\n", - time.Unix(0, minTime).UTC().Format(time.RFC3339Nano), - time.Unix(0, maxTime).UTC().Format(time.RFC3339Nano), - ) - fmt.Printf(" Duration: %s \n", time.Unix(0, maxTime).Sub(time.Unix(0, minTime))) - println() - - fmt.Printf("Statistics\n") - fmt.Printf(" Organizations (%d):\n", len(orgCardinalities)) - for _, org := range sortKeys(orgCardinalities) { - cardinality := orgCardinalities[org].Count() - summary.Organizations[org] = cardinality - fmt.Printf(" - %s: %d%s (%d%%)\n", org, cardinality, estTitle, int(float64(cardinality)/float64(totalSeries.Count())*100)) - } - fmt.Printf(" Total%s: %d\n", estTitle, totalSeries.Count()) - - fmt.Printf(" \n Buckets (%d):\n", len(bucketCardinalities)) - for _, bucket := range sortKeys(bucketCardinalities) { - cardinality := bucketCardinalities[bucket].Count() - summary.Buckets[bucket] = cardinality - fmt.Printf(" - %s: %d%s (%d%%)\n", bucket, cardinality, estTitle, int(float64(cardinality)/float64(totalSeries.Count())*100)) - } - fmt.Printf(" Total%s: %d\n", estTitle, totalSeries.Count()) - - if r.Detailed { - fmt.Printf("\n Series By Measurements (%d):\n", len(mCardinalities)) - for _, mname := range sortKeys(mCardinalities) { - cardinality := mCardinalities[mname].Count() - summary.Measurements[mname] = cardinality - fmt.Printf(" - %v: %d%s (%d%%)\n", mname, cardinality, estTitle, int((float64(cardinality)/float64(totalSeries.Count()))*100)) - } - - fmt.Printf("\n Fields By Measurements (%d):\n", len(fCardinalities)) - for _, mname := range sortKeys(fCardinalities) { - cardinality := fCardinalities[mname].Count() - summary.FieldKeys[mname] = cardinality - fmt.Printf(" - %v: %d%s\n", mname, cardinality, estTitle) - } - - fmt.Printf("\n Tag Values By Tag Keys (%d):\n", len(tCardinalities)) - for _, tkey := range sortKeys(tCardinalities) { - cardinality := tCardinalities[tkey].Count() - summary.TagKeys[tkey] = cardinality - fmt.Printf(" - %v: %d%s\n", tkey, cardinality, estTitle) - } - } - - fmt.Printf("\nCompleted in %s\n", time.Since(start)) - return summary, nil -} - -// sortKeys is a quick helper to return the sorted set of a map's keys -func sortKeys(vals map[string]counter) (keys []string) { - for k := range vals { - keys = append(keys, k) - } - sort.Strings(keys) - - return keys -} - -// counter abstracts a a method of counting keys. -type counter interface { - Add(key []byte) - Count() uint64 -} - -// newHLLCounter returns an approximate counter using HyperLogLogs for cardinality estimation. -func newHLLCounter() counter { - return hll.NewDefaultPlus() -} - -// exactCounter returns an exact count for keys using counting all distinct items in a set. -type exactCounter struct { - m map[string]struct{} -} - -func (c *exactCounter) Add(key []byte) { - c.m[string(key)] = struct{}{} -} - -func (c *exactCounter) Count() uint64 { - return uint64(len(c.m)) -} - -func newExactCounter() counter { - return &exactCounter{ - m: make(map[string]struct{}), - } -} diff --git a/tsdb/tsm1/ring.go b/tsdb/tsm1/ring.go deleted file mode 100644 index 541076dfac..0000000000 --- a/tsdb/tsm1/ring.go +++ /dev/null @@ -1,297 +0,0 @@ -package tsm1 - -import ( - "sync" - "sync/atomic" - - "github.com/cespare/xxhash" - "github.com/influxdata/influxdb/v2/pkg/bytesutil" -) - -// numPartitions is the number of partitions we used in the ring's continuum. It -// basically defines the maximum number of partitions you can have in the ring. -// If a smaller number of partitions are chosen when creating a ring, then -// they're evenly spread across this many partitions in the ring. -const numPartitions = 16 - -// ring is a structure that maps series keys to entries. -// -// ring is implemented as a crude hash ring, in so much that you can have -// variable numbers of members in the ring, and the appropriate member for a -// given series key can always consistently be found. Unlike a true hash ring -// though, this ring is not resizeable—there must be at most 256 members in the -// ring, and the number of members must always be a power of 2. -// -// ring works as follows: Each member of the ring contains a single store, which -// contains a map of series keys to entries. A ring always has 16 partitions, -// and a member takes up one or more of these partitions (depending on how many -// members are specified to be in the ring) -// -// To determine the partition that a series key should be added to, the series -// key is hashed and the least significant 4 bits are used as an index to the ring. -// -type ring struct { - // Number of keys within the ring. This is used to provide a hint for - // allocating the return values in keys(). It will not be perfectly accurate - // since it doesn't consider adding duplicate keys, or trying to remove non- - // existent keys. - keysHint int64 - - // The unique set of partitions in the ring. - // len(partitions) <= len(continuum) - partitions [numPartitions]*partition -} - -// newring returns a new ring initialised with numPartitions partitions. -func newRing() *ring { - r := new(ring) - for i := 0; i < len(r.partitions); i++ { - r.partitions[i] = &partition{store: make(map[string]*entry)} - } - return r -} - -// reset resets the ring so it can be reused. Before removing references to entries -// within each partition it gathers sizing information to provide hints when -// reallocating entries in partition maps. -// -// reset is not safe for use by multiple goroutines. -func (r *ring) reset() { - for _, partition := range r.partitions { - partition.reset() - } - r.keysHint = 0 -} - -// getPartition retrieves the hash ring partition associated with the provided key. -func (r *ring) getPartition(key []byte) *partition { - return r.partitions[int(xxhash.Sum64(key)%numPartitions)] -} - -// entry returns the entry for the given key. -// entry is safe for use by multiple goroutines. -func (r *ring) entry(key []byte) *entry { - return r.getPartition(key).entry(key) -} - -// write writes values to the entry in the ring's partition associated with key. -// If no entry exists for the key then one will be created. -// write is safe for use by multiple goroutines. -func (r *ring) write(key []byte, values Values) (bool, error) { - return r.getPartition(key).write(key, values) -} - -// add adds an entry to the ring. -func (r *ring) add(key []byte, entry *entry) { - r.getPartition(key).add(key, entry) - atomic.AddInt64(&r.keysHint, 1) -} - -// remove deletes the entry for the given key. -// remove is safe for use by multiple goroutines. -func (r *ring) remove(key []byte) { - r.getPartition(key).remove(key) - if r.keysHint > 0 { - atomic.AddInt64(&r.keysHint, -1) - } -} - -// keys returns all the keys from all partitions in the hash ring. The returned -// keys will be in order if sorted is true. -func (r *ring) keys(sorted bool) [][]byte { - keys := make([][]byte, 0, atomic.LoadInt64(&r.keysHint)) - for _, p := range r.partitions { - keys = append(keys, p.keys()...) - } - - if sorted { - bytesutil.Sort(keys) - } - return keys -} - -func (r *ring) count() int { - var n int - for _, p := range r.partitions { - n += p.count() - } - return n -} - -// apply applies the provided function to every entry in the ring under a read -// lock using a separate goroutine for each partition. The provided function -// will be called with each key and the corresponding entry. The first error -// encountered will be returned, if any. apply is safe for use by multiple -// goroutines. -func (r *ring) apply(f func([]byte, *entry) error) error { - - var ( - wg sync.WaitGroup - res = make(chan error, len(r.partitions)) - ) - - for _, p := range r.partitions { - wg.Add(1) - - go func(p *partition) { - defer wg.Done() - - p.mu.RLock() - for k, e := range p.store { - if err := f([]byte(k), e); err != nil { - res <- err - p.mu.RUnlock() - return - } - } - p.mu.RUnlock() - }(p) - } - - go func() { - wg.Wait() - close(res) - }() - - // Collect results. - for err := range res { - if err != nil { - return err - } - } - return nil -} - -// applySerial is similar to apply, but invokes f on each partition in the same -// goroutine. -// apply is safe for use by multiple goroutines. -func (r *ring) applySerial(f func(string, *entry) error) error { - for _, p := range r.partitions { - p.mu.RLock() - for k, e := range p.store { - if e.count() == 0 { - continue - } - if err := f(k, e); err != nil { - p.mu.RUnlock() - return err - } - } - p.mu.RUnlock() - } - return nil -} - -func (r *ring) split(n int) []*ring { - var keys int - storers := make([]*ring, n) - for i := 0; i < n; i++ { - storers[i] = newRing() - } - - for i, p := range r.partitions { - storers[i%n].partitions[i] = p - keys += len(p.store) - } - return storers -} - -// partition provides safe access to a map of series keys to entries. -type partition struct { - mu sync.RWMutex - store map[string]*entry -} - -// entry returns the partition's entry for the provided key. -// It's safe for use by multiple goroutines. -func (p *partition) entry(key []byte) *entry { - p.mu.RLock() - e := p.store[string(key)] - p.mu.RUnlock() - return e -} - -// write writes the values to the entry in the partition, creating the entry -// if it does not exist. -// write is safe for use by multiple goroutines. -func (p *partition) write(key []byte, values Values) (bool, error) { - p.mu.RLock() - e := p.store[string(key)] - p.mu.RUnlock() - if e != nil { - // Hot path. - return false, e.add(values) - } - - p.mu.Lock() - defer p.mu.Unlock() - - // Check again. - if e = p.store[string(key)]; e != nil { - return false, e.add(values) - } - - // Create a new entry using a preallocated size if we have a hint available. - e, err := newEntryValues(values) - if err != nil { - return false, err - } - - p.store[string(key)] = e - return true, nil -} - -// add adds a new entry for key to the partition. -func (p *partition) add(key []byte, entry *entry) { - p.mu.Lock() - p.store[string(key)] = entry - p.mu.Unlock() -} - -// remove deletes the entry associated with the provided key. -// remove is safe for use by multiple goroutines. -func (p *partition) remove(key []byte) { - p.mu.Lock() - delete(p.store, string(key)) - p.mu.Unlock() -} - -// keys returns an unsorted slice of the keys in the partition. -func (p *partition) keys() [][]byte { - p.mu.RLock() - keys := make([][]byte, 0, len(p.store)) - for k, v := range p.store { - if v.count() == 0 { - continue - } - keys = append(keys, []byte(k)) - } - p.mu.RUnlock() - return keys -} - -// reset resets the partition by reinitialising the store. reset returns hints -// about sizes that the entries within the store could be reallocated with. -func (p *partition) reset() { - p.mu.RLock() - sz := len(p.store) - p.mu.RUnlock() - - newStore := make(map[string]*entry, sz) - p.mu.Lock() - p.store = newStore - p.mu.Unlock() -} - -func (p *partition) count() int { - var n int - p.mu.RLock() - for _, v := range p.store { - if v.count() > 0 { - n++ - } - } - p.mu.RUnlock() - return n - -} diff --git a/tsdb/tsm1/ring_test.go b/tsdb/tsm1/ring_test.go deleted file mode 100644 index 7d57fdce20..0000000000 --- a/tsdb/tsm1/ring_test.go +++ /dev/null @@ -1,82 +0,0 @@ -package tsm1 - -import ( - "fmt" - "runtime" - "sync" - "testing" -) - -var strSliceRes [][]byte - -func benchmarkRingkeys(b *testing.B, r *ring, keys int) { - // Add some keys - for i := 0; i < keys; i++ { - r.add([]byte(fmt.Sprintf("cpu,host=server-%d value=1", i)), new(entry)) - } - - b.ReportAllocs() - b.ResetTimer() - for i := 0; i < b.N; i++ { - strSliceRes = r.keys(false) - } -} - -func BenchmarkRing_keys_100(b *testing.B) { benchmarkRingkeys(b, newRing(), 100) } -func BenchmarkRing_keys_1000(b *testing.B) { benchmarkRingkeys(b, newRing(), 1000) } -func BenchmarkRing_keys_10000(b *testing.B) { benchmarkRingkeys(b, newRing(), 10000) } -func BenchmarkRing_keys_100000(b *testing.B) { benchmarkRingkeys(b, newRing(), 100000) } - -func benchmarkRingGetPartition(b *testing.B, r *ring, keys int) { - vals := make([][]byte, keys) - - // Add some keys - for i := 0; i < keys; i++ { - vals[i] = []byte(fmt.Sprintf("cpu,host=server-%d field1=value1,field2=value2,field4=value4,field5=value5,field6=value6,field7=value7,field8=value1,field9=value2,field10=value4,field11=value5,field12=value6,field13=value7", i)) - r.add(vals[i], new(entry)) - } - - b.ReportAllocs() - b.ResetTimer() - for i := 0; i < b.N; i++ { - r.getPartition(vals[i%keys]) - } -} - -func BenchmarkRing_getPartition_100(b *testing.B) { benchmarkRingGetPartition(b, newRing(), 100) } -func BenchmarkRing_getPartition_1000(b *testing.B) { benchmarkRingGetPartition(b, newRing(), 1000) } - -func benchmarkRingWrite(b *testing.B, r *ring, n int) { - b.ReportAllocs() - for i := 0; i < b.N; i++ { - var wg sync.WaitGroup - for i := 0; i < runtime.GOMAXPROCS(0); i++ { - errC := make(chan error) - wg.Add(1) - go func() { - defer wg.Done() - for j := 0; j < n; j++ { - if _, err := r.write([]byte(fmt.Sprintf("cpu,host=server-%d value=1", j)), Values{}); err != nil { - errC <- err - } - } - }() - - go func() { - wg.Wait() - close(errC) - }() - - for err := range errC { - if err != nil { - b.Error(err) - } - } - } - } -} - -func BenchmarkRing_write_1_100(b *testing.B) { benchmarkRingWrite(b, newRing(), 100) } -func BenchmarkRing_write_1_1000(b *testing.B) { benchmarkRingWrite(b, newRing(), 1000) } -func BenchmarkRing_write_1_10000(b *testing.B) { benchmarkRingWrite(b, newRing(), 10000) } -func BenchmarkRing_write_1_100000(b *testing.B) { benchmarkRingWrite(b, newRing(), 100000) } diff --git a/tsdb/tsm1/scheduler.go b/tsdb/tsm1/scheduler.go deleted file mode 100644 index 141077a8dd..0000000000 --- a/tsdb/tsm1/scheduler.go +++ /dev/null @@ -1,80 +0,0 @@ -package tsm1 - -var defaultWeights = [4]float64{0.4, 0.3, 0.2, 0.1} - -type scheduler struct { - maxConcurrency int - compactionTracker *compactionTracker - - // queues is the depth of work pending for each compaction level - queues [4]int - weights [4]float64 -} - -func newScheduler(maxConcurrency int) *scheduler { - return &scheduler{ - maxConcurrency: maxConcurrency, - weights: defaultWeights, - compactionTracker: newCompactionTracker(newCompactionMetrics(nil), nil), - } -} - -// setCompactionTracker sets the metrics on the scheduler. It must be called before next. -func (s *scheduler) setCompactionTracker(tracker *compactionTracker) { - s.compactionTracker = tracker -} - -func (s *scheduler) setDepth(level, depth int) { - level = level - 1 - if level < 0 || level > len(s.queues) { - return - } - - s.queues[level] = depth -} - -func (s *scheduler) next() (int, bool) { - level1Running := int(s.compactionTracker.Active(1)) - level2Running := int(s.compactionTracker.Active(2)) - level3Running := int(s.compactionTracker.Active(3)) - level4Running := int(s.compactionTracker.ActiveFull() + s.compactionTracker.ActiveOptimise()) - - if level1Running+level2Running+level3Running+level4Running >= s.maxConcurrency { - return 0, false - } - - var ( - level int - runnable bool - ) - - loLimit, _ := s.limits() - - end := len(s.queues) - if level3Running+level4Running >= loLimit && s.maxConcurrency-(level1Running+level2Running) == 0 { - end = 2 - } - - var weight float64 - for i := 0; i < end; i++ { - if float64(s.queues[i])*s.weights[i] > weight { - level, runnable = i+1, true - weight = float64(s.queues[i]) * s.weights[i] - } - } - return level, runnable -} - -func (s *scheduler) limits() (int, int) { - hiLimit := s.maxConcurrency * 4 / 5 - loLimit := (s.maxConcurrency / 5) + 1 - if hiLimit == 0 { - hiLimit = 1 - } - - if loLimit == 0 { - loLimit = 1 - } - - return loLimit, hiLimit -} diff --git a/tsdb/tsm1/scheduler_test.go b/tsdb/tsm1/scheduler_test.go deleted file mode 100644 index 97871def85..0000000000 --- a/tsdb/tsm1/scheduler_test.go +++ /dev/null @@ -1,69 +0,0 @@ -package tsm1 - -import "testing" - -func TestScheduler_Runnable_Empty(t *testing.T) { - s := newScheduler(1) - - for i := 1; i < 5; i++ { - s.setDepth(i, 1) - level, runnable := s.next() - if exp, got := true, runnable; exp != got { - t.Fatalf("runnable(%d) mismatch: exp %v, got %v ", i, exp, got) - } - - if exp, got := i, level; exp != got { - t.Fatalf("runnable(%d) mismatch: exp %v, got %v ", i, exp, got) - } - s.setDepth(i, 0) - } -} - -func TestScheduler_Runnable_MaxConcurrency(t *testing.T) { - s := newScheduler(1) - - // level 1 - s.compactionTracker.active[1] = 1 - for i := 0; i <= 4; i++ { - _, runnable := s.next() - if exp, got := false, runnable; exp != got { - t.Fatalf("runnable mismatch: exp %v, got %v ", exp, got) - } - } - - // level 2 - s.compactionTracker.active[2] = 1 - for i := 0; i <= 4; i++ { - _, runnable := s.next() - if exp, got := false, runnable; exp != got { - t.Fatalf("runnable mismatch: exp %v, got %v ", exp, got) - } - } - - // level 3 - s.compactionTracker.active[3] = 1 - for i := 0; i <= 4; i++ { - _, runnable := s.next() - if exp, got := false, runnable; exp != got { - t.Fatalf("runnable mismatch: exp %v, got %v ", exp, got) - } - } - - // optimize - s.compactionTracker.active[4] = 1 - for i := 0; i <= 4; i++ { - _, runnable := s.next() - if exp, got := false, runnable; exp != got { - t.Fatalf("runnable mismatch: exp %v, got %v ", exp, got) - } - } - - // full - s.compactionTracker.active[5] = 1 - for i := 0; i <= 4; i++ { - _, runnable := s.next() - if exp, got := false, runnable; exp != got { - t.Fatalf("runnable mismatch: exp %v, got %v ", exp, got) - } - } -} diff --git a/tsdb/tsm1/stats.go b/tsdb/tsm1/stats.go deleted file mode 100644 index 9ae8dc0d9d..0000000000 --- a/tsdb/tsm1/stats.go +++ /dev/null @@ -1,221 +0,0 @@ -package tsm1 - -import ( - "bytes" - "encoding/binary" - "fmt" - "hash/crc32" - "io" - "sort" - "strings" - - "github.com/influxdata/influxdb/v2/pkg/binaryutil" -) - -const ( - // MeasurementMagicNumber is written as the first 4 bytes of a data file to - // identify the file as a tsm1 stats file. - MeasurementStatsMagicNumber string = "TSS1" - - // MeasurementStatsVersion indicates the version of the TSS1 file format. - MeasurementStatsVersion byte = 1 -) - -// MeasurementStats represents a set of measurement sizes. -type MeasurementStats map[string]int - -// NewStats returns a new instance of Stats. -func NewMeasurementStats() MeasurementStats { - return make(MeasurementStats) -} - -// MeasurementNames returns a list of sorted measurement names. -func (s MeasurementStats) MeasurementNames() []string { - a := make([]string, 0, len(s)) - for name := range s { - a = append(a, name) - } - sort.Strings(a) - return a -} - -// Add adds the values of all measurements in other to s. -func (s MeasurementStats) Add(other MeasurementStats) { - for name, v := range other { - s[name] += v - } -} - -// Sub subtracts the values of all measurements in other from s. -func (s MeasurementStats) Sub(other MeasurementStats) { - for name, v := range other { - s[name] -= v - } -} - -// ReadFrom reads stats from r in a binary format. Reader must also be an io.ByteReader. -func (s MeasurementStats) ReadFrom(r io.Reader) (n int64, err error) { - br, ok := r.(io.ByteReader) - if !ok { - return 0, fmt.Errorf("tsm1.MeasurementStats.ReadFrom: ByteReader required") - } - - // Read & verify magic. - magic := make([]byte, 4) - nn, err := io.ReadFull(r, magic) - if n += int64(nn); err != nil { - return n, fmt.Errorf("tsm1.MeasurementStats.ReadFrom: cannot read stats magic: %s", err) - } else if string(magic) != MeasurementStatsMagicNumber { - return n, fmt.Errorf("tsm1.MeasurementStats.ReadFrom: invalid tsm1 stats file") - } - - // Read & verify version. - version := make([]byte, 1) - nn, err = io.ReadFull(r, version) - if n += int64(nn); err != nil { - return n, fmt.Errorf("tsm1.MeasurementStats.ReadFrom: cannot read stats version: %s", err) - } else if version[0] != MeasurementStatsVersion { - return n, fmt.Errorf("tsm1.MeasurementStats.ReadFrom: incompatible tsm1 stats version: %d", version[0]) - } - - // Read checksum. - checksum := make([]byte, 4) - nn, err = io.ReadFull(r, checksum) - if n += int64(nn); err != nil { - return n, fmt.Errorf("tsm1.MeasurementStats.ReadFrom: cannot read checksum: %s", err) - } - - // Read measurement count. - measurementN, err := binary.ReadVarint(br) - if err != nil { - return n, fmt.Errorf("tsm1.MeasurementStats.ReadFrom: cannot read stats measurement count: %s", err) - } - n += int64(binaryutil.VarintSize(measurementN)) - - // Read measurements. - for i := int64(0); i < measurementN; i++ { - nn64, err := s.readMeasurementFrom(r) - if n += nn64; err != nil { - return n, err - } - } - - // Expect end-of-file. - buf := make([]byte, 1) - if _, err := r.Read(buf); err != io.EOF { - return n, fmt.Errorf("tsm1.MeasurementStats.ReadFrom: file too large, expected EOF") - } - - return n, nil -} - -// readMeasurementFrom reads a measurement stat from r in a binary format. -func (s MeasurementStats) readMeasurementFrom(r io.Reader) (n int64, err error) { - br, ok := r.(io.ByteReader) - if !ok { - return 0, fmt.Errorf("tsm1.MeasurementStats.readMeasurementFrom: ByteReader required") - } - - // Read measurement name length. - nameLen, err := binary.ReadVarint(br) - if err != nil { - return n, fmt.Errorf("tsm1.MeasurementStats.readMeasurementFrom: cannot read stats measurement name length: %s", err) - } - n += int64(binaryutil.VarintSize(nameLen)) - - // Read measurement name. Use large capacity so it can usually be stack allocated. - // Go allocates unescaped variables smaller than 64KB on the stack. - name := make([]byte, nameLen) - nn, err := io.ReadFull(r, name) - if n += int64(nn); err != nil { - return n, fmt.Errorf("tsm1.MeasurementStats.readMeasurementFrom: cannot read stats measurement name: %s", err) - } - - // Read size. - sz, err := binary.ReadVarint(br) - if err != nil { - return n, fmt.Errorf("tsm1.MeasurementStats.readMeasurementFrom: cannot read stats measurement size: %s", err) - } - n += int64(binaryutil.VarintSize(sz)) - - // Insert into map. - s[string(name)] = int(sz) - - return n, nil -} - -// WriteTo writes stats to w in a binary format. -func (s MeasurementStats) WriteTo(w io.Writer) (n int64, err error) { - // Write magic & version. - nn, err := io.WriteString(w, MeasurementStatsMagicNumber) - if n += int64(nn); err != nil { - return n, err - } - nn, err = w.Write([]byte{MeasurementStatsVersion}) - if n += int64(nn); err != nil { - return n, err - } - - // Write measurement count. - var buf bytes.Buffer - b := make([]byte, binary.MaxVarintLen64) - if _, err = buf.Write(b[:binary.PutVarint(b, int64(len(s)))]); err != nil { - return n, err - } - - // Write all measurements in sorted order. - for _, name := range s.MeasurementNames() { - if _, err := s.writeMeasurementTo(&buf, name, s[name]); err != nil { - return n, err - } - } - data := buf.Bytes() - - // Compute & write checksum. - if err := binary.Write(w, binary.BigEndian, crc32.ChecksumIEEE(data)); err != nil { - return n, err - } - n += 4 - - // Write buffer. - nn, err = w.Write(data) - if n += int64(nn); err != nil { - return n, err - } - - return n, err -} - -func (s MeasurementStats) writeMeasurementTo(w io.Writer, name string, sz int) (n int64, err error) { - // Write measurement name length. - buf := make([]byte, binary.MaxVarintLen64) - nn, err := w.Write(buf[:binary.PutVarint(buf, int64(len(name)))]) - if n += int64(nn); err != nil { - return n, err - } - - // Write measurement name. - nn, err = io.WriteString(w, name) - if n += int64(nn); err != nil { - return n, err - } - - // Write size. - nn, err = w.Write(buf[:binary.PutVarint(buf, int64(sz))]) - if n += int64(nn); err != nil { - return n, err - } - - return n, err -} - -// StatsFilename returns the path to the stats file for a given TSM file path. -func StatsFilename(tsmPath string) string { - if strings.HasSuffix(tsmPath, "."+TmpTSMFileExtension) { - tsmPath = strings.TrimSuffix(tsmPath, "."+TmpTSMFileExtension) - } - if strings.HasSuffix(tsmPath, "."+TSMFileExtension) { - tsmPath = strings.TrimSuffix(tsmPath, "."+TSMFileExtension) - } - return tsmPath + "." + TSSFileExtension -} diff --git a/tsdb/tsm1/stats_test.go b/tsdb/tsm1/stats_test.go deleted file mode 100644 index 2505a20c41..0000000000 --- a/tsdb/tsm1/stats_test.go +++ /dev/null @@ -1,42 +0,0 @@ -package tsm1_test - -import ( - "bytes" - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" -) - -func TestMeasurementStats_WriteTo(t *testing.T) { - t.Run("Empty", func(t *testing.T) { - stats, other := tsm1.NewMeasurementStats(), tsm1.NewMeasurementStats() - var buf bytes.Buffer - if wn, err := stats.WriteTo(&buf); err != nil { - t.Fatal(err) - } else if rn, err := other.ReadFrom(&buf); err != nil { - t.Fatal(err) - } else if wn != rn { - t.Fatalf("byte count mismatch: w=%d r=%d", wn, rn) - } else if diff := cmp.Diff(stats, other); diff != "" { - t.Fatal(diff) - } - }) - - t.Run("WithData", func(t *testing.T) { - stats, other := tsm1.NewMeasurementStats(), tsm1.NewMeasurementStats() - stats["cpu"] = 100 - stats["mem"] = 2000 - - var buf bytes.Buffer - if wn, err := stats.WriteTo(&buf); err != nil { - t.Fatal(err) - } else if rn, err := other.ReadFrom(&buf); err != nil { - t.Fatal(err) - } else if wn != rn { - t.Fatalf("byte count mismatch: w=%d r=%d", wn, rn) - } else if diff := cmp.Diff(stats, other); diff != "" { - t.Fatal(diff) - } - }) -} diff --git a/tsdb/tsm1/string.go b/tsdb/tsm1/string.go deleted file mode 100644 index 7400b40a40..0000000000 --- a/tsdb/tsm1/string.go +++ /dev/null @@ -1,129 +0,0 @@ -package tsm1 - -// String encoding uses snappy compression to compress each string. Each string is -// appended to byte slice prefixed with a variable byte length followed by the string -// bytes. The bytes are compressed using snappy compressor and a 1 byte header is used -// to indicate the type of encoding. - -import ( - "encoding/binary" - "fmt" - - "github.com/golang/snappy" -) - -// Note: an uncompressed format is not yet implemented. - -// stringCompressedSnappy is a compressed encoding using Snappy compression -const stringCompressedSnappy = 1 - -// StringEncoder encodes multiple strings into a byte slice. -type StringEncoder struct { - // The encoded bytes - bytes []byte -} - -// NewStringEncoder returns a new StringEncoder with an initial buffer ready to hold sz bytes. -func NewStringEncoder(sz int) StringEncoder { - return StringEncoder{ - bytes: make([]byte, 0, sz), - } -} - -// Flush is no-op -func (e *StringEncoder) Flush() {} - -// Reset sets the encoder back to its initial state. -func (e *StringEncoder) Reset() { - e.bytes = e.bytes[:0] -} - -// Write encodes s to the underlying buffer. -func (e *StringEncoder) Write(s string) { - b := make([]byte, 10) - // Append the length of the string using variable byte encoding - i := binary.PutUvarint(b, uint64(len(s))) - e.bytes = append(e.bytes, b[:i]...) - - // Append the string bytes - e.bytes = append(e.bytes, s...) -} - -// Bytes returns a copy of the underlying buffer. -func (e *StringEncoder) Bytes() ([]byte, error) { - // Compress the currently appended bytes using snappy and prefix with - // a 1 byte header for future extension - data := snappy.Encode(nil, e.bytes) - return append([]byte{stringCompressedSnappy << 4}, data...), nil -} - -// StringDecoder decodes a byte slice into strings. -type StringDecoder struct { - b []byte - l int - i int - err error -} - -// SetBytes initializes the decoder with bytes to read from. -// This must be called before calling any other method. -func (e *StringDecoder) SetBytes(b []byte) error { - // First byte stores the encoding type, only have snappy format - // currently so ignore for now. - var data []byte - if len(b) > 0 { - var err error - data, err = snappy.Decode(nil, b[1:]) - if err != nil { - return fmt.Errorf("failed to decode string block: %v", err.Error()) - } - } - - e.b = data - e.l = 0 - e.i = 0 - e.err = nil - - return nil -} - -// Next returns true if there are any values remaining to be decoded. -func (e *StringDecoder) Next() bool { - if e.err != nil { - return false - } - - e.i += e.l - return e.i < len(e.b) -} - -// Read returns the next value from the decoder. -func (e *StringDecoder) Read() string { - // Read the length of the string - length, n := binary.Uvarint(e.b[e.i:]) - if n <= 0 { - e.err = fmt.Errorf("stringDecoder: invalid encoded string length") - return "" - } - - // The length of this string plus the length of the variable byte encoded length - e.l = int(length) + n - - lower := e.i + n - upper := lower + int(length) - if upper < lower { - e.err = fmt.Errorf("stringDecoder: length overflow") - return "" - } - if upper > len(e.b) { - e.err = fmt.Errorf("stringDecoder: not enough data to represent encoded string") - return "" - } - - return string(e.b[lower:upper]) -} - -// Error returns the last error encountered by the decoder. -func (e *StringDecoder) Error() error { - return e.err -} diff --git a/tsdb/tsm1/string_test.go b/tsdb/tsm1/string_test.go deleted file mode 100644 index 72765d322e..0000000000 --- a/tsdb/tsm1/string_test.go +++ /dev/null @@ -1,229 +0,0 @@ -package tsm1 - -import ( - "fmt" - "math/rand" - "reflect" - "testing" - "testing/quick" - - "github.com/google/go-cmp/cmp" - "github.com/influxdata/influxdb/v2/internal/testutil" -) - -func Test_StringEncoder_NoValues(t *testing.T) { - enc := NewStringEncoder(1024) - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var dec StringDecoder - if err := dec.SetBytes(b); err != nil { - t.Fatalf("unexpected error creating string decoder: %v", err) - } - if dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } -} - -func Test_StringEncoder_Single(t *testing.T) { - enc := NewStringEncoder(1024) - v1 := "v1" - enc.Write(v1) - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var dec StringDecoder - if dec.SetBytes(b); err != nil { - t.Fatalf("unexpected error creating string decoder: %v", err) - } - if !dec.Next() { - t.Fatalf("unexpected next value: got false, exp true") - } - - if v1 != dec.Read() { - t.Fatalf("unexpected value: got %v, exp %v", dec.Read(), v1) - } -} - -func Test_StringEncoder_Multi_Compressed(t *testing.T) { - enc := NewStringEncoder(1024) - - values := make([]string, 10) - for i := range values { - values[i] = fmt.Sprintf("value %d", i) - enc.Write(values[i]) - } - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if b[0]>>4 != stringCompressedSnappy { - t.Fatalf("unexpected encoding: got %v, exp %v", b[0], stringCompressedSnappy) - } - - if exp := 51; len(b) != exp { - t.Fatalf("unexpected length: got %v, exp %v", len(b), exp) - } - - var dec StringDecoder - if err := dec.SetBytes(b); err != nil { - t.Fatalf("unexpected erorr creating string decoder: %v", err) - } - - for i, v := range values { - if !dec.Next() { - t.Fatalf("unexpected next value: got false, exp true") - } - if v != dec.Read() { - t.Fatalf("unexpected value at pos %d: got %v, exp %v", i, dec.Read(), v) - } - } - - if dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } -} - -func Test_StringEncoder_Quick(t *testing.T) { - quick.Check(func(values []string) bool { - expected := values - if values == nil { - expected = []string{} - } - // Write values to encoder. - enc := NewStringEncoder(1024) - for _, v := range values { - enc.Write(v) - } - - // Retrieve encoded bytes from encoder. - buf, err := enc.Bytes() - if err != nil { - t.Fatal(err) - } - - // Read values out of decoder. - got := make([]string, 0, len(values)) - var dec StringDecoder - if err := dec.SetBytes(buf); err != nil { - t.Fatal(err) - } - for dec.Next() { - if err := dec.Error(); err != nil { - t.Fatal(err) - } - got = append(got, dec.Read()) - } - - // Verify that input and output values match. - if !reflect.DeepEqual(expected, got) { - t.Fatalf("mismatch:\n\nexp=%#v\n\ngot=%#v\n\n", expected, got) - } - - return true - }, nil) -} - -func Test_StringDecoder_Empty(t *testing.T) { - var dec StringDecoder - if err := dec.SetBytes([]byte{}); err != nil { - t.Fatal(err) - } - - if dec.Next() { - t.Fatalf("exp Next() == false, got true") - } -} - -func Test_StringDecoder_CorruptRead(t *testing.T) { - cases := []string{ - "\x10\x03\b\x03Hi", // Higher length than actual data - "\x10\x1dp\x9c\x90\x90\x90\x90\x90\x90\x90\x90\x90length overflow----", - } - - for _, c := range cases { - var dec StringDecoder - if err := dec.SetBytes([]byte(c)); err != nil { - t.Fatal(err) - } - - if !dec.Next() { - t.Fatalf("exp Next() to return true, got false") - } - - _ = dec.Read() - if dec.Error() == nil { - t.Fatalf("exp an err, got nil: %q", c) - } - } -} - -func Test_StringDecoder_CorruptSetBytes(t *testing.T) { - cases := []string{ - "0t\x00\x01\x000\x00\x01\x000\x00\x01\x000\x00\x01\x000\x00\x01" + - "\x000\x00\x01\x000\x00\x01\x000\x00\x00\x00\xff:\x01\x00\x01\x00\x01" + - "\x00\x01\x00\x01\x00\x01\x00\x010\x010\x000\x010\x010\x010\x01" + - "0\x010\x010\x010\x010\x010\x010\x010\x010\x010\x010", // Upper slice bounds overflows negative - } - - for _, c := range cases { - var dec StringDecoder - if err := dec.SetBytes([]byte(c)); err == nil { - t.Fatalf("exp an err, got nil: %q", c) - } - } -} - -func BenchmarkStringDecoder_DecodeAll(b *testing.B) { - benchmarks := []struct { - n int - w int - }{ - {1, 10}, - {55, 10}, - {550, 10}, - {1000, 10}, - } - for _, bm := range benchmarks { - rand.Seed(int64(bm.n * 1e3)) - - s := NewStringEncoder(bm.n) - for c := 0; c < bm.n; c++ { - s.Write(testutil.MakeSentence(bm.w)) - } - s.Flush() - bytes, err := s.Bytes() - if err != nil { - b.Fatalf("unexpected error: %v", err) - } - - b.Run(fmt.Sprintf("%d", bm.n), func(b *testing.B) { - b.SetBytes(int64(len(bytes))) - b.ReportAllocs() - - dst := make([]string, bm.n) - for i := 0; i < b.N; i++ { - var it StringDecoder - if err := it.SetBytes(bytes); err != nil { - b.Fatalf("unexpected error creating float decoder: %v", err) - } - - i := 0 - for it.Next() { - dst[i] = it.Read() - i++ - } - - if len(dst) != bm.n { - b.Fatalf("unexpected length -got/+exp\n%s", cmp.Diff(len(dst), bm.n)) - } - } - }) - } -} diff --git a/tsdb/tsm1/timestamp.go b/tsdb/tsm1/timestamp.go deleted file mode 100644 index 9f3d9a6bbe..0000000000 --- a/tsdb/tsm1/timestamp.go +++ /dev/null @@ -1,414 +0,0 @@ -package tsm1 - -// Timestamp encoding is adaptive and based on structure of the timestamps that are encoded. It -// uses a combination of delta encoding, scaling and compression using simple8b, run length encoding -// as well as falling back to no compression if needed. -// -// Timestamp values to be encoded should be sorted before encoding. When encoded, the values are -// first delta-encoded. The first value is the starting timestamp, subsequent values are the difference -// from the prior value. -// -// Timestamp resolution can also be in the nanosecond. Many timestamps are monotonically increasing -// and fall on even boundaries of time such as every 10s. When the timestamps have this structure, -// they are scaled by the largest common divisor that is also a factor of 10. This has the effect -// of converting very large integer deltas into very small one that can be reversed by multiplying them -// by the scaling factor. -// -// Using these adjusted values, if all the deltas are the same, the time range is stored using run -// length encoding. If run length encoding is not possible and all values are less than 1 << 60 - 1 -// (~36.5 yrs in nanosecond resolution), then the timestamps are encoded using simple8b encoding. If -// any value exceeds the maximum values, the deltas are stored uncompressed using 8b each. -// -// Each compressed byte slice has a 1 byte header indicating the compression type. The 4 high bits -// indicate the encoding type. The 4 low bits are used by the encoding type. -// -// For run-length encoding, the 4 low bits store the log10 of the scaling factor. The next 8 bytes are -// the starting timestamp, next 1-10 bytes is the delta value using variable-length encoding, finally the -// next 1-10 bytes is the count of values. -// -// For simple8b encoding, the 4 low bits store the log10 of the scaling factor. The next 8 bytes is the -// first delta value stored uncompressed, the remaining bytes are 64bit words containing compressed delta -// values. -// -// For uncompressed encoding, the delta values are stored using 8 bytes each. - -import ( - "encoding/binary" - "fmt" - "math" - - "github.com/jwilder/encoding/simple8b" -) - -const ( - // timeUncompressed is a an uncompressed format using 8 bytes per timestamp - timeUncompressed = 0 - // timeCompressedPackedSimple is a bit-packed format using simple8b encoding - timeCompressedPackedSimple = 1 - // timeCompressedRLE is a run-length encoding format - timeCompressedRLE = 2 -) - -// TimeEncoder encodes time.Time to byte slices. -type TimeEncoder interface { - Write(t int64) - Bytes() ([]byte, error) - Reset() -} - -type encoder struct { - ts []uint64 - bytes []byte - enc *simple8b.Encoder -} - -// NewTimeEncoder returns a TimeEncoder with an initial buffer ready to hold sz bytes. -func NewTimeEncoder(sz int) TimeEncoder { - return &encoder{ - ts: make([]uint64, 0, sz), - enc: simple8b.NewEncoder(), - } -} - -// Reset sets the encoder back to its initial state. -func (e *encoder) Reset() { - e.ts = e.ts[:0] - e.bytes = e.bytes[:0] - e.enc.Reset() -} - -// Write adds a timestamp to the compressed stream. -func (e *encoder) Write(t int64) { - e.ts = append(e.ts, uint64(t)) -} - -func (e *encoder) reduce() (max, divisor uint64, rle bool, deltas []uint64) { - // Compute the deltas in place to avoid allocating another slice - deltas = e.ts - // Starting values for a max and divisor - max, divisor = 0, 1e12 - - // Indicates whether the the deltas can be run-length encoded - rle = true - - // Iterate in reverse so we can apply deltas in place - for i := len(deltas) - 1; i > 0; i-- { - - // First differential encode the values - deltas[i] = deltas[i] - deltas[i-1] - - // We also need to keep track of the max value and largest common divisor - v := deltas[i] - - if v > max { - max = v - } - - // If our value is divisible by 10, break. Otherwise, try the next smallest divisor. - for divisor > 1 && v%divisor != 0 { - divisor /= 10 - } - - // Skip the first value || see if prev = curr. The deltas can be RLE if the are all equal. - rle = i == len(deltas)-1 || rle && (deltas[i+1] == deltas[i]) - } - return -} - -// Bytes returns the encoded bytes of all written times. -func (e *encoder) Bytes() ([]byte, error) { - if len(e.ts) == 0 { - return e.bytes[:0], nil - } - - // Maximum and largest common divisor. rle is true if dts (the delta timestamps), - // are all the same. - max, div, rle, dts := e.reduce() - - // The deltas are all the same, so we can run-length encode them - if rle && len(e.ts) > 1 { - return e.encodeRLE(e.ts[0], e.ts[1], div, len(e.ts)) - } - - // We can't compress this time-range, the deltas exceed 1 << 60 - if max > simple8b.MaxValue { - return e.encodeRaw() - } - - return e.encodePacked(div, dts) -} - -func (e *encoder) encodePacked(div uint64, dts []uint64) ([]byte, error) { - // Only apply the divisor if it's greater than 1 since division is expensive. - if div > 1 { - for _, v := range dts[1:] { - if err := e.enc.Write(v / div); err != nil { - return nil, err - } - } - } else { - for _, v := range dts[1:] { - if err := e.enc.Write(v); err != nil { - return nil, err - } - } - } - - // The compressed deltas - deltas, err := e.enc.Bytes() - if err != nil { - return nil, err - } - - sz := 8 + 1 + len(deltas) - if cap(e.bytes) < sz { - e.bytes = make([]byte, sz) - } - b := e.bytes[:sz] - - // 4 high bits used for the encoding type - b[0] = byte(timeCompressedPackedSimple) << 4 - // 4 low bits are the log10 divisor - b[0] |= byte(math.Log10(float64(div))) - - // The first delta value - binary.BigEndian.PutUint64(b[1:9], uint64(dts[0])) - - copy(b[9:], deltas) - return b[:9+len(deltas)], nil -} - -func (e *encoder) encodeRaw() ([]byte, error) { - sz := 1 + len(e.ts)*8 - if cap(e.bytes) < sz { - e.bytes = make([]byte, sz) - } - b := e.bytes[:sz] - b[0] = byte(timeUncompressed) << 4 - for i, v := range e.ts { - binary.BigEndian.PutUint64(b[1+i*8:1+i*8+8], uint64(v)) - } - return b, nil -} - -func (e *encoder) encodeRLE(first, delta, div uint64, n int) ([]byte, error) { - // Large varints can take up to 10 bytes, we're encoding 3 + 1 byte type - sz := 31 - if cap(e.bytes) < sz { - e.bytes = make([]byte, sz) - } - b := e.bytes[:sz] - // 4 high bits used for the encoding type - b[0] = byte(timeCompressedRLE) << 4 - // 4 low bits are the log10 divisor - b[0] |= byte(math.Log10(float64(div))) - - i := 1 - // The first timestamp - binary.BigEndian.PutUint64(b[i:], uint64(first)) - i += 8 - // The first delta - i += binary.PutUvarint(b[i:], uint64(delta/div)) - // The number of times the delta is repeated - i += binary.PutUvarint(b[i:], uint64(n)) - - return b[:i], nil -} - -// TimeDecoder decodes a byte slice into timestamps. -type TimeDecoder struct { - v int64 - i, n int - ts []uint64 - dec simple8b.Decoder - err error - - // The delta value for a run-length encoded byte slice - rleDelta int64 - - encoding byte -} - -// Init initializes the decoder with bytes to read from. -func (d *TimeDecoder) Init(b []byte) { - d.v = 0 - d.i = 0 - d.ts = d.ts[:0] - d.err = nil - if len(b) > 0 { - // Encoding type is stored in the 4 high bits of the first byte - d.encoding = b[0] >> 4 - } - d.decode(b) -} - -// Next returns true if there are any timestamps remaining to be decoded. -func (d *TimeDecoder) Next() bool { - if d.err != nil { - return false - } - - if d.encoding == timeCompressedRLE { - if d.i >= d.n { - return false - } - d.i++ - d.v += d.rleDelta - return d.i < d.n - } - - if d.i >= len(d.ts) { - return false - } - d.v = int64(d.ts[d.i]) - d.i++ - return true -} - -// Read returns the next timestamp from the decoder. -func (d *TimeDecoder) Read() int64 { - return d.v -} - -// Error returns the last error encountered by the decoder. -func (d *TimeDecoder) Error() error { - return d.err -} - -func (d *TimeDecoder) decode(b []byte) { - if len(b) == 0 { - return - } - - switch d.encoding { - case timeUncompressed: - d.decodeRaw(b[1:]) - case timeCompressedRLE: - d.decodeRLE(b) - case timeCompressedPackedSimple: - d.decodePacked(b) - default: - d.err = fmt.Errorf("unknown encoding: %v", d.encoding) - } -} - -func (d *TimeDecoder) decodePacked(b []byte) { - if len(b) < 9 { - d.err = fmt.Errorf("timeDecoder: not enough data to decode packed timestamps") - return - } - div := uint64(math.Pow10(int(b[0] & 0xF))) - first := uint64(binary.BigEndian.Uint64(b[1:9])) - - d.dec.SetBytes(b[9:]) - - d.i = 0 - deltas := d.ts[:0] - deltas = append(deltas, first) - - for d.dec.Next() { - deltas = append(deltas, d.dec.Read()) - } - - // Compute the prefix sum and scale the deltas back up - last := deltas[0] - if div > 1 { - for i := 1; i < len(deltas); i++ { - dgap := deltas[i] * div - deltas[i] = last + dgap - last = deltas[i] - } - } else { - for i := 1; i < len(deltas); i++ { - deltas[i] += last - last = deltas[i] - } - } - - d.i = 0 - d.ts = deltas -} - -func (d *TimeDecoder) decodeRLE(b []byte) { - if len(b) < 9 { - d.err = fmt.Errorf("timeDecoder: not enough data for initial RLE timestamp") - return - } - - var i, n int - - // Lower 4 bits hold the 10 based exponent so we can scale the values back up - mod := int64(math.Pow10(int(b[i] & 0xF))) - i++ - - // Next 8 bytes is the starting timestamp - first := binary.BigEndian.Uint64(b[i : i+8]) - i += 8 - - // Next 1-10 bytes is our (scaled down by factor of 10) run length values - value, n := binary.Uvarint(b[i:]) - if n <= 0 { - d.err = fmt.Errorf("timeDecoder: invalid run length in decodeRLE") - return - } - - // Scale the value back up - value *= uint64(mod) - i += n - - // Last 1-10 bytes is how many times the value repeats - count, n := binary.Uvarint(b[i:]) - if n <= 0 { - d.err = fmt.Errorf("timeDecoder: invalid repeat value in decodeRLE") - return - } - - d.v = int64(first - value) - d.rleDelta = int64(value) - - d.i = -1 - d.n = int(count) -} - -func (d *TimeDecoder) decodeRaw(b []byte) { - d.i = 0 - d.ts = make([]uint64, len(b)/8) - for i := range d.ts { - d.ts[i] = binary.BigEndian.Uint64(b[i*8 : i*8+8]) - - delta := d.ts[i] - // Compute the prefix sum and scale the deltas back up - if i > 0 { - d.ts[i] = d.ts[i-1] + delta - } - } -} - -func CountTimestamps(b []byte) int { - if len(b) == 0 { - return 0 - } - - // Encoding type is stored in the 4 high bits of the first byte - encoding := b[0] >> 4 - switch encoding { - case timeUncompressed: - // Uncompressed timestamps are just 8 bytes each - return len(b[1:]) / 8 - case timeCompressedRLE: - // First 9 bytes are the starting timestamp and scaling factor, skip over them - i := 9 - // Next 1-10 bytes is our (scaled down by factor of 10) run length values - _, n := binary.Uvarint(b[9:]) - i += n - // Last 1-10 bytes is how many times the value repeats - count, _ := binary.Uvarint(b[i:]) - return int(count) - case timeCompressedPackedSimple: - // First 9 bytes are the starting timestamp and scaling factor, skip over them - count, _ := simple8b.CountBytes(b[9:]) - return count + 1 // +1 is for the first uncompressed timestamp, starting timestamep in b[1:9] - default: - return 0 - } -} diff --git a/tsdb/tsm1/timestamp_test.go b/tsdb/tsm1/timestamp_test.go deleted file mode 100644 index d065eabff9..0000000000 --- a/tsdb/tsm1/timestamp_test.go +++ /dev/null @@ -1,726 +0,0 @@ -package tsm1 - -import ( - "fmt" - "math/rand" - "reflect" - "testing" - "testing/quick" - "time" -) - -func Test_TimeEncoder(t *testing.T) { - enc := NewTimeEncoder(1) - - x := []int64{} - now := time.Unix(0, 0) - x = append(x, now.UnixNano()) - enc.Write(now.UnixNano()) - for i := 1; i < 4; i++ { - x = append(x, now.Add(time.Duration(i)*time.Second).UnixNano()) - enc.Write(x[i]) - } - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; got != timeCompressedRLE { - t.Fatalf("Wrong encoding used: expected rle, got %v", got) - } - - var dec TimeDecoder - dec.Init(b) - for i, v := range x { - if !dec.Next() { - t.Fatalf("Next == false, expected true") - } - - if v != dec.Read() { - t.Fatalf("Item %d mismatch, got %v, exp %v", i, dec.Read(), v) - } - } -} - -func Test_TimeEncoder_NoValues(t *testing.T) { - enc := NewTimeEncoder(0) - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var dec TimeDecoder - dec.Init(b) - if dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } -} - -func Test_TimeEncoder_One(t *testing.T) { - enc := NewTimeEncoder(1) - var tm int64 - - enc.Write(tm) - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; got != timeCompressedPackedSimple { - t.Fatalf("Wrong encoding used: expected uncompressed, got %v", got) - } - - var dec TimeDecoder - dec.Init(b) - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if tm != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), tm) - } -} - -func Test_TimeEncoder_Two(t *testing.T) { - enc := NewTimeEncoder(2) - t1 := int64(0) - t2 := int64(1) - enc.Write(t1) - enc.Write(t2) - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; got != timeCompressedRLE { - t.Fatalf("Wrong encoding used: expected rle, got %v", got) - } - - var dec TimeDecoder - dec.Init(b) - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if t1 != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), t1) - } - - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if t2 != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), t2) - } -} - -func Test_TimeEncoder_Three(t *testing.T) { - enc := NewTimeEncoder(3) - t1 := int64(0) - t2 := int64(1) - t3 := int64(3) - - enc.Write(t1) - enc.Write(t2) - enc.Write(t3) - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; got != timeCompressedPackedSimple { - t.Fatalf("Wrong encoding used: expected rle, got %v", got) - } - - var dec TimeDecoder - dec.Init(b) - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if t1 != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), t1) - } - - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if t2 != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), t2) - } - - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if t3 != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), t3) - } -} - -func Test_TimeEncoder_Large_Range(t *testing.T) { - enc := NewTimeEncoder(2) - t1 := int64(1442369134000000000) - t2 := int64(1442369135000000000) - enc.Write(t1) - enc.Write(t2) - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; got != timeCompressedRLE { - t.Fatalf("Wrong encoding used: expected rle, got %v", got) - } - - var dec TimeDecoder - dec.Init(b) - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if t1 != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), t1) - } - - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if t2 != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), t2) - } -} - -func Test_TimeEncoder_Uncompressed(t *testing.T) { - enc := NewTimeEncoder(3) - t1 := time.Unix(0, 0).UnixNano() - t2 := time.Unix(1, 0).UnixNano() - - // about 36.5yrs in NS resolution is max range for compressed format - // This should cause the encoding to fallback to raw points - t3 := time.Unix(2, (2 << 59)).UnixNano() - enc.Write(t1) - enc.Write(t2) - enc.Write(t3) - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("expected error: %v", err) - } - - if exp := 25; len(b) != exp { - t.Fatalf("length mismatch: got %v, exp %v", len(b), exp) - } - - if got := b[0] >> 4; got != timeUncompressed { - t.Fatalf("Wrong encoding used: expected uncompressed, got %v", got) - } - - var dec TimeDecoder - dec.Init(b) - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if t1 != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), t1) - } - - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if t2 != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), t2) - } - - if !dec.Next() { - t.Fatalf("unexpected next value: got true, exp false") - } - - if t3 != dec.Read() { - t.Fatalf("read value mismatch: got %v, exp %v", dec.Read(), t3) - } -} - -func Test_TimeEncoder_RLE(t *testing.T) { - enc := NewTimeEncoder(512) - var ts []int64 - for i := 0; i < 500; i++ { - ts = append(ts, int64(i)) - } - - for _, v := range ts { - enc.Write(v) - } - - b, err := enc.Bytes() - if exp := 12; len(b) != exp { - t.Fatalf("length mismatch: got %v, exp %v", len(b), exp) - } - - if got := b[0] >> 4; got != timeCompressedRLE { - t.Fatalf("Wrong encoding used: expected uncompressed, got %v", got) - } - - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var dec TimeDecoder - dec.Init(b) - for i, v := range ts { - if !dec.Next() { - t.Fatalf("Next == false, expected true") - } - - if v != dec.Read() { - t.Fatalf("Item %d mismatch, got %v, exp %v", i, dec.Read(), v) - } - } - - if dec.Next() { - t.Fatalf("unexpected extra values") - } -} - -func Test_TimeEncoder_Reverse(t *testing.T) { - enc := NewTimeEncoder(3) - ts := []int64{ - int64(3), - int64(2), - int64(0), - } - - for _, v := range ts { - enc.Write(v) - } - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; got != timeUncompressed { - t.Fatalf("Wrong encoding used: expected uncompressed, got %v", got) - } - - var dec TimeDecoder - dec.Init(b) - i := 0 - for dec.Next() { - if ts[i] != dec.Read() { - t.Fatalf("read value %d mismatch: got %v, exp %v", i, dec.Read(), ts[i]) - } - i++ - } -} - -func Test_TimeEncoder_220SecondDelta(t *testing.T) { - enc := NewTimeEncoder(256) - var ts []int64 - now := time.Now() - for i := 0; i < 220; i++ { - ts = append(ts, now.Add(time.Duration(i*60)*time.Second).UnixNano()) - } - - for _, v := range ts { - enc.Write(v) - } - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - // Using RLE, should get 12 bytes - if exp := 12; len(b) != exp { - t.Fatalf("unexpected length: got %v, exp %v", len(b), exp) - } - - if got := b[0] >> 4; got != timeCompressedRLE { - t.Fatalf("Wrong encoding used: expected uncompressed, got %v", got) - } - - var dec TimeDecoder - dec.Init(b) - i := 0 - for dec.Next() { - if ts[i] != dec.Read() { - t.Fatalf("read value %d mismatch: got %v, exp %v", i, dec.Read(), ts[i]) - } - i++ - } - - if i != len(ts) { - t.Fatalf("Read too few values: exp %d, got %d", len(ts), i) - } - - if dec.Next() { - t.Fatalf("expecte Next() = false, got true") - } -} - -func Test_TimeEncoder_Quick(t *testing.T) { - quick.Check(func(values []int64) bool { - // Write values to encoder. - enc := NewTimeEncoder(1024) - exp := make([]int64, len(values)) - for i, v := range values { - exp[i] = int64(v) - enc.Write(exp[i]) - } - - // Retrieve encoded bytes from encoder. - buf, err := enc.Bytes() - if err != nil { - t.Fatal(err) - } - - // Read values out of decoder. - got := make([]int64, 0, len(values)) - var dec TimeDecoder - dec.Init(buf) - for dec.Next() { - if err := dec.Error(); err != nil { - t.Fatal(err) - } - got = append(got, dec.Read()) - } - - // Verify that input and output values match. - if !reflect.DeepEqual(exp, got) { - t.Fatalf("mismatch:\n\nexp=%+v\n\ngot=%+v\n\n", exp, got) - } - - return true - }, nil) -} - -func Test_TimeEncoder_RLESeconds(t *testing.T) { - enc := NewTimeEncoder(6) - ts := make([]int64, 6) - - ts[0] = int64(1444448158000000000) - ts[1] = int64(1444448168000000000) - ts[2] = int64(1444448178000000000) - ts[3] = int64(1444448188000000000) - ts[4] = int64(1444448198000000000) - ts[5] = int64(1444448208000000000) - - for _, v := range ts { - enc.Write(v) - } - - b, err := enc.Bytes() - if got := b[0] >> 4; got != timeCompressedRLE { - t.Fatalf("Wrong encoding used: expected rle, got %v", got) - } - - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - var dec TimeDecoder - dec.Init(b) - for i, v := range ts { - if !dec.Next() { - t.Fatalf("Next == false, expected true") - } - - if v != dec.Read() { - t.Fatalf("Item %d mismatch, got %v, exp %v", i, dec.Read(), v) - } - } - - if dec.Next() { - t.Fatalf("unexpected extra values") - } -} - -func TestTimeEncoder_Count_Uncompressed(t *testing.T) { - enc := NewTimeEncoder(2) - t1 := time.Unix(0, 0).UnixNano() - t2 := time.Unix(1, 0).UnixNano() - - // about 36.5yrs in NS resolution is max range for compressed format - // This should cause the encoding to fallback to raw points - t3 := time.Unix(2, (2 << 59)).UnixNano() - enc.Write(t1) - enc.Write(t2) - enc.Write(t3) - - b, err := enc.Bytes() - if got := b[0] >> 4; got != timeUncompressed { - t.Fatalf("Wrong encoding used: expected rle, got %v", got) - } - - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got, exp := CountTimestamps(b), 3; got != exp { - t.Fatalf("count mismatch: got %v, exp %v", got, exp) - } -} - -func TestTimeEncoder_Count_RLE(t *testing.T) { - enc := NewTimeEncoder(5) - ts := make([]int64, 6) - - ts[0] = int64(1444448158000000000) - ts[1] = int64(1444448168000000000) - ts[2] = int64(1444448178000000000) - ts[3] = int64(1444448188000000000) - ts[4] = int64(1444448198000000000) - ts[5] = int64(1444448208000000000) - - for _, v := range ts { - enc.Write(v) - } - - b, err := enc.Bytes() - if got := b[0] >> 4; got != timeCompressedRLE { - t.Fatalf("Wrong encoding used: expected rle, got %v", got) - } - - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got, exp := CountTimestamps(b), len(ts); got != exp { - t.Fatalf("count mismatch: got %v, exp %v", got, exp) - } -} - -func TestTimeEncoder_Count_Simple8(t *testing.T) { - enc := NewTimeEncoder(3) - t1 := int64(0) - t2 := int64(1) - t3 := int64(3) - - enc.Write(t1) - enc.Write(t2) - enc.Write(t3) - - b, err := enc.Bytes() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got := b[0] >> 4; got != timeCompressedPackedSimple { - t.Fatalf("Wrong encoding used: expected rle, got %v", got) - } - - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if got, exp := CountTimestamps(b), 3; got != exp { - t.Fatalf("count mismatch: got %v, exp %v", got, exp) - } -} - -func TestTimeDecoder_Corrupt(t *testing.T) { - cases := []string{ - "", // Empty - "\x10\x14", // Packed: not enough data - "\x20\x00", // RLE: not enough data for starting timestamp - "\x2012345678\x90", // RLE: initial timestamp but invalid uvarint encoding - "\x2012345678\x7f", // RLE: timestamp, RLE but invalid repeat - "\x00123", // Raw: data length not multiple of 8 - } - - for _, c := range cases { - var dec TimeDecoder - dec.Init([]byte(c)) - if dec.Next() { - t.Fatalf("exp next == false, got true") - } - } -} - -func BenchmarkTimeEncoder(b *testing.B) { - enc := NewTimeEncoder(1024) - x := make([]int64, 1024) - for i := 0; i < len(x); i++ { - x[i] = time.Now().UnixNano() - enc.Write(x[i]) - } - - b.ResetTimer() - for i := 0; i < b.N; i++ { - enc.Bytes() - enc.Reset() - for i := 0; i < len(x); i++ { - enc.Write(x[i]) - } - } -} - -func BenchmarkTimeDecoder_Packed(b *testing.B) { - x := make([]int64, 1024) - enc := NewTimeEncoder(1024) - for i := 0; i < len(x); i++ { - x[i] = time.Now().UnixNano() - enc.Write(x[i]) - } - bytes, _ := enc.Bytes() - - b.ResetTimer() - - var dec TimeDecoder - for i := 0; i < b.N; i++ { - dec.Init(bytes) - for dec.Next() { - } - } -} - -func BenchmarkTimeDecoder_RLE(b *testing.B) { - x := make([]int64, 1024) - enc := NewTimeEncoder(1024) - for i := 0; i < len(x); i++ { - x[i] = int64(i * 10) - enc.Write(x[i]) - } - bytes, _ := enc.Bytes() - - b.ResetTimer() - - b.StopTimer() - var dec TimeDecoder - b.StartTimer() - - for i := 0; i < b.N; i++ { - dec.Init(bytes) - for dec.Next() { - } - } -} - -func BenchmarkTimeBatch_DecodeAllUncompressed(b *testing.B) { - benchmarks := []int{ - 5, - 55, - 555, - 1000, - } - - values := []int64{ - -2352281900722994752, 1438442655375607923, -4110452567888190110, - -1221292455668011702, -1941700286034261841, -2836753127140407751, - 1432686216250034552, 3663244026151507025, -3068113732684750258, - -1949953187327444488, 3713374280993588804, 3226153669854871355, - -2093273755080502606, 1006087192578600616, -2272122301622271655, - 2533238229511593671, -4450454445568858273, 2647789901083530435, - 2761419461769776844, -1324397441074946198, -680758138988210958, - 94468846694902125, -2394093124890745254, -2682139311758778198, - } - - for _, size := range benchmarks { - rand.Seed(int64(size * 1e3)) - - enc := NewTimeEncoder(size) - for i := 0; i < size; i++ { - enc.Write(values[rand.Int()%len(values)]) - } - bytes, _ := enc.Bytes() - - b.Run(fmt.Sprintf("%d", size), func(b *testing.B) { - b.SetBytes(int64(len(bytes))) - b.ReportAllocs() - - dst := make([]int64, size) - for i := 0; i < b.N; i++ { - var dec TimeDecoder - dec.Init(bytes) - var n int - for dec.Next() { - dst[n] = dec.Read() - n++ - } - } - }) - } -} - -func BenchmarkTimeBatch_DecodeAllPackedSimple(b *testing.B) { - benchmarks := []struct { - n int - }{ - {5}, - {55}, - {555}, - {1000}, - } - for _, bm := range benchmarks { - rand.Seed(int64(bm.n * 1e3)) - - enc := NewTimeEncoder(bm.n) - for i := 0; i < bm.n; i++ { - // Small amount of randomness prevents RLE from being used - enc.Write(int64(i*1000) + int64(rand.Intn(10))) - } - bytes, _ := enc.Bytes() - - b.Run(fmt.Sprintf("%d", bm.n), func(b *testing.B) { - b.SetBytes(int64(len(bytes))) - b.ReportAllocs() - - dst := make([]int64, bm.n) - for i := 0; i < b.N; i++ { - var dec TimeDecoder - dec.Init(bytes) - var n int - for dec.Next() { - dst[n] = dec.Read() - n++ - } - } - }) - } -} - -func BenchmarkTimeBatch_DecodeAllRLE(b *testing.B) { - benchmarks := []struct { - n int - delta int64 - }{ - {5, 10}, - {55, 10}, - {555, 10}, - {1000, 10}, - } - for _, bm := range benchmarks { - enc := NewTimeEncoder(bm.n) - acc := int64(0) - for i := 0; i < bm.n; i++ { - enc.Write(acc) - acc += bm.delta - } - bytes, _ := enc.Bytes() - - b.Run(fmt.Sprintf("%d_delta_%d", bm.n, bm.delta), func(b *testing.B) { - b.SetBytes(int64(len(bytes))) - b.ReportAllocs() - - dst := make([]int64, bm.n) - for i := 0; i < b.N; i++ { - var dec TimeDecoder - dec.Init(bytes) - var n int - for dec.Next() { - dst[n] = dec.Read() - n++ - } - } - }) - } -} diff --git a/tsdb/tsm1/tombstone.go b/tsdb/tsm1/tombstone.go deleted file mode 100644 index b18d1f855c..0000000000 --- a/tsdb/tsm1/tombstone.go +++ /dev/null @@ -1,635 +0,0 @@ -package tsm1 - -/* -Tombstone file format: - -╔═══════════════════════════════════════════Tombstone File════════════════════════════════════════════╗ -║ ┌─────────────┐┌──────────────────────────────────────────────────────────────────────────────────┐ ║ -║ │ ││ │ ║ -║ │ ││ │ ║ -║ │ ││ │ ║ -║ │ ││ │ ║ -║ │ ││ │ ║ -║ │ Header ││ │ ║ -║ │ 4 bytes ││ Tombstone Entries │ ║ -║ │ ││ │ ║ -║ │ ││ │ ║ -║ │ ││ │ ║ -║ │ ││ │ ║ -║ │ ││ │ ║ -║ │ ││ │ ║ -║ └─────────────┘└──────────────────────────────────────────────────────────────────────────────────┘ ║ -╚═════════════════════════════════════════════════════════════════════════════════════════════════════╝ - -╔═══════════════════════════════════════════Tombstone Entry═══════════════════════════════════════════╗ -║ ┌──────┐┌───────────────┐┌────────────┐┌────────────────────────┐┌───────────────┐┌───────────────┐ ║ -║ │ ││ ││ ││ ││ ││ │ ║ -║ │ ││ ││ ││ ││ ││ │ ║ -║ │ ││ ││ ││ ││ ││ │ ║ -║ │ ││ ││ ││ ││ ││ │ ║ -║ │ ││ ││ ││ ││ ││ │ ║ -║ │Prefix││ Reserved ││ Key Length ││ Key ││ Min Time ││ Max Time │ ║ -║ │ Bit ││ 7 bits ││ 24 bits ││ N bytes ││ 8 bytes ││ 8 bytes │ ║ -║ │ ││ ││ ││ ││ ││ │ ║ -║ │ ││ ││ ││ ││ ││ │ ║ -║ │ ││ ││ ││ ││ ││ │ ║ -║ │ ││ ││ ││ ││ ││ │ ║ -║ │ ││ ││ ││ ││ ││ │ ║ -║ │ ││ ││ ││ ││ ││ │ ║ -║ └──────┘└───────────────┘└────────────┘└────────────────────────┘└───────────────┘└───────────────┘ ║ -╚═════════════════════════════════════════════════════════════════════════════════════════════════════╝ - -NOTE: v1, v2 and v3 tombstone supports have been dropped from 2.x. Only v4 is now -supported. -*/ - -import ( - "bufio" - "compress/gzip" - "encoding/binary" - "errors" - "fmt" - "io" - "math" - "os" - "path/filepath" - "strings" - "sync" - - "github.com/influxdata/influxdb/v2/pkg/fs" -) - -const ( - headerSize = 4 - v4header = 0x1504 -) - -var errIncompatibleV4Version = errors.New("incompatible v4 version") - -// Tombstoner records tombstones when entries are deleted. -type Tombstoner struct { - mu sync.RWMutex - - // Path is the location of the file to record tombstone. This should be the - // full path to a TSM file. - Path string - - FilterFn func(k []byte) bool - - // cache of the stats for this tombstone - fileStats []FileStat - // indicates that the stats may be out of sync with what is on disk and they - // should be refreshed. - statsLoaded bool - - // Tombstones that have been written but not flushed to disk yet. - tombstones []Tombstone - - // These are references used for pending writes that have not been committed. If - // these are nil, then no pending writes are in progress. - gz *gzip.Writer - bw *bufio.Writer - pendingFile *os.File - tmp [8]byte - lastAppliedOffset int64 - - // Optional observer for when tombstone files are written. - obs FileStoreObserver -} - -// NewTombstoner constructs a Tombstoner for the given path. FilterFn can be nil. -func NewTombstoner(path string, filterFn func(k []byte) bool) *Tombstoner { - return &Tombstoner{ - Path: path, - FilterFn: filterFn, - obs: noFileStoreObserver{}, - } -} - -// Tombstone represents an individual deletion. -type Tombstone struct { - // Key is the tombstoned series key. - Key []byte - - // Prefix indicates if this tombstone entry is a prefix key, meaning all - // keys with a prefix matching Key should be removed for the [Min, Max] range. - Prefix bool - - // Min and Max are the min and max unix nanosecond time ranges of Key that are deleted. - Min, Max int64 - - // Predicate stores the marshaled form of some predicate for matching keys. - Predicate []byte -} - -func (t Tombstone) String() string { - prefix := "Key" - if t.Prefix { - prefix = "Prefix" - } - return fmt.Sprintf("%s: %q, [%d, %d] pred:%v", prefix, t.Key, t.Min, t.Max, len(t.Predicate) > 0) -} - -// WithObserver sets a FileStoreObserver for when the tombstone file is written. -func (t *Tombstoner) WithObserver(obs FileStoreObserver) { - if obs == nil { - obs = noFileStoreObserver{} - } - t.obs = obs -} - -// AddPrefixRange adds a prefix-based tombstone key with an explicit range. -func (t *Tombstoner) AddPrefixRange(key []byte, min, max int64, predicate []byte) error { - t.mu.Lock() - defer t.mu.Unlock() - - // If this TSMFile has not been written (mainly in tests), don't write a - // tombstone because the keys will not be written when it's actually saved. - if t.Path == "" { - return nil - } - - t.statsLoaded = false - - if err := t.prepareLatest(); err != nil { - return err - } - - return t.writeTombstoneV4(t.gz, Tombstone{ - Key: key, - Min: min, - Max: max, - Prefix: true, - Predicate: predicate, - }) -} - -// Add adds the all keys, across all timestamps, to the tombstone. -func (t *Tombstoner) Add(keys [][]byte) error { - return t.AddRange(keys, math.MinInt64, math.MaxInt64) -} - -// AddRange adds all keys to the tombstone specifying only the data between min and max to be removed. -func (t *Tombstoner) AddRange(keys [][]byte, min, max int64) error { - for t.FilterFn != nil && len(keys) > 0 && !t.FilterFn(keys[0]) { - keys = keys[1:] - } - - if len(keys) == 0 { - return nil - } - - t.mu.Lock() - defer t.mu.Unlock() - - // If this TSMFile has not been written (mainly in tests), don't write a - // tombstone because the keys will not be written when it's actually saved. - if t.Path == "" { - return nil - } - - t.statsLoaded = false - - if err := t.prepareLatest(); err != nil { - return err - } - - for _, k := range keys { - if t.FilterFn != nil && !t.FilterFn(k) { - continue - } - - if err := t.writeTombstoneV4(t.gz, Tombstone{ - Key: k, - Min: min, - Max: max, - Prefix: false, - }); err != nil { - return err - } - } - return nil -} - -func (t *Tombstoner) Flush() error { - t.mu.Lock() - defer t.mu.Unlock() - - if err := t.commit(); err != nil { - // Reset our temp references and clean up. - _ = t.rollback() - return err - } - return nil -} - -func (t *Tombstoner) Rollback() error { - t.mu.Lock() - defer t.mu.Unlock() - return t.rollback() -} - -// Delete removes all the tombstone files from disk. -func (t *Tombstoner) Delete() error { - t.mu.Lock() - defer t.mu.Unlock() - if err := os.RemoveAll(t.tombstonePath()); err != nil { - return err - } - t.statsLoaded = false - t.lastAppliedOffset = 0 - - return nil -} - -// HasTombstones return true if there are any tombstone entries recorded. -func (t *Tombstoner) HasTombstones() bool { - files := t.TombstoneFiles() - t.mu.RLock() - n := len(t.tombstones) - t.mu.RUnlock() - - return len(files) > 0 && files[0].Size > 0 || n > 0 -} - -// TombstoneFiles returns any tombstone files associated with Tombstoner's TSM file. -func (t *Tombstoner) TombstoneFiles() []FileStat { - t.mu.RLock() - if t.statsLoaded { - stats := t.fileStats - t.mu.RUnlock() - return stats - } - t.mu.RUnlock() - - stat, err := os.Stat(t.tombstonePath()) - if os.IsNotExist(err) || err != nil { - t.mu.Lock() - // The file doesn't exist so record that we tried to load it so - // we don't continue to keep trying. This is the common case. - t.statsLoaded = os.IsNotExist(err) - t.fileStats = t.fileStats[:0] - t.mu.Unlock() - return nil - } - - t.mu.Lock() - t.fileStats = append(t.fileStats[:0], FileStat{ - Path: t.tombstonePath(), - CreatedAt: stat.ModTime().UnixNano(), - LastModified: stat.ModTime().UnixNano(), - Size: uint32(stat.Size()), - }) - t.statsLoaded = true - stats := t.fileStats - t.mu.Unlock() - - return stats -} - -// Walk calls fn for every Tombstone under the Tombstoner. -func (t *Tombstoner) Walk(fn func(t Tombstone) error) error { - t.mu.Lock() - defer t.mu.Unlock() - - f, err := os.Open(t.tombstonePath()) - if os.IsNotExist(err) { - return nil - } else if err != nil { - return err - } - defer f.Close() - - var b [4]byte - if _, err := f.Read(b[:]); err != nil { - return errors.New("unable to read header") - } - - if _, err := f.Seek(0, io.SeekStart); err != nil { - return err - } - - header := binary.BigEndian.Uint32(b[:]) - if header == v4header { - return t.readTombstoneV4(f, fn) - } - return errors.New("invalid tombstone file") -} - -func (t *Tombstoner) prepareLatest() error { - if t.pendingFile != nil { // There is already a pending tombstone file open. - return nil - } - - tmpPath := fmt.Sprintf("%s.%s", t.tombstonePath(), CompactionTempExtension) - tmp, err := os.OpenFile(tmpPath, os.O_CREATE|os.O_RDWR|os.O_EXCL, 0666) - if err != nil { - return err - } - - removeTmp := func() { - tmp.Close() - os.Remove(tmp.Name()) - } - - // Copy the existing v4 file if it exists - f, err := os.Open(t.tombstonePath()) - if err != nil && !os.IsNotExist(err) { - // An unexpected error should be returned - removeTmp() - return err - } else if err == nil { - // No error so load the tombstone file. - defer f.Close() - var b [4]byte - if n, err := f.Read(b[:]); n == 4 && err == nil { - header := binary.BigEndian.Uint32(b[:]) - // There is an existing tombstone on disk and it's not a v4. - // We can't support it. - if header != v4header { - removeTmp() - return errIncompatibleV4Version - } - - // Seek back to the beginning we copy the header - if _, err := f.Seek(0, io.SeekStart); err != nil { - removeTmp() - return err - } - - // Copy the whole file - if _, err := io.Copy(tmp, f); err != nil { - f.Close() - removeTmp() - return err - } - } - } - - // Else, the error was that the file does not exist. Create a new one. - var b [8]byte - bw := bufio.NewWriterSize(tmp, 64*1024) - - // Write the header only if the file is new - if os.IsNotExist(err) { - binary.BigEndian.PutUint32(b[:4], v4header) - if _, err := bw.Write(b[:4]); err != nil { - removeTmp() - return err - } - } - - // Write the tombstones - gz := gzip.NewWriter(bw) - - t.pendingFile = tmp - t.gz = gz - t.bw = bw - - return nil -} - -func (t *Tombstoner) commit() error { - // No pending writes - if t.pendingFile == nil { - return nil - } - - if err := t.gz.Close(); err != nil { - return err - } - - if err := t.bw.Flush(); err != nil { - return err - } - - // fsync the file to flush the write - if err := t.pendingFile.Sync(); err != nil { - return err - } - - tmpFilename := t.pendingFile.Name() - t.pendingFile.Close() - - if err := t.obs.FileFinishing(tmpFilename); err != nil { - return err - } - - if err := fs.RenameFileWithReplacement(tmpFilename, t.tombstonePath()); err != nil { - return err - } - - if err := fs.SyncDir(filepath.Dir(t.tombstonePath())); err != nil { - return err - } - - t.pendingFile = nil - t.bw = nil - t.gz = nil - - return nil -} - -func (t *Tombstoner) rollback() error { - if t.pendingFile == nil { - return nil - } - - tmpFilename := t.pendingFile.Name() - t.pendingFile.Close() - t.gz = nil - t.bw = nil - t.pendingFile = nil - return os.Remove(tmpFilename) -} - -// readTombstoneV4 reads the fourth version of tombstone files that are capable -// of storing multiple v3 files appended together. -func (t *Tombstoner) readTombstoneV4(f *os.File, fn func(t Tombstone) error) error { - // Skip header, already checked earlier - if t.lastAppliedOffset != 0 { - if _, err := f.Seek(t.lastAppliedOffset, io.SeekStart); err != nil { - return err - } - } else { - if _, err := f.Seek(headerSize, io.SeekStart); err != nil { - return err - } - } - - const kmask = int64(0xff000000) // Mask for non key-length bits - - br := bufio.NewReaderSize(f, 64*1024) - gr, err := gzip.NewReader(br) - if err == io.EOF { - return nil - } else if err != nil { - return err - } - defer gr.Close() - - var ( // save these buffers across loop iterations to avoid allocations - keyBuf []byte - predBuf []byte - ) - - for { - gr.Multistream(false) - if err := func() error { - for { - var buf [8]byte - - if _, err = io.ReadFull(gr, buf[:4]); err == io.EOF || err == io.ErrUnexpectedEOF { - return nil - } else if err != nil { - return err - } - - keyLen := int64(binary.BigEndian.Uint32(buf[:4])) - prefix := keyLen>>31&1 == 1 // Prefix is set according to whether the highest bit is set. - hasPred := keyLen>>30&1 == 1 - - // Remove 8 MSB to get correct length. - keyLen &^= kmask - - if int64(len(keyBuf)) < keyLen { - keyBuf = make([]byte, keyLen) - } - // cap slice protects against invalid usages of append in callback - key := keyBuf[:keyLen:keyLen] - - if _, err := io.ReadFull(gr, key); err != nil { - return err - } - - if _, err := io.ReadFull(gr, buf[:8]); err != nil { - return err - } - min := int64(binary.BigEndian.Uint64(buf[:8])) - - if _, err := io.ReadFull(gr, buf[:8]); err != nil { - return err - } - max := int64(binary.BigEndian.Uint64(buf[:8])) - - var predicate []byte - if hasPred { - if _, err := io.ReadFull(gr, buf[:8]); err != nil { - return err - } - predLen := binary.BigEndian.Uint64(buf[:8]) - - if uint64(len(predBuf)) < predLen { - predBuf = make([]byte, predLen) - } - // cap slice protects against invalid usages of append in callback - predicate = predBuf[:predLen:predLen] - - if _, err := io.ReadFull(gr, predicate); err != nil { - return err - } - } - - if err := fn(Tombstone{ - Key: key, - Min: min, - Max: max, - Prefix: prefix, - Predicate: predicate, - }); err != nil { - return err - } - } - }(); err != nil { - return err - } - - for _, t := range t.tombstones { - if err := fn(t); err != nil { - return err - } - } - - err = gr.Reset(br) - if err == io.EOF { - break - } - } - - // Save the position of tombstone file so we don't re-apply the same set again if there are - // more deletes. - pos, err := f.Seek(0, io.SeekCurrent) - if err != nil { - return err - } - t.lastAppliedOffset = pos - return nil -} - -func (t *Tombstoner) tombstonePath() string { - if strings.HasSuffix(t.Path, "tombstone") { - return t.Path - } - - // Filename is 0000001.tsm1 - filename := filepath.Base(t.Path) - - // Strip off the tsm1 - ext := filepath.Ext(filename) - if ext != "" { - filename = strings.TrimSuffix(filename, ext) - } - - // Append the "tombstone" suffix to create a 0000001.tombstone file - return filepath.Join(filepath.Dir(t.Path), filename+".tombstone") -} - -func (t *Tombstoner) writeTombstoneV4(dst io.Writer, ts Tombstone) error { - maxKeyLen := 0x00ffffff // 24 bit key length. Top 8 bits for other information. - - // Maximum key length. Leaves 8 spare bits. - if len(ts.Key) > maxKeyLen { - return fmt.Errorf("key has length %d, maximum allowed key length %d", len(ts.Key), maxKeyLen) - } - - l := uint32(len(ts.Key)) - if ts.Prefix { - // A mask to set the prefix bit on a tombstone. - l |= 1 << 31 - } - if len(ts.Predicate) > 0 { - // A mask to set the predicate bit on a tombstone - l |= 1 << 30 - } - - binary.BigEndian.PutUint32(t.tmp[:4], l) - if _, err := dst.Write(t.tmp[:4]); err != nil { - return err - } - if _, err := dst.Write([]byte(ts.Key)); err != nil { - return err - } - - binary.BigEndian.PutUint64(t.tmp[:], uint64(ts.Min)) - if _, err := dst.Write(t.tmp[:]); err != nil { - return err - } - - binary.BigEndian.PutUint64(t.tmp[:], uint64(ts.Max)) - if _, err := dst.Write(t.tmp[:]); err != nil { - return err - } - - if len(ts.Predicate) > 0 { - binary.BigEndian.PutUint64(t.tmp[:], uint64(len(ts.Predicate))) - if _, err := dst.Write(t.tmp[:]); err != nil { - return err - } - - if _, err := dst.Write(ts.Predicate); err != nil { - return err - } - } - - return nil -} diff --git a/tsdb/tsm1/tombstone_test.go b/tsdb/tsm1/tombstone_test.go deleted file mode 100644 index 4b675b37fc..0000000000 --- a/tsdb/tsm1/tombstone_test.go +++ /dev/null @@ -1,499 +0,0 @@ -package tsm1_test - -import ( - "bytes" - "encoding/hex" - "fmt" - "io" - "os" - "reflect" - "testing" - "time" - - "github.com/google/go-cmp/cmp" - "github.com/influxdata/influxdb/v2/pkg/fs" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" -) - -func TestTombstoner_Add(t *testing.T) { - dir := MustTempDir() - defer func() { os.RemoveAll(dir) }() - - f := MustTempFile(dir) - ts := tsm1.NewTombstoner(f.Name(), nil) - - entries := mustReadAll(ts) - if got, exp := len(entries), 0; got != exp { - t.Fatalf("length mismatch: got %v, exp %v", got, exp) - } - - stats := ts.TombstoneFiles() - if got, exp := len(stats), 0; got != exp { - t.Fatalf("stat length mismatch: got %v, exp %v", got, exp) - } - - ts.Add([][]byte{[]byte("foo")}) - - if err := ts.Flush(); err != nil { - t.Fatalf("unexpected error flushing tombstone: %v", err) - } - - entries = mustReadAll(ts) - stats = ts.TombstoneFiles() - if got, exp := len(stats), 1; got != exp { - t.Fatalf("stat length mismatch: got %v, exp %v", got, exp) - } - - if stats[0].Size == 0 { - t.Fatalf("got size %v, exp > 0", stats[0].Size) - } - - if stats[0].LastModified == 0 { - t.Fatalf("got lastModified %v, exp > 0", stats[0].LastModified) - } - - if stats[0].Path == "" { - t.Fatalf("got path %v, exp != ''", stats[0].Path) - } - - if got, exp := len(entries), 1; got != exp { - t.Fatalf("length mismatch: got %v, exp %v", got, exp) - } - - if got, exp := string(entries[0].Key), "foo"; got != exp { - t.Fatalf("value mismatch: got %v, exp %v", got, exp) - } - - // Use a new Tombstoner to verify values are persisted - ts = tsm1.NewTombstoner(f.Name(), nil) - entries = mustReadAll(ts) - if got, exp := len(entries), 1; got != exp { - t.Fatalf("length mismatch: got %v, exp %v", got, exp) - } - - if got, exp := string(entries[0].Key), "foo"; got != exp { - t.Fatalf("value mismatch: got %v, exp %v", got, exp) - } - - if got, exp := entries[0].Prefix, false; got != exp { - t.Fatalf("value mismatch: got %v, exp %v", got, exp) - } -} - -func TestTombstoner_AddPrefixRange(t *testing.T) { - dir := MustTempDir() - defer func() { os.RemoveAll(dir) }() - - f := MustTempFile(dir) - ts := tsm1.NewTombstoner(f.Name(), nil) - - entries := mustReadAll(ts) - if got, exp := len(entries), 0; got != exp { - t.Fatalf("length mismatch: got %v, exp %v", got, exp) - } - - stats := ts.TombstoneFiles() - if got, exp := len(stats), 0; got != exp { - t.Fatalf("stat length mismatch: got %v, exp %v", got, exp) - } - - if err := ts.AddPrefixRange([]byte("some-prefix"), 20, 30, []byte("some-predicate")); err != nil { - t.Fatal(err) - } - - if err := ts.Flush(); err != nil { - t.Fatalf("unexpected error flushing tombstone: %v", err) - } - - exp := tsm1.Tombstone{ - Key: []byte("some-prefix"), - Min: 20, - Max: 30, - Prefix: true, - Predicate: []byte("some-predicate"), - } - - entries = mustReadAll(ts) - if got, exp := len(entries), 1; got != exp { - t.Fatalf("length mismatch: got %v, exp %v", got, exp) - } - - if got := entries[0]; !reflect.DeepEqual(got, exp) { - t.Fatalf("unexpected tombstone entry. Got %s, expected %s", got, exp) - } -} - -func TestTombstoner_Add_LargeKey(t *testing.T) { - dir := MustTempDir() - defer func() { os.RemoveAll(dir) }() - - f := MustTempFile(dir) - ts := tsm1.NewTombstoner(f.Name(), nil) - - entries := mustReadAll(ts) - if got, exp := len(entries), 0; got != exp { - t.Fatalf("length mismatch: got %v, exp %v", got, exp) - } - - stats := ts.TombstoneFiles() - if got, exp := len(stats), 0; got != exp { - t.Fatalf("stat length mismatch: got %v, exp %v", got, exp) - } - - key := bytes.Repeat([]byte{'a'}, 4096) - ts.Add([][]byte{key}) - - if err := ts.Flush(); err != nil { - t.Fatalf("unexpected error flushing tombstone: %v", err) - } - - entries = mustReadAll(ts) - stats = ts.TombstoneFiles() - if got, exp := len(stats), 1; got != exp { - t.Fatalf("stat length mismatch: got %v, exp %v", got, exp) - } - - if stats[0].Size == 0 { - t.Fatalf("got size %v, exp > 0", stats[0].Size) - } - - if stats[0].LastModified == 0 { - t.Fatalf("got lastModified %v, exp > 0", stats[0].LastModified) - } - - if stats[0].Path == "" { - t.Fatalf("got path %v, exp != ''", stats[0].Path) - } - - if got, exp := len(entries), 1; got != exp { - t.Fatalf("length mismatch: got %v, exp %v", got, exp) - } - - if got, exp := string(entries[0].Key), string(key); got != exp { - t.Fatalf("value mismatch: got %v, exp %v", got, exp) - } - - // Use a new Tombstoner to verify values are persisted - ts = tsm1.NewTombstoner(f.Name(), nil) - entries = mustReadAll(ts) - if got, exp := len(entries), 1; got != exp { - t.Fatalf("length mismatch: got %v, exp %v", got, exp) - } - - if got, exp := string(entries[0].Key), string(key); got != exp { - t.Fatalf("value mismatch: got %v, exp %v", got, exp) - } -} - -func TestTombstoner_Add_KeyTooBig(t *testing.T) { - dir := MustTempDir() - defer func() { os.RemoveAll(dir) }() - - f := MustTempFile(dir) - ts := tsm1.NewTombstoner(f.Name(), nil) - - entries := mustReadAll(ts) - if got, exp := len(entries), 0; got != exp { - t.Fatalf("length mismatch: got %v, exp %v", got, exp) - } - - stats := ts.TombstoneFiles() - if got, exp := len(stats), 0; got != exp { - t.Fatalf("stat length mismatch: got %v, exp %v", got, exp) - } - - key := bytes.Repeat([]byte{'a'}, 0x00ffffff) // This is OK. - if err := ts.Add([][]byte{key}); err != nil { - t.Fatal(err) - } - - if err := ts.Flush(); err != nil { - t.Fatalf("unexpected error flushing tombstone: %v", err) - } - - key = append(key, 'a') // This is not - if err := ts.Add([][]byte{key}); err == nil { - t.Fatalf("got no error, expected key length error") - } - - if err := ts.Flush(); err != nil { - t.Fatalf("unexpected error flushing tombstone: %v", err) - } -} - -func TestTombstoner_Add_Multiple(t *testing.T) { - dir := MustTempDir() - defer func() { os.RemoveAll(dir) }() - - f := MustTempFile(dir) - ts := tsm1.NewTombstoner(f.Name(), nil) - - entries := mustReadAll(ts) - if got, exp := len(entries), 0; got != exp { - t.Fatalf("length mismatch: got %v, exp %v", got, exp) - } - - stats := ts.TombstoneFiles() - if got, exp := len(stats), 0; got != exp { - t.Fatalf("stat length mismatch: got %v, exp %v", got, exp) - } - - ts.Add([][]byte{[]byte("foo")}) - - if err := ts.Flush(); err != nil { - t.Fatalf("unexpected error flushing tombstone: %v", err) - } - - ts.Add([][]byte{[]byte("bar")}) - - if err := ts.Flush(); err != nil { - t.Fatalf("unexpected error flushing tombstone: %v", err) - } - - entries = mustReadAll(ts) - stats = ts.TombstoneFiles() - if got, exp := len(stats), 1; got != exp { - t.Fatalf("stat length mismatch: got %v, exp %v", got, exp) - } - - if stats[0].Size == 0 { - t.Fatalf("got size %v, exp > 0", stats[0].Size) - } - - if stats[0].LastModified == 0 { - t.Fatalf("got lastModified %v, exp > 0", stats[0].LastModified) - } - - if stats[0].Path == "" { - t.Fatalf("got path %v, exp != ''", stats[0].Path) - } - - if got, exp := len(entries), 2; got != exp { - t.Fatalf("length mismatch: got %v, exp %v", got, exp) - } - - if got, exp := string(entries[0].Key), "foo"; got != exp { - t.Fatalf("value mismatch: got %v, exp %v", got, exp) - } - - if got, exp := string(entries[1].Key), "bar"; got != exp { - t.Fatalf("value mismatch: got %v, exp %v", got, exp) - } - - // Use a new Tombstoner to verify values are persisted - ts = tsm1.NewTombstoner(f.Name(), nil) - entries = mustReadAll(ts) - if got, exp := len(entries), 2; got != exp { - t.Fatalf("length mismatch: got %v, exp %v", got, exp) - } - - if got, exp := string(entries[0].Key), "foo"; got != exp { - t.Fatalf("value mismatch: got %v, exp %v", got, exp) - } - - if got, exp := entries[0].Prefix, false; got != exp { - t.Fatalf("value mismatch: got %v, exp %v", got, exp) - } - - if got, exp := string(entries[1].Key), "bar"; got != exp { - t.Fatalf("value mismatch: got %v, exp %v", got, exp) - } - - if got, exp := entries[1].Prefix, false; got != exp { - t.Fatalf("value mismatch: got %v, exp %v", got, exp) - } -} - -func TestTombstoner_Add_Empty(t *testing.T) { - dir := MustTempDir() - defer func() { os.RemoveAll(dir) }() - - f := MustTempFile(dir) - ts := tsm1.NewTombstoner(f.Name(), nil) - - entries := mustReadAll(ts) - if got, exp := len(entries), 0; got != exp { - t.Fatalf("length mismatch: got %v, exp %v", got, exp) - } - - ts.Add([][]byte{}) - - if err := ts.Flush(); err != nil { - t.Fatalf("unexpected error flushing tombstone: %v", err) - } - - // Use a new Tombstoner to verify values are persisted - ts = tsm1.NewTombstoner(f.Name(), nil) - entries = mustReadAll(ts) - if got, exp := len(entries), 0; got != exp { - t.Fatalf("length mismatch: got %v, exp %v", got, exp) - } - - stats := ts.TombstoneFiles() - if got, exp := len(stats), 0; got != exp { - t.Fatalf("stat length mismatch: got %v, exp %v", got, exp) - } - -} - -func TestTombstoner_Delete(t *testing.T) { - dir := MustTempDir() - defer func() { os.RemoveAll(dir) }() - - f := MustTempFile(dir) - ts := tsm1.NewTombstoner(f.Name(), nil) - - ts.Add([][]byte{[]byte("foo")}) - - if err := ts.Flush(); err != nil { - t.Fatalf("unexpected error flushing: %v", err) - } - - // Use a new Tombstoner to verify values are persisted - ts = tsm1.NewTombstoner(f.Name(), nil) - entries := mustReadAll(ts) - if got, exp := len(entries), 1; got != exp { - t.Fatalf("length mismatch: got %v, exp %v", got, exp) - } - - if got, exp := string(entries[0].Key), "foo"; got != exp { - t.Fatalf("value mismatch: got %s, exp %s", got, exp) - } - - if err := ts.Delete(); err != nil { - fatal(t, "delete tombstone", err) - } - - stats := ts.TombstoneFiles() - if got, exp := len(stats), 0; got != exp { - t.Fatalf("stat length mismatch: got %v, exp %v", got, exp) - } - - ts = tsm1.NewTombstoner(f.Name(), nil) - entries = mustReadAll(ts) - if got, exp := len(entries), 0; got != exp { - t.Fatalf("length mismatch: got %v, exp %v", got, exp) - } -} - -func TestTombstoner_Existing(t *testing.T) { - dir := MustTempDir() - defer func() { os.RemoveAll(dir) }() - - expMin := time.Date(2018, time.December, 12, 0, 0, 0, 0, time.UTC).UnixNano() - expMax := time.Date(2018, time.December, 13, 0, 0, 0, 0, time.UTC).UnixNano() - - expKeys := make([]string, 100) - for i := 0; i < len(expKeys); i++ { - expKeys[i] = fmt.Sprintf("m0,tag0=value%d", i) - } - - // base-16 encoded v4 tombstone file of above setup. - v4Raw := `000015041f8b08000000000000ff84d0ab5103401400c0d30850e90291dc` + - `ff092a41453098303140739108da4273b999f5ab36a5f4f8717cfe3cbf1f` + - `5fbecf97afb7e3e17af93ddd523a5c6faf3f0f29dd891345a6281495a251` + - `748a41312962239efe8fed5217b25b5dc8ae7521bbd785ec6217b29b5dc8` + - `ae7621bbdb85ec7217e2ddecddecddecddecddecddecddecddecddecddec` + - `dde2dde2dde2dde2dde2dde2dde2dde2dde2dde2ddeaddeaddeaddeaddea` + - `ddeaddeaddeaddeaddeadde6dde6dde6dde6dde6dde6dde6dde6dde6dde6` + - `ddeeddeeddeeddeeddeeddeeddeeddeeddeeddeedde1dde1dde1dde1dde1` + - `dde1dde1dde1dde1dde1dde9dde9dde9dde9dde9dde9dde9dde9dde9dde9` + - `ddf06e7837bc1bde0def8677c3bbe1ddf06edcedfe050000ffff34593d01` + - `a20d0000` - v4Decoded, err := hex.DecodeString(v4Raw) - if err != nil { - panic(err) - } - - f := MustTempFile(dir) - if _, err := io.Copy(f, bytes.NewReader(v4Decoded)); err != nil { - panic(err) - } - if err := f.Close(); err != nil { - panic(err) - } - - name := f.Name() + ".tombstone" - if err := fs.RenameFile(f.Name(), name); err != nil { - panic(err) - } - - t.Run("read", func(t *testing.T) { - ts := tsm1.NewTombstoner(name, nil) - var gotKeys []string - if err := ts.Walk(func(tombstone tsm1.Tombstone) error { - gotKeys = append(gotKeys, string(tombstone.Key)) - if got, exp := tombstone.Min, expMin; got != exp { - t.Fatalf("got max time %d, expected %d", got, exp) - } else if got, exp := tombstone.Max, expMax; got != exp { - t.Fatalf("got max time %d, expected %d", got, exp) - } else if got, exp := tombstone.Prefix, false; got != exp { - t.Fatalf("got prefix key, expected non-prefix key") - } - return nil - }); err != nil { - t.Fatal(err) - } - - if !reflect.DeepEqual(gotKeys, expKeys) { - t.Fatalf("tombstone entries differ:\n%s\n", cmp.Diff(gotKeys, expKeys, nil)) - } - }) - - t.Run("add_prefix", func(t *testing.T) { - ts := tsm1.NewTombstoner(name, nil) - if err := ts.AddPrefixRange([]byte("new-prefix"), 10, 20, []byte("new-predicate")); err != nil { - t.Fatal(err) - } - - if err := ts.Flush(); err != nil { - t.Fatal(err) - } - - var got tsm1.Tombstone - if err := ts.Walk(func(tombstone tsm1.Tombstone) error { - got = tombstone - return nil - }); err != nil { - t.Fatal(err) - } - - exp := tsm1.Tombstone{ - Key: []byte("new-prefix"), - Min: 10, - Max: 20, - Prefix: true, - Predicate: []byte("new-predicate"), - } - - if !reflect.DeepEqual(got, exp) { - t.Fatalf("unexpected tombstone entry. Got %s, expected %s", got, exp) - } - }) -} - -func mustReadAll(t *tsm1.Tombstoner) []tsm1.Tombstone { - var tombstones []tsm1.Tombstone - if err := t.Walk(func(t tsm1.Tombstone) error { - b := make([]byte, len(t.Key)) - copy(b, t.Key) - - var p []byte - if t.Predicate != nil { - p = make([]byte, len(t.Predicate)) - copy(p, t.Predicate) - } - - tombstones = append(tombstones, tsm1.Tombstone{ - Min: t.Min, - Max: t.Max, - Key: b, - Prefix: t.Prefix, - Predicate: p, - }) - return nil - }); err != nil { - panic(err) - } - return tombstones -} diff --git a/tsdb/tsm1/value.go b/tsdb/tsm1/value.go deleted file mode 100644 index fb0ba9e6cc..0000000000 --- a/tsdb/tsm1/value.go +++ /dev/null @@ -1,144 +0,0 @@ -package tsm1 - -import ( - "fmt" - "time" - - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/value" -) - -type ( - Value = value.Value - IntegerValue = value.IntegerValue - UnsignedValue = value.UnsignedValue - FloatValue = value.FloatValue - BooleanValue = value.BooleanValue - StringValue = value.StringValue -) - -// NewValue returns a new Value with the underlying type dependent on value. -func NewValue(t int64, v interface{}) Value { return value.NewValue(t, v) } - -// NewRawIntegerValue returns a new integer value. -func NewRawIntegerValue(t int64, v int64) IntegerValue { return value.NewRawIntegerValue(t, v) } - -// NewRawUnsignedValue returns a new unsigned integer value. -func NewRawUnsignedValue(t int64, v uint64) UnsignedValue { return value.NewRawUnsignedValue(t, v) } - -// NewRawFloatValue returns a new float value. -func NewRawFloatValue(t int64, v float64) FloatValue { return value.NewRawFloatValue(t, v) } - -// NewRawBooleanValue returns a new boolean value. -func NewRawBooleanValue(t int64, v bool) BooleanValue { return value.NewRawBooleanValue(t, v) } - -// NewRawStringValue returns a new string value. -func NewRawStringValue(t int64, v string) StringValue { return value.NewRawStringValue(t, v) } - -// NewIntegerValue returns a new integer value. -func NewIntegerValue(t int64, v int64) Value { return value.NewIntegerValue(t, v) } - -// NewUnsignedValue returns a new unsigned integer value. -func NewUnsignedValue(t int64, v uint64) Value { return value.NewUnsignedValue(t, v) } - -// NewFloatValue returns a new float value. -func NewFloatValue(t int64, v float64) Value { return value.NewFloatValue(t, v) } - -// NewBooleanValue returns a new boolean value. -func NewBooleanValue(t int64, v bool) Value { return value.NewBooleanValue(t, v) } - -// NewStringValue returns a new string value. -func NewStringValue(t int64, v string) Value { return value.NewStringValue(t, v) } - -// CollectionToValues takes in a series collection and returns it as a map of series key to -// values. It returns an error if any of the points could not be converted. -func CollectionToValues(collection *tsdb.SeriesCollection) (map[string][]Value, error) { - values := make(map[string][]Value, collection.Length()) - var ( - keyBuf []byte - baseLen int - ) - - j := 0 - for citer := collection.Iterator(); citer.Next(); { - keyBuf = append(keyBuf[:0], citer.Key()...) - keyBuf = append(keyBuf, keyFieldSeparator...) - baseLen = len(keyBuf) - - p := citer.Point() - iter := p.FieldIterator() - t := p.Time().UnixNano() - - for iter.Next() { - keyBuf = append(keyBuf[:baseLen], iter.FieldKey()...) - - var v Value - switch iter.Type() { - case models.Float: - fv, err := iter.FloatValue() - if err != nil { - return nil, err - } - v = NewFloatValue(t, fv) - case models.Integer: - iv, err := iter.IntegerValue() - if err != nil { - return nil, err - } - v = NewIntegerValue(t, iv) - case models.Unsigned: - iv, err := iter.UnsignedValue() - if err != nil { - return nil, err - } - v = NewUnsignedValue(t, iv) - case models.String: - v = NewStringValue(t, iter.StringValue()) - case models.Boolean: - bv, err := iter.BooleanValue() - if err != nil { - return nil, err - } - v = NewBooleanValue(t, bv) - default: - return nil, fmt.Errorf("unknown field type for %s: %s", - string(iter.FieldKey()), p.String()) - } - - vs, ok := values[string(keyBuf)] - if ok && len(vs) > 0 && valueType(vs[0]) != valueType(v) { - if collection.Reason == "" { - collection.Reason = fmt.Sprintf( - "conflicting field type: %s has field type %T but expected %T", - citer.Key(), v.Value(), vs[0].Value()) - } - collection.Dropped++ - collection.DroppedKeys = append(collection.DroppedKeys, citer.Key()) - continue - } - - values[string(keyBuf)] = append(vs, v) - collection.Copy(j, citer.Index()) - j++ - } - } - - collection.Truncate(j) - return values, nil -} - -// ValuesToPoints takes in a map of values and returns a slice of models.Point. -func ValuesToPoints(values map[string][]Value) []models.Point { - points := make([]models.Point, 0, len(values)) - for composite, vals := range values { - series, field := SeriesAndFieldFromCompositeKey([]byte(composite)) - strField := string(field) - for _, val := range vals { - t := time.Unix(0, val.UnixNano()) - fields := models.Fields{strField: val.Value()} - points = append(points, models.NewPointFromSeries(series, fields, t)) - } - } - return points -} diff --git a/tsdb/tsm1/verify_tsm.go b/tsdb/tsm1/verify_tsm.go deleted file mode 100644 index d1e0e2f122..0000000000 --- a/tsdb/tsm1/verify_tsm.go +++ /dev/null @@ -1,103 +0,0 @@ -package tsm1 - -import ( - "bytes" - "fmt" - "hash/crc32" - "io" - "os" - - "github.com/influxdata/influxdb/v2" - "github.com/influxdata/influxdb/v2/tsdb" - "github.com/influxdata/influxdb/v2/tsdb/cursors" -) - -type VerifyTSM struct { - Stdout io.Writer - Paths []string - OrgID influxdb.ID - BucketID influxdb.ID -} - -func (v *VerifyTSM) Run() error { - for _, path := range v.Paths { - if err := v.processFile(path); err != nil { - fmt.Fprintf(v.Stdout, "Error processing file %q: %v", path, err) - } - } - return nil -} - -func (v *VerifyTSM) processFile(path string) error { - fmt.Println("processing file: " + path) - - file, err := os.OpenFile(path, os.O_RDONLY, 0600) - if err != nil { - return fmt.Errorf("OpenFile: %v", err) - } - - reader, err := NewTSMReader(file) - if err != nil { - return fmt.Errorf("failed to create TSM reader for %q: %v", path, err) - } - defer reader.Close() - - var start []byte - if v.OrgID.Valid() { - if v.BucketID.Valid() { - v := tsdb.EncodeName(v.OrgID, v.BucketID) - start = v[:] - } else { - v := tsdb.EncodeOrgName(v.OrgID) - start = v[:] - } - } - - var ts cursors.TimestampArray - count := 0 - totalErrors := 0 - iter := reader.Iterator(start) - for iter.Next() { - key := iter.Key() - if len(start) > 0 && (len(key) < len(start) || !bytes.Equal(key[:len(start)], start)) { - break - } - - entries := iter.Entries() - for i := range entries { - entry := &entries[i] - - checksum, buf, err := reader.ReadBytes(entry, nil) - if err != nil { - fmt.Fprintf(v.Stdout, "could not read block %d due to error: %q\n", count, err) - count++ - continue - } - - if expected := crc32.ChecksumIEEE(buf); checksum != expected { - totalErrors++ - fmt.Fprintf(v.Stdout, "unexpected checksum %d, expected %d for key %v, block %d\n", checksum, expected, key, count) - } - - if err = DecodeTimestampArrayBlock(buf, &ts); err != nil { - totalErrors++ - fmt.Fprintf(v.Stdout, "unable to decode timestamps for block %d: %q\n", count, err) - } - - if got, exp := entry.MinTime, ts.MinTime(); got != exp { - totalErrors++ - fmt.Fprintf(v.Stdout, "unexpected min time %d, expected %d for block %d: %q\n", got, exp, count, err) - } - if got, exp := entry.MaxTime, ts.MaxTime(); got != exp { - totalErrors++ - fmt.Fprintf(v.Stdout, "unexpected max time %d, expected %d for block %d: %q\n", got, exp, count, err) - } - - count++ - } - } - - fmt.Fprintf(v.Stdout, "Completed checking %d block(s)\n", count) - - return nil -} diff --git a/tsdb/tsm1/writer.go b/tsdb/tsm1/writer.go deleted file mode 100644 index f7810bc236..0000000000 --- a/tsdb/tsm1/writer.go +++ /dev/null @@ -1,919 +0,0 @@ -package tsm1 - -/* -A TSM file is composed for four sections: header, blocks, index and the footer. - -┌────────┬────────────────────────────────────┬─────────────┬──────────────┐ -│ Header │ Blocks │ Index │ Footer │ -│5 bytes │ N bytes │ N bytes │ 4 bytes │ -└────────┴────────────────────────────────────┴─────────────┴──────────────┘ - -Header is composed of a magic number to identify the file type and a version -number. - -┌───────────────────┐ -│ Header │ -├─────────┬─────────┤ -│ Magic │ Version │ -│ 4 bytes │ 1 byte │ -└─────────┴─────────┘ - -Blocks are sequences of pairs of CRC32 and data. The block data is opaque to the -file. The CRC32 is used for block level error detection. The length of the blocks -is stored in the index. - -┌───────────────────────────────────────────────────────────┐ -│ Blocks │ -├───────────────────┬───────────────────┬───────────────────┤ -│ Block 1 │ Block 2 │ Block N │ -├─────────┬─────────┼─────────┬─────────┼─────────┬─────────┤ -│ CRC │ Data │ CRC │ Data │ CRC │ Data │ -│ 4 bytes │ N bytes │ 4 bytes │ N bytes │ 4 bytes │ N bytes │ -└─────────┴─────────┴─────────┴─────────┴─────────┴─────────┘ - -Following the blocks is the index for the blocks in the file. The index is -composed of a sequence of index entries ordered lexicographically by key and -then by time. Each index entry starts with a key length and key followed by a -count of the number of blocks in the file. Each block entry is composed of -the min and max time for the block, the offset into the file where the block -is located and the the size of the block. - -The index structure can provide efficient access to all blocks as well as the -ability to determine the cost associated with acessing a given key. Given a key -and timestamp, we can determine whether a file contains the block for that -timestamp as well as where that block resides and how much data to read to -retrieve the block. If we know we need to read all or multiple blocks in a -file, we can use the size to determine how much to read in a given IO. - -┌────────────────────────────────────────────────────────────────────────────┐ -│ Index │ -├─────────┬─────────┬──────┬───────┬─────────┬─────────┬────────┬────────┬───┤ -│ Key Len │ Key │ Type │ Count │Min Time │Max Time │ Offset │ Size │...│ -│ 2 bytes │ N bytes │1 byte│2 bytes│ 8 bytes │ 8 bytes │8 bytes │4 bytes │ │ -└─────────┴─────────┴──────┴───────┴─────────┴─────────┴────────┴────────┴───┘ - -The last section is the footer that stores the offset of the start of the index. - -┌─────────┐ -│ Footer │ -├─────────┤ -│Index Ofs│ -│ 8 bytes │ -└─────────┘ -*/ - -import ( - "bufio" - "bytes" - "encoding/binary" - "fmt" - "hash/crc32" - "io" - "os" - "sort" - "strings" - "time" - - "github.com/influxdata/influxdb/v2/models" - "github.com/influxdata/influxdb/v2/pkg/fs" -) - -const ( - // MagicNumber is written as the first 4 bytes of a data file to - // identify the file as a tsm1 formatted file - MagicNumber uint32 = 0x16D116D1 - - // Version indicates the version of the TSM file format. - Version byte = 1 - - // Size in bytes of an index entry - indexEntrySize = 28 - - // Size in bytes used to store the count of index entries for a key - indexCountSize = 2 - - // Size in bytes used to store the type of block encoded - indexTypeSize = 1 - - // Max number of blocks for a given key that can exist in a single file - maxIndexEntries = (1 << (indexCountSize * 8)) - 1 - - // max length of a key in an index entry (measurement + tags) - maxKeyLength = (1 << (2 * 8)) - 1 - - // The threshold amount data written before we periodically fsync a TSM file. This helps avoid - // long pauses due to very large fsyncs at the end of writing a TSM file. - fsyncEvery = 25 * 1024 * 1024 -) - -var ( - //ErrNoValues is returned when TSMWriter.WriteIndex is called and there are no values to write. - ErrNoValues = fmt.Errorf("no values written") - - // ErrTSMClosed is returned when performing an operation against a closed TSM file. - ErrTSMClosed = fmt.Errorf("tsm file closed") - - // ErrMaxKeyLengthExceeded is returned when attempting to write a key that is too long. - ErrMaxKeyLengthExceeded = fmt.Errorf("max key length exceeded") - - // ErrMaxBlocksExceeded is returned when attempting to write a block past the allowed number. - ErrMaxBlocksExceeded = fmt.Errorf("max blocks exceeded") -) - -// TSMWriter writes TSM formatted key and values. -type TSMWriter interface { - // Write writes a new block for key containing and values. Writes append - // blocks in the order that the Write function is called. The caller is - // responsible for ensuring keys and blocks are sorted appropriately. - // Values are encoded as a full block. The caller is responsible for - // ensuring a fixed number of values are encoded in each block as well as - // ensuring the Values are sorted. The first and last timestamp values are - // used as the minimum and maximum values for the index entry. - Write(key []byte, values Values) error - - // WriteBlock writes a new block for key containing the bytes in block. WriteBlock appends - // blocks in the order that the WriteBlock function is called. The caller is - // responsible for ensuring keys and blocks are sorted appropriately, and that the - // block and index information is correct for the block. The minTime and maxTime - // timestamp values are used as the minimum and maximum values for the index entry. - WriteBlock(key []byte, minTime, maxTime int64, block []byte) error - - // WriteIndex finishes the TSM write streams and writes the index. - WriteIndex() error - - // Flushes flushes all pending changes to the underlying file resources. - Flush() error - - // Close closes any underlying file resources. - Close() error - - // Size returns the current size in bytes of the file. - Size() uint32 - - // Stats returns the statistics generated by the writer. - MeasurementStats() MeasurementStats - - Remove() error -} - -// IndexWriter writes a TSMIndex. -type IndexWriter interface { - // Add records a new block entry for a key in the index. - Add(key []byte, blockType byte, minTime, maxTime int64, offset int64, size uint32) - - // Entries returns all index entries for a key. - Entries(key []byte) []IndexEntry - - // KeyCount returns the count of unique keys in the index. - KeyCount() int - - // Size returns the size of a the current index in bytes. - Size() uint32 - - // MarshalBinary returns a byte slice encoded version of the index. - MarshalBinary() ([]byte, error) - - // WriteTo writes the index contents to a writer. - WriteTo(w io.Writer) (int64, error) - - Close() error - - Remove() error -} - -// IndexEntry is the index information for a given block in a TSM file. -type IndexEntry struct { - // The min and max time of all points stored in the block. - MinTime, MaxTime int64 - - // The absolute position in the file where this block is located. - Offset int64 - - // The size in bytes of the block in the file. - Size uint32 -} - -// UnmarshalBinary decodes an IndexEntry from a byte slice. -func (e *IndexEntry) UnmarshalBinary(b []byte) error { - if len(b) < indexEntrySize { - return fmt.Errorf("unmarshalBinary: short buf: %v < %v", len(b), indexEntrySize) - } - e.MinTime = int64(binary.BigEndian.Uint64(b[:8])) - e.MaxTime = int64(binary.BigEndian.Uint64(b[8:16])) - e.Offset = int64(binary.BigEndian.Uint64(b[16:24])) - e.Size = binary.BigEndian.Uint32(b[24:28]) - return nil -} - -// AppendTo writes a binary-encoded version of IndexEntry to b, allocating -// and returning a new slice, if necessary. -func (e *IndexEntry) AppendTo(b []byte) []byte { - if len(b) < indexEntrySize { - if cap(b) < indexEntrySize { - b = make([]byte, indexEntrySize) - } else { - b = b[:indexEntrySize] - } - } - - binary.BigEndian.PutUint64(b[:8], uint64(e.MinTime)) - binary.BigEndian.PutUint64(b[8:16], uint64(e.MaxTime)) - binary.BigEndian.PutUint64(b[16:24], uint64(e.Offset)) - binary.BigEndian.PutUint32(b[24:28], uint32(e.Size)) - - return b -} - -// Contains returns true if this IndexEntry may contain values for the given time. -// The min and max times are inclusive. -func (e *IndexEntry) Contains(t int64) bool { - return e.MinTime <= t && e.MaxTime >= t -} - -// OverlapsTimeRange returns true if the given time ranges are completely within the entry's time bounds. -func (e *IndexEntry) OverlapsTimeRange(min, max int64) bool { - return e.MinTime <= max && e.MaxTime >= min -} - -// String returns a string representation of the entry. -func (e *IndexEntry) String() string { - return fmt.Sprintf("min=%s max=%s ofs=%d siz=%d", - time.Unix(0, e.MinTime).UTC(), time.Unix(0, e.MaxTime).UTC(), e.Offset, e.Size) -} - -// NewIndexWriter returns a new IndexWriter. -func NewIndexWriter() IndexWriter { - buf := bytes.NewBuffer(make([]byte, 0, 1024*1024)) - return &directIndex{buf: buf, w: bufio.NewWriter(buf)} -} - -// NewIndexWriter returns a new IndexWriter. -func NewDiskIndexWriter(f *os.File) IndexWriter { - return &directIndex{fd: f, w: bufio.NewWriterSize(f, 1024*1024)} -} - -type syncer interface { - Name() string - Sync() error -} - -// directIndex is a simple in-memory index implementation for a TSM file. The full index -// must fit in memory. -type directIndex struct { - keyCount int - size uint32 - - // The bytes written count of when we last fsync'd - lastSync uint32 - fd *os.File - buf *bytes.Buffer - - f syncer - - w *bufio.Writer - - key []byte - indexEntries *indexEntries -} - -type indexEntries struct { - Type byte - entries []IndexEntry -} - -func (a *indexEntries) Len() int { return len(a.entries) } -func (a *indexEntries) Swap(i, j int) { a.entries[i], a.entries[j] = a.entries[j], a.entries[i] } -func (a *indexEntries) Less(i, j int) bool { - return a.entries[i].MinTime < a.entries[j].MinTime -} - -func (a *indexEntries) MarshalBinary() ([]byte, error) { - buf := make([]byte, len(a.entries)*indexEntrySize) - - for i, entry := range a.entries { - entry.AppendTo(buf[indexEntrySize*i:]) - } - - return buf, nil -} - -func (a *indexEntries) WriteTo(w io.Writer) (total int64, err error) { - var buf [indexEntrySize]byte - var n int - - for _, entry := range a.entries { - entry.AppendTo(buf[:]) - n, err = w.Write(buf[:]) - total += int64(n) - if err != nil { - return total, err - } - } - - return total, nil -} - -func (d *directIndex) Add(key []byte, blockType byte, minTime, maxTime int64, offset int64, size uint32) { - // Is this the first block being added? - if len(d.key) == 0 { - // size of the key stored in the index - d.size += uint32(2 + len(key)) - // size of the count of entries stored in the index - d.size += indexCountSize - - d.key = key - if d.indexEntries == nil { - d.indexEntries = &indexEntries{} - } - d.indexEntries.Type = blockType - d.indexEntries.entries = append(d.indexEntries.entries, IndexEntry{ - MinTime: minTime, - MaxTime: maxTime, - Offset: offset, - Size: size, - }) - - // size of the encoded index entry - d.size += indexEntrySize - d.keyCount++ - return - } - - // See if were still adding to the same series key. - cmp := bytes.Compare(d.key, key) - if cmp == 0 { - // The last block is still this key - d.indexEntries.entries = append(d.indexEntries.entries, IndexEntry{ - MinTime: minTime, - MaxTime: maxTime, - Offset: offset, - Size: size, - }) - - // size of the encoded index entry - d.size += indexEntrySize - - } else if cmp < 0 { - d.flush(d.w) - // We have a new key that is greater than the last one so we need to add - // a new index block section. - - // size of the key stored in the index - d.size += uint32(2 + len(key)) - // size of the count of entries stored in the index - d.size += indexCountSize - - d.key = key - d.indexEntries.Type = blockType - d.indexEntries.entries = append(d.indexEntries.entries, IndexEntry{ - MinTime: minTime, - MaxTime: maxTime, - Offset: offset, - Size: size, - }) - - // size of the encoded index entry - d.size += indexEntrySize - d.keyCount++ - } else { - // Keys can't be added out of order. - panic(fmt.Sprintf("keys must be added in sorted order: %s < %s", string(key), string(d.key))) - } -} - -func (d *directIndex) entries(key []byte) []IndexEntry { - if len(d.key) == 0 { - return nil - } - - if bytes.Equal(d.key, key) { - return d.indexEntries.entries - } - - return nil -} - -func (d *directIndex) Entries(key []byte) []IndexEntry { - return d.entries(key) -} - -func (d *directIndex) Entry(key []byte, t int64) *IndexEntry { - entries := d.entries(key) - for _, entry := range entries { - if entry.Contains(t) { - return &entry - } - } - return nil -} - -func (d *directIndex) KeyCount() int { - return d.keyCount -} - -// copyBuffer is the actual implementation of Copy and CopyBuffer. -// if buf is nil, one is allocated. This is copied from the Go stdlib -// in order to remove the fast path WriteTo calls which circumvent any -// IO throttling as well as to add periodic fsyncs to avoid long stalls. -func copyBuffer(f syncer, dst io.Writer, src io.Reader, buf []byte) (written int64, err error) { - if buf == nil { - buf = make([]byte, 32*1024) - } - var lastSync int64 - for { - nr, er := src.Read(buf) - if nr > 0 { - nw, ew := dst.Write(buf[0:nr]) - if nw > 0 { - written += int64(nw) - } - - if f != nil && written-lastSync > fsyncEvery { - if err := f.Sync(); err != nil { - return 0, err - } - lastSync = written - } - if ew != nil { - err = ew - break - } - if nr != nw { - err = io.ErrShortWrite - break - } - } - if er != nil { - if er != io.EOF { - err = er - } - break - } - } - return written, err -} - -func (d *directIndex) WriteTo(w io.Writer) (int64, error) { - if _, err := d.flush(d.w); err != nil { - return 0, err - } - - if err := d.w.Flush(); err != nil { - return 0, err - } - - if d.fd == nil { - return copyBuffer(d.f, w, d.buf, nil) - } - - if _, err := d.fd.Seek(0, io.SeekStart); err != nil { - return 0, err - } - - return io.Copy(w, bufio.NewReaderSize(d.fd, 1024*1024)) -} - -func (d *directIndex) flush(w io.Writer) (int64, error) { - var ( - n int - err error - buf [5]byte - N int64 - ) - - if len(d.key) == 0 { - return 0, nil - } - // For each key, individual entries are sorted by time - key := d.key - entries := d.indexEntries - - if entries.Len() > maxIndexEntries { - return N, fmt.Errorf("key '%s' exceeds max index entries: %d > %d", key, entries.Len(), maxIndexEntries) - } - - if !sort.IsSorted(entries) { - sort.Sort(entries) - } - - binary.BigEndian.PutUint16(buf[0:2], uint16(len(key))) - buf[2] = entries.Type - binary.BigEndian.PutUint16(buf[3:5], uint16(entries.Len())) - - // Append the key length and key - if n, err = w.Write(buf[0:2]); err != nil { - return int64(n) + N, fmt.Errorf("write: writer key length error: %v", err) - } - N += int64(n) - - if n, err = w.Write(key); err != nil { - return int64(n) + N, fmt.Errorf("write: writer key error: %v", err) - } - N += int64(n) - - // Append the block type and count - if n, err = w.Write(buf[2:5]); err != nil { - return int64(n) + N, fmt.Errorf("write: writer block type and count error: %v", err) - } - N += int64(n) - - // Append each index entry for all blocks for this key - var n64 int64 - if n64, err = entries.WriteTo(w); err != nil { - return n64 + N, fmt.Errorf("write: writer entries error: %v", err) - } - N += n64 - - d.key = nil - d.indexEntries.Type = 0 - d.indexEntries.entries = d.indexEntries.entries[:0] - - // If this is a disk based index and we've written more than the fsync threshold, - // fsync the data to avoid long pauses later on. - if d.fd != nil && d.size-d.lastSync > fsyncEvery { - if err := d.fd.Sync(); err != nil { - return N, err - } - d.lastSync = d.size - } - - return N, nil - -} - -func (d *directIndex) MarshalBinary() ([]byte, error) { - var b bytes.Buffer - if _, err := d.WriteTo(&b); err != nil { - return nil, err - } - return b.Bytes(), nil -} - -func (d *directIndex) Size() uint32 { - return d.size -} - -func (d *directIndex) Close() error { - // Flush anything remaining in the index - if err := d.w.Flush(); err != nil { - return err - } - - if d.fd == nil { - return nil - } - - if err := d.fd.Close(); err != nil { - return err - } - return os.Remove(d.fd.Name()) -} - -// Remove removes the index from any tempory storage -func (d *directIndex) Remove() error { - if d.fd == nil { - return nil - } - - // Close the file handle to prevent leaking. We ignore the error because - // we just want to cleanup and remove the file. - _ = d.fd.Close() - - return os.Remove(d.fd.Name()) -} - -// tsmWriter writes keys and values in the TSM format -type tsmWriter struct { - wrapped io.Writer - w *bufio.Writer - index IndexWriter - n int64 - - // The bytes written count of when we last fsync'd - lastSync int64 - - stats MeasurementStats -} - -// NewTSMWriter returns a new TSMWriter writing to w. -func NewTSMWriter(w io.Writer) (TSMWriter, error) { - index := NewIndexWriter() - return &tsmWriter{ - wrapped: w, - w: bufio.NewWriterSize(w, 1024*1024), - index: index, - stats: NewMeasurementStats(), - }, nil -} - -// NewTSMWriterWithDiskBuffer returns a new TSMWriter writing to w and will use a disk -// based buffer for the TSM index if possible. -func NewTSMWriterWithDiskBuffer(w io.Writer) (TSMWriter, error) { - var index IndexWriter - // Make sure is a File so we can write the temp index alongside it. - if fw, ok := w.(syncer); ok { - f, err := os.OpenFile(strings.TrimSuffix(fw.Name(), ".tsm.tmp")+".idx.tmp", os.O_CREATE|os.O_RDWR|os.O_EXCL, 0666) - if err != nil { - return nil, err - } - index = NewDiskIndexWriter(f) - } else { - // w is not a file, just use an inmem index - index = NewIndexWriter() - } - - return &tsmWriter{ - wrapped: w, - w: bufio.NewWriterSize(w, 1024*1024), - index: index, - stats: NewMeasurementStats(), - }, nil -} - -// MeasurementStats returns the measurement statistics generated by the writer. -func (t *tsmWriter) MeasurementStats() MeasurementStats { return t.stats } - -func (t *tsmWriter) writeHeader() error { - var buf [5]byte - binary.BigEndian.PutUint32(buf[0:4], MagicNumber) - buf[4] = Version - - n, err := t.w.Write(buf[:]) - if err != nil { - return err - } - t.n = int64(n) - return nil -} - -// Write writes a new block containing key and values. -func (t *tsmWriter) Write(key []byte, values Values) error { - if len(key) > maxKeyLength { - return ErrMaxKeyLengthExceeded - } - - // Nothing to write - if len(values) == 0 { - return nil - } - - // Write header only after we have some data to write. - if t.n == 0 { - if err := t.writeHeader(); err != nil { - return err - } - } - - block, err := values.Encode(nil) - if err != nil { - return err - } - - blockType, err := BlockType(block) - if err != nil { - return err - } - - var checksum [crc32.Size]byte - binary.BigEndian.PutUint32(checksum[:], crc32.ChecksumIEEE(block)) - - _, err = t.w.Write(checksum[:]) - if err != nil { - return err - } - - n, err := t.w.Write(block) - if err != nil { - return err - } - n += len(checksum) - - // Record this block in index - t.index.Add(key, blockType, values[0].UnixNano(), values[len(values)-1].UnixNano(), t.n, uint32(n)) - - // Add block size to measurement stats. - name := models.ParseName(key) - t.stats[string(name)] += n - - // Increment file position pointer - t.n += int64(n) - - if len(t.index.Entries(key)) >= maxIndexEntries { - return ErrMaxBlocksExceeded - } - - return nil -} - -// WriteBlock writes block for the given key and time range to the TSM file. If the write -// exceeds max entries for a given key, ErrMaxBlocksExceeded is returned. This indicates -// that the index is now full for this key and no future writes to this key will succeed. -func (t *tsmWriter) WriteBlock(key []byte, minTime, maxTime int64, block []byte) error { - if len(key) > maxKeyLength { - return ErrMaxKeyLengthExceeded - } - - // Nothing to write - if len(block) == 0 { - return nil - } - - blockType, err := BlockType(block) - if err != nil { - return err - } - - // Write header only after we have some data to write. - if t.n == 0 { - if err := t.writeHeader(); err != nil { - return err - } - } - - var checksum [crc32.Size]byte - binary.BigEndian.PutUint32(checksum[:], crc32.ChecksumIEEE(block)) - - _, err = t.w.Write(checksum[:]) - if err != nil { - return err - } - - n, err := t.w.Write(block) - if err != nil { - return err - } - n += len(checksum) - - // Record this block in index - t.index.Add(key, blockType, minTime, maxTime, t.n, uint32(n)) - - // Add block size to measurement stats. - name := models.ParseName(key) - t.stats[string(name)] += n - - // Increment file position pointer (checksum + block len) - t.n += int64(n) - - // fsync the file periodically to avoid long pauses with very big files. - if t.n-t.lastSync > fsyncEvery { - if err := t.sync(); err != nil { - return err - } - t.lastSync = t.n - } - - if len(t.index.Entries(key)) >= maxIndexEntries { - return ErrMaxBlocksExceeded - } - - return nil -} - -// WriteIndex writes the index section of the file. If there are no index entries to write, -// this returns ErrNoValues. -func (t *tsmWriter) WriteIndex() error { - indexPos := t.n - - if t.index.KeyCount() == 0 { - return ErrNoValues - } - - // Set the destination file on the index so we can periodically - // fsync while writing the index. - if f, ok := t.wrapped.(syncer); ok { - t.index.(*directIndex).f = f - } - - // Write the index - if _, err := t.index.WriteTo(t.w); err != nil { - return err - } - - var buf [8]byte - binary.BigEndian.PutUint64(buf[:], uint64(indexPos)) - - // Write the index index position - _, err := t.w.Write(buf[:]) - return err -} - -func (t *tsmWriter) Flush() error { - if err := t.w.Flush(); err != nil { - return err - } - - return t.sync() -} - -func (t *tsmWriter) sync() error { - // sync is a minimal interface to make sure we can sync the wrapped - // value. we use a minimal interface to be as robust as possible for - // syncing these files. - type sync interface { - Sync() error - } - - if f, ok := t.wrapped.(sync); ok { - if err := f.Sync(); err != nil { - return err - } - } - return nil -} - -func (t *tsmWriter) writeStatsFile() error { - fw, ok := t.wrapped.(syncer) - if !ok { - return nil - } - - f, err := fs.CreateFile(StatsFilename(fw.Name())) - if err != nil { - return err - } - defer f.Close() - - if _, err := t.stats.WriteTo(f); err != nil { - return err - } else if err := f.Sync(); err != nil { - return err - } - return f.Close() -} - -func (t *tsmWriter) Close() error { - if err := t.Flush(); err != nil { - return err - } - - if err := t.index.Close(); err != nil { - return err - } - - // Write stats to disk, if writer is a file. - if err := t.writeStatsFile(); err != nil { - return err - } - - if c, ok := t.wrapped.(io.Closer); ok { - return c.Close() - } - return nil -} - -// Remove removes any temporary storage used by the writer. -func (t *tsmWriter) Remove() error { - if err := t.index.Remove(); err != nil { - return err - } - - // nameCloser is the most permissive interface we can close the wrapped - // value with. - type nameCloser interface { - io.Closer - Name() string - } - - if f, ok := t.wrapped.(nameCloser); ok { - // Close the file handle to prevent leaking. We ignore the error because - // we just want to cleanup and remove the file. - _ = f.Close() - - if err := os.Remove(f.Name()); err != nil { - return err - } else if err := os.Remove(StatsFilename(f.Name())); err != nil && !os.IsNotExist(err) { - return err - } - } - return nil -} - -func (t *tsmWriter) Size() uint32 { - return uint32(t.n) + t.index.Size() -} - -// verifyVersion verifies that the reader's bytes are a TSM byte -// stream of the correct version (1) -func verifyVersion(r io.ReadSeeker) error { - _, err := r.Seek(0, 0) - if err != nil { - return fmt.Errorf("init: failed to seek: %v", err) - } - var b [4]byte - _, err = io.ReadFull(r, b[:]) - if err != nil { - return fmt.Errorf("init: error reading magic number of file: %v", err) - } - if binary.BigEndian.Uint32(b[:]) != MagicNumber { - return fmt.Errorf("can only read from tsm file") - } - _, err = io.ReadFull(r, b[:1]) - if err != nil { - return fmt.Errorf("init: error reading version: %v", err) - } - if b[0] != Version { - return fmt.Errorf("init: file is version %b. expected %b", b[0], Version) - } - - return nil -} diff --git a/tsdb/tsm1/writer_test.go b/tsdb/tsm1/writer_test.go deleted file mode 100644 index cd0d50dea4..0000000000 --- a/tsdb/tsm1/writer_test.go +++ /dev/null @@ -1,647 +0,0 @@ -package tsm1_test - -import ( - "bufio" - "bytes" - "encoding/binary" - "io" - "io/ioutil" - "os" - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/influxdata/influxdb/v2/tsdb/tsm1" -) - -func TestTSMWriter_Write_Empty(t *testing.T) { - var b bytes.Buffer - w, err := tsm1.NewTSMWriter(&b) - if err != nil { - t.Fatalf("unexpected error created writer: %v", err) - } - - if err := w.WriteIndex(); err != tsm1.ErrNoValues { - t.Fatalf("unexpected error closing: %v", err) - } - - if got, exp := len(b.Bytes()), 0; got < exp { - t.Fatalf("file size mismatch: got %v, exp %v", got, exp) - } -} - -func TestTSMWriter_Write_NoValues(t *testing.T) { - var b bytes.Buffer - w, err := tsm1.NewTSMWriter(&b) - if err != nil { - t.Fatalf("unexpected error created writer: %v", err) - } - - if err := w.Write([]byte("foo"), []tsm1.Value{}); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - - if err := w.WriteIndex(); err != tsm1.ErrNoValues { - t.Fatalf("unexpected error closing: %v", err) - } - - if got, exp := len(b.Bytes()), 0; got < exp { - t.Fatalf("file size mismatch: got %v, exp %v", got, exp) - } -} - -func TestTSMWriter_Write_Single(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - f := MustTempFile(dir) - - w, err := tsm1.NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - values := []tsm1.Value{tsm1.NewValue(0, 1.0)} - if err := w.Write([]byte("cpu"), values); err != nil { - t.Fatalf("unexpected error writing: %v", err) - - } - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error writing index: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - fd, err := os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error open file: %v", err) - } - - b, err := ioutil.ReadAll(fd) - if err != nil { - t.Fatalf("unexpected error reading: %v", err) - } - - if got, exp := len(b), 5; got < exp { - t.Fatalf("file size mismatch: got %v, exp %v", got, exp) - } - if got := binary.BigEndian.Uint32(b[0:4]); got != tsm1.MagicNumber { - t.Fatalf("magic number mismatch: got %v, exp %v", got, tsm1.MagicNumber) - } - - if _, err := fd.Seek(0, io.SeekStart); err != nil { - t.Fatalf("unexpected error seeking: %v", err) - } - - r, err := tsm1.NewTSMReader(fd) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - defer r.Close() - - readValues, err := r.ReadAll([]byte("cpu")) - if err != nil { - t.Fatalf("unexpected error readin: %v", err) - } - - if len(readValues) != len(values) { - t.Fatalf("read values length mismatch: got %v, exp %v", len(readValues), len(values)) - } - - for i, v := range values { - if v.Value() != readValues[i].Value() { - t.Fatalf("read value mismatch(%d): got %v, exp %d", i, readValues[i].Value(), v.Value()) - } - } -} - -func TestTSMWriter_Write_Multiple(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - f := MustTempFile(dir) - - w, err := tsm1.NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - var data = []struct { - key string - values []tsm1.Value - }{ - {"cpu", []tsm1.Value{tsm1.NewValue(0, 1.0)}}, - {"mem", []tsm1.Value{tsm1.NewValue(1, 2.0)}}, - } - - for _, d := range data { - if err := w.Write([]byte(d.key), d.values); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - } - - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - fd, err := os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error open file: %v", err) - } - - r, err := tsm1.NewTSMReader(fd) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - defer r.Close() - - for _, d := range data { - readValues, err := r.ReadAll([]byte(d.key)) - if err != nil { - t.Fatalf("unexpected error readin: %v", err) - } - - if exp := len(d.values); exp != len(readValues) { - t.Fatalf("read values length mismatch: got %v, exp %v", len(readValues), exp) - } - - for i, v := range d.values { - if v.Value() != readValues[i].Value() { - t.Fatalf("read value mismatch(%d): got %v, exp %d", i, readValues[i].Value(), v.Value()) - } - } - } -} - -func TestTSMWriter_Write_MultipleKeyValues(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - f := MustTempFile(dir) - - w, err := tsm1.NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - var data = []struct { - key string - values []tsm1.Value - }{ - {"cpu", []tsm1.Value{ - tsm1.NewValue(0, 1.0), - tsm1.NewValue(1, 2.0)}, - }, - {"mem", []tsm1.Value{ - tsm1.NewValue(0, 1.5), - tsm1.NewValue(1, 2.5)}, - }, - } - - for _, d := range data { - if err := w.Write([]byte(d.key), d.values); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - } - - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - fd, err := os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error open file: %v", err) - } - - r, err := tsm1.NewTSMReader(fd) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - defer r.Close() - - for _, d := range data { - readValues, err := r.ReadAll([]byte(d.key)) - if err != nil { - t.Fatalf("unexpected error readin: %v", err) - } - - if exp := len(d.values); exp != len(readValues) { - t.Fatalf("read values length mismatch: got %v, exp %v", len(readValues), exp) - } - - for i, v := range d.values { - if v.Value() != readValues[i].Value() { - t.Fatalf("read value mismatch(%d): got %v, exp %d", i, readValues[i].Value(), v.Value()) - } - } - } -} - -// Tests that writing keys in reverse is able to read them back. -func TestTSMWriter_Write_SameKey(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - f := MustTempFile(dir) - - w, err := tsm1.NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - var data = []struct { - key string - values []tsm1.Value - }{ - {"cpu", []tsm1.Value{ - tsm1.NewValue(0, 1.0), - tsm1.NewValue(1, 2.0)}, - }, - {"cpu", []tsm1.Value{ - tsm1.NewValue(2, 3.0), - tsm1.NewValue(3, 4.0)}, - }, - } - - for _, d := range data { - if err := w.Write([]byte(d.key), d.values); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - } - - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - fd, err := os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error open file: %v", err) - } - - r, err := tsm1.NewTSMReader(fd) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - defer r.Close() - - values := append(data[0].values, data[1].values...) - - readValues, err := r.ReadAll([]byte("cpu")) - if err != nil { - t.Fatalf("unexpected error readin: %v", err) - } - - if exp := len(values); exp != len(readValues) { - t.Fatalf("read values length mismatch: got %v, exp %v", len(readValues), exp) - } - - for i, v := range values { - if v.Value() != readValues[i].Value() { - t.Fatalf("read value mismatch(%d): got %v, exp %d", i, readValues[i].Value(), v.Value()) - } - } -} - -// Tests that calling Read returns all the values for block matching the key -// and timestamp -func TestTSMWriter_Read_Multiple(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - f := MustTempFile(dir) - - w, err := tsm1.NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - var data = []struct { - key string - values []tsm1.Value - }{ - {"cpu", []tsm1.Value{ - tsm1.NewValue(0, 1.0), - tsm1.NewValue(1, 2.0)}, - }, - {"cpu", []tsm1.Value{ - tsm1.NewValue(2, 3.0), - tsm1.NewValue(3, 4.0)}, - }, - } - - for _, d := range data { - if err := w.Write([]byte(d.key), d.values); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - } - - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - fd, err := os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error open file: %v", err) - } - - r, err := tsm1.NewTSMReader(fd) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - defer r.Close() - - for _, values := range data { - // Try the first timestamp - readValues, err := r.Read([]byte("cpu"), values.values[0].UnixNano()) - if err != nil { - t.Fatalf("unexpected error readin: %v", err) - } - - if exp := len(values.values); exp != len(readValues) { - t.Fatalf("read values length mismatch: got %v, exp %v", len(readValues), exp) - } - - for i, v := range values.values { - if v.Value() != readValues[i].Value() { - t.Fatalf("read value mismatch(%d): got %v, exp %d", i, readValues[i].Value(), v.Value()) - } - } - - // Try the last timestamp too - readValues, err = r.Read([]byte("cpu"), values.values[1].UnixNano()) - if err != nil { - t.Fatalf("unexpected error readin: %v", err) - } - - if exp := len(values.values); exp != len(readValues) { - t.Fatalf("read values length mismatch: got %v, exp %v", len(readValues), exp) - } - - for i, v := range values.values { - if v.Value() != readValues[i].Value() { - t.Fatalf("read value mismatch(%d): got %v, exp %d", i, readValues[i].Value(), v.Value()) - } - } - } -} - -func TestTSMWriter_WriteBlock_Empty(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - f := MustTempFile(dir) - - w, err := tsm1.NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - if err := w.WriteBlock([]byte("cpu"), 0, 0, nil); err != nil { - t.Fatalf("unexpected error writing block: %v", err) - } - - if err := w.WriteIndex(); err != tsm1.ErrNoValues { - t.Fatalf("unexpected error closing: %v", err) - } - - fd, err := os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error open file: %v", err) - } - defer fd.Close() - - b, err := ioutil.ReadAll(fd) - if err != nil { - t.Fatalf("unexpected error read all: %v", err) - } - - if got, exp := len(b), 0; got < exp { - t.Fatalf("file size mismatch: got %v, exp %v", got, exp) - } -} - -func TestTSMWriter_WriteBlock_Multiple(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - f := MustTempFile(dir) - - w, err := tsm1.NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - var data = []struct { - key string - values []tsm1.Value - }{ - {"cpu", []tsm1.Value{tsm1.NewValue(0, 1.0)}}, - {"mem", []tsm1.Value{tsm1.NewValue(1, 2.0)}}, - } - - for _, d := range data { - if err := w.Write([]byte(d.key), d.values); err != nil { - t.Fatalf("unexpected error writing: %v", err) - } - } - - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - fd, err := os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error open file: %v", err) - } - defer fd.Close() - - b, err := ioutil.ReadAll(fd) - if err != nil { - t.Fatalf("unexpected error read all: %v", err) - } - - if got, exp := len(b), 5; got < exp { - t.Fatalf("file size mismatch: got %v, exp %v", got, exp) - } - if got := binary.BigEndian.Uint32(b[0:4]); got != tsm1.MagicNumber { - t.Fatalf("magic number mismatch: got %v, exp %v", got, tsm1.MagicNumber) - } - - if _, err := fd.Seek(0, io.SeekStart); err != nil { - t.Fatalf("error seeking: %v", err) - } - - // Create reader for that file - r, err := tsm1.NewTSMReader(fd) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - - f = MustTempFile(dir) - w, err = tsm1.NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - iter := r.BlockIterator() - for iter.Next() { - key, minTime, maxTime, _, _, b, err := iter.Read() - if err != nil { - t.Fatalf("unexpected error reading block: %v", err) - } - if err := w.WriteBlock([]byte(key), minTime, maxTime, b); err != nil { - t.Fatalf("unexpected error writing block: %v", err) - } - } - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - fd, err = os.Open(f.Name()) - if err != nil { - t.Fatalf("unexpected error open file: %v", err) - } - - // Now create a reader to verify the written blocks matches the originally - // written file using Write - r, err = tsm1.NewTSMReader(fd) - if err != nil { - t.Fatalf("unexpected error created reader: %v", err) - } - defer r.Close() - - for _, d := range data { - readValues, err := r.ReadAll([]byte(d.key)) - if err != nil { - t.Fatalf("unexpected error readin: %v", err) - } - - if exp := len(d.values); exp != len(readValues) { - t.Fatalf("read values length mismatch: got %v, exp %v", len(readValues), exp) - } - - for i, v := range d.values { - if v.Value() != readValues[i].Value() { - t.Fatalf("read value mismatch(%d): got %v, exp %d", i, readValues[i].Value(), v.Value()) - } - } - } -} - -func TestTSMWriter_WriteBlock_MaxKey(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - f := MustTempFile(dir) - - w, err := tsm1.NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - key := bytes.Repeat([]byte("a"), 100000) - if err := w.WriteBlock(key, 0, 0, nil); err != tsm1.ErrMaxKeyLengthExceeded { - t.Fatalf("expected max key length error writing key: %v", err) - } -} - -func TestTSMWriter_Write_MaxKey(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - f := MustTempFile(dir) - defer f.Close() - - w, err := tsm1.NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error created writer: %v", err) - } - - key := bytes.Repeat([]byte("a"), 100000) - if err := w.Write(key, []tsm1.Value{tsm1.NewValue(0, 1.0)}); err != tsm1.ErrMaxKeyLengthExceeded { - t.Fatalf("expected max key length error writing key: %v", err) - } -} - -// Ensures that a writer will properly compute stats for multiple measurements. -func TestTSMWriter_Write_MultipleMeasurements(t *testing.T) { - dir := MustTempDir() - defer os.RemoveAll(dir) - - // Write file with multiple measurements. - f1 := MustWriteTSM(dir, 1, map[string][]tsm1.Value{ - "cpu,host=A#!~#value": {tsm1.NewValue(1, 1.1), tsm1.NewValue(2, 1.2)}, - "cpu,host=B#!~#value": {tsm1.NewValue(1, 1.1)}, - "mem,host=A#!~#value": {tsm1.NewValue(1, 1.1), tsm1.NewValue(2, 1.2)}, - "disk,host=A#!~#value": {tsm1.NewValue(1, 1.1)}, - }) - - stats := tsm1.NewMeasurementStats() - if f, err := os.Open(tsm1.StatsFilename(f1)); err != nil { - t.Fatal(err) - } else if _, err := stats.ReadFrom(bufio.NewReader(f)); err != nil { - t.Fatal(err) - } else if err := f.Close(); err != nil { - t.Fatal(err) - } else if diff := cmp.Diff(stats, tsm1.MeasurementStats{ - "cpu": 78, - "mem": 44, - "disk": 34, - }); diff != "" { - t.Fatal(diff) - } -} - -type fakeSyncer bool - -func (f *fakeSyncer) Sync() error { - *f = true - return nil -} - -func TestTSMWriter_Sync(t *testing.T) { - f := &struct { - io.Writer - fakeSyncer - }{ - Writer: ioutil.Discard, - } - - w, err := tsm1.NewTSMWriter(f) - if err != nil { - t.Fatalf("unexpected error creating writer: %v", err) - } - - values := []tsm1.Value{tsm1.NewValue(0, 1.0)} - if err := w.Write([]byte("cpu"), values); err != nil { - t.Fatalf("unexpected error writing: %v", err) - - } - if err := w.WriteIndex(); err != nil { - t.Fatalf("unexpected error writing index: %v", err) - } - - if err := w.Close(); err != nil { - t.Fatalf("unexpected error closing: %v", err) - } - - if !f.fakeSyncer { - t.Fatal("failed to sync") - } -} diff --git a/tsdb/value/value.go b/tsdb/value/value.go deleted file mode 100644 index 4070e2751c..0000000000 --- a/tsdb/value/value.go +++ /dev/null @@ -1,238 +0,0 @@ -package value - -import ( - "fmt" - "time" - - "github.com/influxdata/influxdb/v2/query" -) - -// Value represents a TSM-encoded value. -type Value interface { - // UnixNano returns the timestamp of the value in nanoseconds since unix epoch. - UnixNano() int64 - - // Value returns the underlying value. - Value() interface{} - - // Size returns the number of bytes necessary to represent the value and its timestamp. - Size() int - - // String returns the string representation of the value and its timestamp. - String() string - - // internalOnly is unexported to ensure implementations of Value - // can only originate in this package. - internalOnly() -} - -// NewValue returns a new Value with the underlying type dependent on value. -func NewValue(t int64, value interface{}) Value { - switch v := value.(type) { - case int64: - return IntegerValue{unixnano: t, value: v} - case uint64: - return UnsignedValue{unixnano: t, value: v} - case float64: - return FloatValue{unixnano: t, value: v} - case bool: - return BooleanValue{unixnano: t, value: v} - case string: - return StringValue{unixnano: t, value: v} - } - return EmptyValue{} -} - -// NewRawIntegerValue returns a new integer value. -func NewRawIntegerValue(t int64, v int64) IntegerValue { return IntegerValue{unixnano: t, value: v} } - -// NewRawUnsignedValue returns a new unsigned integer value. -func NewRawUnsignedValue(t int64, v uint64) UnsignedValue { - return UnsignedValue{unixnano: t, value: v} -} - -// NewRawFloatValue returns a new float value. -func NewRawFloatValue(t int64, v float64) FloatValue { return FloatValue{unixnano: t, value: v} } - -// NewRawBooleanValue returns a new boolean value. -func NewRawBooleanValue(t int64, v bool) BooleanValue { return BooleanValue{unixnano: t, value: v} } - -// NewRawStringValue returns a new string value. -func NewRawStringValue(t int64, v string) StringValue { return StringValue{unixnano: t, value: v} } - -// NewIntegerValue returns a new integer value. -func NewIntegerValue(t int64, v int64) Value { return NewRawIntegerValue(t, v) } - -// NewUnsignedValue returns a new unsigned integer value. -func NewUnsignedValue(t int64, v uint64) Value { return NewRawUnsignedValue(t, v) } - -// NewFloatValue returns a new float value. -func NewFloatValue(t int64, v float64) Value { return NewRawFloatValue(t, v) } - -// NewBooleanValue returns a new boolean value. -func NewBooleanValue(t int64, v bool) Value { return NewRawBooleanValue(t, v) } - -// NewStringValue returns a new string value. -func NewStringValue(t int64, v string) Value { return NewRawStringValue(t, v) } - -// EmptyValue is used when there is no appropriate other value. -type EmptyValue struct{} - -// UnixNano returns query.ZeroTime. -func (e EmptyValue) UnixNano() int64 { return query.ZeroTime } - -// Value returns nil. -func (e EmptyValue) Value() interface{} { return nil } - -// Size returns 0. -func (e EmptyValue) Size() int { return 0 } - -// String returns the empty string. -func (e EmptyValue) String() string { return "" } - -func (EmptyValue) internalOnly() {} -func (StringValue) internalOnly() {} -func (IntegerValue) internalOnly() {} -func (UnsignedValue) internalOnly() {} -func (BooleanValue) internalOnly() {} -func (FloatValue) internalOnly() {} - -// IntegerValue represents an int64 value. -type IntegerValue struct { - unixnano int64 - value int64 -} - -// Value returns the underlying int64 value. -func (v IntegerValue) Value() interface{} { - return v.value -} - -// UnixNano returns the timestamp of the value. -func (v IntegerValue) UnixNano() int64 { - return v.unixnano -} - -// Size returns the number of bytes necessary to represent the value and its timestamp. -func (v IntegerValue) Size() int { - return 16 -} - -// String returns the string representation of the value and its timestamp. -func (v IntegerValue) String() string { - return fmt.Sprintf("%v %v", time.Unix(0, v.unixnano), v.Value()) -} - -func (v IntegerValue) RawValue() int64 { return v.value } - -// UnsignedValue represents an int64 value. -type UnsignedValue struct { - unixnano int64 - value uint64 -} - -// Value returns the underlying int64 value. -func (v UnsignedValue) Value() interface{} { - return v.value -} - -// UnixNano returns the timestamp of the value. -func (v UnsignedValue) UnixNano() int64 { - return v.unixnano -} - -// Size returns the number of bytes necessary to represent the value and its timestamp. -func (v UnsignedValue) Size() int { - return 16 -} - -// String returns the string representation of the value and its timestamp. -func (v UnsignedValue) String() string { - return fmt.Sprintf("%v %v", time.Unix(0, v.unixnano), v.Value()) -} - -func (v UnsignedValue) RawValue() uint64 { return v.value } - -// FloatValue represents a float64 value. -type FloatValue struct { - unixnano int64 - value float64 -} - -// UnixNano returns the timestamp of the value. -func (v FloatValue) UnixNano() int64 { - return v.unixnano -} - -// Value returns the underlying float64 value. -func (v FloatValue) Value() interface{} { - return v.value -} - -// Size returns the number of bytes necessary to represent the value and its timestamp. -func (v FloatValue) Size() int { - return 16 -} - -// String returns the string representation of the value and its timestamp. -func (v FloatValue) String() string { - return fmt.Sprintf("%v %v", time.Unix(0, v.unixnano), v.value) -} - -func (v FloatValue) RawValue() float64 { return v.value } - -// BooleanValue represents a boolean value. -type BooleanValue struct { - unixnano int64 - value bool -} - -// Size returns the number of bytes necessary to represent the value and its timestamp. -func (v BooleanValue) Size() int { - return 9 -} - -// UnixNano returns the timestamp of the value in nanoseconds since unix epoch. -func (v BooleanValue) UnixNano() int64 { - return v.unixnano -} - -// Value returns the underlying boolean value. -func (v BooleanValue) Value() interface{} { - return v.value -} - -// String returns the string representation of the value and its timestamp. -func (v BooleanValue) String() string { - return fmt.Sprintf("%v %v", time.Unix(0, v.unixnano), v.Value()) -} - -func (v BooleanValue) RawValue() bool { return v.value } - -// StringValue represents a string value. -type StringValue struct { - unixnano int64 - value string -} - -// Value returns the underlying string value. -func (v StringValue) Value() interface{} { - return v.value -} - -// UnixNano returns the timestamp of the value. -func (v StringValue) UnixNano() int64 { - return v.unixnano -} - -// Size returns the number of bytes necessary to represent the value and its timestamp. -func (v StringValue) Size() int { - return 8 + len(v.value) -} - -// String returns the string representation of the value and its timestamp. -func (v StringValue) String() string { - return fmt.Sprintf("%v %v", time.Unix(0, v.unixnano), v.Value()) -} - -func (v StringValue) RawValue() string { return v.value } diff --git a/v1/storage/reads/group_resultset.go b/v1/storage/reads/group_resultset.go index ed3d788454..7ad4f97671 100644 --- a/v1/storage/reads/group_resultset.go +++ b/v1/storage/reads/group_resultset.go @@ -278,7 +278,7 @@ func (c *groupNoneCursor) Err() error { return nil } func (c *groupNoneCursor) Tags() models.Tags { return c.row.Tags } func (c *groupNoneCursor) Keys() [][]byte { return c.keys } func (c *groupNoneCursor) PartitionKeyVals() [][]byte { return nil } -func (c *groupNoneCursor) Close() { c.cur.Close() } +func (c *groupNoneCursor) Close() { c.cur.Close() } func (c *groupNoneCursor) Stats() cursors.CursorStats { return c.row.Query.Stats() } func (c *groupNoneCursor) Next() bool { diff --git a/tsdb/cursors/fieldtype_string.go b/v1/tsdb/cursors/fieldtype_string.go similarity index 100% rename from tsdb/cursors/fieldtype_string.go rename to v1/tsdb/cursors/fieldtype_string.go diff --git a/v1/tsdb/cursors/gen.go b/v1/tsdb/cursors/gen.go index ee7a8876a6..40bcfb2a3c 100644 --- a/v1/tsdb/cursors/gen.go +++ b/v1/tsdb/cursors/gen.go @@ -1 +1,4 @@ package cursors + +//go:generate env GO111MODULE=on go run github.com/benbjohnson/tmpl -data=@arrayvalues.gen.go.tmpldata arrayvalues.gen.go.tmpl +//go:generate stringer -type FieldType diff --git a/tsdb/cursors/schema.go b/v1/tsdb/cursors/schema.go similarity index 100% rename from tsdb/cursors/schema.go rename to v1/tsdb/cursors/schema.go diff --git a/tsdb/cursors/schema_test.go b/v1/tsdb/cursors/schema_test.go similarity index 99% rename from tsdb/cursors/schema_test.go rename to v1/tsdb/cursors/schema_test.go index a7ac5f4d62..36273faafd 100644 --- a/tsdb/cursors/schema_test.go +++ b/v1/tsdb/cursors/schema_test.go @@ -6,7 +6,7 @@ import ( "testing" "github.com/influxdata/influxdb/v2/pkg/testing/assert" - "github.com/influxdata/influxdb/v2/tsdb/cursors" + "github.com/influxdata/influxdb/v2/v1/tsdb/cursors" ) // Verifies FieldType precedence behavior is equivalent to influxql.DataType#LessThan