refactor: WIP removing tsbd

pull/19446/head
Edd Robinson 2020-04-24 07:58:24 -07:00 committed by Stuart Carnie
parent 05007a7df6
commit 2b175291be
No known key found for this signature in database
GPG Key ID: 848D9C9718D78B4F
263 changed files with 611 additions and 79242 deletions

View File

@ -8,7 +8,7 @@ import (
"github.com/influxdata/influxdb/v2"
"github.com/influxdata/influxdb/v2/internal/fs"
"github.com/influxdata/influxdb/v2/tsdb/tsm1"
"github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1"
"github.com/spf13/cobra"
)

View File

@ -12,10 +12,9 @@ import (
"github.com/influxdata/influxdb/v2/pkg/fs"
"github.com/influxdata/influxdb/v2/storage/wal"
"github.com/influxdata/influxdb/v2/toml"
"github.com/influxdata/influxdb/v2/tsdb"
"github.com/influxdata/influxdb/v2/tsdb/seriesfile"
"github.com/influxdata/influxdb/v2/tsdb/tsi1"
"github.com/influxdata/influxdb/v2/tsdb/tsm1"
"github.com/influxdata/influxdb/v2/v1/tsdb"
"github.com/influxdata/influxdb/v2/v1/tsdb/index/tsi1"
"github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1"
"go.uber.org/zap"
)

View File

@ -9,7 +9,7 @@ import (
"sort"
"sync"
"github.com/influxdata/influxdb/v2/tsdb"
"github.com/influxdata/influxdb/v2/v1/tsdb"
"go.uber.org/zap"
)
@ -125,7 +125,7 @@ func (v Verify) VerifyPartition(partitionPath string) (valid bool, err error) {
return false, err
}
segments := make([]*SeriesSegment, 0, len(segmentInfos))
segments := make([]*tsdb.SeriesSegment, 0, len(segmentInfos))
ids := make(map[uint64]IDData)
// check every segment
@ -137,7 +137,7 @@ func (v Verify) VerifyPartition(partitionPath string) (valid bool, err error) {
}
segmentPath := filepath.Join(partitionPath, segmentInfo.Name())
segmentID, err := ParseSeriesSegmentFilename(segmentInfo.Name())
segmentID, err := tsdb.ParseSeriesSegmentFilename(segmentInfo.Name())
if err != nil {
continue
}
@ -150,7 +150,7 @@ func (v Verify) VerifyPartition(partitionPath string) (valid bool, err error) {
// open the segment for verifying the index. we want it to be open outside
// the for loop as well, so the defer is ok.
segment := NewSeriesSegment(segmentID, segmentPath)
segment := tsdb.NewSeriesSegment(segmentID, segmentPath)
if err := segment.Open(); err != nil {
return false, err
}
@ -186,11 +186,11 @@ func (v Verify) VerifySegment(segmentPath string, ids map[uint64]IDData) (valid
v.Logger.Info("Verifying segment")
// Open up the segment and grab it's data.
segmentID, err := ParseSeriesSegmentFilename(segmentName)
segmentID, err := tsdb.ParseSeriesSegmentFilename(segmentName)
if err != nil {
return false, err
}
segment := NewSeriesSegment(segmentID, segmentPath)
segment := tsdb.NewSeriesSegment(segmentID, segmentPath)
if err := segment.Open(); err != nil {
v.Logger.Error("Error opening segment", zap.Error(err))
return false, nil
@ -207,7 +207,7 @@ func (v Verify) VerifySegment(segmentPath string, ids map[uint64]IDData) (valid
}()
// Skip the header: it has already been verified by the Open call.
if err := buf.advance(SeriesSegmentHeaderSize); err != nil {
if err := buf.advance(tsdb.SeriesSegmentHeaderSize); err != nil {
v.Logger.Error("Unable to advance buffer",
zap.Int64("offset", buf.offset),
zap.Error(err))
@ -224,39 +224,39 @@ entries:
return false, nil
}
flag, id, key, sz := ReadSeriesEntry(buf.data)
flag, id, key, sz := tsdb.ReadSeriesEntry(buf.data)
// Check the flag is valid and for id monotonicity.
hasKey := true
switch flag {
case SeriesEntryInsertFlag:
if !firstID && prevID > id.RawID() {
case tsdb.SeriesEntryInsertFlag:
if !firstID && prevID > id {
v.Logger.Error("ID is not monotonically increasing",
zap.Uint64("prev_id", prevID),
zap.Uint64("id", id.RawID()),
zap.Uint64("id", id),
zap.Int64("offset", buf.offset))
return false, nil
}
firstID = false
prevID = id.RawID()
prevID = id
if ids != nil {
keyCopy := make([]byte, len(key))
copy(keyCopy, key)
ids[id.RawID()] = IDData{
Offset: JoinSeriesOffset(segment.ID(), uint32(buf.offset)),
ids[id] = IDData{
Offset: tsdb.JoinSeriesOffset(segment.ID(), uint32(buf.offset)),
Key: keyCopy,
}
}
case SeriesEntryTombstoneFlag:
case tsdb.SeriesEntryTombstoneFlag:
hasKey = false
if ids != nil {
data := ids[id.RawID()]
data := ids[id]
data.Deleted = true
ids[id.RawID()] = data
ids[id] = data
}
case 0: // if zero, there are no more entries
@ -288,7 +288,7 @@ entries:
zap.String("recovered", fmt.Sprint(rec)))
}
}()
ParseSeriesKey(key)
tsdb.ParseSeriesKey(key)
parsed = true
}()
if !parsed {
@ -311,7 +311,7 @@ entries:
// VerifyIndex performs verification on an index in a series file. The error is only returned
// if there was some fatal problem with operating, not if there was a problem with the partition.
// The ids map must be built from verifying the passed in segments.
func (v Verify) VerifyIndex(indexPath string, segments []*SeriesSegment,
func (v Verify) VerifyIndex(indexPath string, segments []*tsdb.SeriesSegment,
ids map[uint64]IDData) (valid bool, err error) {
v.Logger.Info("Verifying index")
@ -322,7 +322,7 @@ func (v Verify) VerifyIndex(indexPath string, segments []*SeriesSegment,
}
}()
index := NewSeriesIndex(indexPath)
index := tsdb.NewSeriesIndex(indexPath)
if err := index.Open(); err != nil {
v.Logger.Error("Error opening index", zap.Error(err))
return false, nil
@ -353,7 +353,7 @@ func (v Verify) VerifyIndex(indexPath string, segments []*SeriesSegment,
IDData := ids[id]
if gotDeleted := index.IsDeleted(tsdb.NewSeriesID(id)); gotDeleted != IDData.Deleted {
if gotDeleted := index.IsDeleted(id); gotDeleted != IDData.Deleted {
v.Logger.Error("Index inconsistency",
zap.Uint64("id", id),
zap.Bool("got_deleted", gotDeleted),
@ -367,7 +367,7 @@ func (v Verify) VerifyIndex(indexPath string, segments []*SeriesSegment,
}
// otherwise, check both that the offset is right and that we get the right id for the key
if gotOffset := index.FindOffsetByID(tsdb.NewSeriesID(id)); gotOffset != IDData.Offset {
if gotOffset := index.FindOffsetByID(id); gotOffset != IDData.Offset {
v.Logger.Error("Index inconsistency",
zap.Uint64("id", id),
zap.Int64("got_offset", gotOffset),
@ -375,10 +375,10 @@ func (v Verify) VerifyIndex(indexPath string, segments []*SeriesSegment,
return false, nil
}
if gotID := index.FindIDBySeriesKey(segments, IDData.Key); gotID != tsdb.NewSeriesIDTyped(id) {
if gotID := index.FindIDBySeriesKey(segments, IDData.Key); gotID != id {
v.Logger.Error("Index inconsistency",
zap.Uint64("id", id),
zap.Uint64("got_id", gotID.RawID()),
zap.Uint64("got_id", gotID),
zap.Uint64("expected_id", id))
return false, nil
}

View File

@ -1,7 +1,6 @@
package seriesfile_test
import (
"context"
"fmt"
"io"
"io/ioutil"
@ -10,9 +9,9 @@ import (
"testing"
"time"
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxdb/v2/tsdb"
"github.com/influxdata/influxdb/v2/tsdb/seriesfile"
"github.com/influxdata/influxdb/v2/cmd/influx_inspect/verify/seriesfile"
"github.com/influxdata/influxdb/v2/v1/models"
"github.com/influxdata/influxdb/v2/v1/tsdb"
"go.uber.org/zap"
)
@ -79,8 +78,8 @@ func NewTest(t *testing.T) *Test {
// create a series file in the directory
err = func() error {
seriesFile := seriesfile.NewSeriesFile(dir)
if err := seriesFile.Open(context.Background()); err != nil {
seriesFile := tsdb.NewSeriesFile(dir)
if err := seriesFile.Open(); err != nil {
return err
}
defer seriesFile.Close()
@ -88,7 +87,7 @@ func NewTest(t *testing.T) *Test {
const (
compactionThreshold = 100
numSeries = 2 * seriesfile.SeriesFilePartitionN * compactionThreshold
numSeries = 2 * tsdb.SeriesFilePartitionN * compactionThreshold
)
for _, partition := range seriesFile.Partitions() {
@ -103,17 +102,13 @@ func NewTest(t *testing.T) *Test {
tagsSlice = append(tagsSlice, nil)
}
keys := seriesfile.GenerateSeriesKeys(names, tagsSlice)
//keyPartitionIDs := seriesFile.SeriesKeysPartitionIDs(keys)
ids := make([]uint64, len(keys))
//ids, err := seriesFile.CreateSeriesListIfNotExists(names, tagsSlice)
ids, err := seriesFile.CreateSeriesListIfNotExists(names, tagsSlice)
if err != nil {
return err
}
// delete one series
if err := seriesFile.DeleteSeriesIDs([]tsdb.SeriesID{tsdb.NewSeriesID(ids[0])}); err != nil {
if err := seriesFile.DeleteSeriesID(ids[0]); err != nil {
return err
}

View File

@ -0,0 +1,142 @@
// Package tombstone verifies integrity of tombstones.
package tombstone
import (
"errors"
"flag"
"fmt"
"io"
"os"
"path/filepath"
"time"
"github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1"
)
// Command represents the program execution for "influx_inspect verify-tombstone".
type Command struct {
Stderr io.Writer
Stdout io.Writer
}
// NewCommand returns a new instance of Command.
func NewCommand() *Command {
return &Command{
Stderr: os.Stderr,
Stdout: os.Stdout,
}
}
// Run executes the command.
func (cmd *Command) Run(args ...string) error {
runner := verifier{w: cmd.Stdout}
fs := flag.NewFlagSet("verify-tombstone", flag.ExitOnError)
fs.StringVar(&runner.path, "path", os.Getenv("HOME")+"/.influxdb", "path to find tombstone files")
v := fs.Bool("v", false, "verbose: emit periodic progress")
vv := fs.Bool("vv", false, "very verbose: emit every tombstone entry key and time range")
vvv := fs.Bool("vvv", false, "very very verbose: emit every tombstone entry key and RFC3339Nano time range")
fs.SetOutput(cmd.Stdout)
if err := fs.Parse(args); err != nil {
return err
}
if *v {
runner.verbosity = verbose
}
if *vv {
runner.verbosity = veryVerbose
}
if *vvv {
runner.verbosity = veryVeryVerbose
}
return runner.Run()
}
const (
quiet = iota
verbose
veryVerbose
veryVeryVerbose
)
type verifier struct {
path string
verbosity int
w io.Writer
files []string
f string
}
func (v *verifier) loadFiles() error {
return filepath.Walk(v.path, func(path string, f os.FileInfo, err error) error {
if err != nil {
return err
}
if filepath.Ext(path) == "."+tsm1.TombstoneFileExtension {
v.files = append(v.files, path)
}
return nil
})
}
func (v *verifier) Next() bool {
if len(v.files) == 0 {
return false
}
v.f, v.files = v.files[0], v.files[1:]
return true
}
func (v *verifier) Run() error {
if err := v.loadFiles(); err != nil {
return err
}
var failed bool
start := time.Now()
for v.Next() {
if v.verbosity > quiet {
fmt.Fprintf(v.w, "Verifying: %q\n", v.f)
}
tombstoner := tsm1.NewTombstoner(v.f, nil)
if !tombstoner.HasTombstones() {
fmt.Fprintf(v.w, "%s has no tombstone entries", v.f)
continue
}
var totalEntries int64
err := tombstoner.Walk(func(t tsm1.Tombstone) error {
totalEntries++
if v.verbosity > quiet && totalEntries%(10*1e6) == 0 {
fmt.Fprintf(v.w, "Verified %d tombstone entries\n", totalEntries)
} else if v.verbosity > verbose {
var min interface{} = t.Min
var max interface{} = t.Max
if v.verbosity > veryVerbose {
min = time.Unix(0, t.Min)
max = time.Unix(0, t.Max)
}
fmt.Printf("key: %q, min: %v, max: %v\n", t.Key, min, max)
}
return nil
})
if err != nil {
fmt.Fprintf(v.w, "%q failed to walk tombstone entries: %v. Last okay entry: %d\n", v.f, err, totalEntries)
failed = true
continue
}
fmt.Fprintf(v.w, "Completed verification for %q in %v.\nVerified %d entries\n\n", v.f, time.Since(start), totalEntries)
}
if failed {
return errors.New("failed tombstone verification")
}
return nil
}

View File

@ -0,0 +1,232 @@
// Package tsm verifies integrity of TSM files.
package tsm
import (
"flag"
"fmt"
"hash/crc32"
"io"
"os"
"path/filepath"
"text/tabwriter"
"time"
"unicode/utf8"
"github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1"
"github.com/pkg/errors"
)
// Command represents the program execution for "influx_inspect verify".
type Command struct {
Stderr io.Writer
Stdout io.Writer
}
// NewCommand returns a new instance of Command.
func NewCommand() *Command {
return &Command{
Stderr: os.Stderr,
Stdout: os.Stdout,
}
}
// Run executes the command.
func (cmd *Command) Run(args ...string) error {
var path string
fs := flag.NewFlagSet("verify", flag.ExitOnError)
fs.StringVar(&path, "dir", os.Getenv("HOME")+"/.influxdb", "Root storage path. [$HOME/.influxdb]")
var checkUTF8 bool
fs.BoolVar(&checkUTF8, "check-utf8", false, "Verify series keys are valid UTF-8")
fs.SetOutput(cmd.Stdout)
fs.Usage = cmd.printUsage
if err := fs.Parse(args); err != nil {
return err
}
dataPath := filepath.Join(path, "data")
tw := tabwriter.NewWriter(cmd.Stdout, 16, 8, 0, '\t', 0)
var runner verifier
if checkUTF8 {
runner = &verifyUTF8{}
} else {
runner = &verifyChecksums{}
}
err := runner.Run(tw, dataPath)
tw.Flush()
return err
}
// printUsage prints the usage message to STDERR.
func (cmd *Command) printUsage() {
usage := fmt.Sprintf(`Verifies the integrity of TSM files.
Usage: influx_inspect verify [flags]
-dir <path>
The root storage path.
Must be changed if you are using a non-default storage directory.
Defaults to "%[1]s/.influxdb".
-check-utf8
Verify series keys are valid UTF-8.
This check skips verification of block checksums.
`, os.Getenv("HOME"))
fmt.Fprintf(cmd.Stdout, usage)
}
type verifyTSM struct {
files []string
f string
start time.Time
err error
}
func (v *verifyTSM) loadFiles(dataPath string) error {
err := filepath.Walk(dataPath, func(path string, f os.FileInfo, err error) error {
if err != nil {
return err
}
if filepath.Ext(path) == "."+tsm1.TSMFileExtension {
v.files = append(v.files, path)
}
return nil
})
if err != nil {
return errors.Wrap(err, "could not load storage files (use -dir for custom storage root)")
}
return nil
}
func (v *verifyTSM) Next() bool {
if len(v.files) == 0 {
return false
}
v.f, v.files = v.files[0], v.files[1:]
return true
}
func (v *verifyTSM) TSMReader() (string, *tsm1.TSMReader) {
file, err := os.OpenFile(v.f, os.O_RDONLY, 0600)
if err != nil {
v.err = err
return "", nil
}
reader, err := tsm1.NewTSMReader(file)
if err != nil {
file.Close()
v.err = err
return "", nil
}
return v.f, reader
}
func (v *verifyTSM) Start() {
v.start = time.Now()
}
func (v *verifyTSM) Elapsed() time.Duration {
return time.Since(v.start)
}
type verifyChecksums struct {
verifyTSM
totalErrors int
total int
}
func (v *verifyChecksums) Run(w io.Writer, dataPath string) error {
if err := v.loadFiles(dataPath); err != nil {
return err
}
v.Start()
for v.Next() {
f, reader := v.TSMReader()
if reader == nil {
break
}
blockItr := reader.BlockIterator()
fileErrors := 0
count := 0
for blockItr.Next() {
v.total++
key, _, _, _, checksum, buf, err := blockItr.Read()
if err != nil {
v.totalErrors++
fileErrors++
fmt.Fprintf(w, "%s: could not get checksum for key %v block %d due to error: %q\n", f, key, count, err)
} else if expected := crc32.ChecksumIEEE(buf); checksum != expected {
v.totalErrors++
fileErrors++
fmt.Fprintf(w, "%s: got %d but expected %d for key %v, block %d\n", f, checksum, expected, key, count)
}
count++
}
if fileErrors == 0 {
fmt.Fprintf(w, "%s: healthy\n", f)
}
reader.Close()
}
fmt.Fprintf(w, "Broken Blocks: %d / %d, in %vs\n", v.totalErrors, v.total, v.Elapsed().Seconds())
return v.err
}
type verifyUTF8 struct {
verifyTSM
totalErrors int
total int
}
func (v *verifyUTF8) Run(w io.Writer, dataPath string) error {
if err := v.loadFiles(dataPath); err != nil {
return err
}
v.Start()
for v.Next() {
f, reader := v.TSMReader()
if reader == nil {
break
}
n := reader.KeyCount()
fileErrors := 0
v.total += n
for i := 0; i < n; i++ {
key, _ := reader.KeyAt(i)
if !utf8.Valid(key) {
v.totalErrors++
fileErrors++
fmt.Fprintf(w, "%s: key #%d is not valid UTF-8\n", f, i)
}
}
if fileErrors == 0 {
fmt.Fprintf(w, "%s: healthy\n", f)
}
}
fmt.Fprintf(w, "Invalid Keys: %d / %d, in %vs\n", v.totalErrors, v.total, v.Elapsed().Seconds())
if v.totalErrors > 0 && v.err == nil {
v.err = errors.New("check-utf8: failed")
}
return v.err
}
type verifier interface {
Run(w io.Writer, dataPath string) error
}

View File

@ -0,0 +1,3 @@
package tsm_test
// TODO: write some tests

View File

@ -14,10 +14,9 @@ import (
"github.com/influxdata/influxdb/v2/pkg/data/gen"
"github.com/influxdata/influxdb/v2/pkg/limiter"
"github.com/influxdata/influxdb/v2/storage"
"github.com/influxdata/influxdb/v2/tsdb"
"github.com/influxdata/influxdb/v2/tsdb/seriesfile"
"github.com/influxdata/influxdb/v2/tsdb/tsi1"
"github.com/influxdata/influxdb/v2/tsdb/tsm1"
"github.com/influxdata/influxdb/v2/v1/tsdb"
"github.com/influxdata/influxdb/v2/v1/tsdb/index/tsi1"
"github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1"
)
type Generator struct {

View File

@ -6,7 +6,7 @@ import (
"path/filepath"
"github.com/influxdata/influxdb/v2/pkg/data/gen"
"github.com/influxdata/influxdb/v2/tsdb/tsm1"
"github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1"
)
const (

View File

@ -13,9 +13,9 @@ import (
"github.com/influxdata/influxdb/v2/cmd/influx_inspect/buildtsi"
"github.com/influxdata/influxdb/v2/logger"
"github.com/influxdata/influxdb/v2/storage"
"github.com/influxdata/influxdb/v2/tsdb/seriesfile"
"github.com/influxdata/influxdb/v2/tsdb/tsi1"
"github.com/influxdata/influxdb/v2/tsdb/tsm1"
"github.com/influxdata/influxdb/v2/v1/tsdb"
"github.com/influxdata/influxdb/v2/v1/tsdb/index/tsi1"
"github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1"
"github.com/spf13/cobra"
)

View File

@ -12,7 +12,6 @@ import (
"github.com/influxdata/influxdb/v2/internal/fs"
"github.com/influxdata/influxdb/v2/storage"
"github.com/influxdata/influxdb/v2/tsdb/seriesfile"
"github.com/spf13/cobra"
"golang.org/x/sync/errgroup"
)

View File

@ -8,7 +8,7 @@ import (
"regexp"
"github.com/influxdata/influxdb/v2/internal/fs"
"github.com/influxdata/influxdb/v2/tsdb/tsi1"
"github.com/influxdata/influxdb/v2/v1/tsdb/index/tsi1"
"github.com/spf13/cobra"
"go.uber.org/zap"
)

View File

@ -3,7 +3,7 @@ package inspect
import (
"os"
"github.com/influxdata/influxdb/v2/tsdb/tsm1"
"github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1"
"github.com/spf13/cobra"
)

View File

@ -7,8 +7,8 @@ import (
"path/filepath"
"github.com/influxdata/influxdb/v2/internal/fs"
"github.com/influxdata/influxdb/v2/tsdb/seriesfile"
"github.com/influxdata/influxdb/v2/tsdb/tsi1"
"github.com/influxdata/influxdb/v2/v1/tsdb"
"github.com/influxdata/influxdb/v2/v1/tsdb/index/tsi1"
"github.com/spf13/cobra"
)
@ -32,7 +32,7 @@ SQL format for easier inspection and debugging.`,
cmd.RunE = func(cmd *cobra.Command, args []string) error {
// Initialize series file.
sfile := seriesfile.NewSeriesFile(seriesFilePath)
sfile := tsdb.NewSeriesFile(seriesFilePath)
if err := sfile.Open(context.Background()); err != nil {
return err
}

View File

@ -6,7 +6,7 @@ import (
"os"
"github.com/influxdata/influxdb/v2"
"github.com/influxdata/influxdb/v2/tsdb/tsi1"
"github.com/influxdata/influxdb/v2/v1/tsdb/index/tsi1"
"github.com/spf13/cobra"
)

View File

@ -5,7 +5,7 @@ import (
"github.com/influxdata/influxdb/v2"
"github.com/influxdata/influxdb/v2/internal/fs"
"github.com/influxdata/influxdb/v2/kit/errors"
"github.com/influxdata/influxdb/v2/tsdb/tsm1"
"github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1"
"github.com/spf13/cobra"
"os"
"path/filepath"

View File

@ -4,8 +4,8 @@ import (
"os"
"runtime"
"github.com/influxdata/influxdb/v2/cmd/influx_inspect/verify/seriesfile"
"github.com/influxdata/influxdb/v2/logger"
"github.com/influxdata/influxdb/v2/tsdb/seriesfile"
"github.com/spf13/cobra"
"go.uber.org/zap/zapcore"
)

View File

@ -6,7 +6,7 @@ import (
"path/filepath"
"github.com/influxdata/influxdb/v2/kit/cli"
"github.com/influxdata/influxdb/v2/tsdb/tsm1"
"github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1"
"github.com/spf13/cobra"
)

View File

@ -13,7 +13,7 @@ import (
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxdb/v2/storage"
"github.com/influxdata/influxdb/v2/storage/reads"
"github.com/influxdata/influxdb/v2/tsdb/cursors"
"github.com/influxdata/influxdb/v2/v1/tsdb/cursors"
"github.com/influxdata/influxql"
"github.com/prometheus/client_golang/prometheus"
"go.uber.org/zap"

View File

@ -62,10 +62,10 @@ import (
"github.com/influxdata/influxdb/v2/task/backend/scheduler"
"github.com/influxdata/influxdb/v2/telemetry"
"github.com/influxdata/influxdb/v2/tenant"
_ "github.com/influxdata/influxdb/v2/tsdb/tsi1" // needed for tsi1
_ "github.com/influxdata/influxdb/v2/tsdb/tsm1" // needed for tsm1
storage2 "github.com/influxdata/influxdb/v2/v1/services/storage"
"github.com/influxdata/influxdb/v2/v1/storage/reads"
_ "github.com/influxdata/influxdb/v2/v1/tsdb/index/tsi1" // needed for tsi1
_ "github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1" // needed for tsm1
"github.com/influxdata/influxdb/v2/vault"
pzap "github.com/influxdata/influxdb/v2/zap"
"github.com/opentracing/opentracing-go"

View File

@ -13,7 +13,7 @@ import (
"github.com/influxdata/influxdb/v2/cmd/influxd/launcher"
"github.com/influxdata/influxdb/v2/http"
"github.com/influxdata/influxdb/v2/toml"
"github.com/influxdata/influxdb/v2/tsdb/tsm1"
"github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1"
)
func TestStorage_WriteAndQuery(t *testing.T) {

View File

@ -13,8 +13,8 @@ import (
"github.com/influxdata/influxdb/v2/cmd/influxd/launcher"
"github.com/influxdata/influxdb/v2/cmd/influxd/restore"
_ "github.com/influxdata/influxdb/v2/query/builtin"
_ "github.com/influxdata/influxdb/v2/tsdb/tsi1"
_ "github.com/influxdata/influxdb/v2/tsdb/tsm1"
_ "github.com/influxdata/influxdb/v2/v1/tsdb/index/tsi1"
_ "github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1"
"github.com/spf13/cobra"
)

View File

@ -6,7 +6,7 @@ import (
"github.com/influxdata/influxdb/v2/nats"
"github.com/influxdata/influxdb/v2/storage"
"github.com/influxdata/influxdb/v2/tsdb"
"github.com/influxdata/influxdb/v2/v1/tsdb"
"go.uber.org/zap"
)

View File

@ -2,7 +2,7 @@ package cli
import (
"github.com/influxdata/influxdb/v2"
"github.com/influxdata/influxdb/v2/tsdb"
"github.com/influxdata/influxdb/v2/v1/tsdb"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
)

View File

@ -4,7 +4,7 @@ import (
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxdb/v2/pkg/data/gen"
"github.com/influxdata/influxdb/v2/storage/reads"
"github.com/influxdata/influxdb/v2/tsdb/cursors"
"github.com/influxdata/influxdb/v2/v1/tsdb/cursors"
)
type GeneratorResultSet struct {

View File

@ -9,7 +9,7 @@ import (
"github.com/influxdata/influxdb/v2/mock"
"github.com/influxdata/influxdb/v2/pkg/data/gen"
"github.com/influxdata/influxdb/v2/storage/reads"
"github.com/influxdata/influxdb/v2/tsdb/cursors"
"github.com/influxdata/influxdb/v2/v1/tsdb/cursors"
)
func mustNewSpecFromToml(tb testing.TB, toml string) *gen.Spec {

View File

@ -19,7 +19,7 @@ import (
"github.com/google/go-cmp/cmp"
"github.com/influxdata/influxdb/v2"
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxdb/v2/tsdb"
"github.com/influxdata/influxdb/v2/v1/tsdb"
"github.com/stretchr/testify/assert"
)

View File

@ -7,8 +7,8 @@
package gen
import (
"github.com/influxdata/influxdb/v2/tsdb/cursors"
"github.com/influxdata/influxdb/v2/tsdb/tsm1"
"github.com/influxdata/influxdb/v2/v1/tsdb/cursors"
"github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1"
)
type FloatValues interface {

View File

@ -1,8 +1,8 @@
package gen
import (
"github.com/influxdata/influxdb/v2/tsdb/cursors"
"github.com/influxdata/influxdb/v2/tsdb/tsm1"
"github.com/influxdata/influxdb/v2/v1/tsdb/cursors"
"github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1"
)
{{range .}}

View File

@ -10,7 +10,7 @@ import (
"github.com/google/go-cmp/cmp"
"github.com/influxdata/influxdb/v2"
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxdb/v2/tsdb"
"github.com/influxdata/influxdb/v2/v1/tsdb"
)
var (

View File

@ -13,7 +13,7 @@ import (
"github.com/BurntSushi/toml"
"github.com/influxdata/influxdb/v2"
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxdb/v2/tsdb"
"github.com/influxdata/influxdb/v2/v1/tsdb"
"github.com/pkg/errors"
)

View File

@ -8,7 +8,7 @@ package gen
import (
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxdb/v2/tsdb/cursors"
"github.com/influxdata/influxdb/v2/v1/tsdb/cursors"
)
type FloatValuesSequence interface {

View File

@ -2,7 +2,7 @@ package gen
import (
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxdb/v2/tsdb/cursors"
"github.com/influxdata/influxdb/v2/v1/tsdb/cursors"
)
{{range .}}

View File

@ -3,7 +3,7 @@ package predicate
import (
"github.com/influxdata/influxdb/v2"
"github.com/influxdata/influxdb/v2/storage/reads/datatypes"
"github.com/influxdata/influxdb/v2/tsdb/tsm1"
"github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1"
)
// Node is a predicate node.

View File

@ -10,7 +10,7 @@ import (
"github.com/influxdata/flux/plan"
"github.com/influxdata/influxdb/v2"
"github.com/influxdata/influxdb/v2/storage/reads/datatypes"
"github.com/influxdata/influxdb/v2/tsdb/cursors"
"github.com/influxdata/influxdb/v2/v1/tsdb/cursors"
)
// StorageReader is an interface for reading tables from the storage subsystem.

View File

@ -10,7 +10,7 @@ package compat
import (
"github.com/influxdata/influxdb/v2/storage"
"github.com/influxdata/influxdb/v2/toml"
"github.com/influxdata/influxdb/v2/tsdb/tsm1"
"github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1"
)
// Config matches the old toml layout from the influxdb repo, so that we can read

View File

@ -5,9 +5,8 @@ import (
"time"
"github.com/influxdata/influxdb/v2/toml"
"github.com/influxdata/influxdb/v2/tsdb/seriesfile"
"github.com/influxdata/influxdb/v2/tsdb/tsi1"
"github.com/influxdata/influxdb/v2/tsdb/tsm1"
"github.com/influxdata/influxdb/v2/v1/tsdb/index/tsi1"
"github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1"
)
// Default configuration values.

View File

@ -14,11 +14,11 @@ import (
"github.com/influxdata/influxdb/v2"
"github.com/influxdata/influxdb/v2/kit/tracing"
"github.com/influxdata/influxdb/v2/logger"
"github.com/influxdata/influxdb/v2/tsdb/cursors"
"github.com/influxdata/influxdb/v2/v1/coordinator"
"github.com/influxdata/influxdb/v2/v1/models"
"github.com/influxdata/influxdb/v2/v1/services/meta"
"github.com/influxdata/influxdb/v2/v1/tsdb"
"github.com/influxdata/influxdb/v2/v1/tsdb/cursors"
"github.com/influxdata/influxql"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"

View File

@ -4,7 +4,7 @@ import (
"context"
"github.com/influxdata/influxdb/v2"
"github.com/influxdata/influxdb/v2/tsdb/cursors"
"github.com/influxdata/influxdb/v2/v1/tsdb/cursors"
"github.com/influxdata/influxql"
)

View File

@ -4,7 +4,7 @@ import (
"context"
"github.com/influxdata/influxdb/v2"
"github.com/influxdata/influxdb/v2/tsdb/cursors"
"github.com/influxdata/influxdb/v2/v1/tsdb/cursors"
"github.com/influxdata/influxql"
)

View File

@ -4,7 +4,7 @@ import (
"context"
"github.com/influxdata/influxdb/v2"
"github.com/influxdata/influxdb/v2/tsdb/cursors"
"github.com/influxdata/influxdb/v2/v1/tsdb/cursors"
"github.com/influxdata/influxql"
)

View File

@ -15,8 +15,8 @@ import (
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxdb/v2/storage"
"github.com/influxdata/influxdb/v2/storage/reads/datatypes"
"github.com/influxdata/influxdb/v2/tsdb"
"github.com/influxdata/influxdb/v2/tsdb/tsm1"
"github.com/influxdata/influxdb/v2/v1/tsdb"
"github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1"
"github.com/prometheus/client_golang/prometheus"
)

View File

@ -16,7 +16,7 @@ import (
"github.com/influxdata/influxdb/v2/query"
storage "github.com/influxdata/influxdb/v2/storage/reads"
"github.com/influxdata/influxdb/v2/storage/reads/datatypes"
"github.com/influxdata/influxdb/v2/tsdb/cursors"
"github.com/influxdata/influxdb/v2/v1/tsdb/cursors"
)
// GroupCursorError is returned when two different cursor types

View File

@ -18,8 +18,7 @@ import (
"github.com/influxdata/influxdb/v2"
"github.com/influxdata/influxdb/v2/models"
storage "github.com/influxdata/influxdb/v2/storage/reads"
"github.com/influxdata/influxdb/v2/storage/reads/datatypes"
"github.com/influxdata/influxdb/v2/tsdb/cursors"
"github.com/influxdata/influxdb/v2/v1/tsdb/cursors"
)
//
@ -308,7 +307,7 @@ func (t *floatWindowTable) advance() bool {
} else {
cr.cols[startColIdx] = start
cr.cols[stopColIdx] = stop
cr.cols[valueColIdxWithoutTime] = values
cr.cols[windowedValueColIdx] = values
}
t.appendTags(cr)
return true
@ -711,93 +710,26 @@ func (t *floatGroupTable) Do(f func(flux.ColReader) error) error {
}
func (t *floatGroupTable) advance() bool {
if t.cur == nil {
// For group aggregates, we will try to get all the series and all table buffers within those series
// all at once and merge them into one row when this advance() function is first called.
// At the end of this process, t.advanceCursor() already returns false and t.cur becomes nil.
// But we still need to return true to indicate that there is data to be returned.
// The second time when we call this advance(), t.cur is already nil, so we directly return false.
RETRY:
a := t.cur.Next()
l := a.Len()
if l == 0 {
if t.advanceCursor() {
goto RETRY
}
return false
}
var arr *cursors.FloatArray
var len int
for {
arr = t.cur.Next()
len = arr.Len()
if len > 0 {
break
}
if !t.advanceCursor() {
return false
}
}
// handle the group without aggregate case
if t.gc.Aggregate() == nil {
// Retrieve the buffer for the data to avoid allocating
// additional slices. If the buffer is still being used
// because the references were retained, then we will
// allocate a new buffer.
colReader := t.allocateBuffer(len)
colReader.cols[timeColIdx] = arrow.NewInt(arr.Timestamps, t.alloc)
colReader.cols[valueColIdx] = t.toArrowBuffer(arr.Values)
t.appendTags(colReader)
t.appendBounds(colReader)
return true
}
// handle the group with aggregate case
var value float64
// For group count, sum, min, and max, the timestamp here is always math.MaxInt64.
// their final result does not contain _time, so this timestamp value can be anything
// and it won't matter.
// For group first, we need to assign the initial value to math.MaxInt64 so
// we can find the row with the smallest timestamp.
// Do not worry about data with math.MaxInt64 as its real timestamp.
// In OSS we require a |> range() call in the query and a math.MaxInt64 timestamp
// cannot make it through.
var timestamp int64 = math.MaxInt64
if t.gc.Aggregate().Type == datatypes.AggregateTypeLast {
timestamp = math.MinInt64
}
for {
// note that for the group aggregate case, len here should always be 1
for i := 0; i < len; i++ {
switch t.gc.Aggregate().Type {
case datatypes.AggregateTypeCount:
panic("unsupported for aggregate count: Float")
case datatypes.AggregateTypeSum:
value += arr.Values[i]
case datatypes.AggregateTypeFirst:
if arr.Timestamps[i] < timestamp {
timestamp = arr.Timestamps[i]
value = arr.Values[i]
}
case datatypes.AggregateTypeLast:
if arr.Timestamps[i] > timestamp {
timestamp = arr.Timestamps[i]
value = arr.Values[i]
}
}
}
arr = t.cur.Next()
len = arr.Len()
if len > 0 {
continue
}
if !t.advanceCursor() {
break
}
}
colReader := t.allocateBuffer(1)
if IsSelector(t.gc.Aggregate()) {
colReader.cols[timeColIdx] = arrow.NewInt([]int64{timestamp}, t.alloc)
colReader.cols[valueColIdx] = t.toArrowBuffer([]float64{value})
} else {
colReader.cols[valueColIdxWithoutTime] = t.toArrowBuffer([]float64{value})
}
t.appendTags(colReader)
t.appendBounds(colReader)
// Retrieve the buffer for the data to avoid allocating
// additional slices. If the buffer is still being used
// because the references were retained, then we will
// allocate a new buffer.
cr := t.allocateBuffer(l)
cr.cols[timeColIdx] = arrow.NewInt(a.Timestamps, t.alloc)
cr.cols[valueColIdx] = t.toArrowBuffer(a.Values)
t.appendTags(cr)
t.appendBounds(cr)
return true
}
@ -1129,7 +1061,7 @@ func (t *integerWindowTable) advance() bool {
} else {
cr.cols[startColIdx] = start
cr.cols[stopColIdx] = stop
cr.cols[valueColIdxWithoutTime] = values
cr.cols[windowedValueColIdx] = values
}
t.appendTags(cr)
return true
@ -1532,93 +1464,26 @@ func (t *integerGroupTable) Do(f func(flux.ColReader) error) error {
}
func (t *integerGroupTable) advance() bool {
if t.cur == nil {
// For group aggregates, we will try to get all the series and all table buffers within those series
// all at once and merge them into one row when this advance() function is first called.
// At the end of this process, t.advanceCursor() already returns false and t.cur becomes nil.
// But we still need to return true to indicate that there is data to be returned.
// The second time when we call this advance(), t.cur is already nil, so we directly return false.
RETRY:
a := t.cur.Next()
l := a.Len()
if l == 0 {
if t.advanceCursor() {
goto RETRY
}
return false
}
var arr *cursors.IntegerArray
var len int
for {
arr = t.cur.Next()
len = arr.Len()
if len > 0 {
break
}
if !t.advanceCursor() {
return false
}
}
// handle the group without aggregate case
if t.gc.Aggregate() == nil {
// Retrieve the buffer for the data to avoid allocating
// additional slices. If the buffer is still being used
// because the references were retained, then we will
// allocate a new buffer.
colReader := t.allocateBuffer(len)
colReader.cols[timeColIdx] = arrow.NewInt(arr.Timestamps, t.alloc)
colReader.cols[valueColIdx] = t.toArrowBuffer(arr.Values)
t.appendTags(colReader)
t.appendBounds(colReader)
return true
}
// handle the group with aggregate case
var value int64
// For group count, sum, min, and max, the timestamp here is always math.MaxInt64.
// their final result does not contain _time, so this timestamp value can be anything
// and it won't matter.
// For group first, we need to assign the initial value to math.MaxInt64 so
// we can find the row with the smallest timestamp.
// Do not worry about data with math.MaxInt64 as its real timestamp.
// In OSS we require a |> range() call in the query and a math.MaxInt64 timestamp
// cannot make it through.
var timestamp int64 = math.MaxInt64
if t.gc.Aggregate().Type == datatypes.AggregateTypeLast {
timestamp = math.MinInt64
}
for {
// note that for the group aggregate case, len here should always be 1
for i := 0; i < len; i++ {
switch t.gc.Aggregate().Type {
case datatypes.AggregateTypeCount:
fallthrough
case datatypes.AggregateTypeSum:
value += arr.Values[i]
case datatypes.AggregateTypeFirst:
if arr.Timestamps[i] < timestamp {
timestamp = arr.Timestamps[i]
value = arr.Values[i]
}
case datatypes.AggregateTypeLast:
if arr.Timestamps[i] > timestamp {
timestamp = arr.Timestamps[i]
value = arr.Values[i]
}
}
}
arr = t.cur.Next()
len = arr.Len()
if len > 0 {
continue
}
if !t.advanceCursor() {
break
}
}
colReader := t.allocateBuffer(1)
if IsSelector(t.gc.Aggregate()) {
colReader.cols[timeColIdx] = arrow.NewInt([]int64{timestamp}, t.alloc)
colReader.cols[valueColIdx] = t.toArrowBuffer([]int64{value})
} else {
colReader.cols[valueColIdxWithoutTime] = t.toArrowBuffer([]int64{value})
}
t.appendTags(colReader)
t.appendBounds(colReader)
// Retrieve the buffer for the data to avoid allocating
// additional slices. If the buffer is still being used
// because the references were retained, then we will
// allocate a new buffer.
cr := t.allocateBuffer(l)
cr.cols[timeColIdx] = arrow.NewInt(a.Timestamps, t.alloc)
cr.cols[valueColIdx] = t.toArrowBuffer(a.Values)
t.appendTags(cr)
t.appendBounds(cr)
return true
}
@ -1948,7 +1813,7 @@ func (t *unsignedWindowTable) advance() bool {
} else {
cr.cols[startColIdx] = start
cr.cols[stopColIdx] = stop
cr.cols[valueColIdxWithoutTime] = values
cr.cols[windowedValueColIdx] = values
}
t.appendTags(cr)
return true
@ -2351,93 +2216,26 @@ func (t *unsignedGroupTable) Do(f func(flux.ColReader) error) error {
}
func (t *unsignedGroupTable) advance() bool {
if t.cur == nil {
// For group aggregates, we will try to get all the series and all table buffers within those series
// all at once and merge them into one row when this advance() function is first called.
// At the end of this process, t.advanceCursor() already returns false and t.cur becomes nil.
// But we still need to return true to indicate that there is data to be returned.
// The second time when we call this advance(), t.cur is already nil, so we directly return false.
RETRY:
a := t.cur.Next()
l := a.Len()
if l == 0 {
if t.advanceCursor() {
goto RETRY
}
return false
}
var arr *cursors.UnsignedArray
var len int
for {
arr = t.cur.Next()
len = arr.Len()
if len > 0 {
break
}
if !t.advanceCursor() {
return false
}
}
// handle the group without aggregate case
if t.gc.Aggregate() == nil {
// Retrieve the buffer for the data to avoid allocating
// additional slices. If the buffer is still being used
// because the references were retained, then we will
// allocate a new buffer.
colReader := t.allocateBuffer(len)
colReader.cols[timeColIdx] = arrow.NewInt(arr.Timestamps, t.alloc)
colReader.cols[valueColIdx] = t.toArrowBuffer(arr.Values)
t.appendTags(colReader)
t.appendBounds(colReader)
return true
}
// handle the group with aggregate case
var value uint64
// For group count, sum, min, and max, the timestamp here is always math.MaxInt64.
// their final result does not contain _time, so this timestamp value can be anything
// and it won't matter.
// For group first, we need to assign the initial value to math.MaxInt64 so
// we can find the row with the smallest timestamp.
// Do not worry about data with math.MaxInt64 as its real timestamp.
// In OSS we require a |> range() call in the query and a math.MaxInt64 timestamp
// cannot make it through.
var timestamp int64 = math.MaxInt64
if t.gc.Aggregate().Type == datatypes.AggregateTypeLast {
timestamp = math.MinInt64
}
for {
// note that for the group aggregate case, len here should always be 1
for i := 0; i < len; i++ {
switch t.gc.Aggregate().Type {
case datatypes.AggregateTypeCount:
panic("unsupported for aggregate count: Unsigned")
case datatypes.AggregateTypeSum:
value += arr.Values[i]
case datatypes.AggregateTypeFirst:
if arr.Timestamps[i] < timestamp {
timestamp = arr.Timestamps[i]
value = arr.Values[i]
}
case datatypes.AggregateTypeLast:
if arr.Timestamps[i] > timestamp {
timestamp = arr.Timestamps[i]
value = arr.Values[i]
}
}
}
arr = t.cur.Next()
len = arr.Len()
if len > 0 {
continue
}
if !t.advanceCursor() {
break
}
}
colReader := t.allocateBuffer(1)
if IsSelector(t.gc.Aggregate()) {
colReader.cols[timeColIdx] = arrow.NewInt([]int64{timestamp}, t.alloc)
colReader.cols[valueColIdx] = t.toArrowBuffer([]uint64{value})
} else {
colReader.cols[valueColIdxWithoutTime] = t.toArrowBuffer([]uint64{value})
}
t.appendTags(colReader)
t.appendBounds(colReader)
// Retrieve the buffer for the data to avoid allocating
// additional slices. If the buffer is still being used
// because the references were retained, then we will
// allocate a new buffer.
cr := t.allocateBuffer(l)
cr.cols[timeColIdx] = arrow.NewInt(a.Timestamps, t.alloc)
cr.cols[valueColIdx] = t.toArrowBuffer(a.Values)
t.appendTags(cr)
t.appendBounds(cr)
return true
}
@ -2767,7 +2565,7 @@ func (t *stringWindowTable) advance() bool {
} else {
cr.cols[startColIdx] = start
cr.cols[stopColIdx] = stop
cr.cols[valueColIdxWithoutTime] = values
cr.cols[windowedValueColIdx] = values
}
t.appendTags(cr)
return true
@ -3170,93 +2968,26 @@ func (t *stringGroupTable) Do(f func(flux.ColReader) error) error {
}
func (t *stringGroupTable) advance() bool {
if t.cur == nil {
// For group aggregates, we will try to get all the series and all table buffers within those series
// all at once and merge them into one row when this advance() function is first called.
// At the end of this process, t.advanceCursor() already returns false and t.cur becomes nil.
// But we still need to return true to indicate that there is data to be returned.
// The second time when we call this advance(), t.cur is already nil, so we directly return false.
RETRY:
a := t.cur.Next()
l := a.Len()
if l == 0 {
if t.advanceCursor() {
goto RETRY
}
return false
}
var arr *cursors.StringArray
var len int
for {
arr = t.cur.Next()
len = arr.Len()
if len > 0 {
break
}
if !t.advanceCursor() {
return false
}
}
// handle the group without aggregate case
if t.gc.Aggregate() == nil {
// Retrieve the buffer for the data to avoid allocating
// additional slices. If the buffer is still being used
// because the references were retained, then we will
// allocate a new buffer.
colReader := t.allocateBuffer(len)
colReader.cols[timeColIdx] = arrow.NewInt(arr.Timestamps, t.alloc)
colReader.cols[valueColIdx] = t.toArrowBuffer(arr.Values)
t.appendTags(colReader)
t.appendBounds(colReader)
return true
}
// handle the group with aggregate case
var value string
// For group count, sum, min, and max, the timestamp here is always math.MaxInt64.
// their final result does not contain _time, so this timestamp value can be anything
// and it won't matter.
// For group first, we need to assign the initial value to math.MaxInt64 so
// we can find the row with the smallest timestamp.
// Do not worry about data with math.MaxInt64 as its real timestamp.
// In OSS we require a |> range() call in the query and a math.MaxInt64 timestamp
// cannot make it through.
var timestamp int64 = math.MaxInt64
if t.gc.Aggregate().Type == datatypes.AggregateTypeLast {
timestamp = math.MinInt64
}
for {
// note that for the group aggregate case, len here should always be 1
for i := 0; i < len; i++ {
switch t.gc.Aggregate().Type {
case datatypes.AggregateTypeCount:
panic("unsupported for aggregate count: String")
case datatypes.AggregateTypeSum:
panic("unsupported for aggregate sum: String")
case datatypes.AggregateTypeFirst:
if arr.Timestamps[i] < timestamp {
timestamp = arr.Timestamps[i]
value = arr.Values[i]
}
case datatypes.AggregateTypeLast:
if arr.Timestamps[i] > timestamp {
timestamp = arr.Timestamps[i]
value = arr.Values[i]
}
}
}
arr = t.cur.Next()
len = arr.Len()
if len > 0 {
continue
}
if !t.advanceCursor() {
break
}
}
colReader := t.allocateBuffer(1)
if IsSelector(t.gc.Aggregate()) {
colReader.cols[timeColIdx] = arrow.NewInt([]int64{timestamp}, t.alloc)
colReader.cols[valueColIdx] = t.toArrowBuffer([]string{value})
} else {
colReader.cols[valueColIdxWithoutTime] = t.toArrowBuffer([]string{value})
}
t.appendTags(colReader)
t.appendBounds(colReader)
// Retrieve the buffer for the data to avoid allocating
// additional slices. If the buffer is still being used
// because the references were retained, then we will
// allocate a new buffer.
cr := t.allocateBuffer(l)
cr.cols[timeColIdx] = arrow.NewInt(a.Timestamps, t.alloc)
cr.cols[valueColIdx] = t.toArrowBuffer(a.Values)
t.appendTags(cr)
t.appendBounds(cr)
return true
}
@ -3586,7 +3317,7 @@ func (t *booleanWindowTable) advance() bool {
} else {
cr.cols[startColIdx] = start
cr.cols[stopColIdx] = stop
cr.cols[valueColIdxWithoutTime] = values
cr.cols[windowedValueColIdx] = values
}
t.appendTags(cr)
return true
@ -3989,93 +3720,26 @@ func (t *booleanGroupTable) Do(f func(flux.ColReader) error) error {
}
func (t *booleanGroupTable) advance() bool {
if t.cur == nil {
// For group aggregates, we will try to get all the series and all table buffers within those series
// all at once and merge them into one row when this advance() function is first called.
// At the end of this process, t.advanceCursor() already returns false and t.cur becomes nil.
// But we still need to return true to indicate that there is data to be returned.
// The second time when we call this advance(), t.cur is already nil, so we directly return false.
RETRY:
a := t.cur.Next()
l := a.Len()
if l == 0 {
if t.advanceCursor() {
goto RETRY
}
return false
}
var arr *cursors.BooleanArray
var len int
for {
arr = t.cur.Next()
len = arr.Len()
if len > 0 {
break
}
if !t.advanceCursor() {
return false
}
}
// handle the group without aggregate case
if t.gc.Aggregate() == nil {
// Retrieve the buffer for the data to avoid allocating
// additional slices. If the buffer is still being used
// because the references were retained, then we will
// allocate a new buffer.
colReader := t.allocateBuffer(len)
colReader.cols[timeColIdx] = arrow.NewInt(arr.Timestamps, t.alloc)
colReader.cols[valueColIdx] = t.toArrowBuffer(arr.Values)
t.appendTags(colReader)
t.appendBounds(colReader)
return true
}
// handle the group with aggregate case
var value bool
// For group count, sum, min, and max, the timestamp here is always math.MaxInt64.
// their final result does not contain _time, so this timestamp value can be anything
// and it won't matter.
// For group first, we need to assign the initial value to math.MaxInt64 so
// we can find the row with the smallest timestamp.
// Do not worry about data with math.MaxInt64 as its real timestamp.
// In OSS we require a |> range() call in the query and a math.MaxInt64 timestamp
// cannot make it through.
var timestamp int64 = math.MaxInt64
if t.gc.Aggregate().Type == datatypes.AggregateTypeLast {
timestamp = math.MinInt64
}
for {
// note that for the group aggregate case, len here should always be 1
for i := 0; i < len; i++ {
switch t.gc.Aggregate().Type {
case datatypes.AggregateTypeCount:
panic("unsupported for aggregate count: Boolean")
case datatypes.AggregateTypeSum:
panic("unsupported for aggregate sum: Boolean")
case datatypes.AggregateTypeFirst:
if arr.Timestamps[i] < timestamp {
timestamp = arr.Timestamps[i]
value = arr.Values[i]
}
case datatypes.AggregateTypeLast:
if arr.Timestamps[i] > timestamp {
timestamp = arr.Timestamps[i]
value = arr.Values[i]
}
}
}
arr = t.cur.Next()
len = arr.Len()
if len > 0 {
continue
}
if !t.advanceCursor() {
break
}
}
colReader := t.allocateBuffer(1)
if IsSelector(t.gc.Aggregate()) {
colReader.cols[timeColIdx] = arrow.NewInt([]int64{timestamp}, t.alloc)
colReader.cols[valueColIdx] = t.toArrowBuffer([]bool{value})
} else {
colReader.cols[valueColIdxWithoutTime] = t.toArrowBuffer([]bool{value})
}
t.appendTags(colReader)
t.appendBounds(colReader)
// Retrieve the buffer for the data to avoid allocating
// additional slices. If the buffer is still being used
// because the references were retained, then we will
// allocate a new buffer.
cr := t.allocateBuffer(l)
cr.cols[timeColIdx] = arrow.NewInt(a.Timestamps, t.alloc)
cr.cols[valueColIdx] = t.toArrowBuffer(a.Values)
t.appendTags(cr)
t.appendBounds(cr)
return true
}

View File

@ -12,8 +12,7 @@ import (
"github.com/influxdata/influxdb/v2"
"github.com/influxdata/influxdb/v2/models"
storage "github.com/influxdata/influxdb/v2/storage/reads"
"github.com/influxdata/influxdb/v2/storage/reads/datatypes"
"github.com/influxdata/influxdb/v2/tsdb/cursors"
"github.com/influxdata/influxdb/v2/v1/tsdb/cursors"
)
{{range .}}
//
@ -304,7 +303,7 @@ func (t *{{.name}}WindowTable) advance() bool {
} else {
cr.cols[startColIdx] = start
cr.cols[stopColIdx] = stop
cr.cols[valueColIdxWithoutTime] = values
cr.cols[windowedValueColIdx] = values
}
t.appendTags(cr)
return true
@ -707,93 +706,26 @@ func (t *{{.name}}GroupTable) Do(f func(flux.ColReader) error) error {
}
func (t *{{.name}}GroupTable) advance() bool {
if t.cur == nil {
// For group aggregates, we will try to get all the series and all table buffers within those series
// all at once and merge them into one row when this advance() function is first called.
// At the end of this process, t.advanceCursor() already returns false and t.cur becomes nil.
// But we still need to return true to indicate that there is data to be returned.
// The second time when we call this advance(), t.cur is already nil, so we directly return false.
RETRY:
a := t.cur.Next()
l := a.Len()
if l == 0 {
if t.advanceCursor() {
goto RETRY
}
return false
}
var arr *cursors.{{.Name}}Array
var len int
for {
arr = t.cur.Next()
len = arr.Len()
if len > 0 {
break
}
if !t.advanceCursor() {
return false
}
}
// handle the group without aggregate case
if t.gc.Aggregate() == nil {
// Retrieve the buffer for the data to avoid allocating
// additional slices. If the buffer is still being used
// because the references were retained, then we will
// allocate a new buffer.
colReader := t.allocateBuffer(len)
colReader.cols[timeColIdx] = arrow.NewInt(arr.Timestamps, t.alloc)
colReader.cols[valueColIdx] = t.toArrowBuffer(arr.Values)
t.appendTags(colReader)
t.appendBounds(colReader)
return true
}
// handle the group with aggregate case
var value {{.Type}}
// For group count, sum, min, and max, the timestamp here is always math.MaxInt64.
// their final result does not contain _time, so this timestamp value can be anything
// and it won't matter.
// For group first, we need to assign the initial value to math.MaxInt64 so
// we can find the row with the smallest timestamp.
// Do not worry about data with math.MaxInt64 as its real timestamp.
// In OSS we require a |> range() call in the query and a math.MaxInt64 timestamp
// cannot make it through.
var timestamp int64 = math.MaxInt64
if t.gc.Aggregate().Type == datatypes.AggregateTypeLast {
timestamp = math.MinInt64
}
for {
// note that for the group aggregate case, len here should always be 1
for i := 0; i < len; i++ {
switch t.gc.Aggregate().Type {
case datatypes.AggregateTypeCount:
{{if eq .Name "Integer"}}fallthrough{{else}}panic("unsupported for aggregate count: {{.Name}}"){{end}}
case datatypes.AggregateTypeSum:
{{if or (eq .Name "String") (eq .Name "Boolean")}}panic("unsupported for aggregate sum: {{.Name}}"){{else}}value += arr.Values[i]{{end}}
case datatypes.AggregateTypeFirst:
if arr.Timestamps[i] < timestamp {
timestamp = arr.Timestamps[i]
value = arr.Values[i]
}
case datatypes.AggregateTypeLast:
if arr.Timestamps[i] > timestamp {
timestamp = arr.Timestamps[i]
value = arr.Values[i]
}
}
}
arr = t.cur.Next()
len = arr.Len()
if len > 0 {
continue
}
if !t.advanceCursor() {
break
}
}
colReader := t.allocateBuffer(1)
if IsSelector(t.gc.Aggregate()) {
colReader.cols[timeColIdx] = arrow.NewInt([]int64{timestamp}, t.alloc)
colReader.cols[valueColIdx] = t.toArrowBuffer([]{{.Type}}{value})
} else {
colReader.cols[valueColIdxWithoutTime] = t.toArrowBuffer([]{{.Type}}{value})
}
t.appendTags(colReader)
t.appendBounds(colReader)
// Retrieve the buffer for the data to avoid allocating
// additional slices. If the buffer is still being used
// because the references were retained, then we will
// allocate a new buffer.
cr := t.allocateBuffer(l)
cr.cols[timeColIdx] = arrow.NewInt(a.Timestamps, t.alloc)
cr.cols[valueColIdx] = t.toArrowBuffer(a.Values)
t.appendTags(cr)
t.appendBounds(cr)
return true
}

View File

@ -6,7 +6,6 @@ import (
"time"
"github.com/influxdata/influxdb/v2"
"github.com/influxdata/influxdb/v2/tsdb"
"github.com/influxdata/influxdb/v2/v1/models"
)
@ -52,20 +51,16 @@ func (w *LoggingPointsWriter) WritePoints(ctx context.Context, orgID influxdb.ID
}
// Log error to bucket.
name := tsdb.EncodeName(orgID, bkts[0].ID)
pt, e := models.NewPoint(
string(name[:]),
models.NewTags(map[string]string{
models.MeasurementTagKey: "write_errors",
models.FieldKeyTagKey: "error"},
),
"write_errors",
nil,
models.Fields{"error": err.Error()},
time.Now(),
)
if e != nil {
return e
}
if e := w.Underlying.WritePoints(ctx, orgID, bucketID, []models.Point{pt}); e != nil {
if e := w.Underlying.WritePoints(ctx, orgID, bkts[0].ID, []models.Point{pt}); e != nil {
return e
}

View File

@ -13,7 +13,7 @@ import (
"github.com/influxdata/influxdb/v2/mock"
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxdb/v2/storage"
"github.com/influxdata/influxdb/v2/tsdb"
"github.com/influxdata/influxdb/v2/v1/tsdb"
)
func TestLoggingPointsWriter(t *testing.T) {

View File

@ -8,7 +8,7 @@ import (
"github.com/influxdata/influxdb/v2/kit/tracing"
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxdb/v2/storage/reads/datatypes"
"github.com/influxdata/influxdb/v2/tsdb/cursors"
"github.com/influxdata/influxdb/v2/v1/tsdb/cursors"
)
type windowAggregateResultSet struct {

View File

@ -8,7 +8,7 @@ import (
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxdb/v2/storage/reads"
"github.com/influxdata/influxdb/v2/storage/reads/datatypes"
"github.com/influxdata/influxdb/v2/tsdb/cursors"
"github.com/influxdata/influxdb/v2/v1/tsdb/cursors"
)
func TestNewWindowAggregateResultSet_Tags(t *testing.T) {

View File

@ -11,7 +11,7 @@ import (
"fmt"
"math"
"github.com/influxdata/influxdb/v2/tsdb/cursors"
"github.com/influxdata/influxdb/v2/v1/tsdb/cursors"
)
const (

View File

@ -5,7 +5,7 @@ import (
"fmt"
"math"
"github.com/influxdata/influxdb/v2/tsdb/cursors"
"github.com/influxdata/influxdb/v2/v1/tsdb/cursors"
)
const (

View File

@ -5,7 +5,7 @@ import (
"fmt"
"github.com/influxdata/influxdb/v2/storage/reads/datatypes"
"github.com/influxdata/influxdb/v2/tsdb/cursors"
"github.com/influxdata/influxdb/v2/v1/tsdb/cursors"
)
type singleValue struct {

View File

@ -6,7 +6,7 @@ import (
"time"
"github.com/google/go-cmp/cmp"
"github.com/influxdata/influxdb/v2/tsdb/cursors"
"github.com/influxdata/influxdb/v2/v1/tsdb/cursors"
)
func TestIntegerFilterArrayCursor(t *testing.T) {

View File

@ -9,7 +9,7 @@ import (
"github.com/influxdata/influxdb/v2/kit/tracing"
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxdb/v2/storage/reads/datatypes"
"github.com/influxdata/influxdb/v2/tsdb/cursors"
"github.com/influxdata/influxdb/v2/v1/tsdb/cursors"
)
type groupResultSet struct {

View File

@ -5,7 +5,7 @@ import (
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxdb/v2/storage/reads/datatypes"
"github.com/influxdata/influxdb/v2/tsdb/cursors"
"github.com/influxdata/influxdb/v2/v1/tsdb/cursors"
)
type resultSet struct {

View File

@ -6,7 +6,7 @@ import (
"strconv"
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxdb/v2/tsdb/cursors"
"github.com/influxdata/influxdb/v2/v1/tsdb/cursors"
)
// ResultSetToLineProtocol transforms rs to line protocol and writes the

View File

@ -10,7 +10,7 @@ import (
"github.com/influxdata/influxdb/v2/query"
"github.com/influxdata/influxdb/v2/storage"
"github.com/influxdata/influxdb/v2/storage/reads/datatypes"
"github.com/influxdata/influxdb/v2/tsdb/cursors"
"github.com/influxdata/influxdb/v2/v1/tsdb/cursors"
"github.com/influxdata/influxql"
)

View File

@ -7,7 +7,7 @@ import (
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxdb/v2/query"
"github.com/influxdata/influxdb/v2/storage/reads/datatypes"
"github.com/influxdata/influxdb/v2/tsdb/cursors"
"github.com/influxdata/influxdb/v2/v1/tsdb/cursors"
)
type ResultSet interface {

View File

@ -8,7 +8,7 @@ import (
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxdb/v2/storage/reads"
"github.com/influxdata/influxdb/v2/tsdb/cursors"
"github.com/influxdata/influxdb/v2/v1/tsdb/cursors"
)
func cursorToString(wr io.Writer, cur cursors.Cursor) {

View File

@ -5,7 +5,7 @@ import (
"github.com/influxdata/influxdb/v2"
"github.com/influxdata/influxdb/v2/storage"
"github.com/influxdata/influxdb/v2/tsdb/cursors"
"github.com/influxdata/influxdb/v2/v1/tsdb/cursors"
"github.com/influxdata/influxql"
)

View File

@ -9,7 +9,7 @@ import (
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxdb/v2/storage/reads"
"github.com/influxdata/influxdb/v2/storage/reads/datatypes"
"github.com/influxdata/influxdb/v2/tsdb/cursors"
"github.com/influxdata/influxdb/v2/v1/tsdb/cursors"
"github.com/influxdata/influxql"
)

View File

@ -9,7 +9,7 @@ import (
"github.com/influxdata/influxdb/v2"
"github.com/influxdata/influxdb/v2/kit/tracing"
"github.com/influxdata/influxdb/v2/logger"
"github.com/influxdata/influxdb/v2/tsdb/tsm1"
"github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1"
"github.com/prometheus/client_golang/prometheus"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"

View File

@ -18,8 +18,8 @@ import (
"github.com/influxdata/influxdb/v2/kit/prom/promtest"
"github.com/influxdata/influxdb/v2/logger"
"github.com/influxdata/influxdb/v2/toml"
"github.com/influxdata/influxdb/v2/tsdb"
"github.com/influxdata/influxdb/v2/tsdb/tsm1"
"github.com/influxdata/influxdb/v2/v1/tsdb"
"github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1"
"github.com/prometheus/client_golang/prometheus"
"go.uber.org/zap"
)

View File

@ -7,9 +7,8 @@ import (
"github.com/influxdata/influxdb/v2"
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxdb/v2/pkg/lifecycle"
"github.com/influxdata/influxdb/v2/tsdb"
"github.com/influxdata/influxdb/v2/tsdb/seriesfile"
"github.com/influxdata/influxdb/v2/tsdb/tsi1"
"github.com/influxdata/influxdb/v2/v1/tsdb"
"github.com/influxdata/influxdb/v2/v1/tsdb/index/tsi1"
"github.com/influxdata/influxql"
)

View File

@ -5,8 +5,7 @@ import (
"github.com/google/go-cmp/cmp"
"github.com/influxdata/influxdb/v2"
"github.com/influxdata/influxdb/v2/tsdb"
"github.com/influxdata/influxdb/v2/tsdb/seriesfile"
"github.com/influxdata/influxdb/v2/v1/tsdb"
)
func Test_NewSeriesCursor_UnexpectedOrg(t *testing.T) {

View File

@ -10,8 +10,8 @@ import (
"text/tabwriter"
"github.com/influxdata/influxdb/v2/storage/reads/datatypes"
"github.com/influxdata/influxdb/v2/tsdb"
"github.com/influxdata/influxdb/v2/tsdb/value"
"github.com/influxdata/influxdb/v2/v1/tsdb"
"github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1"
)
// Command represents the program execution for "influxd inspect dumpmwal

View File

@ -12,8 +12,8 @@ import (
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/influxdata/influxdb/v2"
"github.com/influxdata/influxdb/v2/tsdb"
"github.com/influxdata/influxdb/v2/tsdb/value"
"github.com/influxdata/influxdb/v2/v1/tsdb"
"github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1"
)
func TestWalDump_RunWriteEntries(t *testing.T) {

View File

@ -2,14 +2,15 @@ package wal
import (
"context"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/influxdata/influxdb/v2/kit/errors"
"github.com/influxdata/influxdb/v2/tsdb/value"
"io/ioutil"
"math/rand"
"os"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/influxdata/influxdb/v2/kit/errors"
"github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1"
)
type Test struct {

View File

@ -25,7 +25,7 @@ import (
"github.com/influxdata/influxdb/v2/kit/tracing"
"github.com/influxdata/influxdb/v2/pkg/limiter"
"github.com/influxdata/influxdb/v2/pkg/pool"
"github.com/influxdata/influxdb/v2/tsdb/value"
"github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1"
)
const (

View File

@ -12,7 +12,7 @@ import (
"github.com/golang/snappy"
"github.com/influxdata/influxdb/v2"
"github.com/influxdata/influxdb/v2/tsdb/value"
"github.com/influxdata/influxdb/v2/v1/tsdb/engine/tsm1/value"
)
func TestWALWriter_WriteMulti_Single(t *testing.T) {

View File

@ -9,7 +9,7 @@ import (
"github.com/influxdata/influxdb/v2"
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxdb/v2/storage"
"github.com/influxdata/influxdb/v2/tsdb"
"github.com/influxdata/influxdb/v2/v1/tsdb"
"go.uber.org/zap"
)

File diff suppressed because it is too large Load Diff

View File

@ -1,259 +0,0 @@
package cursors
{{range .}}
{{- $typename := print .Name "Array" }}
{{- $hasType := or (and .Type true) false }}
type {{ $typename }} struct {
Timestamps []int64
{{- if $hasType }}
Values []{{.Type}}
{{- end }}
}
func New{{$typename}}Len(sz int) *{{$typename}} {
return &{{$typename}}{
Timestamps: make([]int64, sz),
{{- if $hasType }}
Values: make([]{{.Type}}, sz),
{{- end }}
}
}
func (a *{{ $typename }}) MinTime() int64 {
return a.Timestamps[0]
}
func (a *{{ $typename }}) MaxTime() int64 {
return a.Timestamps[len(a.Timestamps)-1]
}
func (a *{{ $typename}}) Len() int {
return len(a.Timestamps)
}
// search performs a binary search for UnixNano() v in a
// and returns the position, i, where v would be inserted.
// An additional check of a.Timestamps[i] == v is necessary
// to determine if the value v exists.
func (a *{{ $typename }}) search(v int64) int {
// Define: f(x) → a.Timestamps[x] < v
// Define: f(-1) == true, f(n) == false
// Invariant: f(lo-1) == true, f(hi) == false
lo := 0
hi := a.Len()
for lo < hi {
mid := int(uint(lo+hi) >> 1)
if a.Timestamps[mid] < v {
lo = mid + 1 // preserves f(lo-1) == true
} else {
hi = mid // preserves f(hi) == false
}
}
// lo == hi
return lo
}
// FindRange returns the positions where min and max would be
// inserted into the array. If a[0].UnixNano() > max or
// a[len-1].UnixNano() < min then FindRange returns (-1, -1)
// indicating the array is outside the [min, max]. The values must
// be deduplicated and sorted before calling FindRange or the results
// are undefined.
func (a *{{ $typename }}) FindRange(min, max int64) (int, int) {
if a.Len() == 0 || min > max {
return -1, -1
}
minVal := a.MinTime()
maxVal := a.MaxTime()
if maxVal < min || minVal > max {
return -1, -1
}
return a.search(min), a.search(max)
}
{{- if $hasType }}
// Exclude removes the subset of values in [min, max]. The values must
// be deduplicated and sorted before calling Exclude or the results are undefined.
func (a *{{ $typename }}) Exclude(min, max int64) {
rmin, rmax := a.FindRange(min, max)
if rmin == -1 && rmax == -1 {
return
}
// a.Timestamps[rmin] ≥ min
// a.Timestamps[rmax] ≥ max
if rmax < a.Len() {
if a.Timestamps[rmax] == max {
rmax++
}
rest := a.Len()-rmax
if rest > 0 {
ts := a.Timestamps[:rmin+rest]
copy(ts[rmin:], a.Timestamps[rmax:])
a.Timestamps = ts
vs := a.Values[:rmin+rest]
copy(vs[rmin:], a.Values[rmax:])
a.Values = vs
return
}
}
a.Timestamps = a.Timestamps[:rmin]
a.Values = a.Values[:rmin]
}
// Include returns the subset values between min and max inclusive. The values must
// be deduplicated and sorted before calling Include or the results are undefined.
func (a *{{ $typename }}) Include(min, max int64) {
rmin, rmax := a.FindRange(min, max)
if rmin == -1 && rmax == -1 {
a.Timestamps = a.Timestamps[:0]
a.Values = a.Values[:0]
return
}
// a.Timestamps[rmin] ≥ min
// a.Timestamps[rmax] ≥ max
if rmax < a.Len() && a.Timestamps[rmax] == max {
rmax++
}
if rmin > -1 {
ts := a.Timestamps[:rmax-rmin]
copy(ts, a.Timestamps[rmin:rmax])
a.Timestamps = ts
vs := a.Values[:rmax-rmin]
copy(vs, a.Values[rmin:rmax])
a.Values = vs
} else {
a.Timestamps = a.Timestamps[:rmax]
a.Values = a.Values[:rmax]
}
}
// Merge overlays b to top of a. If two values conflict with
// the same timestamp, b is used. Both a and b must be sorted
// in ascending order.
func (a *{{ $typename }}) Merge(b *{{ $typename }}) {
if a.Len() == 0 {
*a = *b
return
}
if b.Len() == 0 {
return
}
// Normally, both a and b should not contain duplicates. Due to a bug in older versions, it's
// possible stored blocks might contain duplicate values. Remove them if they exists before
// merging.
// a = a.Deduplicate()
// b = b.Deduplicate()
if a.MaxTime() < b.MinTime() {
a.Timestamps = append(a.Timestamps, b.Timestamps...)
a.Values = append(a.Values, b.Values...)
return
}
if b.MaxTime() < a.MinTime() {
var tmp {{$typename}}
tmp.Timestamps = append(b.Timestamps, a.Timestamps...)
tmp.Values = append(b.Values, a.Values...)
*a = tmp
return
}
out := New{{$typename}}Len(a.Len()+b.Len())
i, j, k := 0, 0, 0
for i < len(a.Timestamps) && j < len(b.Timestamps) {
if a.Timestamps[i] < b.Timestamps[j] {
out.Timestamps[k] = a.Timestamps[i]
out.Values[k] = a.Values[i]
i++
} else if a.Timestamps[i] == b.Timestamps[j] {
out.Timestamps[k] = b.Timestamps[j]
out.Values[k] = b.Values[j]
i++
j++
} else {
out.Timestamps[k] = b.Timestamps[j]
out.Values[k] = b.Values[j]
j++
}
k++
}
if i < len(a.Timestamps) {
n := copy(out.Timestamps[k:], a.Timestamps[i:])
copy(out.Values[k:], a.Values[i:])
k += n
} else if j < len(b.Timestamps) {
n := copy(out.Timestamps[k:], b.Timestamps[j:])
copy(out.Values[k:], b.Values[j:])
k += n
}
a.Timestamps = out.Timestamps[:k]
a.Values = out.Values[:k]
}
{{ else }}
// Exclude removes the subset of timestamps in [min, max]. The timestamps must
// be deduplicated and sorted before calling Exclude or the results are undefined.
func (a *{{ $typename }}) Exclude(min, max int64) {
rmin, rmax := a.FindRange(min, max)
if rmin == -1 && rmax == -1 {
return
}
// a.Timestamps[rmin] ≥ min
// a.Timestamps[rmax] ≥ max
if rmax < a.Len() {
if a.Timestamps[rmax] == max {
rmax++
}
rest := a.Len()-rmax
if rest > 0 {
ts := a.Timestamps[:rmin+rest]
copy(ts[rmin:], a.Timestamps[rmax:])
a.Timestamps = ts
return
}
}
a.Timestamps = a.Timestamps[:rmin]
}
// Contains returns true if values exist between min and max inclusive. The
// values must be sorted before calling Contains or the results are undefined.
func (a *{{ $typename }}) Contains(min, max int64) bool {
rmin, rmax := a.FindRange(min, max)
if rmin == -1 && rmax == -1 {
return false
}
// a.Timestamps[rmin] ≥ min
// a.Timestamps[rmax] ≥ max
if a.Timestamps[rmin] == min {
return true
}
if rmax < a.Len() && a.Timestamps[rmax] == max {
return true
}
return rmax-rmin > 0
}
{{ end }}
{{ end }}

View File

@ -1,26 +0,0 @@
[
{
"Name":"Float",
"Type":"float64"
},
{
"Name":"Integer",
"Type":"int64"
},
{
"Name":"Unsigned",
"Type":"uint64"
},
{
"Name":"String",
"Type":"string"
},
{
"Name":"Boolean",
"Type":"bool"
},
{
"Name":"Timestamp",
"Type": null
}
]

View File

@ -1,254 +0,0 @@
package cursors
import (
"fmt"
"testing"
"github.com/google/go-cmp/cmp"
)
func makeIntegerArray(count int, min, max int64) *IntegerArray {
vals := NewIntegerArrayLen(count)
ts := min
inc := (max - min) / int64(count)
for i := 0; i < count; i++ {
vals.Timestamps[i] = ts
ts += inc
}
return vals
}
func makeIntegerArrayFromSlice(t []int64) *IntegerArray {
iv := NewIntegerArrayLen(len(t))
copy(iv.Timestamps, t)
return iv
}
func TestIntegerArray_FindRangeNoValues(t *testing.T) {
var vals IntegerArray
l, r := vals.FindRange(0, 100)
if exp := -1; l != exp {
t.Errorf("invalid l; exp=%d, got=%d", exp, l)
}
if exp := -1; r != exp {
t.Errorf("invalid r; exp=%d, got=%d", exp, r)
}
}
func TestIntegerArray_FindRange(t *testing.T) {
vals := makeIntegerArrayFromSlice([]int64{10, 11, 13, 15, 17, 20, 21})
cases := []struct {
min, max int64
l, r int
}{
{12, 20, 2, 5},
{22, 40, -1, -1},
{1, 9, -1, -1},
{1, 10, 0, 0},
{1, 11, 0, 1},
{15, 15, 3, 3},
}
for _, tc := range cases {
t.Run(fmt.Sprintf("%d→%d", tc.min, tc.max), func(t *testing.T) {
l, r := vals.FindRange(tc.min, tc.max)
if l != tc.l {
t.Errorf("left: got %d, exp %d", l, tc.l)
}
if r != tc.r {
t.Errorf("right: got %d, exp %d", r, tc.r)
}
})
}
}
func TestIntegerArray_Exclude(t *testing.T) {
cases := []struct {
n string
min, max int64
exp []int64
}{
{"excl bad range", 18, 11, []int64{10, 12, 14, 16, 18}},
{"excl none-lo", 0, 9, []int64{10, 12, 14, 16, 18}},
{"excl none-hi", 19, 30, []int64{10, 12, 14, 16, 18}},
{"excl first", 0, 10, []int64{12, 14, 16, 18}},
{"excl last", 18, 20, []int64{10, 12, 14, 16}},
{"excl all but first and last", 12, 16, []int64{10, 18}},
{"excl none in middle", 13, 13, []int64{10, 12, 14, 16, 18}},
{"excl middle", 14, 14, []int64{10, 12, 16, 18}},
{"excl suffix", 14, 18, []int64{10, 12}},
}
for _, tc := range cases {
t.Run(fmt.Sprintf("%s[%d,%d]", tc.n, tc.min, tc.max), func(t *testing.T) {
vals := makeIntegerArray(5, 10, 20)
vals.Exclude(tc.min, tc.max)
got := vals.Timestamps
if !cmp.Equal(got, tc.exp) {
t.Errorf("unexpected values -got/+exp\n%s", cmp.Diff(got, tc.exp))
}
})
}
}
func TestIntegerArray_Include(t *testing.T) {
cases := []struct {
n string
min, max int64
exp []int64
}{
{"incl none-lo", 0, 9, []int64{}},
{"incl none-hi", 19, 30, []int64{}},
{"incl first", 0, 10, []int64{10}},
{"incl last", 18, 20, []int64{18}},
{"incl all but first and last", 12, 16, []int64{12, 14, 16}},
{"incl none in middle", 13, 13, []int64{}},
{"incl middle", 14, 14, []int64{14}},
}
for _, tc := range cases {
t.Run(fmt.Sprintf("%s[%d,%d]", tc.n, tc.min, tc.max), func(t *testing.T) {
vals := makeIntegerArray(5, 10, 20)
vals.Include(tc.min, tc.max)
got := vals.Timestamps
if !cmp.Equal(got, tc.exp) {
t.Errorf("unexpected values -got/+exp\n%s", cmp.Diff(got, tc.exp))
}
})
}
}
func makeTimestampArray(count int, min, max int64) *TimestampArray {
vals := NewTimestampArrayLen(count)
ts := min
inc := (max - min) / int64(count)
for i := 0; i < count; i++ {
vals.Timestamps[i] = ts
ts += inc
}
return vals
}
func TestTimestampArray_Contains(t *testing.T) {
cases := []struct {
n string
min, max int64
exp bool
}{
{"no/lo", 0, 9, false},
{"no/hi", 19, 30, false},
{"no/middle", 13, 13, false},
{"yes/first", 0, 10, true},
{"yes/first-eq", 10, 10, true},
{"yes/last", 18, 20, true},
{"yes/last-eq", 18, 18, true},
{"yes/all but first and last", 12, 16, true},
{"yes/middle-eq", 14, 14, true},
{"yes/middle-overlap", 13, 15, true},
{"yes/covers", 8, 22, true},
}
for _, tc := range cases {
t.Run(fmt.Sprintf("%s[%d,%d]", tc.n, tc.min, tc.max), func(t *testing.T) {
vals := makeTimestampArray(5, 10, 20)
if got := vals.Contains(tc.min, tc.max); got != tc.exp {
t.Errorf("Contains -got/+exp\n%s", cmp.Diff(got, tc.exp))
}
})
}
}
func benchExclude(b *testing.B, vals *IntegerArray, min, max int64) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
vals.Exclude(min, max)
}
}
func BenchmarkIntegerArray_ExcludeNone_1000(b *testing.B) {
benchExclude(b, makeIntegerArray(1000, 1000, 2000), 0, 500)
}
func BenchmarkIntegerArray_ExcludeMiddleHalf_1000(b *testing.B) {
benchExclude(b, makeIntegerArray(1000, 1000, 2000), 1250, 1750)
}
func BenchmarkIntegerArray_ExcludeFirst_1000(b *testing.B) {
benchExclude(b, makeIntegerArray(1000, 1000, 2000), 0, 1000)
}
func BenchmarkIntegerArray_ExcludeLast_1000(b *testing.B) {
benchExclude(b, makeIntegerArray(1000, 1000, 2000), 1999, 2000)
}
func BenchmarkIntegerArray_ExcludeNone_10000(b *testing.B) {
benchExclude(b, makeIntegerArray(10000, 10000, 20000), 00, 5000)
}
func BenchmarkIntegerArray_ExcludeMiddleHalf_10000(b *testing.B) {
benchExclude(b, makeIntegerArray(10000, 10000, 20000), 12500, 17500)
}
func BenchmarkIntegerArray_ExcludeFirst_10000(b *testing.B) {
benchExclude(b, makeIntegerArray(10000, 10000, 20000), 0, 10000)
}
func BenchmarkIntegerArray_ExcludeLast_10000(b *testing.B) {
benchExclude(b, makeIntegerArray(10000, 10000, 20000), 19999, 20000)
}
func benchInclude(b *testing.B, vals *IntegerArray, min, max int64) {
src := *vals
tmp := NewIntegerArrayLen(vals.Len())
copy(tmp.Timestamps, vals.Timestamps)
copy(tmp.Values, vals.Values)
b.ResetTimer()
for i := 0; i < b.N; i++ {
vals.Include(min, max)
*vals = src
copy(vals.Timestamps, tmp.Timestamps)
copy(vals.Values, tmp.Values)
}
}
func BenchmarkIntegerArray_IncludeNone_1000(b *testing.B) {
benchInclude(b, makeIntegerArray(1000, 1000, 2000), 0, 500)
}
func BenchmarkIntegerArray_IncludeMiddleHalf_1000(b *testing.B) {
benchInclude(b, makeIntegerArray(1000, 1000, 2000), 1250, 1750)
}
func BenchmarkIntegerArray_IncludeFirst_1000(b *testing.B) {
benchInclude(b, makeIntegerArray(1000, 1000, 2000), 0, 1000)
}
func BenchmarkIntegerArray_IncludeLast_1000(b *testing.B) {
benchInclude(b, makeIntegerArray(1000, 1000, 2000), 1999, 2000)
}
func BenchmarkIntegerArray_IncludeNone_10000(b *testing.B) {
benchInclude(b, makeIntegerArray(10000, 10000, 20000), 00, 5000)
}
func BenchmarkIntegerArray_IncludeMiddleHalf_10000(b *testing.B) {
benchInclude(b, makeIntegerArray(10000, 10000, 20000), 12500, 17500)
}
func BenchmarkIntegerArray_IncludeFirst_10000(b *testing.B) {
benchInclude(b, makeIntegerArray(10000, 10000, 20000), 0, 10000)
}
func BenchmarkIntegerArray_IncludeLast_10000(b *testing.B) {
benchInclude(b, makeIntegerArray(10000, 10000, 20000), 19999, 20000)
}

View File

@ -1,41 +0,0 @@
package cursors
import "sort"
func (a *FloatArray) Size() int {
// size of timestamps + values
return len(a.Timestamps)*8 + len(a.Values)*8
}
func (a *IntegerArray) Size() int {
// size of timestamps + values
return len(a.Timestamps)*8 + len(a.Values)*8
}
func (a *UnsignedArray) Size() int {
// size of timestamps + values
return len(a.Timestamps)*8 + len(a.Values)*8
}
func (a *StringArray) Size() int {
sz := len(a.Timestamps) * 8
for _, s := range a.Values {
sz += len(s)
}
return sz
}
func (a *BooleanArray) Size() int {
// size of timestamps + values
return len(a.Timestamps)*8 + len(a.Values)
}
var _ sort.Interface = (*TimestampArray)(nil)
func (a *TimestampArray) Less(i, j int) bool {
return a.Timestamps[i] < a.Timestamps[j]
}
func (a *TimestampArray) Swap(i, j int) {
a.Timestamps[i], a.Timestamps[j] = a.Timestamps[j], a.Timestamps[i]
}

View File

@ -1,459 +0,0 @@
package cursors_test
import (
"strconv"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/influxdata/influxdb/v2/tsdb/cursors"
)
func makeBooleanArray(v ...interface{}) *cursors.BooleanArray {
if len(v)&1 == 1 {
panic("invalid array length")
}
a := cursors.NewBooleanArrayLen(len(v) / 2)
for i := 0; i < len(v); i += 2 {
a.Timestamps[i/2] = int64(v[i].(int))
a.Values[i/2] = v[i+1].(bool)
}
return a
}
func makeFloatArray(v ...interface{}) *cursors.FloatArray {
if len(v)&1 == 1 {
panic("invalid array length")
}
a := cursors.NewFloatArrayLen(len(v) / 2)
for i := 0; i < len(v); i += 2 {
a.Timestamps[i/2] = int64(v[i].(int))
a.Values[i/2] = v[i+1].(float64)
}
return a
}
func makeIntegerArray(v ...interface{}) *cursors.IntegerArray {
if len(v)&1 == 1 {
panic("invalid array length")
}
a := cursors.NewIntegerArrayLen(len(v) / 2)
for i := 0; i < len(v); i += 2 {
a.Timestamps[i/2] = int64(v[i].(int))
a.Values[i/2] = int64(v[i+1].(int))
}
return a
}
func makeUnsignedArray(v ...interface{}) *cursors.UnsignedArray {
if len(v)&1 == 1 {
panic("invalid array length")
}
a := cursors.NewUnsignedArrayLen(len(v) / 2)
for i := 0; i < len(v); i += 2 {
a.Timestamps[i/2] = int64(v[i].(int))
a.Values[i/2] = uint64(v[i+1].(int))
}
return a
}
func makeStringArray(v ...interface{}) *cursors.StringArray {
if len(v)&1 == 1 {
panic("invalid array length")
}
a := cursors.NewStringArrayLen(len(v) / 2)
for i := 0; i < len(v); i += 2 {
a.Timestamps[i/2] = int64(v[i].(int))
a.Values[i/2] = strconv.Itoa(v[i+1].(int))
}
return a
}
func TestBooleanArray_Merge(t *testing.T) {
tests := []struct {
name string
a, b, exp *cursors.BooleanArray
}{
{
name: "empty a",
a: makeBooleanArray(),
b: makeBooleanArray(1, true, 2, true),
exp: makeBooleanArray(1, true, 2, true),
},
{
name: "empty b",
a: makeBooleanArray(1, true, 2, true),
b: makeBooleanArray(),
exp: makeBooleanArray(1, true, 2, true),
},
{
name: "b replaces a",
a: makeBooleanArray(1, true),
b: makeBooleanArray(
0, false,
1, false, // overwrites a
2, false,
3, false,
4, false,
),
exp: makeBooleanArray(0, false, 1, false, 2, false, 3, false, 4, false),
},
{
name: "b replaces partial a",
a: makeBooleanArray(1, true, 2, true, 3, true, 4, true),
b: makeBooleanArray(
1, false, // overwrites a
2, false, // overwrites a
),
exp: makeBooleanArray(
1, false, // overwrites a
2, false, // overwrites a
3, true,
4, true,
),
},
{
name: "b replaces all a",
a: makeBooleanArray(1, true, 2, true, 3, true, 4, true),
b: makeBooleanArray(1, false, 2, false, 3, false, 4, false),
exp: makeBooleanArray(1, false, 2, false, 3, false, 4, false),
},
{
name: "b replaces a interleaved",
a: makeBooleanArray(0, true, 1, true, 2, true, 3, true, 4, true),
b: makeBooleanArray(0, false, 2, false, 4, false),
exp: makeBooleanArray(0, false, 1, true, 2, false, 3, true, 4, false),
},
{
name: "b merges a interleaved",
a: makeBooleanArray(0, true, 2, true, 4, true),
b: makeBooleanArray(1, false, 3, false, 5, false),
exp: makeBooleanArray(0, true, 1, false, 2, true, 3, false, 4, true, 5, false),
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
test.a.Merge(test.b)
if !cmp.Equal(test.a, test.exp) {
t.Fatalf("unexpected values -got/+exp\n%s", cmp.Diff(test.a, test.exp))
}
})
}
}
func TestFloatArray_Merge(t *testing.T) {
tests := []struct {
name string
a, b, exp *cursors.FloatArray
}{
{
name: "empty a",
a: makeFloatArray(),
b: makeFloatArray(1, 1.1, 2, 2.1),
exp: makeFloatArray(1, 1.1, 2, 2.1),
},
{
name: "empty b",
a: makeFloatArray(1, 1.0, 2, 2.0),
b: makeFloatArray(),
exp: makeFloatArray(1, 1.0, 2, 2.0),
},
{
name: "b replaces a",
a: makeFloatArray(1, 1.0),
b: makeFloatArray(
0, 0.1,
1, 1.1, // overwrites a
2, 2.1,
3, 3.1,
4, 4.1,
),
exp: makeFloatArray(0, 0.1, 1, 1.1, 2, 2.1, 3, 3.1, 4, 4.1),
},
{
name: "b replaces partial a",
a: makeFloatArray(1, 1.0, 2, 2.0, 3, 3.0, 4, 4.0),
b: makeFloatArray(
1, 1.1, // overwrites a
2, 2.1, // overwrites a
),
exp: makeFloatArray(
1, 1.1, // overwrites a
2, 2.1, // overwrites a
3, 3.0,
4, 4.0,
),
},
{
name: "b replaces all a",
a: makeFloatArray(1, 1.0, 2, 2.0, 3, 3.0, 4, 4.0),
b: makeFloatArray(1, 1.1, 2, 2.1, 3, 3.1, 4, 4.1),
exp: makeFloatArray(1, 1.1, 2, 2.1, 3, 3.1, 4, 4.1),
},
{
name: "b replaces a interleaved",
a: makeFloatArray(0, 0.0, 1, 1.0, 2, 2.0, 3, 3.0, 4, 4.0),
b: makeFloatArray(0, 0.1, 2, 2.1, 4, 4.1),
exp: makeFloatArray(0, 0.1, 1, 1.0, 2, 2.1, 3, 3.0, 4, 4.1),
},
{
name: "b merges a interleaved",
a: makeFloatArray(0, 0.0, 2, 2.0, 4, 4.0),
b: makeFloatArray(1, 1.1, 3, 3.1, 5, 5.1),
exp: makeFloatArray(0, 0.0, 1, 1.1, 2, 2.0, 3, 3.1, 4, 4.0, 5, 5.1),
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
test.a.Merge(test.b)
if !cmp.Equal(test.a, test.exp) {
t.Fatalf("unexpected values -got/+exp\n%s", cmp.Diff(test.a, test.exp))
}
})
}
}
func TestIntegerArray_Merge(t *testing.T) {
tests := []struct {
name string
a, b, exp *cursors.IntegerArray
}{
{
name: "empty a",
a: makeIntegerArray(),
b: makeIntegerArray(1, 11, 2, 21),
exp: makeIntegerArray(1, 11, 2, 21),
},
{
name: "empty b",
a: makeIntegerArray(1, 10, 2, 20),
b: makeIntegerArray(),
exp: makeIntegerArray(1, 10, 2, 20),
},
{
name: "b replaces a",
a: makeIntegerArray(1, 10),
b: makeIntegerArray(
0, 1,
1, 11, // overwrites a
2, 21,
3, 31,
4, 41,
),
exp: makeIntegerArray(0, 1, 1, 11, 2, 21, 3, 31, 4, 41),
},
{
name: "b replaces partial a",
a: makeIntegerArray(1, 10, 2, 20, 3, 30, 4, 40),
b: makeIntegerArray(
1, 11, // overwrites a
2, 21, // overwrites a
),
exp: makeIntegerArray(
1, 11, // overwrites a
2, 21, // overwrites a
3, 30,
4, 40,
),
},
{
name: "b replaces all a",
a: makeIntegerArray(1, 10, 2, 20, 3, 30, 4, 40),
b: makeIntegerArray(1, 11, 2, 21, 3, 31, 4, 41),
exp: makeIntegerArray(1, 11, 2, 21, 3, 31, 4, 41),
},
{
name: "b replaces a interleaved",
a: makeIntegerArray(0, 0, 1, 10, 2, 20, 3, 30, 4, 40),
b: makeIntegerArray(0, 1, 2, 21, 4, 41),
exp: makeIntegerArray(0, 1, 1, 10, 2, 21, 3, 30, 4, 41),
},
{
name: "b merges a interleaved",
a: makeIntegerArray(0, 00, 2, 20, 4, 40),
b: makeIntegerArray(1, 11, 3, 31, 5, 51),
exp: makeIntegerArray(0, 00, 1, 11, 2, 20, 3, 31, 4, 40, 5, 51),
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
test.a.Merge(test.b)
if !cmp.Equal(test.a, test.exp) {
t.Fatalf("unexpected values -got/+exp\n%s", cmp.Diff(test.a, test.exp))
}
})
}
}
func TestUnsignedArray_Merge(t *testing.T) {
tests := []struct {
name string
a, b, exp *cursors.UnsignedArray
}{
{
name: "empty a",
a: makeUnsignedArray(),
b: makeUnsignedArray(1, 11, 2, 21),
exp: makeUnsignedArray(1, 11, 2, 21),
},
{
name: "empty b",
a: makeUnsignedArray(1, 10, 2, 20),
b: makeUnsignedArray(),
exp: makeUnsignedArray(1, 10, 2, 20),
},
{
name: "b replaces a",
a: makeUnsignedArray(1, 10),
b: makeUnsignedArray(
0, 1,
1, 11, // overwrites a
2, 21,
3, 31,
4, 41,
),
exp: makeUnsignedArray(0, 1, 1, 11, 2, 21, 3, 31, 4, 41),
},
{
name: "b replaces partial a",
a: makeUnsignedArray(1, 10, 2, 20, 3, 30, 4, 40),
b: makeUnsignedArray(
1, 11, // overwrites a
2, 21, // overwrites a
),
exp: makeUnsignedArray(
1, 11, // overwrites a
2, 21, // overwrites a
3, 30,
4, 40,
),
},
{
name: "b replaces all a",
a: makeUnsignedArray(1, 10, 2, 20, 3, 30, 4, 40),
b: makeUnsignedArray(1, 11, 2, 21, 3, 31, 4, 41),
exp: makeUnsignedArray(1, 11, 2, 21, 3, 31, 4, 41),
},
{
name: "b replaces a interleaved",
a: makeUnsignedArray(0, 0, 1, 10, 2, 20, 3, 30, 4, 40),
b: makeUnsignedArray(0, 1, 2, 21, 4, 41),
exp: makeUnsignedArray(0, 1, 1, 10, 2, 21, 3, 30, 4, 41),
},
{
name: "b merges a interleaved",
a: makeUnsignedArray(0, 00, 2, 20, 4, 40),
b: makeUnsignedArray(1, 11, 3, 31, 5, 51),
exp: makeUnsignedArray(0, 00, 1, 11, 2, 20, 3, 31, 4, 40, 5, 51),
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
test.a.Merge(test.b)
if !cmp.Equal(test.a, test.exp) {
t.Fatalf("unexpected values -got/+exp\n%s", cmp.Diff(test.a, test.exp))
}
})
}
}
func TestStringArray_Merge(t *testing.T) {
tests := []struct {
name string
a, b, exp *cursors.StringArray
}{
{
name: "empty a",
a: makeStringArray(),
b: makeStringArray(1, 11, 2, 21),
exp: makeStringArray(1, 11, 2, 21),
},
{
name: "empty b",
a: makeStringArray(1, 10, 2, 20),
b: makeStringArray(),
exp: makeStringArray(1, 10, 2, 20),
},
{
name: "b replaces a",
a: makeStringArray(1, 10),
b: makeStringArray(
0, 1,
1, 11, // overwrites a
2, 21,
3, 31,
4, 41,
),
exp: makeStringArray(0, 1, 1, 11, 2, 21, 3, 31, 4, 41),
},
{
name: "b replaces partial a",
a: makeStringArray(1, 10, 2, 20, 3, 30, 4, 40),
b: makeStringArray(
1, 11, // overwrites a
2, 21, // overwrites a
),
exp: makeStringArray(
1, 11, // overwrites a
2, 21, // overwrites a
3, 30,
4, 40,
),
},
{
name: "b replaces all a",
a: makeStringArray(1, 10, 2, 20, 3, 30, 4, 40),
b: makeStringArray(1, 11, 2, 21, 3, 31, 4, 41),
exp: makeStringArray(1, 11, 2, 21, 3, 31, 4, 41),
},
{
name: "b replaces a interleaved",
a: makeStringArray(0, 0, 1, 10, 2, 20, 3, 30, 4, 40),
b: makeStringArray(0, 1, 2, 21, 4, 41),
exp: makeStringArray(0, 1, 1, 10, 2, 21, 3, 30, 4, 41),
},
{
name: "b merges a interleaved",
a: makeStringArray(0, 00, 2, 20, 4, 40),
b: makeStringArray(1, 11, 3, 31, 5, 51),
exp: makeStringArray(0, 00, 1, 11, 2, 20, 3, 31, 4, 40, 5, 51),
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
test.a.Merge(test.b)
if !cmp.Equal(test.a, test.exp) {
t.Fatalf("unexpected values -got/+exp\n%s", cmp.Diff(test.a, test.exp))
}
})
}
}

View File

@ -1,68 +0,0 @@
package cursors
import (
"context"
"github.com/influxdata/influxdb/v2/models"
)
const DefaultMaxPointsPerBlock = 1000
type Cursor interface {
Close()
Err() error
Stats() CursorStats
}
type IntegerArrayCursor interface {
Cursor
Next() *IntegerArray
}
type FloatArrayCursor interface {
Cursor
Next() *FloatArray
}
type UnsignedArrayCursor interface {
Cursor
Next() *UnsignedArray
}
type StringArrayCursor interface {
Cursor
Next() *StringArray
}
type BooleanArrayCursor interface {
Cursor
Next() *BooleanArray
}
type CursorRequest struct {
Name []byte
Tags models.Tags
Field string
Ascending bool
StartTime int64
EndTime int64
}
type CursorIterator interface {
Next(ctx context.Context, r *CursorRequest) (Cursor, error)
Stats() CursorStats
}
type CursorIterators []CursorIterator
// CursorStats represents stats collected by a cursor.
type CursorStats struct {
ScannedValues int // number of values scanned
ScannedBytes int // number of uncompressed bytes scanned
}
// Add adds other to s and updates s.
func (s *CursorStats) Add(other CursorStats) {
s.ScannedValues += other.ScannedValues
s.ScannedBytes += other.ScannedBytes
}

View File

@ -1,4 +0,0 @@
package cursors
//go:generate env GO111MODULE=on go run github.com/benbjohnson/tmpl -data=@arrayvalues.gen.go.tmpldata arrayvalues.gen.go.tmpl
//go:generate stringer -type FieldType

View File

@ -1,81 +0,0 @@
package cursors
// StringIterator describes the behavior for enumerating a sequence of
// string values.
type StringIterator interface {
// Next advances the StringIterator to the next value. It returns false
// when there are no more values.
Next() bool
// Value returns the current value.
Value() string
Stats() CursorStats
}
// EmptyStringIterator is an implementation of StringIterator that returns
// no values.
var EmptyStringIterator StringIterator = &stringIterator{}
type stringIterator struct{}
func (*stringIterator) Next() bool { return false }
func (*stringIterator) Value() string { return "" }
func (*stringIterator) Stats() CursorStats { return CursorStats{} }
type StringSliceIterator struct {
s []string
v string
i int
stats CursorStats
}
func NewStringSliceIterator(s []string) *StringSliceIterator {
return &StringSliceIterator{s: s, i: 0}
}
func NewStringSliceIteratorWithStats(s []string, stats CursorStats) *StringSliceIterator {
return &StringSliceIterator{s: s, i: 0, stats: stats}
}
func (s *StringSliceIterator) Next() bool {
if s.i < len(s.s) {
s.v = s.s[s.i]
s.i++
return true
}
s.v = ""
return false
}
func (s *StringSliceIterator) Value() string {
return s.v
}
func (s *StringSliceIterator) Stats() CursorStats {
return s.stats
}
func (s *StringSliceIterator) toSlice() []string {
if s.i < len(s.s) {
return s.s[s.i:]
}
return nil
}
// StringIteratorToSlice reads the remainder of i into a slice and
// returns the result.
func StringIteratorToSlice(i StringIterator) []string {
if i == nil {
return nil
}
if si, ok := i.(*StringSliceIterator); ok {
return si.toSlice()
}
var a []string
for i.Next() {
a = append(a, i.Value())
}
return a
}

View File

@ -1,19 +0,0 @@
package tsdb
import (
"fmt"
)
// PartialWriteError indicates a write request could only write a portion of the
// requested values.
type PartialWriteError struct {
Reason string
Dropped int
// A sorted slice of series keys that were dropped.
DroppedKeys [][]byte
}
func (e PartialWriteError) Error() string {
return fmt.Sprintf("partial write: %s dropped=%d", e.Reason, e.Dropped)
}

View File

@ -1,106 +0,0 @@
package tsdb
import (
"encoding/binary"
"github.com/influxdata/influxdb/v2"
"github.com/influxdata/influxdb/v2/models"
)
// DecodeName converts tsdb internal serialization back to organization and bucket IDs.
func DecodeName(name [16]byte) (org, bucket influxdb.ID) {
org = influxdb.ID(binary.BigEndian.Uint64(name[0:8]))
bucket = influxdb.ID(binary.BigEndian.Uint64(name[8:16]))
return
}
// DecodeNameSlice converts tsdb internal serialization back to organization and bucket IDs.
func DecodeNameSlice(name []byte) (org, bucket influxdb.ID) {
return influxdb.ID(binary.BigEndian.Uint64(name[0:8])), influxdb.ID(binary.BigEndian.Uint64(name[8:16]))
}
// EncodeName converts org/bucket pairs to the tsdb internal serialization
func EncodeName(org, bucket influxdb.ID) [16]byte {
var nameBytes [16]byte
binary.BigEndian.PutUint64(nameBytes[0:8], uint64(org))
binary.BigEndian.PutUint64(nameBytes[8:16], uint64(bucket))
return nameBytes
}
// EncodeNameSlice converts org/bucket pairs to the tsdb internal serialization but returns a byte slice.
func EncodeNameSlice(org, bucket influxdb.ID) []byte {
buf := EncodeName(org, bucket)
return buf[:]
}
// EncodeOrgName converts org to the tsdb internal serialization that may be used
// as a prefix when searching for keys matching a specific organization.
func EncodeOrgName(org influxdb.ID) [8]byte {
var orgBytes [8]byte
binary.BigEndian.PutUint64(orgBytes[0:8], uint64(org))
return orgBytes
}
// EncodeNameString converts org/bucket pairs to the tsdb internal serialization
func EncodeNameString(org, bucket influxdb.ID) string {
name := EncodeName(org, bucket)
return string(name[:])
}
// ExplodePoints creates a list of points that only contains one field per point. It also
// moves the measurement to a tag, and changes the measurement to be the provided argument.
func ExplodePoints(org, bucket influxdb.ID, points []models.Point) ([]models.Point, error) {
out := make([]models.Point, 0, len(points))
// TODO(jeff): We should add a RawEncode() method or something to the influxdb.ID type
// or we should use hex encoded measurement names. Either way, we shouldn't be doing a
// decode of the encode here, and we don't want to depend on details of how the ID type
// is represented.
ob := EncodeName(org, bucket)
name := string(ob[:])
tags := make(models.Tags, 1)
for _, pt := range points {
tags = tags[:1] // reset buffer for next point.
tags[0] = models.NewTag(models.MeasurementTagKeyBytes, pt.Name())
pt.ForEachTag(func(k, v []byte) bool {
tags = append(tags, models.NewTag(k, v))
return true
})
t := pt.Time()
itr := pt.FieldIterator()
tags = append(tags, models.Tag{}) // Make room for field key and value.
for itr.Next() {
tags[len(tags)-1] = models.NewTag(models.FieldKeyTagKeyBytes, itr.FieldKey())
var err error
field := make(models.Fields, 1)
switch itr.Type() {
case models.Float:
field[string(itr.FieldKey())], err = itr.FloatValue()
case models.Integer:
field[string(itr.FieldKey())], err = itr.IntegerValue()
case models.Boolean:
field[string(itr.FieldKey())], err = itr.BooleanValue()
case models.String:
field[string(itr.FieldKey())] = itr.StringValue()
case models.Unsigned:
field[string(itr.FieldKey())], err = itr.UnsignedValue()
}
if err != nil {
return nil, err
}
pt, err := models.NewPoint(name, tags, field, t)
if err != nil {
return nil, err
}
out = append(out, pt)
}
}
return out, nil
}

View File

@ -1,42 +0,0 @@
package tsdb_test
import (
"fmt"
"testing"
"github.com/influxdata/influxdb/v2"
"github.com/influxdata/influxdb/v2/tsdb"
)
func TestNames(t *testing.T) {
goodExamples := []struct {
Org uint64
Bucket uint64
Name [16]byte
}{
{Org: 12345678, Bucket: 87654321, Name: [16]byte{0, 0, 0, 0, 0, 188, 97, 78, 0, 0, 0, 0, 5, 57, 127, 177}},
{Org: 1234567891011, Bucket: 87654321, Name: [16]byte{0, 0, 1, 31, 113, 251, 8, 67, 0, 0, 0, 0, 5, 57, 127, 177}},
{Org: 12345678, Bucket: 8765432100000, Name: [16]byte{0, 0, 0, 0, 0, 188, 97, 78, 0, 0, 7, 248, 220, 119, 116, 160}},
{Org: 123456789929, Bucket: 8765432100000, Name: [16]byte{0, 0, 0, 28, 190, 153, 29, 169, 0, 0, 7, 248, 220, 119, 116, 160}},
}
for _, example := range goodExamples {
t.Run(fmt.Sprintf("%d%d", example.Org, example.Bucket), func(t *testing.T) {
name := tsdb.EncodeName(influxdb.ID(example.Org), influxdb.ID(example.Bucket))
if got, exp := name, example.Name; got != exp {
t.Errorf("got name %q, expected %q", got, exp)
}
org, bucket := tsdb.DecodeName(name)
if gotOrg, expOrg := org, example.Org; gotOrg != influxdb.ID(expOrg) {
t.Errorf("got organization ID %q, expected %q", gotOrg, expOrg)
}
if gotBucket, expBucket := bucket, example.Bucket; gotBucket != influxdb.ID(expBucket) {
t.Errorf("got organization ID %q, expected %q", gotBucket, expBucket)
}
})
}
}

View File

@ -1,14 +0,0 @@
//go:generate sh -c "curl -L https://github.com/influxdata/testdata/raw/2020.07.20.1/tsdbtestdata.tar.gz | tar xz"
package tsdb_test
import (
"fmt"
"os"
)
func init() {
if _, err := os.Stat("./testdata"); err != nil {
fmt.Println("Run go generate to download testdata directory.")
os.Exit(1)
}
}

View File

@ -1,59 +0,0 @@
package tsdb
import (
"github.com/influxdata/influxdb/v2/models"
)
// MakeTagsKey converts a tag set to bytes for use as a lookup key.
func MakeTagsKey(keys []string, tags models.Tags) []byte {
// precondition: keys is sorted
// precondition: models.Tags is sorted
// Empty maps marshal to empty bytes.
if len(keys) == 0 || len(tags) == 0 {
return nil
}
sel := make([]int, 0, len(keys))
sz := 0
i, j := 0, 0
for i < len(keys) && j < len(tags) {
if keys[i] < string(tags[j].Key) {
i++
} else if keys[i] > string(tags[j].Key) {
j++
} else {
sel = append(sel, j)
sz += len(keys[i]) + len(tags[j].Value)
i++
j++
}
}
if len(sel) == 0 {
// no tags matched the requested keys
return nil
}
sz += (len(sel) * 2) - 1 // selected tags, add separators
// Generate marshaled bytes.
b := make([]byte, sz)
buf := b
for _, k := range sel {
copy(buf, tags[k].Key)
buf[len(tags[k].Key)] = '|'
buf = buf[len(tags[k].Key)+1:]
}
for i, k := range sel {
copy(buf, tags[k].Value)
if i < len(sel)-1 {
buf[len(tags[k].Value)] = '|'
buf = buf[len(tags[k].Value)+1:]
}
}
return b
}

View File

@ -1,89 +0,0 @@
package tsdb_test
import (
"bytes"
"fmt"
"testing"
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxdb/v2/tsdb"
)
// Ensure tags can be marshaled into a byte slice.
func TestMakeTagsKey(t *testing.T) {
for i, tt := range []struct {
keys []string
tags models.Tags
result []byte
}{
{
keys: nil,
tags: nil,
result: nil,
},
{
keys: []string{"foo"},
tags: models.NewTags(map[string]string{"foo": "bar"}),
result: []byte(`foo|bar`),
},
{
keys: []string{"foo"},
tags: models.NewTags(map[string]string{"baz": "battttt"}),
result: []byte(``),
},
{
keys: []string{"baz", "foo"},
tags: models.NewTags(map[string]string{"baz": "battttt"}),
result: []byte(`baz|battttt`),
},
{
keys: []string{"baz", "foo", "zzz"},
tags: models.NewTags(map[string]string{"foo": "bar"}),
result: []byte(`foo|bar`),
},
{
keys: []string{"baz", "foo"},
tags: models.NewTags(map[string]string{"foo": "bar", "baz": "battttt"}),
result: []byte(`baz|foo|battttt|bar`),
},
{
keys: []string{"baz"},
tags: models.NewTags(map[string]string{"baz": "battttt", "foo": "bar"}),
result: []byte(`baz|battttt`),
},
} {
result := tsdb.MakeTagsKey(tt.keys, tt.tags)
if !bytes.Equal(result, tt.result) {
t.Fatalf("%d. unexpected result: exp=%s, got=%s", i, tt.result, result)
}
}
}
func BenchmarkMakeTagsKey_KeyN1(b *testing.B) { benchmarkMakeTagsKey(b, 1) }
func BenchmarkMakeTagsKey_KeyN3(b *testing.B) { benchmarkMakeTagsKey(b, 3) }
func BenchmarkMakeTagsKey_KeyN5(b *testing.B) { benchmarkMakeTagsKey(b, 5) }
func BenchmarkMakeTagsKey_KeyN10(b *testing.B) { benchmarkMakeTagsKey(b, 10) }
func makeTagsAndKeys(keyN int) ([]string, models.Tags) {
const keySize, valueSize = 8, 15
// Generate tag map.
keys := make([]string, keyN)
tags := make(map[string]string)
for i := 0; i < keyN; i++ {
keys[i] = fmt.Sprintf("%0*d", keySize, i)
tags[keys[i]] = fmt.Sprintf("%0*d", valueSize, i)
}
return keys, models.NewTags(tags)
}
func benchmarkMakeTagsKey(b *testing.B, keyN int) {
keys, tags := makeTagsAndKeys(keyN)
// Unmarshal map into byte slice.
b.ReportAllocs()
for i := 0; i < b.N; i++ {
tsdb.MakeTagsKey(keys, tags)
}
}

View File

@ -1,333 +0,0 @@
package tsdb
import (
"sync"
"sync/atomic"
"unsafe"
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxdb/v2/pkg/bytesutil"
)
// SeriesCollection is a struct of arrays representation of a collection of series that allows
// for efficient filtering.
type SeriesCollection struct {
Points []models.Point
Keys [][]byte
SeriesKeys [][]byte
Names [][]byte
Tags []models.Tags
Types []models.FieldType
SeriesIDs []SeriesID
// Keeps track of invalid entries.
Dropped uint64
DroppedKeys [][]byte
Reason string
// Used by the concurrent iterators to stage drops. Inefficient, but should be
// very infrequently used.
state *seriesCollectionState
}
// seriesCollectionState keeps track of concurrent iterator state.
type seriesCollectionState struct {
mu sync.Mutex
reason string
index map[int]struct{}
}
// NewSeriesCollection builds a SeriesCollection from a slice of points. It does some filtering
// of invalid points.
func NewSeriesCollection(points []models.Point) *SeriesCollection {
out := &SeriesCollection{
Points: append([]models.Point(nil), points...),
Keys: make([][]byte, 0, len(points)),
Names: make([][]byte, 0, len(points)),
Tags: make([]models.Tags, 0, len(points)),
Types: make([]models.FieldType, 0, len(points)),
}
for _, pt := range points {
out.Keys = append(out.Keys, pt.Key())
out.Names = append(out.Names, pt.Name())
out.Tags = append(out.Tags, pt.Tags())
fi := pt.FieldIterator()
fi.Next()
out.Types = append(out.Types, fi.Type())
}
return out
}
// Duplicate returns a copy of the SeriesCollection. The slices are shared. Appending to any of
// them may or may not be reflected.
func (s SeriesCollection) Duplicate() *SeriesCollection { return &s }
// Length returns the length of the first non-nil slice in the collection, or 0 if there is no
// non-nil slice.
func (s *SeriesCollection) Length() int {
switch {
case s.Points != nil:
return len(s.Points)
case s.Keys != nil:
return len(s.Keys)
case s.SeriesKeys != nil:
return len(s.SeriesKeys)
case s.Names != nil:
return len(s.Names)
case s.Tags != nil:
return len(s.Tags)
case s.Types != nil:
return len(s.Types)
case s.SeriesIDs != nil:
return len(s.SeriesIDs)
default:
return 0
}
}
// Copy will copy the element at src into dst in all slices that can: x[dst] = x[src].
func (s *SeriesCollection) Copy(dst, src int) {
if dst == src {
return
}
udst, usrc := uint(dst), uint(src)
if n := uint(len(s.Points)); udst < n && usrc < n {
s.Points[udst] = s.Points[usrc]
}
if n := uint(len(s.Keys)); udst < n && usrc < n {
s.Keys[udst] = s.Keys[usrc]
}
if n := uint(len(s.SeriesKeys)); udst < n && usrc < n {
s.SeriesKeys[udst] = s.SeriesKeys[usrc]
}
if n := uint(len(s.Names)); udst < n && usrc < n {
s.Names[udst] = s.Names[usrc]
}
if n := uint(len(s.Tags)); udst < n && usrc < n {
s.Tags[udst] = s.Tags[usrc]
}
if n := uint(len(s.Types)); udst < n && usrc < n {
s.Types[udst] = s.Types[usrc]
}
if n := uint(len(s.SeriesIDs)); udst < n && usrc < n {
s.SeriesIDs[udst] = s.SeriesIDs[usrc]
}
}
// Swap will swap the elements at i and j in all slices that can: x[i], x[j] = x[j], x[i].
func (s *SeriesCollection) Swap(i, j int) {
if i == j {
return
}
ui, uj := uint(i), uint(j)
if n := uint(len(s.Points)); ui < n && uj < n {
s.Points[ui], s.Points[uj] = s.Points[uj], s.Points[ui]
}
if n := uint(len(s.Keys)); ui < n && uj < n {
s.Keys[ui], s.Keys[uj] = s.Keys[uj], s.Keys[ui]
}
if n := uint(len(s.SeriesKeys)); ui < n && uj < n {
s.SeriesKeys[ui], s.SeriesKeys[uj] = s.SeriesKeys[uj], s.SeriesKeys[ui]
}
if n := uint(len(s.Names)); ui < n && uj < n {
s.Names[ui], s.Names[uj] = s.Names[uj], s.Names[ui]
}
if n := uint(len(s.Tags)); ui < n && uj < n {
s.Tags[ui], s.Tags[uj] = s.Tags[uj], s.Tags[ui]
}
if n := uint(len(s.Types)); ui < n && uj < n {
s.Types[ui], s.Types[uj] = s.Types[uj], s.Types[ui]
}
if n := uint(len(s.SeriesIDs)); ui < n && uj < n {
s.SeriesIDs[ui], s.SeriesIDs[uj] = s.SeriesIDs[uj], s.SeriesIDs[ui]
}
}
// Truncate will truncate all of the slices that can down to length: x = x[:length].
func (s *SeriesCollection) Truncate(length int) {
ulength := uint(length)
if ulength < uint(len(s.Points)) {
s.Points = s.Points[:ulength]
}
if ulength < uint(len(s.Keys)) {
s.Keys = s.Keys[:ulength]
}
if ulength < uint(len(s.SeriesKeys)) {
s.SeriesKeys = s.SeriesKeys[:ulength]
}
if ulength < uint(len(s.Names)) {
s.Names = s.Names[:ulength]
}
if ulength < uint(len(s.Tags)) {
s.Tags = s.Tags[:ulength]
}
if ulength < uint(len(s.Types)) {
s.Types = s.Types[:ulength]
}
if ulength < uint(len(s.SeriesIDs)) {
s.SeriesIDs = s.SeriesIDs[:ulength]
}
}
// Advance will advance all of the slices that can length elements: x = x[length:].
func (s *SeriesCollection) Advance(length int) {
ulength := uint(length)
if ulength < uint(len(s.Points)) {
s.Points = s.Points[ulength:]
}
if ulength < uint(len(s.Keys)) {
s.Keys = s.Keys[ulength:]
}
if ulength < uint(len(s.SeriesKeys)) {
s.SeriesKeys = s.SeriesKeys[ulength:]
}
if ulength < uint(len(s.Names)) {
s.Names = s.Names[ulength:]
}
if ulength < uint(len(s.Tags)) {
s.Tags = s.Tags[ulength:]
}
if ulength < uint(len(s.Types)) {
s.Types = s.Types[ulength:]
}
if ulength < uint(len(s.SeriesIDs)) {
s.SeriesIDs = s.SeriesIDs[ulength:]
}
}
// InvalidateAll causes all of the entries to become invalid.
func (s *SeriesCollection) InvalidateAll(reason string) {
if s.Reason == "" {
s.Reason = reason
}
s.Dropped += uint64(len(s.Keys))
s.DroppedKeys = append(s.DroppedKeys, s.Keys...)
s.Truncate(0)
}
// ApplyConcurrentDrops will remove all of the dropped values during concurrent iteration. It should
// not be called concurrently with any calls to Invalid.
func (s *SeriesCollection) ApplyConcurrentDrops() {
state := s.getState(false)
if state == nil {
return
}
length, j := s.Length(), 0
for i := 0; i < length; i++ {
if _, ok := state.index[i]; ok {
s.Dropped++
if i < len(s.Keys) {
s.DroppedKeys = append(s.DroppedKeys, s.Keys[i])
}
continue
}
s.Copy(j, i)
j++
}
s.Truncate(j)
if s.Reason == "" {
s.Reason = state.reason
}
// clear concurrent state
atomic.StorePointer((*unsafe.Pointer)(unsafe.Pointer(&s.state)), nil)
}
// getState returns the SeriesCollection's concurrent state. If alloc is true and there
// is no state, it will attempt to allocate one and set it. It is safe to call concurrently, but
// not with ApplyConcurrentDrops.
func (s *SeriesCollection) getState(alloc bool) *seriesCollectionState {
addr := (*unsafe.Pointer)(unsafe.Pointer(&s.state))
// fast path: load pointer and it already exists. always return the result if we can't alloc.
if ptr := atomic.LoadPointer(addr); ptr != nil || !alloc {
return (*seriesCollectionState)(ptr)
}
// nothing there. make a new state and try to swap it in.
atomic.CompareAndSwapPointer(addr, nil, unsafe.Pointer(new(seriesCollectionState)))
// reload the pointer. this way we always end up with the winner of the race.
return (*seriesCollectionState)(atomic.LoadPointer(addr))
}
// invalidIndex stages the index as invalid with the reason. It will be removed when
// ApplyConcurrentDrops is called.
func (s *SeriesCollection) invalidIndex(index int, reason string) {
state := s.getState(true)
state.mu.Lock()
if state.index == nil {
state.index = make(map[int]struct{})
}
state.index[index] = struct{}{}
if state.reason == "" {
state.reason = reason
}
state.mu.Unlock()
}
// PartialWriteError returns a PartialWriteError if any entries have been marked as invalid. It
// returns an error to avoid `return collection.PartialWriteError()` always being non-nil.
func (s *SeriesCollection) PartialWriteError() error {
if s.Dropped == 0 {
return nil
}
droppedKeys := bytesutil.SortDedup(s.DroppedKeys)
return PartialWriteError{
Reason: s.Reason,
Dropped: len(droppedKeys),
DroppedKeys: droppedKeys,
}
}
// Iterator returns a new iterator over the entries in the collection. Multiple iterators
// can exist at the same time. Marking entries as invalid/skipped is more expensive, but thread
// safe. You must call ApplyConcurrentDrops after all of the iterators are finished.
func (s *SeriesCollection) Iterator() SeriesCollectionIterator {
return SeriesCollectionIterator{
s: s,
length: s.Length(),
index: -1,
}
}
// SeriesCollectionIterator is an iterator over the collection of series.
type SeriesCollectionIterator struct {
s *SeriesCollection
length int
index int
}
// Next advances the iterator and returns false if it's done.
func (i *SeriesCollectionIterator) Next() bool {
i.index++
return i.index < i.length
}
// Helpers that return the current state of the iterator.
func (i SeriesCollectionIterator) Index() int { return i.index }
func (i SeriesCollectionIterator) Length() int { return i.length }
func (i SeriesCollectionIterator) Point() models.Point { return i.s.Points[i.index] }
func (i SeriesCollectionIterator) Key() []byte { return i.s.Keys[i.index] }
func (i SeriesCollectionIterator) SeriesKey() []byte { return i.s.SeriesKeys[i.index] }
func (i SeriesCollectionIterator) Name() []byte { return i.s.Names[i.index] }
func (i SeriesCollectionIterator) Tags() models.Tags { return i.s.Tags[i.index] }
func (i SeriesCollectionIterator) Type() models.FieldType { return i.s.Types[i.index] }
func (i SeriesCollectionIterator) SeriesID() SeriesID { return i.s.SeriesIDs[i.index] }
// Invalid flags the current entry as invalid, including it in the set of dropped keys and
// recording a reason. Only the first reason is kept. This is safe for concurrent callers,
// but ApplyConcurrentDrops must be called after all iterators are finished.
func (i *SeriesCollectionIterator) Invalid(reason string) {
i.s.invalidIndex(i.index, reason)
}

View File

@ -1,149 +0,0 @@
package tsdb
import (
"reflect"
"testing"
"time"
"github.com/influxdata/influxdb/v2/models"
)
func TestSeriesCollection(t *testing.T) {
// some helper functions. short names because local scope and frequently used.
var (
equal = reflect.DeepEqual
b = func(s string) []byte { return []byte(s) }
bs = func(s ...string) [][]byte {
out := make([][]byte, len(s))
for i := range s {
out[i] = b(s[i])
}
return out
}
assertEqual = func(t *testing.T, name string, got, wanted interface{}) {
t.Helper()
if !equal(got, wanted) {
t.Fatalf("bad %s: got: %v but wanted: %v", name, got, wanted)
}
}
)
t.Run("New", func(t *testing.T) {
points := []models.Point{
models.MustNewPoint("a", models.Tags{}, models.Fields{"f": 1.0}, time.Now()),
models.MustNewPoint("b", models.Tags{}, models.Fields{"b": true}, time.Now()),
models.MustNewPoint("c", models.Tags{}, models.Fields{"i": int64(1)}, time.Now()),
}
collection := NewSeriesCollection(points)
assertEqual(t, "length", collection.Length(), 3)
for iter := collection.Iterator(); iter.Next(); {
ipt, spt := iter.Point(), points[iter.Index()]
fi := spt.FieldIterator()
fi.Next()
assertEqual(t, "point", ipt, spt)
assertEqual(t, "key", iter.Key(), spt.Key())
assertEqual(t, "name", iter.Name(), spt.Name())
assertEqual(t, "tags", iter.Tags(), spt.Tags())
assertEqual(t, "type", iter.Type(), fi.Type())
}
})
t.Run("Copy", func(t *testing.T) {
collection := &SeriesCollection{
Keys: bs("ka", "kb", "kc"),
Names: bs("na", "nb", "nc"),
}
collection.Copy(0, 2)
assertEqual(t, "keys", collection.Keys, bs("kc", "kb", "kc"))
assertEqual(t, "names", collection.Names, bs("nc", "nb", "nc"))
collection.Copy(0, 4) // out of bounds
assertEqual(t, "keys", collection.Keys, bs("kc", "kb", "kc"))
assertEqual(t, "names", collection.Names, bs("nc", "nb", "nc"))
})
t.Run("Swap", func(t *testing.T) {
collection := &SeriesCollection{
Keys: bs("ka", "kb", "kc"),
Names: bs("na", "nb", "nc"),
}
collection.Swap(0, 2)
assertEqual(t, "keys", collection.Keys, bs("kc", "kb", "ka"))
assertEqual(t, "names", collection.Names, bs("nc", "nb", "na"))
collection.Swap(0, 4) // out of bounds
assertEqual(t, "keys", collection.Keys, bs("kc", "kb", "ka"))
assertEqual(t, "names", collection.Names, bs("nc", "nb", "na"))
})
t.Run("Truncate", func(t *testing.T) {
collection := &SeriesCollection{
Keys: bs("ka", "kb", "kc"),
Names: bs("na", "nb", "nc"),
}
collection.Truncate(1)
assertEqual(t, "keys", collection.Keys, bs("ka"))
assertEqual(t, "names", collection.Names, bs("na"))
collection.Truncate(0)
assertEqual(t, "keys", collection.Keys, bs())
assertEqual(t, "names", collection.Names, bs())
})
t.Run("Advance", func(t *testing.T) {
collection := &SeriesCollection{
Keys: bs("ka", "kb", "kc"),
Names: bs("na", "nb", "nc"),
}
collection.Advance(1)
assertEqual(t, "keys", collection.Keys, bs("kb", "kc"))
assertEqual(t, "names", collection.Names, bs("nb", "nc"))
collection.Advance(1)
assertEqual(t, "keys", collection.Keys, bs("kc"))
assertEqual(t, "names", collection.Names, bs("nc"))
})
t.Run("InvalidateAll", func(t *testing.T) {
collection := &SeriesCollection{Keys: bs("ka", "kb", "kc")}
collection.InvalidateAll("test reason")
assertEqual(t, "length", collection.Length(), 0)
assertEqual(t, "error", collection.PartialWriteError(), PartialWriteError{
Reason: "test reason",
Dropped: 3,
DroppedKeys: bs("ka", "kb", "kc"),
})
})
t.Run("Invalid", func(t *testing.T) {
collection := &SeriesCollection{Keys: bs("ka", "kb", "kc")}
// invalidate half the entries
for iter := collection.Iterator(); iter.Next(); {
if iter.Index()%2 == 0 {
iter.Invalid("test reason")
}
}
// nothing happens yet: all values are staged
assertEqual(t, "length", collection.Length(), 3)
// apply all of the invalid calls
collection.ApplyConcurrentDrops()
assertEqual(t, "length", collection.Length(), 1)
assertEqual(t, "error", collection.PartialWriteError(), PartialWriteError{
Reason: "test reason",
Dropped: 2,
DroppedKeys: bs("ka", "kc"),
})
})
}

View File

@ -1,78 +0,0 @@
package tsdb
import (
"unsafe"
"github.com/influxdata/influxdb/v2/models"
)
const (
// constants describing bit layout of id and type info
seriesIDTypeFlag = 1 << 63 // a flag marking that the id contains type info
seriesIDValueMask = 0xFFFFFFFF // series ids numerically are 32 bits
seriesIDTypeShift = 32 // we put the type right after the value info
seriesIDTypeMask = 0xFF << seriesIDTypeShift // a mask for the type byte
seriesIDSize = 8
)
// SeriesID is the type of a series id. It is logically a uint64, but encoded as a struct so
// that we gain more type checking when changing operations on it. The field is exported only
// so that tests that use reflection based comparisons still work; no one should use the field
// directly.
type SeriesID struct{ ID uint64 }
// NewSeriesID constructs a series id from the raw value. It discards any type information.
func NewSeriesID(id uint64) SeriesID { return SeriesID{ID: id & seriesIDValueMask} }
// IsZero returns if the SeriesID is zero.
func (s SeriesID) IsZero() bool { return s.ID == 0 }
// ID returns the raw id for the SeriesID.
func (s SeriesID) RawID() uint64 { return s.ID }
// WithType constructs a SeriesIDTyped with the given type.
func (s SeriesID) WithType(typ models.FieldType) SeriesIDTyped {
return NewSeriesIDTyped(s.ID | seriesIDTypeFlag | (uint64(typ&0xFF) << seriesIDTypeShift))
}
// Greater returns if the SeriesID is greater than the passed in value.
func (s SeriesID) Greater(o SeriesID) bool { return s.ID > o.ID }
// Less returns if the SeriesID is less than the passed in value.
func (s SeriesID) Less(o SeriesID) bool { return s.ID < o.ID }
// SeriesIDType represents a series id with a type. It is logically a uint64, but encoded as
// a struct so that we gain more type checking when changing operations on it. The field is
// exported only so that tests that use reflection based comparisons still work; no one should
// use the field directly.
type SeriesIDTyped struct{ ID uint64 }
// NewSeriesIDTyped constructs a typed series id from the raw values.
func NewSeriesIDTyped(id uint64) SeriesIDTyped { return SeriesIDTyped{ID: id} }
// IsZero returns if the SeriesIDTyped is zero. It ignores any type information.
func (s SeriesIDTyped) IsZero() bool { return s.ID&seriesIDValueMask == 0 }
// ID returns the raw id for the SeriesIDTyped.
func (s SeriesIDTyped) RawID() uint64 { return s.ID }
// SeriesID constructs a SeriesID, discarding any type information.
func (s SeriesIDTyped) SeriesID() SeriesID { return NewSeriesID(s.ID) }
// HasType returns if the id actually contains a type.
func (s SeriesIDTyped) HasType() bool { return s.ID&seriesIDTypeFlag > 0 }
// Type returns the associated type.
func (s SeriesIDTyped) Type() models.FieldType {
return models.FieldType((s.ID & seriesIDTypeMask) >> seriesIDTypeShift)
}
type (
// some static assertions that the SeriesIDSize matches the structs we defined.
// if the values are not the same, at least one will be negative causing a compilation failure
_ [seriesIDSize - unsafe.Sizeof(SeriesID{})]byte
_ [unsafe.Sizeof(SeriesID{}) - seriesIDSize]byte
_ [seriesIDSize - unsafe.Sizeof(SeriesIDTyped{})]byte
_ [unsafe.Sizeof(SeriesIDTyped{}) - seriesIDSize]byte
)

View File

@ -1,31 +0,0 @@
package tsdb
import (
"math/rand"
"testing"
"github.com/influxdata/influxdb/v2/models"
)
func TestSeriesID(t *testing.T) {
types := []models.FieldType{
models.Integer,
models.Float,
models.Boolean,
models.String,
models.Unsigned,
}
for i := 0; i < 1000000; i++ {
id := NewSeriesID(uint64(rand.Int31()))
for _, typ := range types {
typed := id.WithType(typ)
if got := typed.Type(); got != typ {
t.Fatalf("wanted: %v got: %v", typ, got)
}
if got := typed.SeriesID(); id != got {
t.Fatalf("wanted: %016x got: %016x", id, got)
}
}
}
}

View File

@ -1,699 +0,0 @@
package tsdb
import (
"bytes"
"github.com/influxdata/influxql"
)
// SeriesIDElem represents a single series and optional expression.
type SeriesIDElem struct {
SeriesID SeriesID
Expr influxql.Expr
}
// SeriesIDIterator represents a iterator over a list of series ids.
type SeriesIDIterator interface {
Next() (SeriesIDElem, error)
Close() error
}
// SeriesIDSetIterator represents an iterator that can produce a SeriesIDSet.
type SeriesIDSetIterator interface {
SeriesIDIterator
SeriesIDSet() *SeriesIDSet
}
type seriesIDSetIterator struct {
ss *SeriesIDSet
itr SeriesIDSetIterable
}
func NewSeriesIDSetIterator(ss *SeriesIDSet) SeriesIDSetIterator {
if ss == nil || ss.bitmap == nil {
return nil
}
return &seriesIDSetIterator{ss: ss, itr: ss.Iterator()}
}
func (itr *seriesIDSetIterator) Next() (SeriesIDElem, error) {
if !itr.itr.HasNext() {
return SeriesIDElem{}, nil
}
return SeriesIDElem{SeriesID: NewSeriesID(uint64(itr.itr.Next()))}, nil
}
func (itr *seriesIDSetIterator) Close() error { return nil }
func (itr *seriesIDSetIterator) SeriesIDSet() *SeriesIDSet { return itr.ss }
// NewSeriesIDSetIterators returns a slice of SeriesIDSetIterator if all itrs
// can be type casted. Otherwise returns nil.
func NewSeriesIDSetIterators(itrs []SeriesIDIterator) []SeriesIDSetIterator {
if len(itrs) == 0 {
return nil
}
a := make([]SeriesIDSetIterator, len(itrs))
for i := range itrs {
if itr, ok := itrs[i].(SeriesIDSetIterator); ok {
a[i] = itr
} else {
return nil
}
}
return a
}
// NewSeriesIDSliceIterator returns a SeriesIDIterator that iterates over a slice.
func NewSeriesIDSliceIterator(ids []SeriesID) *SeriesIDSliceIterator {
return &SeriesIDSliceIterator{ids: ids}
}
// SeriesIDSliceIterator iterates over a slice of series ids.
type SeriesIDSliceIterator struct {
ids []SeriesID
}
// Next returns the next series id in the slice.
func (itr *SeriesIDSliceIterator) Next() (SeriesIDElem, error) {
if len(itr.ids) == 0 {
return SeriesIDElem{}, nil
}
id := itr.ids[0]
itr.ids = itr.ids[1:]
return SeriesIDElem{SeriesID: id}, nil
}
func (itr *SeriesIDSliceIterator) Close() error { return nil }
// SeriesIDSet returns a set of all remaining ids.
func (itr *SeriesIDSliceIterator) SeriesIDSet() *SeriesIDSet {
s := NewSeriesIDSet()
for _, id := range itr.ids {
s.AddNoLock(id)
}
return s
}
type SeriesIDIterators []SeriesIDIterator
func (a SeriesIDIterators) Close() (err error) {
for i := range a {
if e := a[i].Close(); e != nil && err == nil {
err = e
}
}
return err
}
// seriesIDExprIterator is an iterator that attaches an associated expression.
type SeriesIDExprIterator struct {
itr SeriesIDIterator
expr influxql.Expr
}
// newSeriesIDExprIterator returns a new instance of seriesIDExprIterator.
func NewSeriesIDExprIterator(itr SeriesIDIterator, expr influxql.Expr) SeriesIDIterator {
if itr == nil {
return nil
}
return &SeriesIDExprIterator{
itr: itr,
expr: expr,
}
}
func (itr *SeriesIDExprIterator) Close() error {
return itr.itr.Close()
}
// Next returns the next element in the iterator.
func (itr *SeriesIDExprIterator) Next() (SeriesIDElem, error) {
elem, err := itr.itr.Next()
if err != nil {
return SeriesIDElem{}, err
} else if elem.SeriesID.IsZero() {
return SeriesIDElem{}, nil
}
elem.Expr = itr.expr
return elem, nil
}
// MergeSeriesIDIterators returns an iterator that merges a set of iterators.
// Iterators that are first in the list take precedence and a deletion by those
// early iterators will invalidate elements by later iterators.
func MergeSeriesIDIterators(itrs ...SeriesIDIterator) SeriesIDIterator {
if n := len(itrs); n == 0 {
return nil
} else if n == 1 {
return itrs[0]
}
// Merge as series id sets, if available.
if a := NewSeriesIDSetIterators(itrs); a != nil {
sets := make([]*SeriesIDSet, len(a))
for i := range a {
sets[i] = a[i].SeriesIDSet()
}
ss := NewSeriesIDSet()
ss.Merge(sets...)
SeriesIDIterators(itrs).Close()
return NewSeriesIDSetIterator(ss)
}
return &seriesIDMergeIterator{
buf: make([]SeriesIDElem, len(itrs)),
itrs: itrs,
}
}
// seriesIDMergeIterator is an iterator that merges multiple iterators together.
type seriesIDMergeIterator struct {
buf []SeriesIDElem
itrs []SeriesIDIterator
}
func (itr *seriesIDMergeIterator) Close() (err error) {
return SeriesIDIterators(itr.itrs).Close()
}
// Next returns the element with the next lowest name/tags across the iterators.
func (itr *seriesIDMergeIterator) Next() (SeriesIDElem, error) {
// Find next lowest id amongst the buffers.
var elem SeriesIDElem
for i := range itr.buf {
buf := &itr.buf[i]
// Fill buffer.
if buf.SeriesID.IsZero() {
elem, err := itr.itrs[i].Next()
if err != nil {
return SeriesIDElem{}, nil
} else if elem.SeriesID.IsZero() {
continue
}
itr.buf[i] = elem
}
if elem.SeriesID.IsZero() || buf.SeriesID.Less(elem.SeriesID) {
elem = *buf
}
}
// Return EOF if no elements remaining.
if elem.SeriesID.IsZero() {
return SeriesIDElem{}, nil
}
// Clear matching buffers.
for i := range itr.buf {
if itr.buf[i].SeriesID == elem.SeriesID {
itr.buf[i].SeriesID = SeriesID{}
}
}
return elem, nil
}
// IntersectSeriesIDIterators returns an iterator that only returns series which
// occur in both iterators. If both series have associated expressions then
// they are combined together.
func IntersectSeriesIDIterators(itr0, itr1 SeriesIDIterator) SeriesIDIterator {
if itr0 == nil || itr1 == nil {
if itr0 != nil {
itr0.Close()
}
if itr1 != nil {
itr1.Close()
}
return nil
}
// Create series id set, if available.
if a := NewSeriesIDSetIterators([]SeriesIDIterator{itr0, itr1}); a != nil {
itr0.Close()
itr1.Close()
return NewSeriesIDSetIterator(a[0].SeriesIDSet().And(a[1].SeriesIDSet()))
}
return &seriesIDIntersectIterator{itrs: [2]SeriesIDIterator{itr0, itr1}}
}
// seriesIDIntersectIterator is an iterator that merges two iterators together.
type seriesIDIntersectIterator struct {
buf [2]SeriesIDElem
itrs [2]SeriesIDIterator
}
func (itr *seriesIDIntersectIterator) Close() (err error) {
if e := itr.itrs[0].Close(); e != nil && err == nil {
err = e
}
if e := itr.itrs[1].Close(); e != nil && err == nil {
err = e
}
return err
}
// Next returns the next element which occurs in both iterators.
func (itr *seriesIDIntersectIterator) Next() (_ SeriesIDElem, err error) {
for {
// Fill buffers.
if itr.buf[0].SeriesID.IsZero() {
if itr.buf[0], err = itr.itrs[0].Next(); err != nil {
return SeriesIDElem{}, err
}
}
if itr.buf[1].SeriesID.IsZero() {
if itr.buf[1], err = itr.itrs[1].Next(); err != nil {
return SeriesIDElem{}, err
}
}
// Exit if either buffer is still empty.
if itr.buf[0].SeriesID.IsZero() || itr.buf[1].SeriesID.IsZero() {
return SeriesIDElem{}, nil
}
// Skip if both series are not equal.
if a, b := itr.buf[0].SeriesID, itr.buf[1].SeriesID; a.Less(b) {
itr.buf[0].SeriesID = SeriesID{}
continue
} else if a.Greater(b) {
itr.buf[1].SeriesID = SeriesID{}
continue
}
// Merge series together if equal.
elem := itr.buf[0]
// Attach expression.
expr0 := itr.buf[0].Expr
expr1 := itr.buf[1].Expr
if expr0 == nil {
elem.Expr = expr1
} else if expr1 == nil {
elem.Expr = expr0
} else {
elem.Expr = influxql.Reduce(&influxql.BinaryExpr{
Op: influxql.AND,
LHS: expr0,
RHS: expr1,
}, nil)
}
itr.buf[0].SeriesID, itr.buf[1].SeriesID = SeriesID{}, SeriesID{}
return elem, nil
}
}
// UnionSeriesIDIterators returns an iterator that returns series from both
// both iterators. If both series have associated expressions then they are
// combined together.
func UnionSeriesIDIterators(itr0, itr1 SeriesIDIterator) SeriesIDIterator {
// Return other iterator if either one is nil.
if itr0 == nil {
return itr1
} else if itr1 == nil {
return itr0
}
// Create series id set, if available.
if a := NewSeriesIDSetIterators([]SeriesIDIterator{itr0, itr1}); a != nil {
itr0.Close()
itr1.Close()
ss := NewSeriesIDSet()
ss.Merge(a[0].SeriesIDSet(), a[1].SeriesIDSet())
return NewSeriesIDSetIterator(ss)
}
return &seriesIDUnionIterator{itrs: [2]SeriesIDIterator{itr0, itr1}}
}
// seriesIDUnionIterator is an iterator that unions two iterators together.
type seriesIDUnionIterator struct {
buf [2]SeriesIDElem
itrs [2]SeriesIDIterator
}
func (itr *seriesIDUnionIterator) Close() (err error) {
if e := itr.itrs[0].Close(); e != nil && err == nil {
err = e
}
if e := itr.itrs[1].Close(); e != nil && err == nil {
err = e
}
return err
}
// Next returns the next element which occurs in both iterators.
func (itr *seriesIDUnionIterator) Next() (_ SeriesIDElem, err error) {
// Fill buffers.
if itr.buf[0].SeriesID.IsZero() {
if itr.buf[0], err = itr.itrs[0].Next(); err != nil {
return SeriesIDElem{}, err
}
}
if itr.buf[1].SeriesID.IsZero() {
if itr.buf[1], err = itr.itrs[1].Next(); err != nil {
return SeriesIDElem{}, err
}
}
// Return non-zero or lesser series.
if a, b := itr.buf[0].SeriesID, itr.buf[1].SeriesID; a.IsZero() && b.IsZero() {
return SeriesIDElem{}, nil
} else if b.IsZero() || (!a.IsZero() && a.Less(b)) {
elem := itr.buf[0]
itr.buf[0].SeriesID = SeriesID{}
return elem, nil
} else if a.IsZero() || (!b.IsZero() && a.Greater(b)) {
elem := itr.buf[1]
itr.buf[1].SeriesID = SeriesID{}
return elem, nil
}
// Attach element.
elem := itr.buf[0]
// Attach expression.
expr0 := itr.buf[0].Expr
expr1 := itr.buf[1].Expr
if expr0 != nil && expr1 != nil {
elem.Expr = influxql.Reduce(&influxql.BinaryExpr{
Op: influxql.OR,
LHS: expr0,
RHS: expr1,
}, nil)
} else {
elem.Expr = nil
}
itr.buf[0].SeriesID, itr.buf[1].SeriesID = SeriesID{}, SeriesID{}
return elem, nil
}
// DifferenceSeriesIDIterators returns an iterator that only returns series which
// occur the first iterator but not the second iterator.
func DifferenceSeriesIDIterators(itr0, itr1 SeriesIDIterator) SeriesIDIterator {
if itr0 == nil && itr1 == nil {
return nil
} else if itr1 == nil {
return itr0
} else if itr0 == nil {
itr1.Close()
return nil
}
// Create series id set, if available.
if a := NewSeriesIDSetIterators([]SeriesIDIterator{itr0, itr1}); a != nil {
itr0.Close()
itr1.Close()
return NewSeriesIDSetIterator(NewSeriesIDSetNegate(a[0].SeriesIDSet(), a[1].SeriesIDSet()))
}
return &seriesIDDifferenceIterator{itrs: [2]SeriesIDIterator{itr0, itr1}}
}
// seriesIDDifferenceIterator is an iterator that merges two iterators together.
type seriesIDDifferenceIterator struct {
buf [2]SeriesIDElem
itrs [2]SeriesIDIterator
}
func (itr *seriesIDDifferenceIterator) Close() (err error) {
if e := itr.itrs[0].Close(); e != nil && err == nil {
err = e
}
if e := itr.itrs[1].Close(); e != nil && err == nil {
err = e
}
return err
}
// Next returns the next element which occurs only in the first iterator.
func (itr *seriesIDDifferenceIterator) Next() (_ SeriesIDElem, err error) {
for {
// Fill buffers.
if itr.buf[0].SeriesID.IsZero() {
if itr.buf[0], err = itr.itrs[0].Next(); err != nil {
return SeriesIDElem{}, err
}
}
if itr.buf[1].SeriesID.IsZero() {
if itr.buf[1], err = itr.itrs[1].Next(); err != nil {
return SeriesIDElem{}, err
}
}
// Exit if first buffer is still empty.
if itr.buf[0].SeriesID.IsZero() {
return SeriesIDElem{}, nil
} else if itr.buf[1].SeriesID.IsZero() {
elem := itr.buf[0]
itr.buf[0].SeriesID = SeriesID{}
return elem, nil
}
// Return first series if it's less.
// If second series is less then skip it.
// If both series are equal then skip both.
if a, b := itr.buf[0].SeriesID, itr.buf[1].SeriesID; a.Less(b) {
elem := itr.buf[0]
itr.buf[0].SeriesID = SeriesID{}
return elem, nil
} else if a.Greater(b) {
itr.buf[1].SeriesID = SeriesID{}
continue
} else {
itr.buf[0].SeriesID, itr.buf[1].SeriesID = SeriesID{}, SeriesID{}
continue
}
}
}
// MeasurementIterator represents a iterator over a list of measurements.
type MeasurementIterator interface {
Close() error
Next() ([]byte, error)
}
// MergeMeasurementIterators returns an iterator that merges a set of iterators.
// Iterators that are first in the list take precedence and a deletion by those
// early iterators will invalidate elements by later iterators.
func MergeMeasurementIterators(itrs ...MeasurementIterator) MeasurementIterator {
if len(itrs) == 0 {
return nil
} else if len(itrs) == 1 {
return itrs[0]
}
return &measurementMergeIterator{
buf: make([][]byte, len(itrs)),
itrs: itrs,
}
}
type measurementMergeIterator struct {
buf [][]byte
itrs []MeasurementIterator
}
func (itr *measurementMergeIterator) Close() (err error) {
for i := range itr.itrs {
if e := itr.itrs[i].Close(); e != nil && err == nil {
err = e
}
}
return err
}
// Next returns the element with the next lowest name across the iterators.
//
// If multiple iterators contain the same name then the first is returned
// and the remaining ones are skipped.
func (itr *measurementMergeIterator) Next() (_ []byte, err error) {
// Find next lowest name amongst the buffers.
var name []byte
for i, buf := range itr.buf {
// Fill buffer if empty.
if buf == nil {
if buf, err = itr.itrs[i].Next(); err != nil {
return nil, err
} else if buf != nil {
itr.buf[i] = buf
} else {
continue
}
}
// Find next lowest name.
if name == nil || bytes.Compare(itr.buf[i], name) == -1 {
name = itr.buf[i]
}
}
// Return nil if no elements remaining.
if name == nil {
return nil, nil
}
// Merge all elements together and clear buffers.
for i, buf := range itr.buf {
if buf == nil || !bytes.Equal(buf, name) {
continue
}
itr.buf[i] = nil
}
return name, nil
}
// TagKeyIterator represents a iterator over a list of tag keys.
type TagKeyIterator interface {
Close() error
Next() ([]byte, error)
}
// MergeTagKeyIterators returns an iterator that merges a set of iterators.
func MergeTagKeyIterators(itrs ...TagKeyIterator) TagKeyIterator {
if len(itrs) == 0 {
return nil
} else if len(itrs) == 1 {
return itrs[0]
}
return &tagKeyMergeIterator{
buf: make([][]byte, len(itrs)),
itrs: itrs,
}
}
type tagKeyMergeIterator struct {
buf [][]byte
itrs []TagKeyIterator
}
func (itr *tagKeyMergeIterator) Close() (err error) {
for i := range itr.itrs {
if e := itr.itrs[i].Close(); e != nil && err == nil {
err = e
}
}
return err
}
// Next returns the element with the next lowest key across the iterators.
//
// If multiple iterators contain the same key then the first is returned
// and the remaining ones are skipped.
func (itr *tagKeyMergeIterator) Next() (_ []byte, err error) {
// Find next lowest key amongst the buffers.
var key []byte
for i, buf := range itr.buf {
// Fill buffer.
if buf == nil {
if buf, err = itr.itrs[i].Next(); err != nil {
return nil, err
} else if buf != nil {
itr.buf[i] = buf
} else {
continue
}
}
// Find next lowest key.
if key == nil || bytes.Compare(buf, key) == -1 {
key = buf
}
}
// Return nil if no elements remaining.
if key == nil {
return nil, nil
}
// Merge elements and clear buffers.
for i, buf := range itr.buf {
if buf == nil || !bytes.Equal(buf, key) {
continue
}
itr.buf[i] = nil
}
return key, nil
}
// TagValueIterator represents a iterator over a list of tag values.
type TagValueIterator interface {
Close() error
Next() ([]byte, error)
}
// MergeTagValueIterators returns an iterator that merges a set of iterators.
func MergeTagValueIterators(itrs ...TagValueIterator) TagValueIterator {
if len(itrs) == 0 {
return nil
} else if len(itrs) == 1 {
return itrs[0]
}
return &tagValueMergeIterator{
buf: make([][]byte, len(itrs)),
itrs: itrs,
}
}
type tagValueMergeIterator struct {
buf [][]byte
itrs []TagValueIterator
}
func (itr *tagValueMergeIterator) Close() (err error) {
for i := range itr.itrs {
if e := itr.itrs[i].Close(); e != nil && err == nil {
err = e
}
}
return err
}
// Next returns the element with the next lowest value across the iterators.
//
// If multiple iterators contain the same value then the first is returned
// and the remaining ones are skipped.
func (itr *tagValueMergeIterator) Next() (_ []byte, err error) {
// Find next lowest value amongst the buffers.
var value []byte
for i, buf := range itr.buf {
// Fill buffer.
if buf == nil {
if buf, err = itr.itrs[i].Next(); err != nil {
return nil, err
} else if buf != nil {
itr.buf[i] = buf
} else {
continue
}
}
// Find next lowest value.
if value == nil || bytes.Compare(buf, value) == -1 {
value = buf
}
}
// Return nil if no elements remaining.
if value == nil {
return nil, nil
}
// Merge elements and clear buffers.
for i, buf := range itr.buf {
if buf == nil || !bytes.Equal(buf, value) {
continue
}
itr.buf[i] = nil
}
return value, nil
}

View File

@ -1,357 +0,0 @@
package tsdb_test
import (
"compress/gzip"
"context"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"reflect"
"sync"
"testing"
"github.com/influxdata/influxdb/v2/logger"
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxdb/v2/query"
"github.com/influxdata/influxdb/v2/tsdb"
"github.com/influxdata/influxdb/v2/tsdb/seriesfile"
"github.com/influxdata/influxdb/v2/tsdb/tsi1"
"github.com/influxdata/influxql"
)
func toSeriesIDs(ids []uint64) []tsdb.SeriesID {
sids := make([]tsdb.SeriesID, 0, len(ids))
for _, id := range ids {
sids = append(sids, tsdb.NewSeriesID(id))
}
return sids
}
// Ensure iterator can merge multiple iterators together.
func TestMergeSeriesIDIterators(t *testing.T) {
itr := tsdb.MergeSeriesIDIterators(
tsdb.NewSeriesIDSliceIterator(toSeriesIDs([]uint64{1, 2, 3})),
tsdb.NewSeriesIDSliceIterator(nil),
tsdb.NewSeriesIDSliceIterator(toSeriesIDs([]uint64{1, 2, 3, 4})),
)
if e, err := itr.Next(); err != nil {
t.Fatal(err)
} else if !reflect.DeepEqual(e, tsdb.SeriesIDElem{SeriesID: tsdb.NewSeriesID(1)}) {
t.Fatalf("unexpected elem(0): %#v", e)
}
if e, err := itr.Next(); err != nil {
t.Fatal(err)
} else if !reflect.DeepEqual(e, tsdb.SeriesIDElem{SeriesID: tsdb.NewSeriesID(2)}) {
t.Fatalf("unexpected elem(1): %#v", e)
}
if e, err := itr.Next(); err != nil {
t.Fatal(err)
} else if !reflect.DeepEqual(e, tsdb.SeriesIDElem{SeriesID: tsdb.NewSeriesID(3)}) {
t.Fatalf("unexpected elem(2): %#v", e)
}
if e, err := itr.Next(); err != nil {
t.Fatal(err)
} else if !reflect.DeepEqual(e, tsdb.SeriesIDElem{SeriesID: tsdb.NewSeriesID(4)}) {
t.Fatalf("unexpected elem(3): %#v", e)
}
if e, err := itr.Next(); err != nil {
t.Fatal(err)
} else if !e.SeriesID.IsZero() {
t.Fatalf("expected nil elem: %#v", e)
}
}
// Index wraps a series file and index.
type Index struct {
rootPath string
config tsi1.Config
*tsi1.Index
sfile *seriesfile.SeriesFile
}
// MustNewIndex will initialize a new index using the provide type. It creates
// everything under the same root directory so it can be cleanly removed on Close.
//
// The index will not be opened.
func MustNewIndex(c tsi1.Config) *Index {
rootPath, err := ioutil.TempDir("", "influxdb-tsdb")
if err != nil {
panic(err)
}
seriesPath, err := ioutil.TempDir(rootPath, "_series")
if err != nil {
panic(err)
}
sfile := seriesfile.NewSeriesFile(seriesPath)
if err := sfile.Open(context.Background()); err != nil {
panic(err)
}
i := tsi1.NewIndex(sfile, c, tsi1.WithPath(filepath.Join(rootPath, "index")))
if testing.Verbose() {
i.WithLogger(logger.New(os.Stderr))
}
idx := &Index{
config: c,
Index: i,
rootPath: rootPath,
sfile: sfile,
}
return idx
}
// MustOpenNewIndex will initialize a new index using the provide type and opens
// it.
func MustOpenNewIndex(c tsi1.Config) *Index {
idx := MustNewIndex(c)
idx.MustOpen()
return idx
}
// MustOpen opens the underlying index or panics.
func (i *Index) MustOpen() {
if err := i.Index.Open(context.Background()); err != nil {
panic(err)
}
}
// Reopen closes and re-opens the underlying index, without removing any data.
func (i *Index) Reopen() error {
if err := i.Index.Close(); err != nil {
return err
}
if err := i.sfile.Close(); err != nil {
return err
}
i.sfile = seriesfile.NewSeriesFile(i.sfile.Path())
if err := i.sfile.Open(context.Background()); err != nil {
return err
}
i.Index = tsi1.NewIndex(i.SeriesFile(), i.config,
tsi1.WithPath(filepath.Join(i.rootPath, "index")))
return i.Index.Open(context.Background())
}
// Close closes the index cleanly and removes all on-disk data.
func (i *Index) Close() error {
if err := i.Index.Close(); err != nil {
return err
}
if err := i.sfile.Close(); err != nil {
return err
}
return os.RemoveAll(i.rootPath)
}
// This benchmark compares the TagSets implementation across index types.
//
// In the case of the TSI index, TagSets has to merge results across all several
// index partitions.
//
// Typical results on an i7 laptop.
//
// BenchmarkIndex_TagSets/1M_series/tsi1-8 100 18995530 ns/op 5221180 B/op 20379 allocs/op
func BenchmarkIndex_TagSets(b *testing.B) {
// Read line-protocol and coerce into tsdb format.
// 1M series generated with:
// $inch -b 10000 -c 1 -t 10,10,10,10,10,10 -f 1 -m 5 -p 1
fd, err := os.Open("testdata/line-protocol-1M.txt.gz")
if err != nil {
b.Fatal(err)
}
gzr, err := gzip.NewReader(fd)
if err != nil {
fd.Close()
b.Fatal(err)
}
data, err := ioutil.ReadAll(gzr)
if err != nil {
b.Fatal(err)
}
if err := fd.Close(); err != nil {
b.Fatal(err)
}
points, err := models.ParsePoints(data, []byte("mm"))
if err != nil {
b.Fatal(err)
}
// setup writes all of the above points to the index.
setup := func(idx *Index) {
batchSize := 10000
for j := 0; j < 1; j++ {
for i := 0; i < len(points); i += batchSize {
collection := tsdb.NewSeriesCollection(points[i : i+batchSize])
if err := idx.CreateSeriesListIfNotExists(collection); err != nil {
b.Fatal(err)
}
}
}
}
var errResult error
// This benchmark will merge eight bitsets each containing ~10,000 series IDs.
b.Run("1M series", func(b *testing.B) {
idx := MustOpenNewIndex(tsi1.NewConfig())
setup(idx)
defer idx.Close()
name := []byte("m4")
opt := query.IteratorOptions{Condition: influxql.MustParseExpr(`"tag5"::tag = 'value0'`)}
ts := func() ([]*query.TagSet, error) {
return idx.Index.TagSets(name, opt)
}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
// Will call TagSets on the appropriate implementation.
_, errResult = ts()
if errResult != nil {
b.Fatal(err)
}
}
if err := idx.Close(); err != nil {
b.Fatal(err)
}
})
}
// This benchmark concurrently writes series to the index and fetches cached bitsets.
// The idea is to emphasize the performance difference when bitset caching is on and off.
//
// Typical results for an i7 laptop
//
// BenchmarkIndex_ConcurrentWriteQuery/inmem/queries_100000/cache-8 1 5963346204 ns/op 2499655768 B/op 23964183 allocs/op
// BenchmarkIndex_ConcurrentWriteQuery/inmem/queries_100000/no_cache-8 1 5314841090 ns/op 2499495280 B/op 23963322 allocs/op
// BenchmarkIndex_ConcurrentWriteQuery/tsi1/queries_100000/cache-8 1 1645048376 ns/op 2215402840 B/op 23048978 allocs/op
// BenchmarkIndex_ConcurrentWriteQuery/tsi1/queries_100000/no_cache-8 1 22242155616 ns/op 28277544136 B/op 79620463 allocs/op
func BenchmarkIndex_ConcurrentWriteQuery(b *testing.B) {
// Read line-protocol and coerce into tsdb format.
// 1M series generated with:
// $inch -b 10000 -c 1 -t 10,10,10,10,10,10 -f 1 -m 5 -p 1
fd, err := os.Open("testdata/line-protocol-1M.txt.gz")
if err != nil {
b.Fatal(err)
}
gzr, err := gzip.NewReader(fd)
if err != nil {
fd.Close()
b.Fatal(err)
}
data, err := ioutil.ReadAll(gzr)
if err != nil {
b.Fatal(err)
}
if err := fd.Close(); err != nil {
b.Fatal(err)
}
points, err := models.ParsePoints(data, []byte("mm"))
if err != nil {
b.Fatal(err)
}
runBenchmark := func(b *testing.B, queryN int, cacheSize uint64) {
config := tsi1.NewConfig()
config.SeriesIDSetCacheSize = cacheSize
idx := MustOpenNewIndex(config)
var wg sync.WaitGroup
begin := make(chan struct{})
// Run concurrent iterator...
runIter := func() {
keys := [][]string{
{"m0", "tag2", "value4"},
{"m1", "tag3", "value5"},
{"m2", "tag4", "value6"},
{"m3", "tag0", "value8"},
{"m4", "tag5", "value0"},
}
<-begin // Wait for writes to land
for i := 0; i < queryN/5; i++ {
for _, key := range keys {
itr, err := idx.TagValueSeriesIDIterator([]byte(key[0]), []byte(key[1]), []byte(key[2]))
if err != nil {
b.Fatal(err)
}
if itr == nil {
panic("should not happen")
}
if err := itr.Close(); err != nil {
b.Fatal(err)
}
}
}
}
batchSize := 10000
wg.Add(1)
go func() { defer wg.Done(); runIter() }()
var once sync.Once
for j := 0; j < b.N; j++ {
for i := 0; i < len(points); i += batchSize {
collection := tsdb.NewSeriesCollection(points[i : i+batchSize])
if err := idx.CreateSeriesListIfNotExists(collection); err != nil {
b.Fatal(err)
}
once.Do(func() { close(begin) })
}
// Wait for queries to finish
wg.Wait()
// Reset the index...
b.StopTimer()
if err := idx.Close(); err != nil {
b.Fatal(err)
}
// Re-open everything
idx = MustOpenNewIndex(tsi1.NewConfig())
wg.Add(1)
begin = make(chan struct{})
once = sync.Once{}
go func() { defer wg.Done(); runIter() }()
b.StartTimer()
}
}
queries := []int{1e5}
for _, queryN := range queries {
b.Run(fmt.Sprintf("queries %d", queryN), func(b *testing.B) {
b.Run("cache", func(b *testing.B) {
runBenchmark(b, queryN, tsi1.DefaultSeriesIDSetCacheSize)
})
b.Run("no cache", func(b *testing.B) {
runBenchmark(b, queryN, 0)
})
})
}
}

View File

@ -1,293 +0,0 @@
package tsdb
import (
"io"
"sync"
"unsafe"
"github.com/RoaringBitmap/roaring"
)
// SeriesIDSet represents a lockable bitmap of series ids.
type SeriesIDSet struct {
sync.RWMutex
bitmap *roaring.Bitmap
}
// NewSeriesIDSet returns a new instance of SeriesIDSet.
func NewSeriesIDSet(a ...SeriesID) *SeriesIDSet {
ss := &SeriesIDSet{bitmap: roaring.NewBitmap()}
if len(a) > 0 {
a32 := make([]uint32, len(a))
for i := range a {
a32[i] = uint32(a[i].RawID())
}
ss.bitmap.AddMany(a32)
}
return ss
}
// NewSeriesIDSetNegate returns a new SeriesIDSet containing all the elements in a
// that are not present in b. That is, the set difference between a and b.
func NewSeriesIDSetNegate(a, b *SeriesIDSet) *SeriesIDSet {
a.RLock()
defer a.RUnlock()
b.RLock()
defer b.RUnlock()
return &SeriesIDSet{bitmap: roaring.AndNot(a.bitmap, b.bitmap)}
}
// Bytes estimates the memory footprint of this SeriesIDSet, in bytes.
func (s *SeriesIDSet) Bytes() int {
var b int
s.RLock()
b += 24 // mu RWMutex is 24 bytes
b += int(unsafe.Sizeof(s.bitmap)) + int(s.bitmap.GetSizeInBytes())
s.RUnlock()
return b
}
// Add adds the series id to the set.
func (s *SeriesIDSet) Add(id SeriesID) {
s.Lock()
defer s.Unlock()
s.AddNoLock(id)
}
// AddNoLock adds the series id to the set. Add is not safe for use from multiple
// goroutines. Callers must manage synchronization.
func (s *SeriesIDSet) AddNoLock(id SeriesID) {
s.bitmap.Add(uint32(id.RawID()))
}
// AddMany adds multiple ids to the SeriesIDSet. AddMany takes a lock, so may not be
// optimal to call many times with few ids.
func (s *SeriesIDSet) AddMany(ids ...SeriesID) {
if len(ids) == 0 {
return
}
a32 := make([]uint32, len(ids))
for i := range ids {
a32[i] = uint32(ids[i].RawID())
}
s.Lock()
defer s.Unlock()
s.bitmap.AddMany(a32)
}
// Contains returns true if the id exists in the set.
func (s *SeriesIDSet) Contains(id SeriesID) bool {
s.RLock()
x := s.ContainsNoLock(id)
s.RUnlock()
return x
}
// ContainsNoLock returns true if the id exists in the set. ContainsNoLock is
// not safe for use from multiple goroutines. The caller must manage synchronization.
func (s *SeriesIDSet) ContainsNoLock(id SeriesID) bool {
return s.bitmap.Contains(uint32(id.RawID()))
}
// Remove removes the id from the set.
func (s *SeriesIDSet) Remove(id SeriesID) {
s.Lock()
defer s.Unlock()
s.RemoveNoLock(id)
}
// RemoveNoLock removes the id from the set. RemoveNoLock is not safe for use
// from multiple goroutines. The caller must manage synchronization.
func (s *SeriesIDSet) RemoveNoLock(id SeriesID) {
s.bitmap.Remove(uint32(id.RawID()))
}
// Cardinality returns the cardinality of the SeriesIDSet.
func (s *SeriesIDSet) Cardinality() uint64 {
s.RLock()
defer s.RUnlock()
return s.bitmap.GetCardinality()
}
// Merge merged the contents of others into s. The caller does not need to
// provide s as an argument, and the contents of s will always be present in s
// after Merge returns.
func (s *SeriesIDSet) Merge(others ...*SeriesIDSet) {
bms := make([]*roaring.Bitmap, 0, len(others)+1)
s.RLock()
bms = append(bms, s.bitmap) // Add ourself.
// Add other bitsets.
for _, other := range others {
other.RLock()
defer other.RUnlock() // Hold until we have merged all the bitmaps
bms = append(bms, other.bitmap)
}
result := roaring.FastOr(bms...)
s.RUnlock()
s.Lock()
s.bitmap = result
s.Unlock()
}
// MergeInPlace merges other into s, modifying s in the process.
func (s *SeriesIDSet) MergeInPlace(other *SeriesIDSet) {
if s == other {
return
}
other.RLock()
s.Lock()
s.bitmap.Or(other.bitmap)
s.Unlock()
other.RUnlock()
}
// Equals returns true if other and s are the same set of ids.
func (s *SeriesIDSet) Equals(other *SeriesIDSet) bool {
if s == other {
return true
}
s.RLock()
defer s.RUnlock()
other.RLock()
defer other.RUnlock()
return s.bitmap.Equals(other.bitmap)
}
// And returns a new SeriesIDSet containing elements that were present in s and other.
func (s *SeriesIDSet) And(other *SeriesIDSet) *SeriesIDSet {
s.RLock()
defer s.RUnlock()
other.RLock()
defer other.RUnlock()
return &SeriesIDSet{bitmap: roaring.And(s.bitmap, other.bitmap)}
}
// RemoveSet removes all values in other from s, if they exist.
func (s *SeriesIDSet) RemoveSet(other *SeriesIDSet) {
s.RLock()
defer s.RUnlock()
other.RLock()
defer other.RUnlock()
s.bitmap.AndNot(other.bitmap)
}
// ForEach calls f for each id in the set. The function is applied to the IDs
// in ascending order.
func (s *SeriesIDSet) ForEach(f func(id SeriesID)) {
s.RLock()
defer s.RUnlock()
itr := s.bitmap.Iterator()
for itr.HasNext() {
f(NewSeriesID(uint64(itr.Next())))
}
}
// ForEachNoLock calls f for each id in the set without taking a lock.
func (s *SeriesIDSet) ForEachNoLock(f func(id SeriesID)) {
itr := s.bitmap.Iterator()
for itr.HasNext() {
f(NewSeriesID(uint64(itr.Next())))
}
}
func (s *SeriesIDSet) String() string {
s.RLock()
defer s.RUnlock()
return s.bitmap.String()
}
// Diff removes from s any elements also present in other.
func (s *SeriesIDSet) Diff(other *SeriesIDSet) {
other.RLock()
defer other.RUnlock()
s.Lock()
defer s.Unlock()
s.bitmap = roaring.AndNot(s.bitmap, other.bitmap)
}
// Clone returns a new SeriesIDSet with a deep copy of the underlying bitmap.
func (s *SeriesIDSet) Clone() *SeriesIDSet {
// Cloning the SeriesIDSet involves cloning s's bitmap.
// Unfortunately, if the bitmap is set to COW, the original bitmap is modified during clone,
// so we have to take a write lock rather than a read lock.
// For now, we'll just hold a write lock for clone; if this shows up as a bottleneck later,
// we can conditionally RLock if we are not COW.
s.Lock()
defer s.Unlock()
return s.CloneNoLock()
}
// CloneNoLock calls Clone without taking a lock.
func (s *SeriesIDSet) CloneNoLock() *SeriesIDSet {
new := NewSeriesIDSet()
new.bitmap = s.bitmap.Clone()
return new
}
// Iterator returns an iterator to the underlying bitmap.
// This iterator is not protected by a lock.
func (s *SeriesIDSet) Iterator() SeriesIDSetIterable {
return s.bitmap.Iterator()
}
// UnmarshalBinary unmarshals data into the set.
func (s *SeriesIDSet) UnmarshalBinary(data []byte) error {
s.Lock()
defer s.Unlock()
return s.bitmap.UnmarshalBinary(data)
}
// UnmarshalBinaryUnsafe unmarshals data into the set.
// References to the underlying data are used so data should not be reused by caller.
func (s *SeriesIDSet) UnmarshalBinaryUnsafe(data []byte) error {
s.Lock()
defer s.Unlock()
_, err := s.bitmap.FromBuffer(data)
return err
}
// WriteTo writes the set to w.
func (s *SeriesIDSet) WriteTo(w io.Writer) (int64, error) {
s.RLock()
defer s.RUnlock()
return s.bitmap.WriteTo(w)
}
// Clear clears the underlying bitmap for re-use. Clear is safe for use by multiple goroutines.
func (s *SeriesIDSet) Clear() {
s.Lock()
defer s.Unlock()
s.ClearNoLock()
}
// ClearNoLock clears the underlying bitmap for re-use without taking a lock.
func (s *SeriesIDSet) ClearNoLock() {
s.bitmap.Clear()
}
// Slice returns a slice of series ids.
func (s *SeriesIDSet) Slice() []uint64 {
s.RLock()
defer s.RUnlock()
a := make([]uint64, 0, s.bitmap.GetCardinality())
for _, seriesID := range s.bitmap.ToArray() {
a = append(a, uint64(seriesID))
}
return a
}
type SeriesIDSetIterable interface {
HasNext() bool
Next() uint32
}

View File

@ -1,778 +0,0 @@
package tsdb
import (
"bytes"
"fmt"
"math"
"math/rand"
"runtime"
"sync"
"testing"
)
func TestSeriesIDSet_NewSeriesIDSetNegate(t *testing.T) {
examples := [][3][]uint64{
[3][]uint64{
{1, 10, 20, 30},
{10, 12, 13, 14, 20},
{1, 30},
},
[3][]uint64{
{},
{10},
{},
},
[3][]uint64{
{1, 10, 20, 30},
{1, 10, 20, 30},
{},
},
[3][]uint64{
{1, 10},
{1, 10, 100},
{},
},
[3][]uint64{
{1, 10},
{},
{1, 10},
},
}
for i, example := range examples {
t.Run(fmt.Sprint(i), func(t *testing.T) {
// Build sets.
a, b := NewSeriesIDSet(), NewSeriesIDSet()
for _, v := range example[0] {
a.Add(NewSeriesID(v))
}
for _, v := range example[1] {
b.Add(NewSeriesID(v))
}
expected := NewSeriesIDSet()
for _, v := range example[2] {
expected.Add(NewSeriesID(v))
}
got := NewSeriesIDSetNegate(a, b)
if got.String() != expected.String() {
t.Fatalf("got %s, expected %s", got.String(), expected.String())
}
})
}
}
func TestSeriesIDSet_RemoveSet(t *testing.T) {
examples := [][3][]uint64{
[3][]uint64{
{1, 10, 20, 30},
{10, 12, 13, 14, 20},
{1, 30},
},
[3][]uint64{
{},
{10},
{},
},
[3][]uint64{
{1, 10, 20, 30},
{1, 10, 20, 30},
{},
},
[3][]uint64{
{1, 10},
{1, 10, 100},
{},
},
[3][]uint64{
{1, 10},
{},
{1, 10},
},
}
for i, example := range examples {
t.Run(fmt.Sprint(i), func(t *testing.T) {
// Build sets.
a, b := NewSeriesIDSet(), NewSeriesIDSet()
for _, v := range example[0] {
a.Add(NewSeriesID(v))
}
for _, v := range example[1] {
b.Add(NewSeriesID(v))
}
expected := NewSeriesIDSet()
for _, v := range example[2] {
expected.Add(NewSeriesID(v))
}
a.RemoveSet(b)
if a.String() != expected.String() {
t.Fatalf("got %s, expected %s", a.String(), expected.String())
}
})
}
}
// Ensure that cloning is race-free.
func TestSeriesIDSet_Clone_Race(t *testing.T) {
main := NewSeriesIDSet()
total := NewSeriesIDSet()
for i := uint64(0); i < 1024; i++ {
id := NewSeriesID(i)
main.AddNoLock(id)
total.AddNoLock(id)
}
// One test with a closure around the main SeriesIDSet,
// so that we can run a subtest with and without COW.
test := func(t *testing.T) {
n := 10 * (runtime.NumCPU() + 1)
clones := make([]*SeriesIDSet, n)
var wg sync.WaitGroup
wg.Add(n)
for i := 1; i <= n; i++ {
go func(i int) {
defer wg.Done()
clones[i-1] = main.Clone()
for j := 0; j < 1000; j++ {
id := NewSeriesID(uint64(j + (100000 * i)))
total.Add(id)
clones[i-1].AddNoLock(id)
}
}(i)
}
wg.Wait()
for _, o := range clones {
if got, exp := o.Cardinality(), uint64(2024); got != exp {
t.Errorf("got cardinality %d, expected %d", got, exp)
}
}
// The original set should be unaffected
if got, exp := main.Cardinality(), uint64(1024); got != exp {
t.Errorf("got cardinality %d, expected %d", got, exp)
}
// Merging the clones should result in only 1024 shared values.
union := NewSeriesIDSet()
for _, o := range clones {
o.ForEachNoLock(func(id SeriesID) {
union.AddNoLock(id)
})
}
if !union.Equals(total) {
t.Fatal("union not equal to total")
}
}
t.Run("clone", test)
}
var resultBool bool
// Contains should be typically a constant time lookup. Example results on a laptop:
//
// BenchmarkSeriesIDSet_Contains/1-4 20000000 68.5 ns/op 0 B/op 0 allocs/op
// BenchmarkSeriesIDSet_Contains/2-4 20000000 70.8 ns/op 0 B/op 0 allocs/op
// BenchmarkSeriesIDSet_Contains/10-4 20000000 70.3 ns/op 0 B/op 0 allocs/op
// BenchmarkSeriesIDSet_Contains/100-4 20000000 71.3 ns/op 0 B/op 0 allocs/op
// BenchmarkSeriesIDSet_Contains/1000-4 20000000 80.5 ns/op 0 B/op 0 allocs/op
// BenchmarkSeriesIDSet_Contains/10000-4 20000000 67.3 ns/op 0 B/op 0 allocs/op
// BenchmarkSeriesIDSet_Contains/100000-4 20000000 73.1 ns/op 0 B/op 0 allocs/op
// BenchmarkSeriesIDSet_Contains/1000000-4 20000000 77.3 ns/op 0 B/op 0 allocs/op
// BenchmarkSeriesIDSet_Contains/10000000-4 20000000 75.3 ns/op 0 B/op 0 allocs/op
func BenchmarkSeriesIDSet_Contains(b *testing.B) {
cardinalities := []uint64{1, 2, 10, 100, 1000, 10000, 100000, 1000000, 10000000}
for _, cardinality := range cardinalities {
// Setup...
set := NewSeriesIDSet()
for i := uint64(0); i < cardinality; i++ {
set.Add(NewSeriesID(i))
}
lookup := cardinality / 2
b.Run(fmt.Sprint(cardinality), func(b *testing.B) {
for i := 0; i < b.N; i++ {
resultBool = set.Contains(NewSeriesID(lookup))
}
})
}
}
var set *SeriesIDSet
// Adding to a larger bitset shouldn't be significantly more expensive than adding
// to a smaller one. This benchmark adds a value to different cardinality sets.
//
// Example results from a laptop:
// BenchmarkSeriesIDSet_Add/1-4 1000000 1053 ns/op 48 B/op 2 allocs/op
// BenchmarkSeriesIDSet_Add/2-4 5000000 303 ns/op 0 B/op 0 allocs/op
// BenchmarkSeriesIDSet_Add/10-4 5000000 348 ns/op 0 B/op 0 allocs/op
// BenchmarkSeriesIDSet_Add/100-4 5000000 373 ns/op 0 B/op 0 allocs/op
// BenchmarkSeriesIDSet_Add/1000-4 5000000 342 ns/op 0 B/op 0 allocs/op
//
//
func BenchmarkSeriesIDSet_AddMore(b *testing.B) {
cardinalities := []uint64{1, 2, 10, 100, 1000, 10000, 100000, 1000000, 10000000}
for _, cardinality := range cardinalities {
// Setup...
set = NewSeriesIDSet()
for i := uint64(0); i < cardinality-1; i++ {
set.Add(NewSeriesID(i))
}
b.Run(fmt.Sprint(cardinality), func(b *testing.B) {
for i := 0; i < b.N; i++ {
// Add next value
set.Add(NewSeriesID(cardinality))
b.StopTimer()
set.Remove(NewSeriesID(cardinality))
b.StartTimer()
}
})
}
}
// Add benchmarks the cost of adding the same element to a set versus the
// cost of checking if it exists before adding it.
//
// Typical benchmarks from a laptop:
//
// BenchmarkSeriesIDSet_Add/cardinality_1000000_add/same-8 20000000 64.8 ns/op 0 B/op 0 allocs/op
// BenchmarkSeriesIDSet_Add/cardinality_1000000_add/random-8 2000000 704 ns/op 5 B/op 0 allocs/op
// BenchmarkSeriesIDSet_Add/cardinality_1000000_add/same_no_lock-8 50000000 40.3 ns/op 0 B/op 0 allocs/op
// BenchmarkSeriesIDSet_Add/cardinality_1000000_add/random_no_lock-8 2000000 644 ns/op 5 B/op 0 allocs/op
// BenchmarkSeriesIDSet_Add/cardinality_1000000_check_add/same_no_lock-8 50000000 34.0 ns/op 0 B/op 0 allocs/op
// BenchmarkSeriesIDSet_Add/cardinality_1000000_check_add/random_no_lock-8 2000000 860 ns/op 14 B/op 0 allocs/op
// BenchmarkSeriesIDSet_Add/cardinality_1000000_check_add/same_global_lock-8 30000000 49.8 ns/op 0 B/op 0 allocs/op
// BenchmarkSeriesIDSet_Add/cardinality_1000000_check_add/random_global_lock-8 2000000 914 ns/op 0 B/op 0 allocs/op
// BenchmarkSeriesIDSet_Add/cardinality_1000000_check_add/same_multi_lock-8 30000000 39.7 ns/op 0 B/op 0 allocs/op
// BenchmarkSeriesIDSet_Add/cardinality_1000000_check_add/random_multi_lock-8 1000000 1002 ns/op 0 B/op 0 allocs/op
//
func BenchmarkSeriesIDSet_Add(b *testing.B) {
// Setup...
set = NewSeriesIDSet()
for i := uint64(0); i < 1000000; i++ {
set.Add(NewSeriesID(i))
}
lookup := NewSeriesID(300032)
// Add the same value over and over.
b.Run("cardinality_1000000_add", func(b *testing.B) {
b.Run("same", func(b *testing.B) {
for i := 0; i < b.N; i++ {
set.Add(lookup)
}
})
b.Run("random", func(b *testing.B) {
for i := 0; i < b.N; i++ {
b.StopTimer()
x := NewSeriesID(uint64(rand.Intn(math.MaxInt32)))
b.StartTimer()
set.Add(x)
}
})
b.Run("same no lock", func(b *testing.B) {
for i := 0; i < b.N; i++ {
set.AddNoLock(lookup)
}
})
b.Run("random no lock", func(b *testing.B) {
for i := 0; i < b.N; i++ {
b.StopTimer()
x := NewSeriesID(uint64(rand.Intn(math.MaxInt32)))
b.StartTimer()
set.AddNoLock(x)
}
})
})
// Add the same value over and over with no lock
b.Run("cardinality_1000000_check_add", func(b *testing.B) {
b.Run("same no lock", func(b *testing.B) {
for i := 0; i < b.N; i++ {
if !set.ContainsNoLock(lookup) {
set.AddNoLock(lookup)
}
}
})
b.Run("random no lock", func(b *testing.B) {
for i := 0; i < b.N; i++ {
b.StopTimer()
x := NewSeriesID(uint64(rand.Intn(math.MaxInt32)))
b.StartTimer()
if !set.ContainsNoLock(x) {
set.AddNoLock(x)
}
}
})
b.Run("same global lock", func(b *testing.B) {
for i := 0; i < b.N; i++ {
set.Lock()
if !set.ContainsNoLock(lookup) {
set.AddNoLock(lookup)
}
set.Unlock()
}
})
b.Run("random global lock", func(b *testing.B) {
for i := 0; i < b.N; i++ {
b.StopTimer()
x := NewSeriesID(uint64(rand.Intn(math.MaxInt32)))
b.StartTimer()
set.Lock()
if !set.ContainsNoLock(x) {
set.AddNoLock(x)
}
set.Unlock()
}
})
b.Run("same multi lock", func(b *testing.B) {
for i := 0; i < b.N; i++ {
if !set.Contains(lookup) {
set.Add(lookup)
}
}
})
b.Run("random multi lock", func(b *testing.B) {
for i := 0; i < b.N; i++ {
b.StopTimer()
x := NewSeriesID(uint64(rand.Intn(math.MaxInt32)))
b.StartTimer()
if !set.Contains(x) {
set.Add(x)
}
}
})
})
}
var ssResult *SeriesIDSet
// Benchmark various ways of creating a copy of a bitmap. Note, Clone_COW will result
// in a bitmap where future modifications will involve copies.
//
// Typical results from an i7 laptop.
// BenchmarkSeriesIDSet_Clone/cardinality_1000/re-use/Clone-8 30000 44171 ns/op 47200 B/op 1737 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_1000/re-use/Merge-8 100000 17877 ns/op 39008 B/op 30 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_1000/re-use/MergeInPlace-8 200000 7367 ns/op 0 B/op 0 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_1000/re-use/Add-8 10000 137460 ns/op 62336 B/op 2596 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_1000/re-use/WriteTo-8 30000 52896 ns/op 35872 B/op 866 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_1000/don't_re-use/Clone-8 30000 41940 ns/op 47200 B/op 1737 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_1000/don't_re-use/Merge-8 100000 17624 ns/op 39008 B/op 30 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_1000/don't_re-use/MergeInPlace-8 100000 17320 ns/op 38880 B/op 28 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_1000/don't_re-use/Add-8 10000 167544 ns/op 101216 B/op 2624 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_1000/don't_re-use/WriteTo-8 20000 66976 ns/op 52897 B/op 869 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_10000/re-use/Clone-8 10000 179933 ns/op 177072 B/op 5895 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_10000/re-use/Merge-8 20000 77574 ns/op 210656 B/op 42 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_10000/re-use/MergeInPlace-8 100000 23645 ns/op 0 B/op 0 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_10000/re-use/Add-8 2000 689254 ns/op 224161 B/op 9572 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_10000/re-use/WriteTo-8 10000 199052 ns/op 118791 B/op 2945 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_10000/don't_re-use/Clone-8 10000 183137 ns/op 177073 B/op 5895 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_10000/don't_re-use/Merge-8 20000 77502 ns/op 210656 B/op 42 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_10000/don't_re-use/MergeInPlace-8 20000 72610 ns/op 210528 B/op 40 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_10000/don't_re-use/Add-8 2000 724789 ns/op 434691 B/op 9612 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_10000/don't_re-use/WriteTo-8 10000 215734 ns/op 177159 B/op 2948 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_100000/re-use/Clone-8 5000 244971 ns/op 377648 B/op 6111 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_100000/re-use/Merge-8 20000 90580 ns/op 210656 B/op 42 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_100000/re-use/MergeInPlace-8 50000 24697 ns/op 0 B/op 0 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_100000/re-use/Add-8 500 3274456 ns/op 758996 B/op 19853 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_100000/re-use/WriteTo-8 5000 248791 ns/op 122392 B/op 3053 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_100000/don't_re-use/Clone-8 5000 269152 ns/op 377648 B/op 6111 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_100000/don't_re-use/Merge-8 20000 85948 ns/op 210657 B/op 42 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_100000/don't_re-use/MergeInPlace-8 20000 78142 ns/op 210528 B/op 40 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_100000/don't_re-use/Add-8 500 3123753 ns/op 969529 B/op 19893 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_100000/don't_re-use/WriteTo-8 10000 230657 ns/op 180684 B/op 3056 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_1000000/re-use/Clone-8 3000 551781 ns/op 2245424 B/op 6111 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_1000000/re-use/Merge-8 20000 92104 ns/op 210656 B/op 42 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_1000000/re-use/MergeInPlace-8 50000 27408 ns/op 0 B/op 0 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_1000000/re-use/Add-8 100 22573498 ns/op 6420446 B/op 30520 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_1000000/re-use/WriteTo-8 5000 284901 ns/op 123522 B/op 3053 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_1000000/don't_re-use/Clone-8 3000 679284 ns/op 2245424 B/op 6111 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_1000000/don't_re-use/Merge-8 20000 68965 ns/op 210656 B/op 42 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_1000000/don't_re-use/MergeInPlace-8 20000 64236 ns/op 210528 B/op 40 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_1000000/don't_re-use/Add-8 100 21960668 ns/op 6630979 B/op 30560 allocs/op
// BenchmarkSeriesIDSet_Clone/cardinality_1000000/don't_re-use/WriteTo-8 5000 298276 ns/op 181890 B/op 3056 allocs/op
func BenchmarkSeriesIDSet_Clone(b *testing.B) {
toAddCardinalities := []int{1e3, 1e4, 1e5, 1e6}
runBenchmarks := func(b *testing.B, other *SeriesIDSet, init func() *SeriesIDSet) {
b.Run("Clone", func(b *testing.B) {
for i := 0; i < b.N; i++ {
ssResult = other.Clone()
}
})
b.Run("Merge", func(b *testing.B) {
ssResult = init()
for i := 0; i < b.N; i++ {
ssResult.Merge(other)
b.StopTimer()
ssResult = init()
b.StartTimer()
}
})
b.Run("MergeInPlace", func(b *testing.B) {
ssResult = init()
for i := 0; i < b.N; i++ {
ssResult.MergeInPlace(other)
b.StopTimer()
ssResult = init()
b.StartTimer()
}
})
b.Run("Add", func(b *testing.B) {
ssResult = init()
for i := 0; i < b.N; i++ {
itr := other.Iterator()
ssResult.Lock()
for itr.HasNext() {
ssResult.AddNoLock(NewSeriesID(uint64(itr.Next())))
}
ssResult.Unlock()
b.StopTimer()
ssResult = init()
b.StartTimer()
}
})
b.Run("WriteTo", func(b *testing.B) {
var buf bytes.Buffer
ssResult = init()
for i := 0; i < b.N; i++ {
other.WriteTo(&buf)
ssResult.UnmarshalBinaryUnsafe(buf.Bytes())
b.StopTimer()
ssResult = init()
buf.Reset()
b.StartTimer()
}
})
}
for _, toAddCardinality := range toAddCardinalities {
b.Run(fmt.Sprintf("cardinality %d", toAddCardinality), func(b *testing.B) {
ids := make([]SeriesID, 0, toAddCardinality)
for i := 0; i < toAddCardinality; i++ {
ids = append(ids, NewSeriesID(uint64(rand.Intn(200000000))))
}
other := NewSeriesIDSet(ids...)
b.Run("re-use", func(b *testing.B) {
base := NewSeriesIDSet()
runBenchmarks(b, other, func() *SeriesIDSet {
base.Clear()
return base
})
})
b.Run("don't re-use", func(b *testing.B) {
runBenchmarks(b, other, func() *SeriesIDSet {
return NewSeriesIDSet()
})
})
})
}
}
func BenchmarkSeriesIDSet_AddMany(b *testing.B) {
cardinalities := []int{1, 1e3, 1e4, 1e5, 1e6}
toAddCardinalities := []int{1e3, 1e4, 1e5}
for _, cardinality := range cardinalities {
ids := make([]SeriesID, 0, cardinality)
for i := 0; i < cardinality; i++ {
ids = append(ids, NewSeriesID(uint64(rand.Intn(200000000))))
}
// Setup...
set = NewSeriesIDSet(ids...)
// Check if the value exists before adding it under two locks.
b.Run(fmt.Sprintf("cardinality %d", cardinality), func(b *testing.B) {
for _, toAddCardinality := range toAddCardinalities {
ids := make([]SeriesID, 0, toAddCardinality)
for i := 0; i < toAddCardinality; i++ {
ids = append(ids, NewSeriesID(uint64(rand.Intn(200000000))))
}
b.Run(fmt.Sprintf("adding %d", toAddCardinality), func(b *testing.B) {
b.Run("AddNoLock", func(b *testing.B) {
clone := set.Clone()
for i := 0; i < b.N; i++ {
for _, id := range ids {
clone.AddNoLock(id)
}
b.StopTimer()
clone = set.Clone()
b.StartTimer()
}
})
b.Run("AddMany", func(b *testing.B) {
clone := set.Clone()
for i := 0; i < b.N; i++ {
clone.AddMany(ids...)
b.StopTimer()
clone = set.Clone()
b.StartTimer()
}
})
// Merge will involve a new bitmap being allocated.
b.Run("Merge", func(b *testing.B) {
clone := set.Clone()
for i := 0; i < b.N; i++ {
other := NewSeriesIDSet(ids...)
clone.Merge(other)
b.StopTimer()
clone = set.Clone()
b.StartTimer()
}
})
b.Run("MergeInPlace", func(b *testing.B) {
clone := set.Clone()
for i := 0; i < b.N; i++ {
other := NewSeriesIDSet(ids...)
clone.MergeInPlace(other)
b.StopTimer()
clone = set.Clone()
b.StartTimer()
}
})
})
}
})
}
}
// Remove benchmarks the cost of removing the same element in a set versus the
// cost of checking if it exists before removing it.
//
// Typical benchmarks from a laptop:
//
// BenchmarkSeriesIDSet_Remove/cardinality_1000000_remove_same-4 20000000 99.1 ns/op 0 B/op 0 allocs/op
// BenchmarkSeriesIDSet_Remove/cardinality_1000000_check_remove_global_lock-4 20000000 57.7 ns/op 0 B/op 0 allocs/op
// BenchmarkSeriesIDSet_Remove/cardinality_1000000_check_remove_multi_lock-4 20000000 80.1 ns/op 0 B/op 0 allocs/op
//
func BenchmarkSeriesIDSet_Remove(b *testing.B) {
// Setup...
set = NewSeriesIDSet()
for i := uint64(0); i < 1000000; i++ {
set.Add(NewSeriesID(i))
}
lookup := uint64(300032)
// Remove the same value over and over.
b.Run("cardinality_1000000_remove_same", func(b *testing.B) {
for i := 0; i < b.N; i++ {
set.Remove(NewSeriesID(lookup))
}
})
// Check if the value exists before adding it. Subsequent repeats of the code
// will result in contains checks.
b.Run("cardinality_1000000_check_remove_global_lock", func(b *testing.B) {
for i := 0; i < b.N; i++ {
set.Lock()
if set.ContainsNoLock(NewSeriesID(lookup)) {
set.RemoveNoLock(NewSeriesID(lookup))
}
set.Unlock()
}
})
// Check if the value exists before adding it under two locks.
b.Run("cardinality_1000000_check_remove_multi_lock", func(b *testing.B) {
for i := 0; i < b.N; i++ {
if set.Contains(NewSeriesID(lookup)) {
set.Remove(NewSeriesID(lookup))
}
}
})
}
// BenchmarkSeriesIDSet_MassRemove benchmarks the cost of removing a large set of values.
func BenchmarkSeriesIDSet_MassRemove(b *testing.B) {
var size = uint64(1000000)
// Setup...
set = NewSeriesIDSet()
for i := uint64(0); i < size; i++ {
set.Add(NewSeriesID(i))
}
// Remove one at a time
b.Run("cardinality_1000000_remove_each", func(b *testing.B) {
clone := set.Clone()
for i := 0; i < b.N; i++ {
for j := uint64(0); j < size/2; j++ {
clone.RemoveNoLock(NewSeriesID(j))
}
b.StopTimer()
clone = set.Clone()
b.StartTimer()
}
})
// This is the case where a target series id set exists.
b.Run("cardinality_1000000_remove_set_exists", func(b *testing.B) {
clone := set.Clone()
other := NewSeriesIDSet()
for j := uint64(0); j < size/2; j++ {
other.AddNoLock(NewSeriesID(j))
}
for i := 0; i < b.N; i++ {
clone.RemoveSet(other)
b.StopTimer()
clone = set.Clone()
b.StartTimer()
}
})
// Make a target series id set and negate it
b.Run("cardinality_1000000_remove_set", func(b *testing.B) {
clone := set.Clone()
for i := 0; i < b.N; i++ {
other := NewSeriesIDSet()
for j := uint64(0); j < size/2; j++ {
other.AddNoLock(NewSeriesID(j))
}
clone.RemoveSet(other)
b.StopTimer()
clone = set.Clone()
b.StartTimer()
}
})
// This is the case where a new result set is created.
b.Run("cardinality_1000000_remove_set_new", func(b *testing.B) {
clone := set.Clone()
other := NewSeriesIDSet()
for j := uint64(0); j < size/2; j++ {
other.AddNoLock(NewSeriesID(j))
}
for i := 0; i < b.N; i++ {
_ = NewSeriesIDSetNegate(clone, other)
b.StopTimer()
clone = set.Clone()
b.StartTimer()
}
})
}
// Typical benchmarks for a laptop:
//
// BenchmarkSeriesIDSet_Merge_Duplicates/cardinality_1/shards_1-4 200000 8095 ns/op 16656 B/op 11 allocs/op
// BenchmarkSeriesIDSet_Merge_Duplicates/cardinality_1/shards_10-4 200000 11755 ns/op 18032 B/op 47 allocs/op
// BenchmarkSeriesIDSet_Merge_Duplicates/cardinality_1/shards_100-4 50000 41632 ns/op 31794 B/op 407 allocs/op
// BenchmarkSeriesIDSet_Merge_Duplicates/cardinality_10000/shards_1-4 200000 6022 ns/op 8384 B/op 7 allocs/op
// BenchmarkSeriesIDSet_Merge_Duplicates/cardinality_10000/shards_10-4 100000 19674 ns/op 9760 B/op 43 allocs/op
// BenchmarkSeriesIDSet_Merge_Duplicates/cardinality_10000/shards_100-4 10000 152865 ns/op 23522 B/op 403 allocs/op
// BenchmarkSeriesIDSet_Merge_Duplicates/cardinality_1000000/shards_1-4 200000 8252 ns/op 9712 B/op 44 allocs/op
// BenchmarkSeriesIDSet_Merge_Duplicates/cardinality_1000000/shards_10-4 50000 29566 ns/op 15984 B/op 143 allocs/op
// BenchmarkSeriesIDSet_Merge_Duplicates/cardinality_1000000/shards_100-4 10000 237672 ns/op 78710 B/op 1133 allocs/op
// BenchmarkSeriesIDSet_Merge_Duplicates/cardinality_10000000/shards_1-4 100000 21559 ns/op 25968 B/op 330 allocs/op
// BenchmarkSeriesIDSet_Merge_Duplicates/cardinality_10000000/shards_10-4 20000 102326 ns/op 114325 B/op 537 allocs/op
// BenchmarkSeriesIDSet_Merge_Duplicates/cardinality_10000000/shards_100-4 2000 1042697 ns/op 997909 B/op 2608 allocs/op
func BenchmarkSeriesIDSet_Merge_Duplicates(b *testing.B) {
cardinalities := []int{1, 10000, 1000000, 10000000}
shards := []int{1, 10, 100}
for _, cardinality := range cardinalities {
set = NewSeriesIDSet()
for i := 0; i < cardinality; i++ {
set.Add(NewSeriesID(uint64(i)))
}
for _, shard := range shards {
others := make([]*SeriesIDSet, 0, shard)
for s := 0; s < shard; s++ {
others = append(others, &SeriesIDSet{bitmap: set.bitmap.Clone()})
}
b.Run(fmt.Sprintf("cardinality_%d/shards_%d", cardinality, shard), func(b *testing.B) {
base := &SeriesIDSet{bitmap: set.bitmap.Clone()}
for i := 0; i < b.N; i++ {
base.Merge(others...)
b.StopTimer()
base.bitmap = set.bitmap.Clone()
b.StartTimer()
}
})
}
}
}
// Typical benchmarks for a laptop:
//
// BenchmarkSeriesIDSet_Merge_Unique/cardinality_1/shards_1-4 200000 7841 ns/op 16656 B/op 11 allocs/op
// BenchmarkSeriesIDSet_Merge_Unique/cardinality_1/shards_10-4 200000 13093 ns/op 18048 B/op 47 allocs/op
// BenchmarkSeriesIDSet_Merge_Unique/cardinality_1/shards_100-4 30000 57399 ns/op 31985 B/op 407 allocs/op
// BenchmarkSeriesIDSet_Merge_Unique/cardinality_10000/shards_1-4 200000 7740 ns/op 8384 B/op 7 allocs/op
// BenchmarkSeriesIDSet_Merge_Unique/cardinality_10000/shards_10-4 50000 37116 ns/op 18208 B/op 52 allocs/op
// BenchmarkSeriesIDSet_Merge_Unique/cardinality_10000/shards_100-4 5000 409487 ns/op 210563 B/op 955 allocs/op
// BenchmarkSeriesIDSet_Merge_Unique/cardinality_1000000/shards_1-4 100000 19289 ns/op 19328 B/op 79 allocs/op
// BenchmarkSeriesIDSet_Merge_Unique/cardinality_1000000/shards_10-4 10000 129048 ns/op 159716 B/op 556 allocs/op
// BenchmarkSeriesIDSet_Merge_Unique/cardinality_1000000/shards_100-4 500 3482907 ns/op 5428116 B/op 6174 allocs/op
// BenchmarkSeriesIDSet_Merge_Unique/cardinality_10000000/shards_1-4 30000 43734 ns/op 51872 B/op 641 allocs/op
// BenchmarkSeriesIDSet_Merge_Unique/cardinality_10000000/shards_10-4 3000 514412 ns/op 748678 B/op 3687 allocs/op
// BenchmarkSeriesIDSet_Merge_Unique/cardinality_10000000/shards_100-4 30 61891687 ns/op 69626539 B/op 36038 allocs/op
func BenchmarkSeriesIDSet_Merge_Unique(b *testing.B) {
cardinalities := []int{1, 10000, 1000000, 10000000}
shards := []int{1, 10, 100}
for _, cardinality := range cardinalities {
set = NewSeriesIDSet()
for i := 0; i < cardinality; i++ {
set.Add(NewSeriesID(uint64(i)))
}
for _, shard := range shards {
others := make([]*SeriesIDSet, 0, shard)
for s := 1; s <= shard; s++ {
other := NewSeriesIDSet()
for i := 0; i < cardinality; i++ {
other.Add(NewSeriesID(uint64(i + (s * cardinality))))
}
others = append(others, other)
}
b.Run(fmt.Sprintf("cardinality_%d/shards_%d", cardinality, shard), func(b *testing.B) {
base := &SeriesIDSet{bitmap: set.bitmap.Clone()}
for i := 0; i < b.N; i++ {
base.Merge(others...)
b.StopTimer()
base.bitmap = set.bitmap.Clone()
b.StartTimer()
}
})
}
}
}

View File

@ -1,21 +0,0 @@
package seriesfile
const (
// DefaultLargeSeriesWriteThreshold is the number of series per write
// that requires the series index be pregrown before insert.
DefaultLargeSeriesWriteThreshold = 10000
)
// Config contains all of the configuration related to tsdb.
type Config struct {
// LargeSeriesWriteThreshold is the threshold before a write requires
// preallocation to improve throughput. Currently used in the series file.
LargeSeriesWriteThreshold int `toml:"large-series-write-threshold"`
}
// NewConfig return a new instance of config with default settings.
func NewConfig() Config {
return Config{
LargeSeriesWriteThreshold: DefaultLargeSeriesWriteThreshold,
}
}

View File

@ -1,126 +0,0 @@
package seriesfile
import (
"sort"
"sync"
"github.com/influxdata/influxdb/v2/pkg/rhh"
"github.com/prometheus/client_golang/prometheus"
)
// The following package variables act as singletons, to be shared by all
// storage.Engine instantiations. This allows multiple Series Files to be
// monitored within the same process.
var (
sms *seriesFileMetrics // main metrics
ims *rhh.Metrics // hashmap specific metrics
mmu sync.RWMutex
)
// PrometheusCollectors returns all the metrics associated with the tsdb package.
func PrometheusCollectors() []prometheus.Collector {
mmu.RLock()
defer mmu.RUnlock()
var collectors []prometheus.Collector
if sms != nil {
collectors = append(collectors, sms.PrometheusCollectors()...)
}
if ims != nil {
collectors = append(collectors, ims.PrometheusCollectors()...)
}
return collectors
}
// namespace is the leading part of all published metrics for the Storage service.
const namespace = "storage"
const seriesFileSubsystem = "series_file" // sub-system associated with metrics for the Series File.
type seriesFileMetrics struct {
SeriesCreated *prometheus.CounterVec // Number of series created in Series File.
Series *prometheus.GaugeVec // Number of series.
DiskSize *prometheus.GaugeVec // Size occupied on disk.
Segments *prometheus.GaugeVec // Number of segment files.
CompactionsActive *prometheus.GaugeVec // Number of active compactions.
CompactionDuration *prometheus.HistogramVec // Duration of compactions.
// The following metrics include a ``"status" = {ok, error}` label
Compactions *prometheus.CounterVec // Total number of compactions.
}
// newSeriesFileMetrics initialises the prometheus metrics for tracking the Series File.
func newSeriesFileMetrics(labels prometheus.Labels) *seriesFileMetrics {
names := []string{"series_file_partition"} // All metrics have this label.
for k := range labels {
names = append(names, k)
}
sort.Strings(names)
totalCompactions := append(append([]string(nil), names...), "status")
sort.Strings(totalCompactions)
durationCompaction := append(append([]string(nil), names...), "component")
sort.Strings(durationCompaction)
return &seriesFileMetrics{
SeriesCreated: prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: seriesFileSubsystem,
Name: "series_created",
Help: "Number of series created in Series File.",
}, names),
Series: prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: seriesFileSubsystem,
Name: "series_total",
Help: "Number of series in Series File.",
}, names),
DiskSize: prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: seriesFileSubsystem,
Name: "disk_bytes",
Help: "Number of bytes Series File is using on disk.",
}, names),
Segments: prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: seriesFileSubsystem,
Name: "segments_total",
Help: "Number of segment files in Series File.",
}, names),
CompactionsActive: prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: seriesFileSubsystem,
Name: "index_compactions_active",
Help: "Number of active index compactions.",
}, durationCompaction),
CompactionDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: seriesFileSubsystem,
Name: "index_compactions_duration_seconds",
Help: "Time taken for a successful compaction of index.",
// 30 buckets spaced exponentially between 5s and ~53 minutes.
Buckets: prometheus.ExponentialBuckets(5.0, 1.25, 30),
}, durationCompaction),
Compactions: prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: seriesFileSubsystem,
Name: "compactions_total",
Help: "Number of compactions.",
}, totalCompactions),
}
}
// PrometheusCollectors satisfies the prom.PrometheusCollector interface.
func (m *seriesFileMetrics) PrometheusCollectors() []prometheus.Collector {
return []prometheus.Collector{
m.SeriesCreated,
m.Series,
m.DiskSize,
m.Segments,
m.CompactionsActive,
m.CompactionDuration,
m.Compactions,
}
}

View File

@ -1,180 +0,0 @@
package seriesfile
import (
"context"
"io/ioutil"
"os"
"testing"
"time"
"github.com/influxdata/influxdb/v2/kit/prom/promtest"
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxdb/v2/tsdb"
"github.com/prometheus/client_golang/prometheus"
dto "github.com/prometheus/client_model/go"
)
func TestMetrics_SeriesPartition(t *testing.T) {
// metrics to be shared by multiple file stores.
metrics := newSeriesFileMetrics(prometheus.Labels{"engine_id": "", "node_id": ""})
t1 := newSeriesPartitionTracker(metrics, prometheus.Labels{"series_file_partition": "0", "engine_id": "0", "node_id": "0"})
t2 := newSeriesPartitionTracker(metrics, prometheus.Labels{"series_file_partition": "0", "engine_id": "1", "node_id": "0"})
reg := prometheus.NewRegistry()
reg.MustRegister(metrics.PrometheusCollectors()...)
base := namespace + "_" + seriesFileSubsystem + "_"
// All the metric names
gauges := []string{
base + "series_total",
base + "disk_bytes",
base + "segments_total",
base + "index_compactions_active",
}
counters := []string{
base + "series_created",
base + "compactions_total",
}
histograms := []string{
base + "index_compactions_duration_seconds",
}
// Generate some measurements.
for i, tracker := range []*seriesPartitionTracker{t1, t2} {
tracker.SetSeries(uint64(i + len(gauges[0])))
tracker.SetDiskSize(uint64(i + len(gauges[1])))
tracker.SetSegments(uint64(i + len(gauges[2])))
labels := tracker.Labels()
labels["component"] = "index"
tracker.metrics.CompactionsActive.With(labels).Add(float64(i + len(gauges[3])))
tracker.AddSeriesCreated(uint64(i + len(counters[0])))
labels = tracker.Labels()
labels["status"] = "ok"
tracker.metrics.Compactions.With(labels).Add(float64(i + len(counters[1])))
labels = tracker.Labels()
labels["component"] = "index"
tracker.metrics.CompactionDuration.With(labels).Observe(float64(i + len(histograms[0])))
}
// Test that all the correct metrics are present.
mfs, err := reg.Gather()
if err != nil {
t.Fatal(err)
}
// The label variants for the two caches.
labelVariants := []prometheus.Labels{
prometheus.Labels{"engine_id": "0", "node_id": "0"},
prometheus.Labels{"engine_id": "1", "node_id": "0"},
}
for i, labels := range labelVariants {
labels["series_file_partition"] = "0"
var metric *dto.Metric
for _, name := range gauges {
exp := float64(i + len(name))
if name == base+"index_compactions_active" {
// Make a copy since we need to add a label
l := make(prometheus.Labels, len(labels))
for k, v := range labels {
l[k] = v
}
l["component"] = "index"
metric = promtest.MustFindMetric(t, mfs, name, l)
} else {
metric = promtest.MustFindMetric(t, mfs, name, labels)
}
if got := metric.GetGauge().GetValue(); got != exp {
t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp)
}
}
for _, name := range counters {
exp := float64(i + len(name))
if name == base+"compactions_total" {
// Make a copy since we need to add a label
l := make(prometheus.Labels, len(labels))
for k, v := range labels {
l[k] = v
}
l["status"] = "ok"
metric = promtest.MustFindMetric(t, mfs, name, l)
} else {
metric = promtest.MustFindMetric(t, mfs, name, labels)
}
if got := metric.GetCounter().GetValue(); got != exp {
t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp)
}
}
for _, name := range histograms {
// Make a copy since we need to add a label
l := make(prometheus.Labels, len(labels))
for k, v := range labels {
l[k] = v
}
l["component"] = "index"
exp := float64(i + len(name))
metric := promtest.MustFindMetric(t, mfs, name, l)
if got := metric.GetHistogram().GetSampleSum(); got != exp {
t.Errorf("[%s %d] got %v, expected %v", name, i, got, exp)
}
}
}
}
// This test ensures that disabling metrics works even if a series file has been created before.
func TestMetrics_Disabled(t *testing.T) {
// This test messes with global state. Gotta fix it up otherwise other tests panic. I really
// am beginning to wonder about our metrics.
defer func() {
mmu.Lock()
sms = nil
ims = nil
mmu.Unlock()
}()
path, err := ioutil.TempDir("", "sfile-metrics-")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(path)
// Step 1. make a series file with metrics and some labels
sfile := NewSeriesFile(path)
sfile.SetDefaultMetricLabels(prometheus.Labels{"foo": "bar"})
if err := sfile.Open(context.Background()); err != nil {
t.Fatal(err)
}
if err := sfile.Close(); err != nil {
t.Fatal(err)
}
// Step 2. open the series file again, but disable metrics
sfile = NewSeriesFile(path)
sfile.DisableMetrics()
if err := sfile.Open(context.Background()); err != nil {
t.Fatal(err)
}
defer sfile.Close()
// Step 3. add a series
points := []models.Point{models.MustNewPoint("a", models.Tags{}, models.Fields{"f": 1.0}, time.Now())}
if err := sfile.CreateSeriesListIfNotExists(tsdb.NewSeriesCollection(points)); err != nil {
t.Fatal(err)
}
}

View File

@ -1,628 +0,0 @@
package seriesfile
import (
"bytes"
"context"
"encoding/binary"
"errors"
"fmt"
"os"
"path/filepath"
"sort"
"sync"
"github.com/cespare/xxhash"
"github.com/influxdata/influxdb/v2/kit/tracing"
"github.com/influxdata/influxdb/v2/logger"
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxdb/v2/pkg/binaryutil"
"github.com/influxdata/influxdb/v2/pkg/lifecycle"
"github.com/influxdata/influxdb/v2/pkg/mincore"
"github.com/influxdata/influxdb/v2/pkg/rhh"
"github.com/influxdata/influxdb/v2/tsdb"
"github.com/prometheus/client_golang/prometheus"
"go.uber.org/multierr"
"go.uber.org/zap"
"golang.org/x/sync/errgroup"
"golang.org/x/time/rate"
)
var (
ErrSeriesFileClosed = errors.New("tsdb: series file closed")
ErrInvalidSeriesPartitionID = errors.New("tsdb: invalid series partition id")
)
const (
// SeriesFilePartitionN is the number of partitions a series file is split into.
SeriesFilePartitionN = 8
)
// SeriesFile represents the section of the index that holds series data.
type SeriesFile struct {
mu sync.Mutex // protects concurrent open and close
res lifecycle.Resource
path string
partitions []*SeriesPartition
// N.B we have many partitions, but they must share the same metrics, so the
// metrics are managed in a single shared package variable and
// each partition decorates the same metric measurements with different
// partition id label values.
defaultMetricLabels prometheus.Labels
metricsEnabled bool
pageFaultLimiter *rate.Limiter // Limits page faults by the series file
LargeWriteThreshold int
Logger *zap.Logger
}
// NewSeriesFile returns a new instance of SeriesFile.
func NewSeriesFile(path string) *SeriesFile {
return &SeriesFile{
path: path,
metricsEnabled: true,
Logger: zap.NewNop(),
LargeWriteThreshold: DefaultLargeSeriesWriteThreshold,
}
}
// WithLogger sets the logger on the SeriesFile and all underlying partitions. It must be called before Open.
func (f *SeriesFile) WithLogger(log *zap.Logger) {
f.Logger = log.With(zap.String("service", "series-file"))
}
// SetDefaultMetricLabels sets the default labels for metrics on the Series File.
// It must be called before the SeriesFile is opened.
func (f *SeriesFile) SetDefaultMetricLabels(labels prometheus.Labels) {
f.defaultMetricLabels = make(prometheus.Labels, len(labels))
for k, v := range labels {
f.defaultMetricLabels[k] = v
}
}
// DisableMetrics ensures that activity is not collected via the prometheus metrics.
// DisableMetrics must be called before Open.
func (f *SeriesFile) DisableMetrics() {
f.metricsEnabled = false
}
// WithPageFaultLimiter sets a limiter to restrict the number of page faults.
func (f *SeriesFile) WithPageFaultLimiter(limiter *rate.Limiter) {
f.pageFaultLimiter = limiter
}
// Open memory maps the data file at the file's path.
func (f *SeriesFile) Open(ctx context.Context) error {
f.mu.Lock()
defer f.mu.Unlock()
if f.res.Opened() {
return errors.New("series file already opened")
}
span, ctx := tracing.StartSpanFromContext(ctx)
defer span.Finish()
_, logEnd := logger.NewOperation(ctx, f.Logger, "Opening Series File", "series_file_open", zap.String("path", f.path))
defer logEnd()
// Create path if it doesn't exist.
if err := os.MkdirAll(filepath.Join(f.path), 0777); err != nil {
return err
}
// Initialise metrics for trackers.
mmu.Lock()
if sms == nil && f.metricsEnabled {
sms = newSeriesFileMetrics(f.defaultMetricLabels)
}
if ims == nil && f.metricsEnabled {
// Make a copy of the default labels so that another label can be provided.
labels := make(prometheus.Labels, len(f.defaultMetricLabels))
for k, v := range f.defaultMetricLabels {
labels[k] = v
}
labels["series_file_partition"] = "" // All partitions have this label.
ims = rhh.NewMetrics(namespace, seriesFileSubsystem+"_index", labels)
}
mmu.Unlock()
// Open partitions.
f.partitions = make([]*SeriesPartition, 0, SeriesFilePartitionN)
for i := 0; i < SeriesFilePartitionN; i++ {
// TODO(edd): These partition initialisation should be moved up to NewSeriesFile.
p := NewSeriesPartition(i, f.SeriesPartitionPath(i))
p.LargeWriteThreshold = f.LargeWriteThreshold
p.Logger = f.Logger.With(zap.Int("partition", p.ID()))
p.pageFaultLimiter = f.pageFaultLimiter
// For each series file index, rhh trackers are used to track the RHH Hashmap.
// Each of the trackers needs to be given slightly different default
// labels to ensure the correct partition_ids are set as labels.
labels := make(prometheus.Labels, len(f.defaultMetricLabels))
for k, v := range f.defaultMetricLabels {
labels[k] = v
}
labels["series_file_partition"] = fmt.Sprint(p.ID())
p.index.rhhMetrics = ims
p.index.rhhLabels = labels
p.index.rhhMetricsEnabled = f.metricsEnabled
// Set the metric trackers on the partition with any injected default labels.
p.tracker = newSeriesPartitionTracker(sms, labels)
p.tracker.enabled = f.metricsEnabled
if err := p.Open(); err != nil {
f.Logger.Error("Unable to open series file",
zap.String("path", f.path),
zap.Int("partition", p.ID()),
zap.Error(err))
f.closeNoLock()
return err
}
f.partitions = append(f.partitions, p)
}
// The resource is now open.
f.res.Open()
return nil
}
func (f *SeriesFile) closeNoLock() (err error) {
// Close the resource and wait for any outstanding references.
f.res.Close()
var errs []error
for _, p := range f.partitions {
errs = append(errs, p.Close())
}
return multierr.Combine(errs...)
}
// Close unmaps the data file.
func (f *SeriesFile) Close() error {
f.mu.Lock()
defer f.mu.Unlock()
return f.closeNoLock()
}
// Path returns the path to the file.
func (f *SeriesFile) Path() string { return f.path }
// SeriesPartitionPath returns the path to a given partition.
func (f *SeriesFile) SeriesPartitionPath(i int) string {
return filepath.Join(f.path, fmt.Sprintf("%02x", i))
}
// Partitions returns all partitions.
func (f *SeriesFile) Partitions() []*SeriesPartition { return f.partitions }
// Acquire ensures that the series file won't be closed until after the reference
// has been released.
func (f *SeriesFile) Acquire() (*lifecycle.Reference, error) {
return f.res.Acquire()
}
// EnableCompactions allows compactions to run.
func (f *SeriesFile) EnableCompactions() {
for _, p := range f.partitions {
p.EnableCompactions()
}
}
// DisableCompactions prevents new compactions from running.
func (f *SeriesFile) DisableCompactions() {
for _, p := range f.partitions {
p.DisableCompactions()
}
}
// FileSize returns the size of all partitions, in bytes.
func (f *SeriesFile) FileSize() (n int64, err error) {
for _, p := range f.partitions {
v, err := p.FileSize()
n += v
if err != nil {
return n, err
}
}
return n, err
}
// CreateSeriesListIfNotExists creates a list of series in bulk if they don't exist. It overwrites
// the collection's Keys and SeriesIDs fields. The collection's SeriesIDs slice will have IDs for
// every name+tags, creating new series IDs as needed. If any SeriesID is zero, then a type
// conflict has occurred for that series.
func (f *SeriesFile) CreateSeriesListIfNotExists(collection *tsdb.SeriesCollection) error {
collection.SeriesKeys = GenerateSeriesKeys(collection.Names, collection.Tags)
collection.SeriesIDs = make([]tsdb.SeriesID, len(collection.SeriesKeys))
keyPartitionIDs := f.SeriesKeysPartitionIDs(collection.SeriesKeys)
var g errgroup.Group
for i := range f.partitions {
p := f.partitions[i]
g.Go(func() error {
return p.CreateSeriesListIfNotExists(collection, keyPartitionIDs)
})
}
if err := g.Wait(); err != nil {
return err
}
collection.ApplyConcurrentDrops()
return nil
}
// DeleteSeriesID flags a list of series as permanently deleted.
// If a series is reintroduced later then it must create a new id.
func (f *SeriesFile) DeleteSeriesIDs(ids []tsdb.SeriesID) error {
m := make(map[int][]tsdb.SeriesID)
for _, id := range ids {
partitionID := f.SeriesIDPartitionID(id)
m[partitionID] = append(m[partitionID], id)
}
var g errgroup.Group
for partitionID, partitionIDs := range m {
partitionID, partitionIDs := partitionID, partitionIDs
g.Go(func() error { return f.partitions[partitionID].DeleteSeriesIDs(partitionIDs) })
}
return g.Wait()
}
// IsDeleted returns true if the ID has been deleted before.
func (f *SeriesFile) IsDeleted(id tsdb.SeriesID) bool {
p := f.SeriesIDPartition(id)
if p == nil {
return false
}
return p.IsDeleted(id)
}
// SeriesKey returns the series key for a given id.
func (f *SeriesFile) SeriesKey(id tsdb.SeriesID) []byte {
if id.IsZero() {
return nil
}
p := f.SeriesIDPartition(id)
if p == nil {
return nil
}
return p.SeriesKey(id)
}
// SeriesKeyName returns the measurement name for a series id.
func (f *SeriesFile) SeriesKeyName(id tsdb.SeriesID) []byte {
if id.IsZero() {
return nil
}
data := f.SeriesIDPartition(id).SeriesKey(id)
if data == nil {
return nil
}
_, data = ReadSeriesKeyLen(data)
name, _ := ReadSeriesKeyMeasurement(data)
return name
}
// SeriesKeys returns a list of series keys from a list of ids.
func (f *SeriesFile) SeriesKeys(ids []tsdb.SeriesID) [][]byte {
keys := make([][]byte, len(ids))
for i := range ids {
keys[i] = f.SeriesKey(ids[i])
}
return keys
}
// Series returns the parsed series name and tags for an offset.
func (f *SeriesFile) Series(id tsdb.SeriesID) ([]byte, models.Tags) {
key := f.SeriesKey(id)
if key == nil {
return nil, nil
}
return ParseSeriesKey(key)
}
// SeriesID returns the series id for the series.
func (f *SeriesFile) SeriesID(name []byte, tags models.Tags, buf []byte) tsdb.SeriesID {
return f.SeriesIDTyped(name, tags, buf).SeriesID()
}
// SeriesIDTyped returns the typed series id for the series.
func (f *SeriesFile) SeriesIDTyped(name []byte, tags models.Tags, buf []byte) tsdb.SeriesIDTyped {
key := AppendSeriesKey(buf[:0], name, tags)
return f.SeriesIDTypedBySeriesKey(key)
}
// SeriesIDTypedBySeriesKey returns the typed series id for the series.
func (f *SeriesFile) SeriesIDTypedBySeriesKey(key []byte) tsdb.SeriesIDTyped {
keyPartition := f.SeriesKeyPartition(key)
if keyPartition == nil {
return tsdb.SeriesIDTyped{}
}
return keyPartition.FindIDTypedBySeriesKey(key)
}
// HasSeries return true if the series exists.
func (f *SeriesFile) HasSeries(name []byte, tags models.Tags, buf []byte) bool {
return !f.SeriesID(name, tags, buf).IsZero()
}
// SeriesCount returns the number of series.
func (f *SeriesFile) SeriesCount() uint64 {
var n uint64
for _, p := range f.partitions {
n += p.SeriesCount()
}
return n
}
// SeriesIDs returns a slice of series IDs in all partitions, sorted.
// This may return a lot of data at once, so use sparingly.
func (f *SeriesFile) SeriesIDs() []tsdb.SeriesID {
var ids []tsdb.SeriesID
for _, p := range f.partitions {
ids = p.AppendSeriesIDs(ids)
}
sort.Slice(ids, func(i, j int) bool { return ids[i].Less(ids[j]) })
return ids
}
func (f *SeriesFile) SeriesIDPartitionID(id tsdb.SeriesID) int {
return int((id.RawID() - 1) % SeriesFilePartitionN)
}
func (f *SeriesFile) SeriesIDPartition(id tsdb.SeriesID) *SeriesPartition {
partitionID := f.SeriesIDPartitionID(id)
if partitionID >= len(f.partitions) {
return nil
}
return f.partitions[partitionID]
}
func (f *SeriesFile) SeriesKeysPartitionIDs(keys [][]byte) []int {
partitionIDs := make([]int, len(keys))
for i := range keys {
partitionIDs[i] = f.SeriesKeyPartitionID(keys[i])
}
return partitionIDs
}
func (f *SeriesFile) SeriesKeyPartitionID(key []byte) int {
return int(xxhash.Sum64(key) % SeriesFilePartitionN)
}
func (f *SeriesFile) SeriesKeyPartition(key []byte) *SeriesPartition {
partitionID := f.SeriesKeyPartitionID(key)
if partitionID >= len(f.partitions) {
return nil
}
return f.partitions[partitionID]
}
// AppendSeriesKey serializes name and tags to a byte slice.
// The total length is prepended as a uvarint.
func AppendSeriesKey(dst []byte, name []byte, tags models.Tags) []byte {
buf := make([]byte, binary.MaxVarintLen64)
origLen := len(dst)
// The tag count is variable encoded, so we need to know ahead of time what
// the size of the tag count value will be.
tcBuf := make([]byte, binary.MaxVarintLen64)
tcSz := binary.PutUvarint(tcBuf, uint64(len(tags)))
// Size of name/tags. Does not include total length.
size := 0 + //
2 + // size of measurement
len(name) + // measurement
tcSz + // size of number of tags
(4 * len(tags)) + // length of each tag key and value
tags.Size() // size of tag keys/values
// Variable encode length.
totalSz := binary.PutUvarint(buf, uint64(size))
// If caller doesn't provide a buffer then pre-allocate an exact one.
if dst == nil {
dst = make([]byte, 0, size+totalSz)
}
// Append total length.
dst = append(dst, buf[:totalSz]...)
// Append name.
binary.BigEndian.PutUint16(buf, uint16(len(name)))
dst = append(dst, buf[:2]...)
dst = append(dst, name...)
// Append tag count.
dst = append(dst, tcBuf[:tcSz]...)
// Append tags.
for _, tag := range tags {
binary.BigEndian.PutUint16(buf, uint16(len(tag.Key)))
dst = append(dst, buf[:2]...)
dst = append(dst, tag.Key...)
binary.BigEndian.PutUint16(buf, uint16(len(tag.Value)))
dst = append(dst, buf[:2]...)
dst = append(dst, tag.Value...)
}
// Verify that the total length equals the encoded byte count.
if got, exp := len(dst)-origLen, size+totalSz; got != exp {
panic(fmt.Sprintf("series key encoding does not match calculated total length: actual=%d, exp=%d, key=%x", got, exp, dst))
}
return dst
}
// ReadSeriesKey returns the series key from the beginning of the buffer.
func ReadSeriesKey(data []byte) (key, remainder []byte) {
sz, n := binary.Uvarint(data)
return data[:int(sz)+n], data[int(sz)+n:]
}
func ReadSeriesKeyLen(data []byte) (sz int, remainder []byte) {
sz64, i := binary.Uvarint(data)
return int(sz64), data[i:]
}
func ReadSeriesKeyMeasurement(data []byte) (name, remainder []byte) {
n, data := binary.BigEndian.Uint16(data), data[2:]
return data[:n], data[n:]
}
func ReadSeriesKeyTagN(data []byte) (n int, remainder []byte) {
n64, i := binary.Uvarint(data)
return int(n64), data[i:]
}
func ReadSeriesKeyTag(data []byte) (key, value, remainder []byte) {
n, data := binary.BigEndian.Uint16(data), data[2:]
key, data = data[:n], data[n:]
n, data = binary.BigEndian.Uint16(data), data[2:]
value, data = data[:n], data[n:]
return key, value, data
}
// ParseSeriesKey extracts the name & tags from a series key.
func ParseSeriesKey(data []byte) (name []byte, tags models.Tags) {
return parseSeriesKey(data, nil)
}
// ParseSeriesKeyInto extracts the name and tags for data, parsing the tags into
// dstTags, which is then returened.
//
// The returned dstTags may have a different length and capacity.
func ParseSeriesKeyInto(data []byte, dstTags models.Tags) ([]byte, models.Tags) {
return parseSeriesKey(data, dstTags)
}
// parseSeriesKey extracts the name and tags from data, attempting to re-use the
// provided tags value rather than allocating. The returned tags may have a
// different length and capacity to those provided.
func parseSeriesKey(data []byte, dst models.Tags) ([]byte, models.Tags) {
var name []byte
_, data = ReadSeriesKeyLen(data)
name, data = ReadSeriesKeyMeasurement(data)
tagN, data := ReadSeriesKeyTagN(data)
dst = dst[:cap(dst)] // Grow dst to use full capacity
if got, want := len(dst), tagN; got < want {
dst = append(dst, make(models.Tags, want-got)...)
} else if got > want {
dst = dst[:want]
}
dst = dst[:tagN]
for i := 0; i < tagN; i++ {
var key, value []byte
key, value, data = ReadSeriesKeyTag(data)
dst[i].Key, dst[i].Value = key, value
}
return name, dst
}
func CompareSeriesKeys(a, b []byte) int {
// Handle 'nil' keys.
if len(a) == 0 && len(b) == 0 {
return 0
} else if len(a) == 0 {
return -1
} else if len(b) == 0 {
return 1
}
// Read total size.
_, a = ReadSeriesKeyLen(a)
_, b = ReadSeriesKeyLen(b)
// Read names.
name0, a := ReadSeriesKeyMeasurement(a)
name1, b := ReadSeriesKeyMeasurement(b)
// Compare names, return if not equal.
if cmp := bytes.Compare(name0, name1); cmp != 0 {
return cmp
}
// Read tag counts.
tagN0, a := ReadSeriesKeyTagN(a)
tagN1, b := ReadSeriesKeyTagN(b)
// Compare each tag in order.
for i := 0; ; i++ {
// Check for EOF.
if i == tagN0 && i == tagN1 {
return 0
} else if i == tagN0 {
return -1
} else if i == tagN1 {
return 1
}
// Read keys.
var key0, key1, value0, value1 []byte
key0, value0, a = ReadSeriesKeyTag(a)
key1, value1, b = ReadSeriesKeyTag(b)
// Compare keys & values.
if cmp := bytes.Compare(key0, key1); cmp != 0 {
return cmp
} else if cmp := bytes.Compare(value0, value1); cmp != 0 {
return cmp
}
}
}
// GenerateSeriesKeys generates series keys for a list of names & tags using
// a single large memory block.
func GenerateSeriesKeys(names [][]byte, tagsSlice []models.Tags) [][]byte {
buf := make([]byte, 0, SeriesKeysSize(names, tagsSlice))
keys := make([][]byte, len(names))
for i := range names {
offset := len(buf)
buf = AppendSeriesKey(buf, names[i], tagsSlice[i])
keys[i] = buf[offset:]
}
return keys
}
// SeriesKeysSize returns the number of bytes required to encode a list of name/tags.
func SeriesKeysSize(names [][]byte, tagsSlice []models.Tags) int {
var n int
for i := range names {
n += SeriesKeySize(names[i], tagsSlice[i])
}
return n
}
// SeriesKeySize returns the number of bytes required to encode a series key.
func SeriesKeySize(name []byte, tags models.Tags) int {
var n int
n += 2 + len(name)
n += binaryutil.UvarintSize(uint64(len(tags)))
for _, tag := range tags {
n += 2 + len(tag.Key)
n += 2 + len(tag.Value)
}
n += binaryutil.UvarintSize(uint64(n))
return n
}
// wait rate limits page faults to the underlying data. Skipped if limiter is not set.
func wait(limiter *mincore.Limiter, b []byte) error {
if limiter == nil {
return nil
}
return limiter.WaitRange(context.Background(), b)
}

View File

@ -1,494 +0,0 @@
package seriesfile_test
import (
"bytes"
"context"
"fmt"
"io/ioutil"
"os"
"path"
"testing"
"github.com/influxdata/influxdb/v2/logger"
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxdb/v2/tsdb"
"github.com/influxdata/influxdb/v2/tsdb/seriesfile"
"golang.org/x/sync/errgroup"
)
func TestParseSeriesKeyInto(t *testing.T) {
name := []byte("cpu")
tags := models.NewTags(map[string]string{"region": "east", "server": "a"})
key := seriesfile.AppendSeriesKey(nil, name, tags)
dst := make(models.Tags, 0)
gotName, gotTags := seriesfile.ParseSeriesKeyInto(key, dst)
if !bytes.Equal(gotName, name) {
t.Fatalf("got %q, expected %q", gotName, name)
}
if got, exp := len(gotTags), 2; got != exp {
t.Fatalf("got tags length %d, expected %d", got, exp)
} else if got, exp := gotTags, tags; !got.Equal(exp) {
t.Fatalf("got tags %v, expected %v", got, exp)
}
dst = make(models.Tags, 0, 5)
_, gotTags = seriesfile.ParseSeriesKeyInto(key, dst)
if got, exp := len(gotTags), 2; got != exp {
t.Fatalf("got tags length %d, expected %d", got, exp)
} else if got, exp := cap(gotTags), 5; got != exp {
t.Fatalf("got tags capacity %d, expected %d", got, exp)
} else if got, exp := gotTags, tags; !got.Equal(exp) {
t.Fatalf("got tags %v, expected %v", got, exp)
}
dst = make(models.Tags, 1)
_, gotTags = seriesfile.ParseSeriesKeyInto(key, dst)
if got, exp := len(gotTags), 2; got != exp {
t.Fatalf("got tags length %d, expected %d", got, exp)
} else if got, exp := gotTags, tags; !got.Equal(exp) {
t.Fatalf("got tags %v, expected %v", got, exp)
}
}
// Ensure that broken series files are closed
func TestSeriesFile_Open_WhenFileCorrupt_ShouldReturnErr(t *testing.T) {
f := NewBrokenSeriesFile([]byte{0, 0, 0, 0, 0})
defer f.Close()
f.Logger = logger.New(os.Stdout)
err := f.Open(context.Background())
if err == nil {
t.Fatalf("should report error")
}
}
// Ensure series file contains the correct set of series.
func TestSeriesFile_Series(t *testing.T) {
sfile := MustOpenSeriesFile()
defer sfile.Close()
series := []Series{
{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "east"}), Type: models.Integer},
{Name: []byte("cpu"), Tags: models.NewTags(map[string]string{"region": "west"}), Type: models.Integer},
{Name: []byte("mem"), Tags: models.NewTags(map[string]string{"region": "east"}), Type: models.Integer},
}
for _, s := range series {
collection := &tsdb.SeriesCollection{
Names: [][]byte{[]byte(s.Name)},
Tags: []models.Tags{s.Tags},
Types: []models.FieldType{s.Type},
}
if err := sfile.CreateSeriesListIfNotExists(collection); err != nil {
t.Fatal(err)
}
}
// Verify total number of series is correct.
if n := sfile.SeriesCount(); n != 3 {
t.Fatalf("unexpected series count: %d", n)
}
// Verify all series exist.
for i, s := range series {
if seriesID := sfile.SeriesID(s.Name, s.Tags, nil); seriesID.IsZero() {
t.Fatalf("series does not exist: i=%d", i)
}
}
// Verify non-existent series doesn't exist.
if sfile.HasSeries([]byte("foo"), models.NewTags(map[string]string{"region": "north"}), nil) {
t.Fatal("series should not exist")
}
}
// Ensure series file can be compacted.
func TestSeriesFileCompactor(t *testing.T) {
sfile := MustOpenSeriesFile()
defer sfile.Close()
// Disable automatic compactions.
for _, p := range sfile.Partitions() {
p.CompactThreshold = 0
}
collection := new(tsdb.SeriesCollection)
for i := 0; i < 10000; i++ {
collection.Names = append(collection.Names, []byte(fmt.Sprintf("m%d", i)))
collection.Tags = append(collection.Tags, models.NewTags(map[string]string{"foo": "bar"}))
collection.Types = append(collection.Types, models.Integer)
}
if err := sfile.CreateSeriesListIfNotExists(collection); err != nil {
t.Fatal(err)
}
if err := collection.PartialWriteError(); err != nil {
t.Fatal(err)
}
// Verify total number of series is correct.
if n := sfile.SeriesCount(); n != uint64(len(collection.Names)) {
t.Fatalf("unexpected series count: %d", n)
}
// Compact in-place for each partition.
for _, p := range sfile.Partitions() {
compactor := seriesfile.NewSeriesPartitionCompactor()
if _, err := compactor.Compact(p); err != nil {
t.Fatal(err)
}
}
// Verify all series exist.
for iter := collection.Iterator(); iter.Next(); {
if seriesID := sfile.SeriesID(iter.Name(), iter.Tags(), nil); seriesID.IsZero() {
t.Fatalf("series does not exist: %s,%s", iter.Name(), iter.Tags().String())
}
}
// Verify total number of series is correct.
if got, exp := sfile.SeriesCount(), uint64(len(collection.Names)); got != exp {
t.Fatalf("SeriesCount()=%d, expected %d (after compaction)", got, exp)
}
}
// Ensures that types are tracked and checked by the series file.
func TestSeriesFile_Type(t *testing.T) {
sfile := MustOpenSeriesFile()
defer sfile.Close()
// Add the series with some types.
collection := &tsdb.SeriesCollection{
Names: [][]byte{[]byte("a"), []byte("b"), []byte("c")},
Tags: []models.Tags{{}, {}, {}},
Types: []models.FieldType{models.Integer, models.Float, models.Boolean},
}
if err := sfile.CreateSeriesListIfNotExists(collection); err != nil {
t.Fatal(err)
}
// Attempt to add the series again but with different types.
collection = &tsdb.SeriesCollection{
Names: [][]byte{[]byte("a"), []byte("b"), []byte("c"), []byte("d")},
Tags: []models.Tags{{}, {}, {}, {}},
Types: []models.FieldType{models.String, models.String, models.String, models.String},
}
if err := sfile.CreateSeriesListIfNotExists(collection); err != nil {
t.Fatal(err)
}
// All of the series except d should be dropped.
if err := collection.PartialWriteError(); err == nil {
t.Fatal("expected partial write error")
}
if collection.Length() != 1 {
t.Fatal("expected one series to remain in collection")
}
if got := string(collection.Names[0]); got != "d" {
t.Fatal("got invalid name on remaining series:", got)
}
}
// Ensure series file deletions persist across compactions.
func TestSeriesFile_DeleteSeriesID(t *testing.T) {
sfile := MustOpenSeriesFile()
defer sfile.Close()
if err := sfile.CreateSeriesListIfNotExists(&tsdb.SeriesCollection{
Names: [][]byte{[]byte("m1")},
Tags: []models.Tags{{}},
Types: []models.FieldType{models.String},
}); err != nil {
t.Fatal(err)
} else if err := sfile.CreateSeriesListIfNotExists(&tsdb.SeriesCollection{
Names: [][]byte{[]byte("m2")},
Tags: []models.Tags{{}},
Types: []models.FieldType{models.String},
}); err != nil {
t.Fatal(err)
} else if err := sfile.ForceCompact(); err != nil {
t.Fatal(err)
}
id := sfile.SeriesID([]byte("m1"), nil, nil)
// Verify total number of series is correct.
if got, exp := sfile.SeriesCount(), uint64(2); got != exp {
t.Fatalf("SeriesCount()=%d, expected %d (before deleted)", got, exp)
}
// Delete and ensure deletion.
if err := sfile.DeleteSeriesIDs([]tsdb.SeriesID{id}); err != nil {
t.Fatal(err)
} else if !sfile.IsDeleted(id) {
t.Fatal("expected deletion before compaction")
}
// Verify total number of series is correct.
if got, exp := sfile.SeriesCount(), uint64(1); got != exp {
t.Fatalf("SeriesCount()=%d, expected %d (before compaction)", got, exp)
}
if err := sfile.ForceCompact(); err != nil {
t.Fatal(err)
} else if !sfile.IsDeleted(id) {
t.Fatal("expected deletion after compaction")
} else if got, exp := sfile.SeriesCount(), uint64(1); got != exp {
t.Fatalf("SeriesCount()=%d, expected %d (after compaction)", got, exp)
}
if err := sfile.Reopen(); err != nil {
t.Fatal(err)
} else if !sfile.IsDeleted(id) {
t.Fatal("expected deletion after reopen")
} else if got, exp := sfile.SeriesCount(), uint64(1); got != exp {
t.Fatalf("SeriesCount()=%d, expected %d (after reopen)", got, exp)
}
// Recreate series with new ID.
if err := sfile.CreateSeriesListIfNotExists(&tsdb.SeriesCollection{
Names: [][]byte{[]byte("m1")},
Tags: []models.Tags{{}},
Types: []models.FieldType{models.String},
}); err != nil {
t.Fatal(err)
} else if got, exp := sfile.SeriesCount(), uint64(2); got != exp {
t.Fatalf("SeriesCount()=%d, expected %d (after recreate)", got, exp)
}
if err := sfile.ForceCompact(); err != nil {
t.Fatal(err)
} else if !sfile.IsDeleted(id) {
t.Fatal("expected deletion after compaction")
} else if got, exp := sfile.SeriesCount(), uint64(2); got != exp {
t.Fatalf("SeriesCount()=%d, expected %d (after recreate & compaction)", got, exp)
}
if err := sfile.Reopen(); err != nil {
t.Fatal(err)
} else if !sfile.IsDeleted(id) {
t.Fatal("expected deletion after reopen")
} else if got, exp := sfile.SeriesCount(), uint64(2); got != exp {
t.Fatalf("SeriesCount()=%d, expected %d (after recreate & compaction)", got, exp)
}
}
func TestSeriesFile_Compaction(t *testing.T) {
const n = 1000
sfile := MustOpenSeriesFile()
defer sfile.Close()
// Generate a bunch of keys.
var collection tsdb.SeriesCollection
for i := 0; i < n; i++ {
collection.Names = append(collection.Names, []byte("cpu"))
collection.Tags = append(collection.Tags, models.NewTags(map[string]string{"region": fmt.Sprintf("r%d", i)}))
collection.Types = append(collection.Types, models.Integer)
}
// Add all to the series file.
err := sfile.CreateSeriesListIfNotExists(&collection)
if err != nil {
t.Fatal(err)
}
// Delete a subset of keys.
for i := 0; i < n; i++ {
if i%10 != 0 {
continue
}
if id := sfile.SeriesID(collection.Names[i], collection.Tags[i], nil); id.IsZero() {
t.Fatal("expected series id")
} else if err := sfile.DeleteSeriesIDs([]tsdb.SeriesID{id}); err != nil {
t.Fatal(err)
}
}
// Compute total size of all series data.
origSize, err := sfile.FileSize()
if err != nil {
t.Fatal(err)
}
// Compact all segments.
var paths []string
for _, p := range sfile.Partitions() {
for _, ss := range p.Segments() {
if err := ss.CompactToPath(ss.Path()+".tmp", p.Index()); err != nil {
t.Fatal(err)
}
paths = append(paths, ss.Path())
}
}
// Close index.
if err := sfile.SeriesFile.Close(); err != nil {
t.Fatal(err)
}
// Overwrite files.
for _, path := range paths {
if err := os.Rename(path+".tmp", path); err != nil {
t.Fatal(err)
}
}
// Reopen index.
sfile.SeriesFile = seriesfile.NewSeriesFile(sfile.SeriesFile.Path())
if err := sfile.SeriesFile.Open(context.Background()); err != nil {
t.Fatal(err)
}
// Ensure series status is correct.
for i := 0; i < n; i++ {
if id := sfile.SeriesID(collection.Names[i], collection.Tags[i], nil); id.IsZero() {
continue
} else if got, want := sfile.IsDeleted(id), (i%10) == 0; got != want {
t.Fatalf("IsDeleted(%d)=%v, want %v", id, got, want)
}
}
// Verify new size is smaller.
newSize, err := sfile.FileSize()
if err != nil {
t.Fatal(err)
} else if newSize >= origSize {
t.Fatalf("expected new size (%d) to be smaller than original size (%d)", newSize, origSize)
}
t.Logf("original size: %d, new size: %d", origSize, newSize)
}
var cachedCompactionSeriesFile *SeriesFile
func BenchmarkSeriesFile_Compaction(b *testing.B) {
const n = 1000000
if cachedCompactionSeriesFile == nil {
sfile := MustOpenSeriesFile()
// Generate a bunch of keys.
ids := make([]tsdb.SeriesID, n)
for i := 0; i < n; i++ {
collection := &tsdb.SeriesCollection{
Names: [][]byte{[]byte("cpu")},
Tags: []models.Tags{models.NewTags(map[string]string{"region": fmt.Sprintf("r%d", i)})},
Types: []models.FieldType{models.Integer},
}
if err := sfile.CreateSeriesListIfNotExists(collection); err != nil {
b.Fatal(err)
} else if ids[i] = sfile.SeriesID(collection.Names[0], collection.Tags[0], nil); ids[i].IsZero() {
b.Fatalf("expected series id: i=%d", i)
}
}
// Delete a subset of keys.
for i := 0; i < len(ids); i += 10 {
if err := sfile.DeleteSeriesIDs([]tsdb.SeriesID{ids[i]}); err != nil {
b.Fatal(err)
}
}
cachedCompactionSeriesFile = sfile
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
// Compact all segments in parallel.
var g errgroup.Group
for _, p := range cachedCompactionSeriesFile.Partitions() {
for _, segment := range p.Segments() {
p, segment := p, segment
g.Go(func() error {
return segment.CompactToPath(segment.Path()+".tmp", p.Index())
})
}
}
if err := g.Wait(); err != nil {
b.Fatal(err)
}
}
}
// Series represents name/tagset pairs that are used in testing.
type Series struct {
Name []byte
Tags models.Tags
Type models.FieldType
Deleted bool
}
// SeriesFile is a test wrapper for tsdb.SeriesFile.
type SeriesFile struct {
*seriesfile.SeriesFile
}
// NewSeriesFile returns a new instance of SeriesFile with a temporary file path.
func NewSeriesFile() *SeriesFile {
dir, err := ioutil.TempDir("", "tsdb-series-file-")
if err != nil {
panic(err)
}
return &SeriesFile{SeriesFile: seriesfile.NewSeriesFile(dir)}
}
func NewBrokenSeriesFile(content []byte) *SeriesFile {
sFile := NewSeriesFile()
fPath := sFile.Path()
if err := sFile.Open(context.Background()); err != nil {
panic(err)
}
if err := sFile.SeriesFile.Close(); err != nil {
panic(err)
}
segPath := path.Join(fPath, "00", "0000")
if _, err := os.Stat(segPath); os.IsNotExist(err) {
panic(err)
}
err := ioutil.WriteFile(segPath, content, 0777)
if err != nil {
panic(err)
}
return sFile
}
// MustOpenSeriesFile returns a new, open instance of SeriesFile. Panic on error.
func MustOpenSeriesFile() *SeriesFile {
f := NewSeriesFile()
f.Logger = logger.New(os.Stdout)
if err := f.Open(context.Background()); err != nil {
panic(err)
}
return f
}
// Close closes the log file and removes it from disk.
func (f *SeriesFile) Close() error {
defer os.RemoveAll(f.Path())
return f.SeriesFile.Close()
}
// Reopen close & reopens the series file.
func (f *SeriesFile) Reopen() error {
if err := f.SeriesFile.Close(); err != nil {
return err
}
f.SeriesFile = seriesfile.NewSeriesFile(f.SeriesFile.Path())
return f.SeriesFile.Open(context.Background())
}
// ForceCompact executes an immediate compaction across all partitions.
func (f *SeriesFile) ForceCompact() error {
for _, p := range f.Partitions() {
if _, err := seriesfile.NewSeriesPartitionCompactor().Compact(p); err != nil {
return err
}
}
return nil
}

View File

@ -1,436 +0,0 @@
package seriesfile
import (
"bytes"
"encoding/binary"
"errors"
"io"
"os"
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxdb/v2/pkg/mincore"
"github.com/influxdata/influxdb/v2/pkg/mmap"
"github.com/influxdata/influxdb/v2/pkg/rhh"
"github.com/influxdata/influxdb/v2/tsdb"
"github.com/prometheus/client_golang/prometheus"
"golang.org/x/time/rate"
)
const (
SeriesIndexVersion = 1
SeriesIndexMagic = "SIDX"
)
const (
// SeriesIDSize is the size in bytes of a series key ID.
SeriesIDSize = 8
SeriesOffsetSize = 8
SeriesIndexElemSize = SeriesOffsetSize + SeriesIDSize
SeriesIndexLoadFactor = 90 // rhh load factor
SeriesIndexHeaderSize = 0 +
4 + 1 + // magic + version
8 + 8 + // max series + max offset
8 + 8 + // count + capacity
8 + 8 + // key/id map offset & size
8 + 8 + // id/offset map offset & size
0
)
var ErrInvalidSeriesIndex = errors.New("invalid series index")
// SeriesIndex represents an index of key-to-id & id-to-offset mappings.
type SeriesIndex struct {
path string
count uint64
capacity int64
mask int64
maxSeriesID tsdb.SeriesID
maxOffset int64
// metrics stores a shard instance of some Prometheus metrics. metrics
// must be set before Open is called.
rhhMetrics *rhh.Metrics
rhhLabels prometheus.Labels
rhhMetricsEnabled bool
data []byte // mmap data
keyIDData []byte // key/id mmap data
idOffsetData []byte // id/offset mmap data
// In-memory data since rebuild.
keyIDMap *rhh.HashMap
idOffsetMap map[tsdb.SeriesID]int64
tombstones map[tsdb.SeriesID]struct{}
limiter *mincore.Limiter // Limits page faults by the partition
}
func NewSeriesIndex(path string) *SeriesIndex {
return &SeriesIndex{
path: path,
rhhMetricsEnabled: true,
}
}
// Open memory-maps the index file.
func (idx *SeriesIndex) Open() (err error) {
// Map data file, if it exists.
if err := func() error {
if _, err := os.Stat(idx.path); err != nil && !os.IsNotExist(err) {
return err
} else if err == nil {
if idx.data, err = mmap.Map(idx.path, 0); err != nil {
return err
}
hdr, err := ReadSeriesIndexHeader(idx.data)
if err != nil {
return err
}
idx.count, idx.capacity, idx.mask = hdr.Count, hdr.Capacity, hdr.Capacity-1
idx.maxSeriesID, idx.maxOffset = hdr.MaxSeriesID, hdr.MaxOffset
idx.keyIDData = idx.data[hdr.KeyIDMap.Offset : hdr.KeyIDMap.Offset+hdr.KeyIDMap.Size]
idx.idOffsetData = idx.data[hdr.IDOffsetMap.Offset : hdr.IDOffsetMap.Offset+hdr.IDOffsetMap.Size]
}
return nil
}(); err != nil {
idx.Close()
return err
}
options := rhh.DefaultOptions
options.Metrics = idx.rhhMetrics
options.Labels = idx.rhhLabels
options.MetricsEnabled = idx.rhhMetricsEnabled
idx.keyIDMap = rhh.NewHashMap(options)
idx.idOffsetMap = make(map[tsdb.SeriesID]int64)
idx.tombstones = make(map[tsdb.SeriesID]struct{})
return nil
}
// Close unmaps the index file.
func (idx *SeriesIndex) Close() (err error) {
if idx.data != nil {
err = mmap.Unmap(idx.data)
}
idx.keyIDData = nil
idx.idOffsetData = nil
idx.keyIDMap = nil
idx.idOffsetMap = nil
idx.tombstones = nil
return err
}
// SetPageFaultLimiter sets the limiter used for rate limiting page faults.
// Must be called after Open().
func (idx *SeriesIndex) SetPageFaultLimiter(limiter *rate.Limiter) {
idx.limiter = mincore.NewLimiter(limiter, idx.data)
}
// Recover rebuilds the in-memory index for all new entries.
func (idx *SeriesIndex) Recover(segments []*SeriesSegment) error {
// Allocate new in-memory maps.
options := rhh.DefaultOptions
options.Metrics = idx.rhhMetrics
options.Labels = idx.rhhLabels
options.MetricsEnabled = idx.rhhMetricsEnabled
idx.keyIDMap = rhh.NewHashMap(options)
idx.idOffsetMap = make(map[tsdb.SeriesID]int64)
idx.tombstones = make(map[tsdb.SeriesID]struct{})
// Process all entries since the maximum offset in the on-disk index.
minSegmentID, _ := SplitSeriesOffset(idx.maxOffset)
for _, segment := range segments {
if segment.ID() < minSegmentID {
continue
}
if err := segment.ForEachEntry(func(flag uint8, id tsdb.SeriesIDTyped, offset int64, key []byte) error {
if offset <= idx.maxOffset {
return nil
}
idx.execEntry(flag, id, offset, key)
return nil
}); err != nil {
return err
}
}
return nil
}
// GrowBy preallocates the in-memory hashmap to a larger size.
func (idx *SeriesIndex) GrowBy(delta int) {
if delta < 0 {
return
}
idx.keyIDMap.Grow(((idx.keyIDMap.Len() + int64(delta)) * 100) / int64(idx.keyIDMap.LoadFactor()))
}
// Count returns the number of series in the index.
func (idx *SeriesIndex) Count() uint64 {
n := int64(idx.OnDiskCount()+idx.InMemCount()) - int64(len(idx.tombstones))
if n < 0 {
n = 0
}
return uint64(n)
}
// OnDiskCount returns the number of series in the on-disk index.
func (idx *SeriesIndex) OnDiskCount() uint64 { return idx.count }
// InMemCount returns the number of series in the in-memory index.
func (idx *SeriesIndex) InMemCount() uint64 { return uint64(len(idx.idOffsetMap)) }
// OnDiskSize returns the on-disk size of the index in bytes.
func (idx *SeriesIndex) OnDiskSize() uint64 { return uint64(len(idx.data)) }
// InMemSize returns the heap size of the index in bytes. The returned value is
// an estimation and does not include include all allocated memory.
func (idx *SeriesIndex) InMemSize() uint64 {
n := len(idx.idOffsetMap)
return uint64(2*8*n) + uint64(len(idx.tombstones)*8)
}
func (idx *SeriesIndex) Insert(key []byte, id tsdb.SeriesIDTyped, offset int64) {
idx.execEntry(SeriesEntryInsertFlag, id, offset, key)
}
// Delete marks the series id as deleted.
func (idx *SeriesIndex) Delete(id tsdb.SeriesID) {
// NOTE: WithType(0) kinda sucks here, but we know it will be masked off.
idx.execEntry(SeriesEntryTombstoneFlag, id.WithType(0), 0, nil)
}
// IsDeleted returns true if series id has been deleted.
func (idx *SeriesIndex) IsDeleted(id tsdb.SeriesID) bool {
if _, ok := idx.tombstones[id]; ok {
return true
}
return idx.FindOffsetByID(id) == 0
}
func (idx *SeriesIndex) execEntry(flag uint8, id tsdb.SeriesIDTyped, offset int64, key []byte) {
untypedID := id.SeriesID()
switch flag {
case SeriesEntryInsertFlag:
idx.keyIDMap.PutQuiet(key, id)
idx.idOffsetMap[untypedID] = offset
if untypedID.Greater(idx.maxSeriesID) {
idx.maxSeriesID = untypedID
}
if offset > idx.maxOffset {
idx.maxOffset = offset
}
case SeriesEntryTombstoneFlag:
// Only add to tombstone if it exists on disk or in-memory.
// This affects counts if a tombstone exists but the ID doesn't exist.
if idx.FindOffsetByID(untypedID) != 0 {
idx.tombstones[untypedID] = struct{}{}
}
default:
panic("unreachable")
}
}
func (idx *SeriesIndex) FindIDBySeriesKey(segments []*SeriesSegment, key []byte) tsdb.SeriesIDTyped {
if v := idx.keyIDMap.Get(key); v != nil {
if id, _ := v.(tsdb.SeriesIDTyped); !id.IsZero() && !idx.IsDeleted(id.SeriesID()) {
return id
}
}
if len(idx.data) == 0 {
return tsdb.SeriesIDTyped{}
}
hash := rhh.HashKey(key)
for d, pos := int64(0), hash&idx.mask; ; d, pos = d+1, (pos+1)&idx.mask {
elem := idx.keyIDData[(pos * SeriesIndexElemSize):]
elemOffset := int64(binary.BigEndian.Uint64(elem[:SeriesOffsetSize]))
_ = wait(idx.limiter, elem[:SeriesOffsetSize]) // elem size is two uint64s
if elemOffset == 0 {
return tsdb.SeriesIDTyped{}
}
elemKey := ReadSeriesKeyFromSegments(segments, elemOffset+SeriesEntryHeaderSize)
elemHash := rhh.HashKey(elemKey)
if d > rhh.Dist(elemHash, pos, idx.capacity) {
return tsdb.SeriesIDTyped{}
} else if elemHash == hash && bytes.Equal(elemKey, key) {
id := tsdb.NewSeriesIDTyped(binary.BigEndian.Uint64(elem[SeriesOffsetSize:]))
if idx.IsDeleted(id.SeriesID()) {
return tsdb.SeriesIDTyped{}
}
return id
}
}
}
func (idx *SeriesIndex) FindIDByNameTags(segments []*SeriesSegment, name []byte, tags models.Tags, buf []byte) tsdb.SeriesIDTyped {
id := idx.FindIDBySeriesKey(segments, AppendSeriesKey(buf[:0], name, tags))
if _, ok := idx.tombstones[id.SeriesID()]; ok {
return tsdb.SeriesIDTyped{}
}
return id
}
func (idx *SeriesIndex) FindIDListByNameTags(segments []*SeriesSegment, names [][]byte, tagsSlice []models.Tags, buf []byte) (ids []tsdb.SeriesIDTyped, ok bool) {
ids, ok = make([]tsdb.SeriesIDTyped, len(names)), true
for i := range names {
id := idx.FindIDByNameTags(segments, names[i], tagsSlice[i], buf)
if id.IsZero() {
ok = false
continue
}
ids[i] = id
}
return ids, ok
}
func (idx *SeriesIndex) FindOffsetByID(id tsdb.SeriesID) int64 {
if offset := idx.idOffsetMap[id]; offset != 0 {
return offset
} else if len(idx.data) == 0 {
return 0
}
hash := rhh.HashUint64(id.RawID())
for d, pos := int64(0), hash&idx.mask; ; d, pos = d+1, (pos+1)&idx.mask {
elem := idx.idOffsetData[(pos * SeriesIndexElemSize):]
elemID := tsdb.NewSeriesID(binary.BigEndian.Uint64(elem[:SeriesIDSize]))
_ = wait(idx.limiter, elem[:SeriesIDSize])
if elemID == id {
return int64(binary.BigEndian.Uint64(elem[SeriesIDSize:]))
} else if elemID.IsZero() || d > rhh.Dist(rhh.HashUint64(elemID.RawID()), pos, idx.capacity) {
return 0
}
}
}
// Clone returns a copy of idx for use during compaction. In-memory maps are not cloned.
func (idx *SeriesIndex) Clone() *SeriesIndex {
tombstones := make(map[tsdb.SeriesID]struct{}, len(idx.tombstones))
for id := range idx.tombstones {
tombstones[id] = struct{}{}
}
idOffsetMap := make(map[tsdb.SeriesID]int64)
for k, v := range idx.idOffsetMap {
idOffsetMap[k] = v
}
return &SeriesIndex{
path: idx.path,
count: idx.count,
capacity: idx.capacity,
mask: idx.mask,
maxSeriesID: idx.maxSeriesID,
maxOffset: idx.maxOffset,
data: idx.data,
keyIDData: idx.keyIDData,
idOffsetData: idx.idOffsetData,
tombstones: tombstones,
idOffsetMap: idOffsetMap,
}
}
// SeriesIndexHeader represents the header of a series index.
type SeriesIndexHeader struct {
Version uint8
MaxSeriesID tsdb.SeriesID
MaxOffset int64
Count uint64
Capacity int64
KeyIDMap struct {
Offset int64
Size int64
}
IDOffsetMap struct {
Offset int64
Size int64
}
}
// NewSeriesIndexHeader returns a new instance of SeriesIndexHeader.
func NewSeriesIndexHeader() SeriesIndexHeader {
return SeriesIndexHeader{Version: SeriesIndexVersion}
}
// ReadSeriesIndexHeader returns the header from data.
func ReadSeriesIndexHeader(data []byte) (hdr SeriesIndexHeader, err error) {
r := bytes.NewReader(data)
// Read magic number.
magic := make([]byte, len(SeriesIndexMagic))
if _, err := io.ReadFull(r, magic); err != nil {
return hdr, err
} else if !bytes.Equal([]byte(SeriesIndexMagic), magic) {
return hdr, ErrInvalidSeriesIndex
}
// Read version.
if err := binary.Read(r, binary.BigEndian, &hdr.Version); err != nil {
return hdr, err
}
// Read max offset.
if err := binary.Read(r, binary.BigEndian, &hdr.MaxSeriesID.ID); err != nil {
return hdr, err
} else if err := binary.Read(r, binary.BigEndian, &hdr.MaxOffset); err != nil {
return hdr, err
}
// Read count & capacity.
if err := binary.Read(r, binary.BigEndian, &hdr.Count); err != nil {
return hdr, err
} else if err := binary.Read(r, binary.BigEndian, &hdr.Capacity); err != nil {
return hdr, err
}
// Read key/id map position.
if err := binary.Read(r, binary.BigEndian, &hdr.KeyIDMap.Offset); err != nil {
return hdr, err
} else if err := binary.Read(r, binary.BigEndian, &hdr.KeyIDMap.Size); err != nil {
return hdr, err
}
// Read offset/id map position.
if err := binary.Read(r, binary.BigEndian, &hdr.IDOffsetMap.Offset); err != nil {
return hdr, err
} else if err := binary.Read(r, binary.BigEndian, &hdr.IDOffsetMap.Size); err != nil {
return hdr, err
}
return hdr, nil
}
// WriteTo writes the header to w.
func (hdr *SeriesIndexHeader) WriteTo(w io.Writer) (n int64, err error) {
var buf bytes.Buffer
buf.WriteString(SeriesIndexMagic)
binary.Write(&buf, binary.BigEndian, hdr.Version)
binary.Write(&buf, binary.BigEndian, hdr.MaxSeriesID)
binary.Write(&buf, binary.BigEndian, hdr.MaxOffset)
binary.Write(&buf, binary.BigEndian, hdr.Count)
binary.Write(&buf, binary.BigEndian, hdr.Capacity)
binary.Write(&buf, binary.BigEndian, hdr.KeyIDMap.Offset)
binary.Write(&buf, binary.BigEndian, hdr.KeyIDMap.Size)
binary.Write(&buf, binary.BigEndian, hdr.IDOffsetMap.Offset)
binary.Write(&buf, binary.BigEndian, hdr.IDOffsetMap.Size)
return buf.WriteTo(w)
}

Some files were not shown because too many files have changed in this diff Show More