influxdb/tsdb/shard_internal_test.go

418 lines
10 KiB
Go

package tsdb
import (
"context"
"fmt"
"os"
"path/filepath"
"regexp"
"sort"
"strings"
"testing"
"time"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxql"
"github.com/stretchr/testify/require"
"go.uber.org/zap/zaptest"
)
func TestShard_ErrorPrinting(t *testing.T) {
tests := []struct {
nSeq int
raw string
}{
{1, string([]byte{'b', 'e', 'n', 't', 'e', 's', 't', '\t', '\n'})},
{1, string([]byte{'b', 'e', 'n', 't', 'e', 's', 0, 0, 0xFE, 0, 0xFE, 't'})},
{2, string([]byte{0, 0, 0, 0, 0xFE, '\t', '\n', '\t', 'b', 'e', 'n', 't', 'e', 's', 't', 0, 0, 0, 0, 0xFE, '\t', '\n', '\t', '\t', '\t'})},
}
for i := range tests {
f := makePrintable(tests[i].raw)
require.True(t, models.ValidToken([]byte(f)))
c := 0
nSeq := 0
for _, r := range f {
if r == unPrintReplRune {
c++
if c == 1 {
nSeq++
}
require.LessOrEqual(t, c, unPrintMaxReplRune, "too many repeated %c", unPrintReplRune)
} else {
c = 0
}
}
require.Equalf(t, tests[i].nSeq, nSeq, "wrong number of elided sequences of replacement characters")
}
}
func TestShard_MapType(t *testing.T) {
var sh *TempShard
setup := func(index string) {
sh = NewTempShard(t, index)
if err := sh.Open(context.Background()); err != nil {
t.Fatal(err)
}
sh.MustWritePointsString(`
cpu,host=serverA,region=uswest value=100 0
cpu,host=serverA,region=uswest value=50,val2=5 10
cpu,host=serverB,region=uswest value=25 0
mem,host=serverA value=25i 0
mem,host=serverB value=50i,val3=t 10
_reserved,region=uswest value="foo" 0
`)
}
for _, index := range RegisteredIndexes() {
setup(index)
for _, tt := range []struct {
measurement string
field string
typ influxql.DataType
}{
{
measurement: "cpu",
field: "value",
typ: influxql.Float,
},
{
measurement: "cpu",
field: "host",
typ: influxql.Tag,
},
{
measurement: "cpu",
field: "region",
typ: influxql.Tag,
},
{
measurement: "cpu",
field: "val2",
typ: influxql.Float,
},
{
measurement: "cpu",
field: "unknown",
typ: influxql.Unknown,
},
{
measurement: "mem",
field: "value",
typ: influxql.Integer,
},
{
measurement: "mem",
field: "val3",
typ: influxql.Boolean,
},
{
measurement: "mem",
field: "host",
typ: influxql.Tag,
},
{
measurement: "unknown",
field: "unknown",
typ: influxql.Unknown,
},
{
measurement: "_fieldKeys",
field: "fieldKey",
typ: influxql.String,
},
{
measurement: "_fieldKeys",
field: "fieldType",
typ: influxql.String,
},
{
measurement: "_fieldKeys",
field: "unknown",
typ: influxql.Unknown,
},
{
measurement: "_series",
field: "key",
typ: influxql.String,
},
{
measurement: "_series",
field: "unknown",
typ: influxql.Unknown,
},
{
measurement: "_tagKeys",
field: "tagKey",
typ: influxql.String,
},
{
measurement: "_tagKeys",
field: "unknown",
typ: influxql.Unknown,
},
{
measurement: "_reserved",
field: "value",
typ: influxql.String,
},
{
measurement: "_reserved",
field: "region",
typ: influxql.Tag,
},
} {
name := fmt.Sprintf("%s_%s_%s", index, tt.measurement, tt.field)
t.Run(name, func(t *testing.T) {
typ, err := sh.mapType(tt.measurement, tt.field)
if err != nil {
t.Fatal(err)
}
if have, want := typ, tt.typ; have != want {
t.Errorf("unexpected data type: have=%#v want=%#v", have, want)
}
})
}
sh.Close()
}
}
func TestShard_MeasurementsByRegex(t *testing.T) {
var sh *TempShard
setup := func(index string) {
sh = NewTempShard(t, index)
if err := sh.Open(context.Background()); err != nil {
t.Fatal(err)
}
sh.MustWritePointsString(`
cpu,host=serverA,region=uswest value=100 0
cpu,host=serverA,region=uswest value=50,val2=5 10
cpu,host=serverB,region=uswest value=25 0
mem,host=serverA value=25i 0
mem,host=serverB value=50i,val3=t 10
`)
}
for _, index := range RegisteredIndexes() {
setup(index)
for _, tt := range []struct {
regex string
measurements []string
}{
{regex: `cpu`, measurements: []string{"cpu"}},
{regex: `mem`, measurements: []string{"mem"}},
{regex: `cpu|mem`, measurements: []string{"cpu", "mem"}},
{regex: `gpu`, measurements: []string{}},
{regex: `pu`, measurements: []string{"cpu"}},
{regex: `p|m`, measurements: []string{"cpu", "mem"}},
} {
t.Run(index+"_"+tt.regex, func(t *testing.T) {
re := regexp.MustCompile(tt.regex)
measurements, err := sh.MeasurementNamesByRegex(re)
if err != nil {
t.Fatal(err)
}
mstrings := make([]string, 0, len(measurements))
for _, name := range measurements {
mstrings = append(mstrings, string(name))
}
sort.Strings(mstrings)
if diff := cmp.Diff(tt.measurements, mstrings, cmpopts.EquateEmpty()); diff != "" {
t.Errorf("unexpected measurements:\n%s", diff)
}
})
}
sh.Close()
}
}
func TestShard_MeasurementOptimization(t *testing.T) {
t.Parallel()
cases := []struct {
expr influxql.Expr
name string
ok bool
names [][]byte
}{
{
expr: influxql.MustParseExpr(`_name = 'm0'`),
name: "single measurement",
ok: true,
names: [][]byte{[]byte("m0")},
},
{
expr: influxql.MustParseExpr(`_something = 'f' AND _name = 'm0'`),
name: "single measurement with AND",
ok: true,
names: [][]byte{[]byte("m0")},
},
{
expr: influxql.MustParseExpr(`_something = 'f' AND (a =~ /x0/ AND _name = 'm0')`),
name: "single measurement with multiple AND",
ok: true,
names: [][]byte{[]byte("m0")},
},
{
expr: influxql.MustParseExpr(`_name = 'm0' OR _name = 'm1' OR _name = 'm2'`),
name: "multiple measurements alone",
ok: true,
names: [][]byte{[]byte("m0"), []byte("m1"), []byte("m2")},
},
{
expr: influxql.MustParseExpr(`(_name = 'm0' OR _name = 'm1' OR _name = 'm2') AND (_field = 'foo' OR _field = 'bar' OR _field = 'qux')`),
name: "multiple measurements combined",
ok: true,
names: [][]byte{[]byte("m0"), []byte("m1"), []byte("m2")},
},
{
expr: influxql.MustParseExpr(`(_name = 'm0' OR (_name = 'm1' OR _name = 'm2')) AND tag1 != 'foo'`),
name: "parens in expression",
ok: true,
names: [][]byte{[]byte("m0"), []byte("m1"), []byte("m2")},
},
{
expr: influxql.MustParseExpr(`(tag1 != 'foo' OR tag2 = 'bar') AND (_name = 'm0' OR _name = 'm1' OR _name = 'm2') AND (_field = 'val1' OR _field = 'val2')`),
name: "multiple AND",
ok: true,
names: [][]byte{[]byte("m0"), []byte("m1"), []byte("m2")},
},
{
expr: influxql.MustParseExpr(`(_name = 'm0' OR _name = 'm1' OR _name = 'm2') AND (tag1 != 'foo' OR _name = 'm1')`),
name: "measurements on in multiple groups, only one valid group",
ok: true,
names: [][]byte{[]byte("m0"), []byte("m1"), []byte("m2")},
},
{
expr: influxql.MustParseExpr(`_name = 'm0' OR tag1 != 'foo'`),
name: "single measurement with OR",
ok: false,
names: nil,
},
{
expr: influxql.MustParseExpr(`_name = 'm0' OR true`),
name: "measurement with OR boolean literal",
ok: false,
names: nil,
},
{
expr: influxql.MustParseExpr(`_name != 'm0' AND tag1 != 'foo'`),
name: "single measurement with non-equal",
ok: false,
names: nil,
},
{
expr: influxql.MustParseExpr(`(_name = 'm0' OR _name != 'm1' OR _name = 'm2') AND (_field = 'foo' OR _field = 'bar' OR _field = 'qux')`),
name: "multiple measurements with non-equal",
ok: false,
names: nil,
},
{
expr: influxql.MustParseExpr(`tag1 = 'foo' AND tag2 = 'bar'`),
name: "no measurements - multiple tags",
ok: false,
names: nil,
},
{
expr: influxql.MustParseExpr(`_field = 'foo'`),
name: "no measurements - single field",
ok: false,
names: nil,
},
{
expr: influxql.MustParseExpr(`(_name = 'm0' OR _name = 'm1' AND _name = 'm2') AND tag1 != 'foo'`),
name: "measurements with AND",
ok: false,
names: nil,
},
{
expr: influxql.MustParseExpr(`(_name = 'm0' OR _name = 'm1' OR _name = 'm2') OR (tag1 != 'foo' OR _name = 'm1')`),
name: "top level is not AND",
ok: false,
names: nil,
},
}
for _, tc := range cases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
names, ok := measurementOptimization(tc.expr, measurementKey)
require.Equal(t, tc.names, names)
require.Equal(t, tc.ok, ok)
})
}
}
// TempShard represents a test wrapper for Shard that uses temporary
// filesystem paths.
type TempShard struct {
*Shard
path string
sfile *SeriesFile
}
// NewTempShard returns a new instance of TempShard with temp paths.
func NewTempShard(tb testing.TB, index string) *TempShard {
tb.Helper()
// Create temporary path for data and WAL.
dir, err := os.MkdirTemp("", "influxdb-tsdb-")
if err != nil {
panic(err)
}
// Create series file.
sfile := NewSeriesFile(filepath.Join(dir, "db0", SeriesFileDirectory))
sfile.Logger = zaptest.NewLogger(tb)
if err := sfile.Open(); err != nil {
panic(err)
}
// Build engine options.
opt := NewEngineOptions()
opt.IndexVersion = index
opt.Config.WALDir = filepath.Join(dir, "wal")
return &TempShard{
Shard: NewShard(0,
filepath.Join(dir, "data", "db0", "rp0", "1"),
filepath.Join(dir, "wal", "db0", "rp0", "1"),
sfile,
opt,
),
sfile: sfile,
path: dir,
}
}
// Close closes the shard and removes all underlying data.
func (sh *TempShard) Close() error {
defer os.RemoveAll(sh.path)
sh.sfile.Close()
return sh.Shard.Close()
}
// MustWritePointsString parses the line protocol (with second precision) and
// inserts the resulting points into the shard. Panic on error.
func (sh *TempShard) MustWritePointsString(s string) {
a, err := models.ParsePointsWithPrecision([]byte(strings.TrimSpace(s)), time.Time{}, "s")
if err != nil {
panic(err)
}
if err := sh.WritePoints(context.Background(), a); err != nil {
panic(err)
}
}