influxdb/tsdb/shard_internal_test.go

413 lines
10 KiB
Go
Raw Normal View History

package tsdb
import (
"context"
"fmt"
"path/filepath"
"regexp"
"sort"
"strings"
"testing"
"time"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxql"
"github.com/stretchr/testify/require"
"go.uber.org/zap/zaptest"
)
func TestShard_ErrorPrinting(t *testing.T) {
tests := []struct {
nSeq int
raw string
}{
{1, string([]byte{'b', 'e', 'n', 't', 'e', 's', 't', '\t', '\n'})},
{1, string([]byte{'b', 'e', 'n', 't', 'e', 's', 0, 0, 0xFE, 0, 0xFE, 't'})},
{2, string([]byte{0, 0, 0, 0, 0xFE, '\t', '\n', '\t', 'b', 'e', 'n', 't', 'e', 's', 't', 0, 0, 0, 0, 0xFE, '\t', '\n', '\t', '\t', '\t'})},
}
for i := range tests {
f := makePrintable(tests[i].raw)
require.True(t, models.ValidToken([]byte(f)))
c := 0
nSeq := 0
for _, r := range f {
if r == unPrintReplRune {
c++
if c == 1 {
nSeq++
}
require.LessOrEqual(t, c, unPrintMaxReplRune, "too many repeated %c", unPrintReplRune)
} else {
c = 0
}
}
require.Equalf(t, tests[i].nSeq, nSeq, "wrong number of elided sequences of replacement characters")
}
}
func TestShard_MapType(t *testing.T) {
var sh *TempShard
setup := func(index string) {
sh = NewTempShard(t, index)
if err := sh.Open(context.Background()); err != nil {
t.Fatal(err)
}
sh.MustWritePointsString(`
cpu,host=serverA,region=uswest value=100 0
cpu,host=serverA,region=uswest value=50,val2=5 10
cpu,host=serverB,region=uswest value=25 0
mem,host=serverA value=25i 0
mem,host=serverB value=50i,val3=t 10
_reserved,region=uswest value="foo" 0
`)
}
for _, index := range RegisteredIndexes() {
setup(index)
for _, tt := range []struct {
measurement string
field string
typ influxql.DataType
}{
{
measurement: "cpu",
field: "value",
typ: influxql.Float,
},
{
measurement: "cpu",
field: "host",
typ: influxql.Tag,
},
{
measurement: "cpu",
field: "region",
typ: influxql.Tag,
},
{
measurement: "cpu",
field: "val2",
typ: influxql.Float,
},
{
measurement: "cpu",
field: "unknown",
typ: influxql.Unknown,
},
{
measurement: "mem",
field: "value",
typ: influxql.Integer,
},
{
measurement: "mem",
field: "val3",
typ: influxql.Boolean,
},
{
measurement: "mem",
field: "host",
typ: influxql.Tag,
},
{
measurement: "unknown",
field: "unknown",
typ: influxql.Unknown,
},
{
measurement: "_fieldKeys",
field: "fieldKey",
typ: influxql.String,
},
{
measurement: "_fieldKeys",
field: "fieldType",
typ: influxql.String,
},
{
measurement: "_fieldKeys",
field: "unknown",
typ: influxql.Unknown,
},
{
measurement: "_series",
field: "key",
typ: influxql.String,
},
{
measurement: "_series",
field: "unknown",
typ: influxql.Unknown,
},
{
measurement: "_tagKeys",
field: "tagKey",
typ: influxql.String,
},
{
measurement: "_tagKeys",
field: "unknown",
typ: influxql.Unknown,
},
{
measurement: "_reserved",
field: "value",
typ: influxql.String,
},
{
measurement: "_reserved",
field: "region",
typ: influxql.Tag,
},
} {
name := fmt.Sprintf("%s_%s_%s", index, tt.measurement, tt.field)
t.Run(name, func(t *testing.T) {
typ, err := sh.mapType(tt.measurement, tt.field)
if err != nil {
t.Fatal(err)
}
if have, want := typ, tt.typ; have != want {
t.Errorf("unexpected data type: have=%#v want=%#v", have, want)
}
})
}
sh.Close()
}
}
func TestShard_MeasurementsByRegex(t *testing.T) {
var sh *TempShard
setup := func(index string) {
sh = NewTempShard(t, index)
if err := sh.Open(context.Background()); err != nil {
t.Fatal(err)
}
sh.MustWritePointsString(`
cpu,host=serverA,region=uswest value=100 0
cpu,host=serverA,region=uswest value=50,val2=5 10
cpu,host=serverB,region=uswest value=25 0
mem,host=serverA value=25i 0
mem,host=serverB value=50i,val3=t 10
`)
}
for _, index := range RegisteredIndexes() {
setup(index)
for _, tt := range []struct {
regex string
measurements []string
}{
{regex: `cpu`, measurements: []string{"cpu"}},
{regex: `mem`, measurements: []string{"mem"}},
{regex: `cpu|mem`, measurements: []string{"cpu", "mem"}},
{regex: `gpu`, measurements: []string{}},
{regex: `pu`, measurements: []string{"cpu"}},
{regex: `p|m`, measurements: []string{"cpu", "mem"}},
} {
t.Run(index+"_"+tt.regex, func(t *testing.T) {
re := regexp.MustCompile(tt.regex)
measurements, err := sh.MeasurementNamesByRegex(re)
if err != nil {
t.Fatal(err)
}
mstrings := make([]string, 0, len(measurements))
for _, name := range measurements {
mstrings = append(mstrings, string(name))
}
sort.Strings(mstrings)
if diff := cmp.Diff(tt.measurements, mstrings, cmpopts.EquateEmpty()); diff != "" {
t.Errorf("unexpected measurements:\n%s", diff)
}
})
}
sh.Close()
}
}
func TestShard_MeasurementOptimization(t *testing.T) {
t.Parallel()
cases := []struct {
expr influxql.Expr
name string
ok bool
names [][]byte
}{
{
expr: influxql.MustParseExpr(`_name = 'm0'`),
name: "single measurement",
ok: true,
names: [][]byte{[]byte("m0")},
},
{
expr: influxql.MustParseExpr(`_something = 'f' AND _name = 'm0'`),
name: "single measurement with AND",
ok: true,
names: [][]byte{[]byte("m0")},
},
{
expr: influxql.MustParseExpr(`_something = 'f' AND (a =~ /x0/ AND _name = 'm0')`),
name: "single measurement with multiple AND",
ok: true,
names: [][]byte{[]byte("m0")},
},
{
expr: influxql.MustParseExpr(`_name = 'm0' OR _name = 'm1' OR _name = 'm2'`),
name: "multiple measurements alone",
ok: true,
names: [][]byte{[]byte("m0"), []byte("m1"), []byte("m2")},
},
{
expr: influxql.MustParseExpr(`(_name = 'm0' OR _name = 'm1' OR _name = 'm2') AND (_field = 'foo' OR _field = 'bar' OR _field = 'qux')`),
name: "multiple measurements combined",
ok: true,
names: [][]byte{[]byte("m0"), []byte("m1"), []byte("m2")},
},
{
expr: influxql.MustParseExpr(`(_name = 'm0' OR (_name = 'm1' OR _name = 'm2')) AND tag1 != 'foo'`),
name: "parens in expression",
ok: true,
names: [][]byte{[]byte("m0"), []byte("m1"), []byte("m2")},
},
{
expr: influxql.MustParseExpr(`(tag1 != 'foo' OR tag2 = 'bar') AND (_name = 'm0' OR _name = 'm1' OR _name = 'm2') AND (_field = 'val1' OR _field = 'val2')`),
name: "multiple AND",
ok: true,
names: [][]byte{[]byte("m0"), []byte("m1"), []byte("m2")},
},
{
expr: influxql.MustParseExpr(`(_name = 'm0' OR _name = 'm1' OR _name = 'm2') AND (tag1 != 'foo' OR _name = 'm1')`),
name: "measurements on in multiple groups, only one valid group",
ok: true,
names: [][]byte{[]byte("m0"), []byte("m1"), []byte("m2")},
},
{
expr: influxql.MustParseExpr(`_name = 'm0' OR tag1 != 'foo'`),
name: "single measurement with OR",
ok: false,
names: nil,
},
{
expr: influxql.MustParseExpr(`_name = 'm0' OR true`),
name: "measurement with OR boolean literal",
ok: false,
names: nil,
},
{
expr: influxql.MustParseExpr(`_name != 'm0' AND tag1 != 'foo'`),
name: "single measurement with non-equal",
ok: false,
names: nil,
},
{
expr: influxql.MustParseExpr(`(_name = 'm0' OR _name != 'm1' OR _name = 'm2') AND (_field = 'foo' OR _field = 'bar' OR _field = 'qux')`),
name: "multiple measurements with non-equal",
ok: false,
names: nil,
},
{
expr: influxql.MustParseExpr(`tag1 = 'foo' AND tag2 = 'bar'`),
name: "no measurements - multiple tags",
ok: false,
names: nil,
},
{
expr: influxql.MustParseExpr(`_field = 'foo'`),
name: "no measurements - single field",
ok: false,
names: nil,
},
{
expr: influxql.MustParseExpr(`(_name = 'm0' OR _name = 'm1' AND _name = 'm2') AND tag1 != 'foo'`),
name: "measurements with AND",
ok: false,
names: nil,
},
{
expr: influxql.MustParseExpr(`(_name = 'm0' OR _name = 'm1' OR _name = 'm2') OR (tag1 != 'foo' OR _name = 'm1')`),
name: "top level is not AND",
ok: false,
names: nil,
},
}
for _, tc := range cases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
names, ok := measurementOptimization(tc.expr, measurementKey)
require.Equal(t, tc.names, names)
require.Equal(t, tc.ok, ok)
})
}
}
// TempShard represents a test wrapper for Shard that uses temporary
// filesystem paths.
type TempShard struct {
*Shard
path string
sfile *SeriesFile
}
// NewTempShard returns a new instance of TempShard with temp paths.
func NewTempShard(tb testing.TB, index string) *TempShard {
tb.Helper()
// Create temporary path for data and WAL.
test: use `T.TempDir` to create temporary test directory (#23258) * test: use `T.TempDir` to create temporary test directory This commit replaces `os.MkdirTemp` with `t.TempDir` in tests. The directory created by `t.TempDir` is automatically removed when the test and all its subtests complete. Prior to this commit, temporary directory created using `os.MkdirTemp` needs to be removed manually by calling `os.RemoveAll`, which is omitted in some tests. The error handling boilerplate e.g. defer func() { if err := os.RemoveAll(dir); err != nil { t.Fatal(err) } } is also tedious, but `t.TempDir` handles this for us nicely. Reference: https://pkg.go.dev/testing#T.TempDir Signed-off-by: Eng Zer Jun <engzerjun@gmail.com> * test: fix failing TestSendWrite on Windows === FAIL: replications/internal TestSendWrite (0.29s) logger.go:130: 2022-06-23T13:00:54.290Z DEBUG Created new durable queue for replication stream {"id": "0000000000000001", "path": "C:\\Users\\circleci\\AppData\\Local\\Temp\\TestSendWrite1627281409\\001\\replicationq\\0000000000000001"} logger.go:130: 2022-06-23T13:00:54.457Z ERROR Error in replication stream {"replication_id": "0000000000000001", "error": "remote timeout", "retries": 1} testing.go:1090: TempDir RemoveAll cleanup: remove C:\Users\circleci\AppData\Local\Temp\TestSendWrite1627281409\001\replicationq\0000000000000001\1: The process cannot access the file because it is being used by another process. Signed-off-by: Eng Zer Jun <engzerjun@gmail.com> * test: fix failing TestStore_BadShard on Windows === FAIL: tsdb TestStore_BadShard (0.09s) logger.go:130: 2022-06-23T12:18:21.827Z INFO Using data dir {"service": "store", "path": "C:\\Users\\circleci\\AppData\\Local\\Temp\\TestStore_BadShard1363295568\\001"} logger.go:130: 2022-06-23T12:18:21.827Z INFO Compaction settings {"service": "store", "max_concurrent_compactions": 2, "throughput_bytes_per_second": 50331648, "throughput_bytes_per_second_burst": 50331648} logger.go:130: 2022-06-23T12:18:21.828Z INFO Open store (start) {"service": "store", "op_name": "tsdb_open", "op_event": "start"} logger.go:130: 2022-06-23T12:18:21.828Z INFO Open store (end) {"service": "store", "op_name": "tsdb_open", "op_event": "end", "op_elapsed": "77.3µs"} testing.go:1090: TempDir RemoveAll cleanup: remove C:\Users\circleci\AppData\Local\Temp\TestStore_BadShard1363295568\002\data\db0\rp0\1\index\0\L0-00000001.tsl: The process cannot access the file because it is being used by another process. Signed-off-by: Eng Zer Jun <engzerjun@gmail.com> * test: fix failing TestPartition_PrependLogFile_Write_Fail and TestPartition_Compact_Write_Fail on Windows === FAIL: tsdb/index/tsi1 TestPartition_PrependLogFile_Write_Fail/write_MANIFEST (0.06s) testing.go:1090: TempDir RemoveAll cleanup: remove C:\Users\circleci\AppData\Local\Temp\TestPartition_PrependLogFile_Write_Failwrite_MANIFEST656030081\002\0\L0-00000003.tsl: The process cannot access the file because it is being used by another process. --- FAIL: TestPartition_PrependLogFile_Write_Fail/write_MANIFEST (0.06s) === FAIL: tsdb/index/tsi1 TestPartition_Compact_Write_Fail/write_MANIFEST (0.08s) testing.go:1090: TempDir RemoveAll cleanup: remove C:\Users\circleci\AppData\Local\Temp\TestPartition_Compact_Write_Failwrite_MANIFEST3398667527\002\0\L0-00000003.tsl: The process cannot access the file because it is being used by another process. --- FAIL: TestPartition_Compact_Write_Fail/write_MANIFEST (0.08s) We must close the open file descriptor otherwise the temporary file cannot be cleaned up on Windows. Fixes: 619eb1cae6 ("fix: restore in-memory Manifest on write error") Signed-off-by: Eng Zer Jun <engzerjun@gmail.com> * test: fix failing TestReplicationStartMissingQueue on Windows === FAIL: TestReplicationStartMissingQueue (1.60s) logger.go:130: 2023-03-17T10:42:07.269Z DEBUG Created new durable queue for replication stream {"id": "0000000000000001", "path": "C:\\Users\\circleci\\AppData\\Local\\Temp\\TestReplicationStartMissingQueue76668607\\001\\replicationq\\0000000000000001"} logger.go:130: 2023-03-17T10:42:07.305Z INFO Opened replication stream {"id": "0000000000000001", "path": "C:\\Users\\circleci\\AppData\\Local\\Temp\\TestReplicationStartMissingQueue76668607\\001\\replicationq\\0000000000000001"} testing.go:1206: TempDir RemoveAll cleanup: remove C:\Users\circleci\AppData\Local\Temp\TestReplicationStartMissingQueue76668607\001\replicationq\0000000000000001\1: The process cannot access the file because it is being used by another process. Signed-off-by: Eng Zer Jun <engzerjun@gmail.com> * test: update TestWAL_DiskSize Signed-off-by: Eng Zer Jun <engzerjun@gmail.com> * test: fix failing TestWAL_DiskSize on Windows === FAIL: tsdb/engine/tsm1 TestWAL_DiskSize (2.65s) testing.go:1206: TempDir RemoveAll cleanup: remove C:\Users\circleci\AppData\Local\Temp\TestWAL_DiskSize2736073801\001\_00006.wal: The process cannot access the file because it is being used by another process. Signed-off-by: Eng Zer Jun <engzerjun@gmail.com> --------- Signed-off-by: Eng Zer Jun <engzerjun@gmail.com>
2023-03-21 20:22:11 +00:00
dir := tb.TempDir()
// Create series file.
sfile := NewSeriesFile(filepath.Join(dir, "db0", SeriesFileDirectory))
sfile.Logger = zaptest.NewLogger(tb)
if err := sfile.Open(); err != nil {
panic(err)
}
// Build engine options.
opt := NewEngineOptions()
opt.IndexVersion = index
opt.Config.WALDir = filepath.Join(dir, "wal")
return &TempShard{
Shard: NewShard(0,
filepath.Join(dir, "data", "db0", "rp0", "1"),
filepath.Join(dir, "wal", "db0", "rp0", "1"),
sfile,
opt,
),
sfile: sfile,
path: dir,
}
}
// Close closes the shard and removes all underlying data.
func (sh *TempShard) Close() error {
sh.sfile.Close()
return sh.Shard.Close()
}
// MustWritePointsString parses the line protocol (with second precision) and
// inserts the resulting points into the shard. Panic on error.
func (sh *TempShard) MustWritePointsString(s string) {
a, err := models.ParsePointsWithPrecision([]byte(strings.TrimSpace(s)), time.Time{}, "s")
if err != nil {
panic(err)
}
if err := sh.WritePoints(context.Background(), a); err != nil {
panic(err)
}
}