influxdb/tsdb/index/tsi1/log_file_test.go

365 lines
10 KiB
Go
Raw Normal View History

2016-10-21 15:31:40 +00:00
package tsi1_test
import (
2017-02-10 15:48:11 +00:00
"bytes"
2016-10-25 14:36:58 +00:00
"fmt"
2016-10-21 15:31:40 +00:00
"io/ioutil"
2017-02-10 18:49:03 +00:00
"math/rand"
2016-10-21 15:31:40 +00:00
"os"
2017-02-10 15:48:11 +00:00
"path/filepath"
"regexp"
"runtime/pprof"
2017-02-10 18:49:03 +00:00
"sort"
2016-10-21 15:31:40 +00:00
"testing"
2017-02-10 18:49:03 +00:00
"time"
2016-10-21 15:31:40 +00:00
"github.com/influxdata/influxdb/models"
2017-04-26 17:02:48 +00:00
"github.com/influxdata/influxdb/pkg/bloom"
2017-11-22 15:30:02 +00:00
"github.com/influxdata/influxdb/tsdb"
2016-11-15 16:20:00 +00:00
"github.com/influxdata/influxdb/tsdb/index/tsi1"
2016-10-21 15:31:40 +00:00
)
// Ensure log file can append series.
2017-09-25 15:31:20 +00:00
func TestLogFile_AddSeriesList(t *testing.T) {
2017-09-14 15:41:58 +00:00
sfile := MustOpenSeriesFile()
defer sfile.Close()
f := MustOpenLogFile(sfile.SeriesFile)
2016-10-21 15:31:40 +00:00
defer f.Close()
2018-01-03 19:19:02 +00:00
seriesSet := tsi1.NewSeriesSet()
2016-10-21 15:31:40 +00:00
// Add test data.
2018-01-03 19:19:02 +00:00
if err := f.AddSeriesList(seriesSet, [][]byte{
2017-09-25 15:31:20 +00:00
[]byte("mem"),
[]byte("cpu"),
[]byte("cpu"),
}, []models.Tags{
{{Key: []byte("host"), Value: []byte("serverA")}},
{{Key: []byte("region"), Value: []byte("us-east")}},
{{Key: []byte("region"), Value: []byte("us-west")}},
}); err != nil {
2016-10-21 15:31:40 +00:00
t.Fatal(err)
}
// Verify data.
itr := f.MeasurementIterator()
2016-10-31 14:46:07 +00:00
if e := itr.Next(); e == nil || string(e.Name()) != "cpu" {
2016-10-21 15:31:40 +00:00
t.Fatalf("unexpected measurement: %#v", e)
2016-10-31 14:46:07 +00:00
} else if e := itr.Next(); e == nil || string(e.Name()) != "mem" {
2016-10-21 15:31:40 +00:00
t.Fatalf("unexpected measurement: %#v", e)
} else if e := itr.Next(); e != nil {
t.Fatalf("expected eof, got: %#v", e)
}
2016-11-11 16:25:53 +00:00
// Reopen file and re-verify.
if err := f.Reopen(); err != nil {
t.Fatal(err)
}
// Verify data.
itr = f.MeasurementIterator()
if e := itr.Next(); e == nil || string(e.Name()) != "cpu" {
t.Fatalf("unexpected measurement: %#v", e)
} else if e := itr.Next(); e == nil || string(e.Name()) != "mem" {
t.Fatalf("unexpected measurement: %#v", e)
} else if e := itr.Next(); e != nil {
t.Fatalf("expected eof, got: %#v", e)
}
2016-10-21 15:31:40 +00:00
}
2017-02-10 18:49:03 +00:00
func TestLogFile_SeriesStoredInOrder(t *testing.T) {
2017-09-14 15:41:58 +00:00
sfile := MustOpenSeriesFile()
defer sfile.Close()
f := MustOpenLogFile(sfile.SeriesFile)
2017-02-10 18:49:03 +00:00
defer f.Close()
2018-01-03 19:19:02 +00:00
seriesSet := tsi1.NewSeriesSet()
2017-02-10 18:49:03 +00:00
// Generate and add test data
tvm := make(map[string]struct{})
rand.Seed(time.Now().Unix())
for i := 0; i < 100; i++ {
tv := fmt.Sprintf("server-%d", rand.Intn(50)) // Encourage adding duplicate series.
tvm[tv] = struct{}{}
2018-01-03 19:19:02 +00:00
if err := f.AddSeriesList(seriesSet, [][]byte{
2017-09-25 15:31:20 +00:00
[]byte("mem"),
[]byte("cpu"),
}, []models.Tags{
{models.NewTag([]byte("host"), []byte(tv))},
{models.NewTag([]byte("host"), []byte(tv))},
}); err != nil {
2017-02-10 18:49:03 +00:00
t.Fatal(err)
}
}
// Sort the tag values so we know what order to expect.
tvs := make([]string, 0, len(tvm))
for tv := range tvm {
tvs = append(tvs, tv)
}
sort.Strings(tvs)
// Double the series values since we're adding them twice (two measurements)
tvs = append(tvs, tvs...)
// When we pull the series out via an iterator they should be in order.
2017-09-17 18:06:37 +00:00
itr := f.SeriesIDIterator()
2017-02-10 18:49:03 +00:00
if itr == nil {
t.Fatal("nil iterator")
}
2017-09-26 13:40:26 +00:00
var prevSeriesID uint64
2017-02-10 18:49:03 +00:00
for i := 0; i < len(tvs); i++ {
2017-12-02 23:52:34 +00:00
elem, err := itr.Next()
if err != nil {
t.Fatal(err)
} else if elem.SeriesID == 0 {
2017-02-10 18:49:03 +00:00
t.Fatal("got nil series")
2017-09-18 19:03:47 +00:00
} else if elem.SeriesID < prevSeriesID {
2017-09-27 11:47:17 +00:00
t.Fatalf("series out of order: %d !< %d ", elem.SeriesID, prevSeriesID)
2017-02-10 18:49:03 +00:00
}
2017-09-18 19:03:47 +00:00
prevSeriesID = elem.SeriesID
2017-02-10 18:49:03 +00:00
}
}
2016-10-21 15:31:40 +00:00
// Ensure log file can delete an existing measurement.
func TestLogFile_DeleteMeasurement(t *testing.T) {
2017-09-14 15:41:58 +00:00
sfile := MustOpenSeriesFile()
defer sfile.Close()
f := MustOpenLogFile(sfile.SeriesFile)
2016-10-21 15:31:40 +00:00
defer f.Close()
2018-01-03 19:19:02 +00:00
seriesSet := tsi1.NewSeriesSet()
2016-10-21 15:31:40 +00:00
// Add test data.
2018-01-03 19:19:02 +00:00
if err := f.AddSeriesList(seriesSet, [][]byte{
2017-09-25 15:31:20 +00:00
[]byte("mem"),
[]byte("cpu"),
[]byte("cpu"),
}, []models.Tags{
{{Key: []byte("host"), Value: []byte("serverA")}},
{{Key: []byte("region"), Value: []byte("us-east")}},
{{Key: []byte("region"), Value: []byte("us-west")}},
}); err != nil {
2016-10-21 15:31:40 +00:00
t.Fatal(err)
}
// Remove measurement.
if err := f.DeleteMeasurement([]byte("cpu")); err != nil {
t.Fatal(err)
}
// Verify data.
itr := f.MeasurementIterator()
2016-10-31 14:46:07 +00:00
if e := itr.Next(); string(e.Name()) != "cpu" || !e.Deleted() {
t.Fatalf("unexpected measurement: %s/%v", e.Name(), e.Deleted())
} else if e := itr.Next(); string(e.Name()) != "mem" || e.Deleted() {
t.Fatalf("unexpected measurement: %s/%v", e.Name(), e.Deleted())
2016-10-21 15:31:40 +00:00
} else if e := itr.Next(); e != nil {
t.Fatalf("expected eof, got: %#v", e)
}
}
// LogFile is a test wrapper for tsi1.LogFile.
type LogFile struct {
*tsi1.LogFile
}
// NewLogFile returns a new instance of LogFile with a temporary file path.
2017-11-22 15:30:02 +00:00
func NewLogFile(sfile *tsdb.SeriesFile) *LogFile {
2016-10-21 15:31:40 +00:00
file, err := ioutil.TempFile("", "tsi1-log-file-")
if err != nil {
panic(err)
}
file.Close()
2017-09-14 15:41:58 +00:00
return &LogFile{LogFile: tsi1.NewLogFile(sfile, file.Name())}
2016-10-21 15:31:40 +00:00
}
// MustOpenLogFile returns a new, open instance of LogFile. Panic on error.
2017-11-22 15:30:02 +00:00
func MustOpenLogFile(sfile *tsdb.SeriesFile) *LogFile {
2017-09-14 15:41:58 +00:00
f := NewLogFile(sfile)
2016-10-21 15:31:40 +00:00
if err := f.Open(); err != nil {
panic(err)
}
return f
}
// Close closes the log file and removes it from disk.
func (f *LogFile) Close() error {
2017-02-06 18:14:13 +00:00
defer os.Remove(f.Path())
2016-10-21 15:31:40 +00:00
return f.LogFile.Close()
}
2016-10-25 14:36:58 +00:00
2016-11-11 16:25:53 +00:00
// Reopen closes and reopens the file.
func (f *LogFile) Reopen() error {
if err := f.LogFile.Close(); err != nil {
return err
}
if err := f.LogFile.Open(); err != nil {
return err
}
return nil
}
2016-10-25 14:36:58 +00:00
// CreateLogFile creates a new temporary log file and adds a list of series.
2017-11-22 15:30:02 +00:00
func CreateLogFile(sfile *tsdb.SeriesFile, series []Series) (*LogFile, error) {
2017-09-14 15:41:58 +00:00
f := MustOpenLogFile(sfile)
2018-01-03 19:19:02 +00:00
seriesSet := tsi1.NewSeriesSet()
2016-10-25 14:36:58 +00:00
for _, serie := range series {
2018-01-03 19:19:02 +00:00
if err := f.AddSeriesList(seriesSet, [][]byte{serie.Name}, []models.Tags{serie.Tags}); err != nil {
2016-10-25 14:36:58 +00:00
return nil, err
}
}
return f, nil
}
// GenerateLogFile generates a log file from a set of series based on the count arguments.
// Total series returned will equal measurementN * tagN * valueN.
2017-11-22 15:30:02 +00:00
func GenerateLogFile(sfile *tsdb.SeriesFile, measurementN, tagN, valueN int) (*LogFile, error) {
2016-10-25 14:36:58 +00:00
tagValueN := pow(valueN, tagN)
2017-09-14 15:41:58 +00:00
f := MustOpenLogFile(sfile)
2018-01-03 19:19:02 +00:00
seriesSet := tsi1.NewSeriesSet()
2016-10-25 14:36:58 +00:00
for i := 0; i < measurementN; i++ {
name := []byte(fmt.Sprintf("measurement%d", i))
// Generate tag sets.
for j := 0; j < tagValueN; j++ {
var tags models.Tags
for k := 0; k < tagN; k++ {
key := []byte(fmt.Sprintf("key%d", k))
value := []byte(fmt.Sprintf("value%d", (j / pow(valueN, k) % valueN)))
tags = append(tags, models.NewTag(key, value))
2016-10-25 14:36:58 +00:00
}
2018-01-03 19:19:02 +00:00
if err := f.AddSeriesList(seriesSet, [][]byte{name}, []models.Tags{tags}); err != nil {
2016-10-25 14:36:58 +00:00
return nil, err
}
}
}
return f, nil
}
2017-11-22 15:30:02 +00:00
func MustGenerateLogFile(sfile *tsdb.SeriesFile, measurementN, tagN, valueN int) *LogFile {
2017-09-14 15:41:58 +00:00
f, err := GenerateLogFile(sfile, measurementN, tagN, valueN)
2016-10-25 14:36:58 +00:00
if err != nil {
panic(err)
}
return f
}
2017-02-08 16:50:46 +00:00
func benchmarkLogFile_AddSeries(b *testing.B, measurementN, seriesKeyN, seriesValueN int) {
2017-09-14 15:41:58 +00:00
sfile := MustOpenSeriesFile()
defer sfile.Close()
2017-02-08 16:50:46 +00:00
b.StopTimer()
2017-09-14 15:41:58 +00:00
f := MustOpenLogFile(sfile.SeriesFile)
2018-01-03 19:19:02 +00:00
seriesSet := tsi1.NewSeriesSet()
2017-02-08 16:50:46 +00:00
type Datum struct {
Name []byte
Tags models.Tags
}
// Pre-generate everything.
var (
data []Datum
series int
)
tagValueN := pow(seriesValueN, seriesKeyN)
for i := 0; i < measurementN; i++ {
name := []byte(fmt.Sprintf("measurement%d", i))
for j := 0; j < tagValueN; j++ {
var tags models.Tags
for k := 0; k < seriesKeyN; k++ {
key := []byte(fmt.Sprintf("key%d", k))
value := []byte(fmt.Sprintf("value%d", (j / pow(seriesValueN, k) % seriesValueN)))
tags = append(tags, models.NewTag(key, value))
2017-02-08 16:50:46 +00:00
}
data = append(data, Datum{Name: name, Tags: tags})
series += len(tags)
}
}
b.StartTimer()
b.ResetTimer()
for i := 0; i < b.N; i++ {
for _, d := range data {
2018-01-03 19:19:02 +00:00
if err := f.AddSeriesList(seriesSet, [][]byte{d.Name}, []models.Tags{d.Tags}); err != nil {
2017-02-08 16:50:46 +00:00
b.Fatal(err)
}
}
}
}
func BenchmarkLogFile_AddSeries_100_1_1(b *testing.B) { benchmarkLogFile_AddSeries(b, 100, 1, 1) } // 100 series
func BenchmarkLogFile_AddSeries_1000_1_1(b *testing.B) { benchmarkLogFile_AddSeries(b, 1000, 1, 1) } // 1000 series
func BenchmarkLogFile_AddSeries_10000_1_1(b *testing.B) { benchmarkLogFile_AddSeries(b, 10000, 1, 1) } // 10000 series
func BenchmarkLogFile_AddSeries_100_2_10(b *testing.B) { benchmarkLogFile_AddSeries(b, 100, 2, 10) } // ~20K series
func BenchmarkLogFile_AddSeries_100000_1_1(b *testing.B) { benchmarkLogFile_AddSeries(b, 100000, 1, 1) } // ~100K series
func BenchmarkLogFile_AddSeries_100_3_7(b *testing.B) { benchmarkLogFile_AddSeries(b, 100, 3, 7) } // ~100K series
func BenchmarkLogFile_AddSeries_200_3_7(b *testing.B) { benchmarkLogFile_AddSeries(b, 200, 3, 7) } // ~200K series
func BenchmarkLogFile_AddSeries_200_4_7(b *testing.B) { benchmarkLogFile_AddSeries(b, 200, 4, 7) } // ~1.9M series
2017-02-10 15:48:11 +00:00
func BenchmarkLogFile_WriteTo(b *testing.B) {
for _, seriesN := range []int{1000, 10000, 100000, 1000000} {
name := fmt.Sprintf("series=%d", seriesN)
b.Run(name, func(b *testing.B) {
2017-09-14 15:41:58 +00:00
sfile := MustOpenSeriesFile()
defer sfile.Close()
f := MustOpenLogFile(sfile.SeriesFile)
2017-02-10 15:48:11 +00:00
defer f.Close()
2018-01-03 19:19:02 +00:00
seriesSet := tsi1.NewSeriesSet()
2017-02-10 15:48:11 +00:00
2017-04-26 17:02:48 +00:00
// Estimate bloom filter size.
m, k := bloom.Estimate(uint64(seriesN), 0.02)
2017-02-10 15:48:11 +00:00
// Initialize log file with series data.
for i := 0; i < seriesN; i++ {
2017-09-25 15:31:20 +00:00
if err := f.AddSeriesList(
2018-01-03 19:19:02 +00:00
seriesSet,
2017-09-25 15:31:20 +00:00
[][]byte{[]byte("cpu")},
[]models.Tags{{
2017-02-10 15:48:11 +00:00
{Key: []byte("host"), Value: []byte(fmt.Sprintf("server-%d", i))},
{Key: []byte("location"), Value: []byte("us-west")},
2017-09-25 15:31:20 +00:00
}},
2017-02-10 15:48:11 +00:00
); err != nil {
b.Fatal(err)
}
}
b.ResetTimer()
// Create cpu profile for each subtest.
MustStartCPUProfile(name)
defer pprof.StopCPUProfile()
// Compact log file.
for i := 0; i < b.N; i++ {
buf := bytes.NewBuffer(make([]byte, 0, 150*seriesN))
2017-05-23 19:42:38 +00:00
if _, err := f.CompactTo(buf, m, k); err != nil {
2017-02-10 15:48:11 +00:00
b.Fatal(err)
}
b.Logf("sz=%db", buf.Len())
}
})
}
}
// MustStartCPUProfile starts a cpu profile in a temporary path based on name.
func MustStartCPUProfile(name string) {
name = regexp.MustCompile(`\W+`).ReplaceAllString(name, "-")
// Open file and start pprof.
f, err := os.Create(filepath.Join("/tmp", fmt.Sprintf("cpu-%s.pprof", name)))
if err != nil {
panic(err)
}
if err := pprof.StartCPUProfile(f); err != nil {
panic(err)
}
}