influxdb/tsdb/index/tsi1/log_file_test.go

365 lines
10 KiB
Go

package tsi1_test
import (
"bytes"
"fmt"
"io/ioutil"
"math/rand"
"os"
"path/filepath"
"regexp"
"runtime/pprof"
"sort"
"testing"
"time"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/pkg/bloom"
"github.com/influxdata/influxdb/tsdb"
"github.com/influxdata/influxdb/tsdb/index/tsi1"
)
// Ensure log file can append series.
func TestLogFile_AddSeriesList(t *testing.T) {
sfile := MustOpenSeriesFile()
defer sfile.Close()
f := MustOpenLogFile(sfile.SeriesFile)
defer f.Close()
seriesSet := tsi1.NewSeriesSet()
// Add test data.
if err := f.AddSeriesList(seriesSet, [][]byte{
[]byte("mem"),
[]byte("cpu"),
[]byte("cpu"),
}, []models.Tags{
{{Key: []byte("host"), Value: []byte("serverA")}},
{{Key: []byte("region"), Value: []byte("us-east")}},
{{Key: []byte("region"), Value: []byte("us-west")}},
}); err != nil {
t.Fatal(err)
}
// Verify data.
itr := f.MeasurementIterator()
if e := itr.Next(); e == nil || string(e.Name()) != "cpu" {
t.Fatalf("unexpected measurement: %#v", e)
} else if e := itr.Next(); e == nil || string(e.Name()) != "mem" {
t.Fatalf("unexpected measurement: %#v", e)
} else if e := itr.Next(); e != nil {
t.Fatalf("expected eof, got: %#v", e)
}
// Reopen file and re-verify.
if err := f.Reopen(); err != nil {
t.Fatal(err)
}
// Verify data.
itr = f.MeasurementIterator()
if e := itr.Next(); e == nil || string(e.Name()) != "cpu" {
t.Fatalf("unexpected measurement: %#v", e)
} else if e := itr.Next(); e == nil || string(e.Name()) != "mem" {
t.Fatalf("unexpected measurement: %#v", e)
} else if e := itr.Next(); e != nil {
t.Fatalf("expected eof, got: %#v", e)
}
}
func TestLogFile_SeriesStoredInOrder(t *testing.T) {
sfile := MustOpenSeriesFile()
defer sfile.Close()
f := MustOpenLogFile(sfile.SeriesFile)
defer f.Close()
seriesSet := tsi1.NewSeriesSet()
// Generate and add test data
tvm := make(map[string]struct{})
rand.Seed(time.Now().Unix())
for i := 0; i < 100; i++ {
tv := fmt.Sprintf("server-%d", rand.Intn(50)) // Encourage adding duplicate series.
tvm[tv] = struct{}{}
if err := f.AddSeriesList(seriesSet, [][]byte{
[]byte("mem"),
[]byte("cpu"),
}, []models.Tags{
{models.NewTag([]byte("host"), []byte(tv))},
{models.NewTag([]byte("host"), []byte(tv))},
}); err != nil {
t.Fatal(err)
}
}
// Sort the tag values so we know what order to expect.
tvs := make([]string, 0, len(tvm))
for tv := range tvm {
tvs = append(tvs, tv)
}
sort.Strings(tvs)
// Double the series values since we're adding them twice (two measurements)
tvs = append(tvs, tvs...)
// When we pull the series out via an iterator they should be in order.
itr := f.SeriesIDIterator()
if itr == nil {
t.Fatal("nil iterator")
}
var prevSeriesID uint64
for i := 0; i < len(tvs); i++ {
elem, err := itr.Next()
if err != nil {
t.Fatal(err)
} else if elem.SeriesID == 0 {
t.Fatal("got nil series")
} else if elem.SeriesID < prevSeriesID {
t.Fatalf("series out of order: %d !< %d ", elem.SeriesID, prevSeriesID)
}
prevSeriesID = elem.SeriesID
}
}
// Ensure log file can delete an existing measurement.
func TestLogFile_DeleteMeasurement(t *testing.T) {
sfile := MustOpenSeriesFile()
defer sfile.Close()
f := MustOpenLogFile(sfile.SeriesFile)
defer f.Close()
seriesSet := tsi1.NewSeriesSet()
// Add test data.
if err := f.AddSeriesList(seriesSet, [][]byte{
[]byte("mem"),
[]byte("cpu"),
[]byte("cpu"),
}, []models.Tags{
{{Key: []byte("host"), Value: []byte("serverA")}},
{{Key: []byte("region"), Value: []byte("us-east")}},
{{Key: []byte("region"), Value: []byte("us-west")}},
}); err != nil {
t.Fatal(err)
}
// Remove measurement.
if err := f.DeleteMeasurement([]byte("cpu")); err != nil {
t.Fatal(err)
}
// Verify data.
itr := f.MeasurementIterator()
if e := itr.Next(); string(e.Name()) != "cpu" || !e.Deleted() {
t.Fatalf("unexpected measurement: %s/%v", e.Name(), e.Deleted())
} else if e := itr.Next(); string(e.Name()) != "mem" || e.Deleted() {
t.Fatalf("unexpected measurement: %s/%v", e.Name(), e.Deleted())
} else if e := itr.Next(); e != nil {
t.Fatalf("expected eof, got: %#v", e)
}
}
// LogFile is a test wrapper for tsi1.LogFile.
type LogFile struct {
*tsi1.LogFile
}
// NewLogFile returns a new instance of LogFile with a temporary file path.
func NewLogFile(sfile *tsdb.SeriesFile) *LogFile {
file, err := ioutil.TempFile("", "tsi1-log-file-")
if err != nil {
panic(err)
}
file.Close()
return &LogFile{LogFile: tsi1.NewLogFile(sfile, file.Name())}
}
// MustOpenLogFile returns a new, open instance of LogFile. Panic on error.
func MustOpenLogFile(sfile *tsdb.SeriesFile) *LogFile {
f := NewLogFile(sfile)
if err := f.Open(); err != nil {
panic(err)
}
return f
}
// Close closes the log file and removes it from disk.
func (f *LogFile) Close() error {
defer os.Remove(f.Path())
return f.LogFile.Close()
}
// Reopen closes and reopens the file.
func (f *LogFile) Reopen() error {
if err := f.LogFile.Close(); err != nil {
return err
}
if err := f.LogFile.Open(); err != nil {
return err
}
return nil
}
// CreateLogFile creates a new temporary log file and adds a list of series.
func CreateLogFile(sfile *tsdb.SeriesFile, series []Series) (*LogFile, error) {
f := MustOpenLogFile(sfile)
seriesSet := tsi1.NewSeriesSet()
for _, serie := range series {
if err := f.AddSeriesList(seriesSet, [][]byte{serie.Name}, []models.Tags{serie.Tags}); err != nil {
return nil, err
}
}
return f, nil
}
// GenerateLogFile generates a log file from a set of series based on the count arguments.
// Total series returned will equal measurementN * tagN * valueN.
func GenerateLogFile(sfile *tsdb.SeriesFile, measurementN, tagN, valueN int) (*LogFile, error) {
tagValueN := pow(valueN, tagN)
f := MustOpenLogFile(sfile)
seriesSet := tsi1.NewSeriesSet()
for i := 0; i < measurementN; i++ {
name := []byte(fmt.Sprintf("measurement%d", i))
// Generate tag sets.
for j := 0; j < tagValueN; j++ {
var tags models.Tags
for k := 0; k < tagN; k++ {
key := []byte(fmt.Sprintf("key%d", k))
value := []byte(fmt.Sprintf("value%d", (j / pow(valueN, k) % valueN)))
tags = append(tags, models.NewTag(key, value))
}
if err := f.AddSeriesList(seriesSet, [][]byte{name}, []models.Tags{tags}); err != nil {
return nil, err
}
}
}
return f, nil
}
func MustGenerateLogFile(sfile *tsdb.SeriesFile, measurementN, tagN, valueN int) *LogFile {
f, err := GenerateLogFile(sfile, measurementN, tagN, valueN)
if err != nil {
panic(err)
}
return f
}
func benchmarkLogFile_AddSeries(b *testing.B, measurementN, seriesKeyN, seriesValueN int) {
sfile := MustOpenSeriesFile()
defer sfile.Close()
b.StopTimer()
f := MustOpenLogFile(sfile.SeriesFile)
seriesSet := tsi1.NewSeriesSet()
type Datum struct {
Name []byte
Tags models.Tags
}
// Pre-generate everything.
var (
data []Datum
series int
)
tagValueN := pow(seriesValueN, seriesKeyN)
for i := 0; i < measurementN; i++ {
name := []byte(fmt.Sprintf("measurement%d", i))
for j := 0; j < tagValueN; j++ {
var tags models.Tags
for k := 0; k < seriesKeyN; k++ {
key := []byte(fmt.Sprintf("key%d", k))
value := []byte(fmt.Sprintf("value%d", (j / pow(seriesValueN, k) % seriesValueN)))
tags = append(tags, models.NewTag(key, value))
}
data = append(data, Datum{Name: name, Tags: tags})
series += len(tags)
}
}
b.StartTimer()
b.ResetTimer()
for i := 0; i < b.N; i++ {
for _, d := range data {
if err := f.AddSeriesList(seriesSet, [][]byte{d.Name}, []models.Tags{d.Tags}); err != nil {
b.Fatal(err)
}
}
}
}
func BenchmarkLogFile_AddSeries_100_1_1(b *testing.B) { benchmarkLogFile_AddSeries(b, 100, 1, 1) } // 100 series
func BenchmarkLogFile_AddSeries_1000_1_1(b *testing.B) { benchmarkLogFile_AddSeries(b, 1000, 1, 1) } // 1000 series
func BenchmarkLogFile_AddSeries_10000_1_1(b *testing.B) { benchmarkLogFile_AddSeries(b, 10000, 1, 1) } // 10000 series
func BenchmarkLogFile_AddSeries_100_2_10(b *testing.B) { benchmarkLogFile_AddSeries(b, 100, 2, 10) } // ~20K series
func BenchmarkLogFile_AddSeries_100000_1_1(b *testing.B) { benchmarkLogFile_AddSeries(b, 100000, 1, 1) } // ~100K series
func BenchmarkLogFile_AddSeries_100_3_7(b *testing.B) { benchmarkLogFile_AddSeries(b, 100, 3, 7) } // ~100K series
func BenchmarkLogFile_AddSeries_200_3_7(b *testing.B) { benchmarkLogFile_AddSeries(b, 200, 3, 7) } // ~200K series
func BenchmarkLogFile_AddSeries_200_4_7(b *testing.B) { benchmarkLogFile_AddSeries(b, 200, 4, 7) } // ~1.9M series
func BenchmarkLogFile_WriteTo(b *testing.B) {
for _, seriesN := range []int{1000, 10000, 100000, 1000000} {
name := fmt.Sprintf("series=%d", seriesN)
b.Run(name, func(b *testing.B) {
sfile := MustOpenSeriesFile()
defer sfile.Close()
f := MustOpenLogFile(sfile.SeriesFile)
defer f.Close()
seriesSet := tsi1.NewSeriesSet()
// Estimate bloom filter size.
m, k := bloom.Estimate(uint64(seriesN), 0.02)
// Initialize log file with series data.
for i := 0; i < seriesN; i++ {
if err := f.AddSeriesList(
seriesSet,
[][]byte{[]byte("cpu")},
[]models.Tags{{
{Key: []byte("host"), Value: []byte(fmt.Sprintf("server-%d", i))},
{Key: []byte("location"), Value: []byte("us-west")},
}},
); err != nil {
b.Fatal(err)
}
}
b.ResetTimer()
// Create cpu profile for each subtest.
MustStartCPUProfile(name)
defer pprof.StopCPUProfile()
// Compact log file.
for i := 0; i < b.N; i++ {
buf := bytes.NewBuffer(make([]byte, 0, 150*seriesN))
if _, err := f.CompactTo(buf, m, k); err != nil {
b.Fatal(err)
}
b.Logf("sz=%db", buf.Len())
}
})
}
}
// MustStartCPUProfile starts a cpu profile in a temporary path based on name.
func MustStartCPUProfile(name string) {
name = regexp.MustCompile(`\W+`).ReplaceAllString(name, "-")
// Open file and start pprof.
f, err := os.Create(filepath.Join("/tmp", fmt.Sprintf("cpu-%s.pprof", name)))
if err != nil {
panic(err)
}
if err := pprof.StartCPUProfile(f); err != nil {
panic(err)
}
}