influxdb/tsdb/tsm1/engine_schema.go

528 lines
14 KiB
Go
Raw Normal View History

package tsm1
import (
"bytes"
"context"
"errors"
"fmt"
"sort"
perf(storage): reduce allocations when deleting from cache When deleting from the cache, each cache key must be checked to determine if it matches the prefix we're deleting. Since the keys are stored as strings in the cache (map keys) there were a lot of allocations happening because `applySerial` expects `[]byte` keys. It's beneficial to reduce allocations by refacting `applySerial` to work on strings. Whilst some allocations now have to happen the other way (string -> []byte), they only happen if we actually need to delete the key from the cache. Most of the keys don't get deleted so it's better doing it this way. Performance on the benchmark from the previous commit improved by ~40-50%. name old time/op new time/op delta Engine_DeletePrefixRange_Cache/exists-24 102ms ±11% 59ms ± 3% -41.95% (p=0.000 n=10+8) Engine_DeletePrefixRange_Cache/not_exists-24 97.1ms ± 4% 45.0ms ± 1% -53.66% (p=0.000 n=10+10) name old alloc/op new alloc/op delta Engine_DeletePrefixRange_Cache/exists-24 25.5MB ± 1% 3.1MB ± 2% -87.83% (p=0.000 n=10+10) Engine_DeletePrefixRange_Cache/not_exists-24 23.9MB ± 1% 0.1MB ±86% -99.65% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Engine_DeletePrefixRange_Cache/exists-24 305k ± 1% 28k ± 1% -90.77% (p=0.000 n=10+10) Engine_DeletePrefixRange_Cache/not_exists-24 299k ± 1% 1k ±63% -99.74% (p=0.000 n=9+10) Raw benchmarks on a 24T/32GB/NVME machine are as follows: goos: linux goarch: amd64 pkg: github.com/influxdata/influxdb/tsdb/tsm1 BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 50379720 ns/op 3054106 B/op 27859 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 57326032 ns/op 3124764 B/op 28217 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 58943855 ns/op 3162146 B/op 28527 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 60565115 ns/op 3138811 B/op 28176 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 200 59775969 ns/op 3087910 B/op 27921 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 59530451 ns/op 3120986 B/op 28207 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 59185532 ns/op 3113066 B/op 28302 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 59295867 ns/op 3100832 B/op 28108 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 59599776 ns/op 3100686 B/op 28113 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 200 62065907 ns/op 3048527 B/op 27879 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 44979062 ns/op 123026 B/op 1244 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 44733344 ns/op 52650 B/op 479 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 44534180 ns/op 35119 B/op 398 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 45179881 ns/op 105256 B/op 706 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 44918964 ns/op 47426 B/op 621 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 45000465 ns/op 63164 B/op 564 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 45332999 ns/op 117008 B/op 1146 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 45652342 ns/op 66221 B/op 616 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 45083957 ns/op 154354 B/op 1143 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 44560228 ns/op 65024 B/op 724 allocs/op PASS ok github.com/influxdata/influxdb/tsdb/tsm1 1690.583s
2019-08-30 15:41:47 +00:00
"strings"
"github.com/influxdata/influxdb"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/tsdb"
"github.com/influxdata/influxdb/tsdb/cursors"
"github.com/influxdata/influxql"
)
// cancelCheckInterval represents the period at which TagKeys and TagValues
// will check for a canceled context. Specifically after every 64 series
// scanned, the query context will be checked for cancellation, and if canceled,
// the calls will immediately return.
const cancelCheckInterval = 64
// TagValues returns an iterator which enumerates the values for the specific
// tagKey in the given bucket matching the predicate within the
// time range (start, end].
//
// TagValues will always return a StringIterator if there is no error.
//
// If the context is canceled before TagValues has finished processing, a non-nil
// error will be returned along with a partial result of the already scanned values.
func (e *Engine) TagValues(ctx context.Context, orgID, bucketID influxdb.ID, tagKey string, start, end int64, predicate influxql.Expr) (cursors.StringIterator, error) {
encoded := tsdb.EncodeName(orgID, bucketID)
if predicate == nil {
return e.tagValuesNoPredicate(ctx, encoded[:], []byte(tagKey), start, end)
}
return e.tagValuesPredicate(ctx, encoded[:], []byte(tagKey), start, end, predicate)
}
func (e *Engine) tagValuesNoPredicate(ctx context.Context, orgBucket, tagKeyBytes []byte, start, end int64) (cursors.StringIterator, error) {
tsmValues := make(map[string]struct{})
var tags models.Tags
// TODO(edd): we need to clean up how we're encoding the prefix so that we
// don't have to remember to get it right everywhere we need to touch TSM data.
prefix := models.EscapeMeasurement(orgBucket)
// TODO(sgc): extend prefix when filtering by \x00 == <measurement>
var stats cursors.CursorStats
var canceled bool
e.FileStore.ForEachFile(func(f TSMFile) bool {
// Check the context before accessing each tsm file
select {
case <-ctx.Done():
canceled = true
return false
default:
}
if f.OverlapsTimeRange(start, end) && f.OverlapsKeyPrefixRange(prefix, prefix) {
// TODO(sgc): create f.TimeRangeIterator(minKey, maxKey, start, end)
iter := f.TimeRangeIterator(prefix, start, end)
for i := 0; iter.Next(); i++ {
sfkey := iter.Key()
if !bytes.HasPrefix(sfkey, prefix) {
// end of org+bucket
break
}
key, _ := SeriesAndFieldFromCompositeKey(sfkey)
tags = models.ParseTagsWithTags(key, tags[:0])
curVal := tags.Get(tagKeyBytes)
if len(curVal) == 0 {
continue
}
if _, ok := tsmValues[string(curVal)]; ok {
continue
}
if iter.HasData() {
tsmValues[string(curVal)] = struct{}{}
}
}
stats.Add(iter.Stats())
}
return true
})
if canceled {
return cursors.NewStringSliceIteratorWithStats(nil, stats), ctx.Err()
}
// With performance in mind, we explicitly do not check the context
// while scanning the entries in the cache.
perf(storage): reduce allocations when deleting from cache When deleting from the cache, each cache key must be checked to determine if it matches the prefix we're deleting. Since the keys are stored as strings in the cache (map keys) there were a lot of allocations happening because `applySerial` expects `[]byte` keys. It's beneficial to reduce allocations by refacting `applySerial` to work on strings. Whilst some allocations now have to happen the other way (string -> []byte), they only happen if we actually need to delete the key from the cache. Most of the keys don't get deleted so it's better doing it this way. Performance on the benchmark from the previous commit improved by ~40-50%. name old time/op new time/op delta Engine_DeletePrefixRange_Cache/exists-24 102ms ±11% 59ms ± 3% -41.95% (p=0.000 n=10+8) Engine_DeletePrefixRange_Cache/not_exists-24 97.1ms ± 4% 45.0ms ± 1% -53.66% (p=0.000 n=10+10) name old alloc/op new alloc/op delta Engine_DeletePrefixRange_Cache/exists-24 25.5MB ± 1% 3.1MB ± 2% -87.83% (p=0.000 n=10+10) Engine_DeletePrefixRange_Cache/not_exists-24 23.9MB ± 1% 0.1MB ±86% -99.65% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Engine_DeletePrefixRange_Cache/exists-24 305k ± 1% 28k ± 1% -90.77% (p=0.000 n=10+10) Engine_DeletePrefixRange_Cache/not_exists-24 299k ± 1% 1k ±63% -99.74% (p=0.000 n=9+10) Raw benchmarks on a 24T/32GB/NVME machine are as follows: goos: linux goarch: amd64 pkg: github.com/influxdata/influxdb/tsdb/tsm1 BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 50379720 ns/op 3054106 B/op 27859 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 57326032 ns/op 3124764 B/op 28217 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 58943855 ns/op 3162146 B/op 28527 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 60565115 ns/op 3138811 B/op 28176 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 200 59775969 ns/op 3087910 B/op 27921 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 59530451 ns/op 3120986 B/op 28207 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 59185532 ns/op 3113066 B/op 28302 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 59295867 ns/op 3100832 B/op 28108 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 59599776 ns/op 3100686 B/op 28113 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 200 62065907 ns/op 3048527 B/op 27879 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 44979062 ns/op 123026 B/op 1244 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 44733344 ns/op 52650 B/op 479 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 44534180 ns/op 35119 B/op 398 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 45179881 ns/op 105256 B/op 706 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 44918964 ns/op 47426 B/op 621 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 45000465 ns/op 63164 B/op 564 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 45332999 ns/op 117008 B/op 1146 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 45652342 ns/op 66221 B/op 616 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 45083957 ns/op 154354 B/op 1143 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 44560228 ns/op 65024 B/op 724 allocs/op PASS ok github.com/influxdata/influxdb/tsdb/tsm1 1690.583s
2019-08-30 15:41:47 +00:00
prefixStr := string(prefix)
_ = e.Cache.ApplyEntryFn(func(sfkey string, entry *entry) error {
if !strings.HasPrefix(sfkey, prefixStr) {
return nil
}
perf(storage): reduce allocations when deleting from cache When deleting from the cache, each cache key must be checked to determine if it matches the prefix we're deleting. Since the keys are stored as strings in the cache (map keys) there were a lot of allocations happening because `applySerial` expects `[]byte` keys. It's beneficial to reduce allocations by refacting `applySerial` to work on strings. Whilst some allocations now have to happen the other way (string -> []byte), they only happen if we actually need to delete the key from the cache. Most of the keys don't get deleted so it's better doing it this way. Performance on the benchmark from the previous commit improved by ~40-50%. name old time/op new time/op delta Engine_DeletePrefixRange_Cache/exists-24 102ms ±11% 59ms ± 3% -41.95% (p=0.000 n=10+8) Engine_DeletePrefixRange_Cache/not_exists-24 97.1ms ± 4% 45.0ms ± 1% -53.66% (p=0.000 n=10+10) name old alloc/op new alloc/op delta Engine_DeletePrefixRange_Cache/exists-24 25.5MB ± 1% 3.1MB ± 2% -87.83% (p=0.000 n=10+10) Engine_DeletePrefixRange_Cache/not_exists-24 23.9MB ± 1% 0.1MB ±86% -99.65% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Engine_DeletePrefixRange_Cache/exists-24 305k ± 1% 28k ± 1% -90.77% (p=0.000 n=10+10) Engine_DeletePrefixRange_Cache/not_exists-24 299k ± 1% 1k ±63% -99.74% (p=0.000 n=9+10) Raw benchmarks on a 24T/32GB/NVME machine are as follows: goos: linux goarch: amd64 pkg: github.com/influxdata/influxdb/tsdb/tsm1 BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 50379720 ns/op 3054106 B/op 27859 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 57326032 ns/op 3124764 B/op 28217 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 58943855 ns/op 3162146 B/op 28527 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 60565115 ns/op 3138811 B/op 28176 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 200 59775969 ns/op 3087910 B/op 27921 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 59530451 ns/op 3120986 B/op 28207 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 59185532 ns/op 3113066 B/op 28302 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 59295867 ns/op 3100832 B/op 28108 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 59599776 ns/op 3100686 B/op 28113 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 200 62065907 ns/op 3048527 B/op 27879 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 44979062 ns/op 123026 B/op 1244 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 44733344 ns/op 52650 B/op 479 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 44534180 ns/op 35119 B/op 398 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 45179881 ns/op 105256 B/op 706 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 44918964 ns/op 47426 B/op 621 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 45000465 ns/op 63164 B/op 564 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 45332999 ns/op 117008 B/op 1146 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 45652342 ns/op 66221 B/op 616 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 45083957 ns/op 154354 B/op 1143 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 44560228 ns/op 65024 B/op 724 allocs/op PASS ok github.com/influxdata/influxdb/tsdb/tsm1 1690.583s
2019-08-30 15:41:47 +00:00
// TODO(edd): consider the []byte() conversion here.
key, _ := SeriesAndFieldFromCompositeKey([]byte(sfkey))
tags = models.ParseTagsWithTags(key, tags[:0])
curVal := tags.Get(tagKeyBytes)
if len(curVal) == 0 {
return nil
}
if _, ok := tsmValues[string(curVal)]; ok {
return nil
}
stats.ScannedValues += entry.values.Len()
stats.ScannedBytes += entry.values.Len() * 8 // sizeof timestamp
if entry.values.Contains(start, end) {
tsmValues[string(curVal)] = struct{}{}
}
return nil
})
vals := make([]string, 0, len(tsmValues))
for val := range tsmValues {
vals = append(vals, val)
}
sort.Strings(vals)
return cursors.NewStringSliceIteratorWithStats(vals, stats), nil
}
func (e *Engine) tagValuesPredicate(ctx context.Context, orgBucket, tagKeyBytes []byte, start, end int64, predicate influxql.Expr) (cursors.StringIterator, error) {
if err := ValidateTagPredicate(predicate); err != nil {
return nil, err
}
keys, err := e.findCandidateKeys(ctx, orgBucket, predicate)
if err != nil {
return cursors.EmptyStringIterator, err
}
if len(keys) == 0 {
return cursors.EmptyStringIterator, nil
}
var files []TSMFile
defer func() {
for _, f := range files {
f.Unref()
}
}()
var iters []*TimeRangeIterator
// TODO(edd): we need to clean up how we're encoding the prefix so that we
// don't have to remember to get it right everywhere we need to touch TSM data.
prefix := models.EscapeMeasurement(orgBucket)
var canceled bool
e.FileStore.ForEachFile(func(f TSMFile) bool {
// Check the context before accessing each tsm file
select {
case <-ctx.Done():
canceled = true
return false
default:
}
if f.OverlapsTimeRange(start, end) && f.OverlapsKeyPrefixRange(prefix, prefix) {
f.Ref()
files = append(files, f)
iters = append(iters, f.TimeRangeIterator(prefix, start, end))
}
return true
})
var stats cursors.CursorStats
if canceled {
stats = statsFromIters(stats, iters)
return cursors.NewStringSliceIteratorWithStats(nil, stats), ctx.Err()
}
tsmValues := make(map[string]struct{})
// reusable buffers
var (
tags models.Tags
keybuf []byte
sfkey []byte
)
for i := range keys {
// to keep cache scans fast, check context every 'cancelCheckInterval' iteratons
if i%cancelCheckInterval == 0 {
select {
case <-ctx.Done():
stats = statsFromIters(stats, iters)
return cursors.NewStringSliceIteratorWithStats(nil, stats), ctx.Err()
default:
}
}
_, tags = tsdb.ParseSeriesKeyInto(keys[i], tags[:0])
curVal := tags.Get(tagKeyBytes)
if len(curVal) == 0 {
continue
}
if _, ok := tsmValues[string(curVal)]; ok {
continue
}
keybuf = models.AppendMakeKey(keybuf[:0], prefix, tags)
sfkey = AppendSeriesFieldKeyBytes(sfkey[:0], keybuf, tags.Get(models.FieldKeyTagKeyBytes))
values := e.Cache.Values(sfkey)
stats.ScannedValues += values.Len()
stats.ScannedBytes += values.Len() * 8 // sizeof timestamp
if values.Contains(start, end) {
tsmValues[string(curVal)] = struct{}{}
continue
}
for _, iter := range iters {
if exact, _ := iter.Seek(sfkey); !exact {
continue
}
if iter.HasData() {
tsmValues[string(curVal)] = struct{}{}
break
}
}
}
vals := make([]string, 0, len(tsmValues))
for val := range tsmValues {
vals = append(vals, val)
}
sort.Strings(vals)
stats = statsFromIters(stats, iters)
return cursors.NewStringSliceIteratorWithStats(vals, stats), err
}
func (e *Engine) findCandidateKeys(ctx context.Context, orgBucket []byte, predicate influxql.Expr) ([][]byte, error) {
// determine candidate series keys
sitr, err := e.index.MeasurementSeriesByExprIterator(orgBucket, predicate)
if err != nil {
return nil, err
} else if sitr == nil {
return nil, nil
}
defer sitr.Close()
var keys [][]byte
for i := 0; ; i++ {
// to keep series file index scans fast,
// check context every 'cancelCheckInterval' iteratons
if i%cancelCheckInterval == 0 {
select {
case <-ctx.Done():
return keys, ctx.Err()
default:
}
}
elem, err := sitr.Next()
if err != nil {
return nil, err
} else if elem.SeriesID.IsZero() {
break
}
key := e.sfile.SeriesKey(elem.SeriesID)
if len(key) == 0 {
continue
}
keys = append(keys, key)
}
return keys, nil
}
// TagKeys returns an iterator which enumerates the tag keys for the given
// bucket matching the predicate within the time range (start, end].
//
// TagKeys will always return a StringIterator if there is no error.
//
// If the context is canceled before TagKeys has finished processing, a non-nil
// error will be returned along with a partial result of the already scanned keys.
func (e *Engine) TagKeys(ctx context.Context, orgID, bucketID influxdb.ID, start, end int64, predicate influxql.Expr) (cursors.StringIterator, error) {
encoded := tsdb.EncodeName(orgID, bucketID)
if predicate == nil {
return e.tagKeysNoPredicate(ctx, encoded[:], start, end)
}
return e.tagKeysPredicate(ctx, encoded[:], start, end, predicate)
}
func (e *Engine) tagKeysNoPredicate(ctx context.Context, orgBucket []byte, start, end int64) (cursors.StringIterator, error) {
var tags models.Tags
// TODO(edd): we need to clean up how we're encoding the prefix so that we
// don't have to remember to get it right everywhere we need to touch TSM data.
prefix := models.EscapeMeasurement(orgBucket)
var keyset models.TagKeysSet
// TODO(sgc): extend prefix when filtering by \x00 == <measurement>
var stats cursors.CursorStats
var canceled bool
e.FileStore.ForEachFile(func(f TSMFile) bool {
// Check the context before touching each tsm file
select {
case <-ctx.Done():
canceled = true
return false
default:
}
if f.OverlapsTimeRange(start, end) && f.OverlapsKeyPrefixRange(prefix, prefix) {
// TODO(sgc): create f.TimeRangeIterator(minKey, maxKey, start, end)
iter := f.TimeRangeIterator(prefix, start, end)
for i := 0; iter.Next(); i++ {
sfkey := iter.Key()
if !bytes.HasPrefix(sfkey, prefix) {
// end of org+bucket
break
}
key, _ := SeriesAndFieldFromCompositeKey(sfkey)
tags = models.ParseTagsWithTags(key, tags[:0])
if keyset.IsSupersetKeys(tags) {
continue
}
if iter.HasData() {
keyset.UnionKeys(tags)
}
}
stats.Add(iter.Stats())
}
return true
})
if canceled {
return cursors.NewStringSliceIteratorWithStats(nil, stats), ctx.Err()
}
// With performance in mind, we explicitly do not check the context
// while scanning the entries in the cache.
perf(storage): reduce allocations when deleting from cache When deleting from the cache, each cache key must be checked to determine if it matches the prefix we're deleting. Since the keys are stored as strings in the cache (map keys) there were a lot of allocations happening because `applySerial` expects `[]byte` keys. It's beneficial to reduce allocations by refacting `applySerial` to work on strings. Whilst some allocations now have to happen the other way (string -> []byte), they only happen if we actually need to delete the key from the cache. Most of the keys don't get deleted so it's better doing it this way. Performance on the benchmark from the previous commit improved by ~40-50%. name old time/op new time/op delta Engine_DeletePrefixRange_Cache/exists-24 102ms ±11% 59ms ± 3% -41.95% (p=0.000 n=10+8) Engine_DeletePrefixRange_Cache/not_exists-24 97.1ms ± 4% 45.0ms ± 1% -53.66% (p=0.000 n=10+10) name old alloc/op new alloc/op delta Engine_DeletePrefixRange_Cache/exists-24 25.5MB ± 1% 3.1MB ± 2% -87.83% (p=0.000 n=10+10) Engine_DeletePrefixRange_Cache/not_exists-24 23.9MB ± 1% 0.1MB ±86% -99.65% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Engine_DeletePrefixRange_Cache/exists-24 305k ± 1% 28k ± 1% -90.77% (p=0.000 n=10+10) Engine_DeletePrefixRange_Cache/not_exists-24 299k ± 1% 1k ±63% -99.74% (p=0.000 n=9+10) Raw benchmarks on a 24T/32GB/NVME machine are as follows: goos: linux goarch: amd64 pkg: github.com/influxdata/influxdb/tsdb/tsm1 BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 50379720 ns/op 3054106 B/op 27859 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 57326032 ns/op 3124764 B/op 28217 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 58943855 ns/op 3162146 B/op 28527 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 60565115 ns/op 3138811 B/op 28176 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 200 59775969 ns/op 3087910 B/op 27921 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 59530451 ns/op 3120986 B/op 28207 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 59185532 ns/op 3113066 B/op 28302 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 59295867 ns/op 3100832 B/op 28108 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 59599776 ns/op 3100686 B/op 28113 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 200 62065907 ns/op 3048527 B/op 27879 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 44979062 ns/op 123026 B/op 1244 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 44733344 ns/op 52650 B/op 479 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 44534180 ns/op 35119 B/op 398 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 45179881 ns/op 105256 B/op 706 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 44918964 ns/op 47426 B/op 621 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 45000465 ns/op 63164 B/op 564 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 45332999 ns/op 117008 B/op 1146 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 45652342 ns/op 66221 B/op 616 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 45083957 ns/op 154354 B/op 1143 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 44560228 ns/op 65024 B/op 724 allocs/op PASS ok github.com/influxdata/influxdb/tsdb/tsm1 1690.583s
2019-08-30 15:41:47 +00:00
_ = e.Cache.ApplyEntryFn(func(sfkey string, entry *entry) error {
if !strings.HasPrefix(sfkey, string(prefix)) {
return nil
}
perf(storage): reduce allocations when deleting from cache When deleting from the cache, each cache key must be checked to determine if it matches the prefix we're deleting. Since the keys are stored as strings in the cache (map keys) there were a lot of allocations happening because `applySerial` expects `[]byte` keys. It's beneficial to reduce allocations by refacting `applySerial` to work on strings. Whilst some allocations now have to happen the other way (string -> []byte), they only happen if we actually need to delete the key from the cache. Most of the keys don't get deleted so it's better doing it this way. Performance on the benchmark from the previous commit improved by ~40-50%. name old time/op new time/op delta Engine_DeletePrefixRange_Cache/exists-24 102ms ±11% 59ms ± 3% -41.95% (p=0.000 n=10+8) Engine_DeletePrefixRange_Cache/not_exists-24 97.1ms ± 4% 45.0ms ± 1% -53.66% (p=0.000 n=10+10) name old alloc/op new alloc/op delta Engine_DeletePrefixRange_Cache/exists-24 25.5MB ± 1% 3.1MB ± 2% -87.83% (p=0.000 n=10+10) Engine_DeletePrefixRange_Cache/not_exists-24 23.9MB ± 1% 0.1MB ±86% -99.65% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Engine_DeletePrefixRange_Cache/exists-24 305k ± 1% 28k ± 1% -90.77% (p=0.000 n=10+10) Engine_DeletePrefixRange_Cache/not_exists-24 299k ± 1% 1k ±63% -99.74% (p=0.000 n=9+10) Raw benchmarks on a 24T/32GB/NVME machine are as follows: goos: linux goarch: amd64 pkg: github.com/influxdata/influxdb/tsdb/tsm1 BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 50379720 ns/op 3054106 B/op 27859 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 57326032 ns/op 3124764 B/op 28217 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 58943855 ns/op 3162146 B/op 28527 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 60565115 ns/op 3138811 B/op 28176 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 200 59775969 ns/op 3087910 B/op 27921 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 59530451 ns/op 3120986 B/op 28207 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 59185532 ns/op 3113066 B/op 28302 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 59295867 ns/op 3100832 B/op 28108 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 300 59599776 ns/op 3100686 B/op 28113 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 200 62065907 ns/op 3048527 B/op 27879 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 44979062 ns/op 123026 B/op 1244 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 44733344 ns/op 52650 B/op 479 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 44534180 ns/op 35119 B/op 398 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 45179881 ns/op 105256 B/op 706 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 44918964 ns/op 47426 B/op 621 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 45000465 ns/op 63164 B/op 564 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 45332999 ns/op 117008 B/op 1146 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 45652342 ns/op 66221 B/op 616 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 45083957 ns/op 154354 B/op 1143 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 300 44560228 ns/op 65024 B/op 724 allocs/op PASS ok github.com/influxdata/influxdb/tsdb/tsm1 1690.583s
2019-08-30 15:41:47 +00:00
// TODO(edd): consider []byte conversion here.
key, _ := SeriesAndFieldFromCompositeKey([]byte(sfkey))
tags = models.ParseTagsWithTags(key, tags[:0])
if keyset.IsSupersetKeys(tags) {
return nil
}
stats.ScannedValues += entry.values.Len()
stats.ScannedBytes += entry.values.Len() * 8 // sizeof timestamp
if entry.values.Contains(start, end) {
keyset.UnionKeys(tags)
}
return nil
})
return cursors.NewStringSliceIteratorWithStats(keyset.Keys(), stats), nil
}
func (e *Engine) tagKeysPredicate(ctx context.Context, orgBucket []byte, start, end int64, predicate influxql.Expr) (cursors.StringIterator, error) {
if err := ValidateTagPredicate(predicate); err != nil {
return nil, err
}
keys, err := e.findCandidateKeys(ctx, orgBucket, predicate)
if err != nil {
return cursors.EmptyStringIterator, err
}
if len(keys) == 0 {
return cursors.EmptyStringIterator, nil
}
var files []TSMFile
defer func() {
for _, f := range files {
f.Unref()
}
}()
var iters []*TimeRangeIterator
// TODO(edd): we need to clean up how we're encoding the prefix so that we
// don't have to remember to get it right everywhere we need to touch TSM data.
prefix := models.EscapeMeasurement(orgBucket)
var canceled bool
e.FileStore.ForEachFile(func(f TSMFile) bool {
// Check the context before touching each tsm file
select {
case <-ctx.Done():
canceled = true
return false
default:
}
if f.OverlapsTimeRange(start, end) && f.OverlapsKeyPrefixRange(prefix, prefix) {
f.Ref()
files = append(files, f)
iters = append(iters, f.TimeRangeIterator(prefix, start, end))
}
return true
})
var stats cursors.CursorStats
if canceled {
stats = statsFromIters(stats, iters)
return cursors.NewStringSliceIteratorWithStats(nil, stats), ctx.Err()
}
var keyset models.TagKeysSet
// reusable buffers
var (
tags models.Tags
keybuf []byte
sfkey []byte
)
for i := range keys {
// to keep cache scans fast, check context every 'cancelCheckInterval' iteratons
if i%cancelCheckInterval == 0 {
select {
case <-ctx.Done():
stats = statsFromIters(stats, iters)
return cursors.NewStringSliceIteratorWithStats(nil, stats), ctx.Err()
default:
}
}
_, tags = tsdb.ParseSeriesKeyInto(keys[i], tags[:0])
if keyset.IsSupersetKeys(tags) {
continue
}
keybuf = models.AppendMakeKey(keybuf[:0], prefix, tags)
sfkey = AppendSeriesFieldKeyBytes(sfkey[:0], keybuf, tags.Get(models.FieldKeyTagKeyBytes))
values := e.Cache.Values(sfkey)
stats.ScannedValues += values.Len()
stats.ScannedBytes += values.Len() * 8 // sizeof timestamp
if values.Contains(start, end) {
keyset.UnionKeys(tags)
continue
}
for _, iter := range iters {
if exact, _ := iter.Seek(sfkey); !exact {
continue
}
if iter.HasData() {
keyset.UnionKeys(tags)
break
}
}
}
stats = statsFromIters(stats, iters)
return cursors.NewStringSliceIteratorWithStats(keyset.Keys(), stats), err
}
func statsFromIters(stats cursors.CursorStats, iters []*TimeRangeIterator) cursors.CursorStats {
for _, iter := range iters {
stats.Add(iter.Stats())
}
return stats
}
var errUnexpectedTagComparisonOperator = errors.New("unexpected tag comparison operator")
func ValidateTagPredicate(expr influxql.Expr) (err error) {
influxql.WalkFunc(expr, func(node influxql.Node) {
if err != nil {
return
}
switch n := node.(type) {
case *influxql.BinaryExpr:
switch n.Op {
case influxql.EQ, influxql.NEQ, influxql.EQREGEX, influxql.NEQREGEX, influxql.OR, influxql.AND:
default:
err = errUnexpectedTagComparisonOperator
}
switch r := n.LHS.(type) {
case *influxql.VarRef:
case *influxql.BinaryExpr:
default:
err = fmt.Errorf("binary expression: LHS must be tag key reference, got: %T", r)
}
switch r := n.RHS.(type) {
case *influxql.StringLiteral:
case *influxql.RegexLiteral:
case *influxql.BinaryExpr:
default:
err = fmt.Errorf("binary expression: RHS must be string or regex, got: %T", r)
}
}
})
return err
}