perf(storage): reduce amount of tracing
In a previous PR I added some tracing to help investigate delete performance within the cache. Ironically this makes performance significantly worse when you have a very high cardinality cache. This keeps the main benefits of the tracing, but reduces the number of spans created. The remaining spans are smarter with context, and include useful information about the size of the operation being traced. Performance on a benchmark shows a significant improvement: name old time/op new time/op delta Engine_DeletePrefixRange_Cache/exists-24 262ms ± 6% 113ms ± 8% -57.06% (p=0.000 n=10+10) Engine_DeletePrefixRange_Cache/not_exists-24 266ms ± 4% 96ms ± 2% -64.09% (p=0.000 n=8+10) name old alloc/op new alloc/op delta Engine_DeletePrefixRange_Cache/exists-24 62.7MB ± 0% 29.6MB ± 1% -52.82% (p=0.000 n=9+10) Engine_DeletePrefixRange_Cache/not_exists-24 59.2MB ± 0% 24.3MB ± 2% -59.03% (p=0.000 n=8+10) name old allocs/op new allocs/op delta Engine_DeletePrefixRange_Cache/exists-24 711k ± 0% 334k ± 0% -53.07% (p=0.000 n=9+8) Engine_DeletePrefixRange_Cache/not_exists-24 700k ± 0% 302k ± 1% -56.79% (p=0.000 n=8+10) Raw benchmarks on a 24T/32GB/Nvme machine: goos: linux goarch: amd64 pkg: github.com/influxdata/influxdb/tsdb/tsm1 BenchmarkEngine_DeletePrefixRange_Cache/exists-24 100 104851012 ns/op 29442514 B/op 333599 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 100 107838824 ns/op 29485649 B/op 334369 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 100 108020671 ns/op 29443324 B/op 333610 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 100 106507506 ns/op 29977931 B/op 338597 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 100 116393032 ns/op 29443516 B/op 333614 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 100 112581877 ns/op 29691455 B/op 334699 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 50 119833106 ns/op 29444712 B/op 333625 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 100 113851895 ns/op 29921119 B/op 337419 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 50 121735395 ns/op 29445551 B/op 333634 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/exists-24 50 115387319 ns/op 29444513 B/op 333627 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 100 94474658 ns/op 24696698 B/op 306702 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 100 94767020 ns/op 24004763 B/op 300066 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 100 97869523 ns/op 24556560 B/op 305827 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 100 93916119 ns/op 24172163 B/op 301244 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 100 96591891 ns/op 24006021 B/op 300081 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 100 93521244 ns/op 24266467 B/op 303190 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 100 95419569 ns/op 24006501 B/op 300087 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 100 96694570 ns/op 24521126 B/op 306041 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 100 95075965 ns/op 24299409 B/op 301649 allocs/op BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24 100 97182864 ns/op 24007644 B/op 300101 allocs/op PASS ok github.com/influxdata/influxdb/tsdb/tsm1 490.287spull/14892/head
parent
15ade8c162
commit
da2fb27cb9
|
@ -76,6 +76,7 @@ func (e *Engine) DeletePrefixRange(rootCtx context.Context, name []byte, min, ma
|
|||
// TODO(edd): tracing this deep down is currently speculative, so I have
|
||||
// not added the tracing into the TSMReader API.
|
||||
span, _ := tracing.StartSpanFromContextWithOperationName(rootCtx, "TSMFile delete prefix")
|
||||
span.LogKV("file_path", r.Path())
|
||||
defer span.Finish()
|
||||
|
||||
return r.DeletePrefix(name, min, max, pred, func(key []byte) {
|
||||
|
@ -87,13 +88,12 @@ func (e *Engine) DeletePrefixRange(rootCtx context.Context, name []byte, min, ma
|
|||
return err
|
||||
}
|
||||
|
||||
span, _ = tracing.StartSpanFromContextWithOperationName(rootCtx, "Cache find delete keys")
|
||||
span.LogKV("cache_size", e.Cache.Size())
|
||||
var keysChecked int // For tracing information.
|
||||
// ApplySerialEntryFn cannot return an error in this invocation.
|
||||
_ = e.Cache.ApplyEntryFn(func(k []byte, _ *entry) error {
|
||||
// TODO(edd): tracing this deep down is currently speculative, so I have
|
||||
// not added the tracing into the Cache API.
|
||||
span, _ := tracing.StartSpanFromContextWithOperationName(rootCtx, "Cache find delete keys")
|
||||
defer span.Finish()
|
||||
|
||||
keysChecked++
|
||||
if !bytes.HasPrefix(k, name) {
|
||||
return nil
|
||||
}
|
||||
|
@ -107,11 +107,10 @@ func (e *Engine) DeletePrefixRange(rootCtx context.Context, name []byte, min, ma
|
|||
|
||||
return nil
|
||||
})
|
||||
span.LogKV("cache_cardinality", keysChecked)
|
||||
span.Finish()
|
||||
|
||||
sortSpan, _ := tracing.StartSpanFromContextWithOperationName(rootCtx, "Cache sort keys")
|
||||
sortSpan.Finish()
|
||||
|
||||
// Delete from the cache.
|
||||
// Delete from the cache (traced in cache).
|
||||
e.Cache.DeleteBucketRange(ctx, name, min, max, pred)
|
||||
|
||||
// Now that all of the data is purged, we need to find if some keys are fully deleted
|
||||
|
@ -120,11 +119,13 @@ func (e *Engine) DeletePrefixRange(rootCtx context.Context, name []byte, min, ma
|
|||
// TODO(edd): tracing this deep down is currently speculative, so I have
|
||||
// not added the tracing into the Engine API.
|
||||
span, _ := tracing.StartSpanFromContextWithOperationName(rootCtx, "TSMFile determine fully deleted")
|
||||
span.LogKV("file_path", r.Path())
|
||||
defer span.Finish()
|
||||
|
||||
possiblyDead.RLock()
|
||||
defer possiblyDead.RUnlock()
|
||||
|
||||
var keysChecked int
|
||||
iter := r.Iterator(name)
|
||||
for i := 0; iter.Next(); i++ {
|
||||
key := iter.Key()
|
||||
|
@ -149,19 +150,18 @@ func (e *Engine) DeletePrefixRange(rootCtx context.Context, name []byte, min, ma
|
|||
possiblyDead.RLock()
|
||||
}
|
||||
}
|
||||
|
||||
span.LogKV("keys_checked", keysChecked)
|
||||
return iter.Err()
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
span, _ = tracing.StartSpanFromContextWithOperationName(rootCtx, "Cache find delete keys")
|
||||
span.LogKV("cache_size", e.Cache.Size())
|
||||
keysChecked = 0
|
||||
// ApplySerialEntryFn cannot return an error in this invocation.
|
||||
_ = e.Cache.ApplyEntryFn(func(k []byte, _ *entry) error {
|
||||
// TODO(edd): tracing this deep down is currently speculative, so I have
|
||||
// not added the tracing into the Cache API.
|
||||
span, _ := tracing.StartSpanFromContextWithOperationName(rootCtx, "Cache find delete keys")
|
||||
defer span.Finish()
|
||||
|
||||
keysChecked++
|
||||
if !bytes.HasPrefix(k, name) {
|
||||
return nil
|
||||
}
|
||||
|
@ -172,6 +172,8 @@ func (e *Engine) DeletePrefixRange(rootCtx context.Context, name []byte, min, ma
|
|||
delete(possiblyDead.keys, string(k))
|
||||
return nil
|
||||
})
|
||||
span.LogKV("cache_cardinality", keysChecked)
|
||||
span.Finish()
|
||||
|
||||
if len(possiblyDead.keys) > 0 {
|
||||
buf := make([]byte, 1024)
|
||||
|
@ -218,7 +220,8 @@ func (e *Engine) DeletePrefixRange(rootCtx context.Context, name []byte, min, ma
|
|||
|
||||
// Iterate over the series ids we previously extracted from the index
|
||||
// and remove from the series file.
|
||||
span, _ = tracing.StartSpanFromContextWithOperationName(rootCtx, "SFile Delete Series ID")
|
||||
span, _ = tracing.StartSpanFromContextWithOperationName(rootCtx, "SFile Delete Series IDs")
|
||||
span.LogKV("series_id_set_size", set.Cardinality())
|
||||
set.ForEachNoLock(func(id tsdb.SeriesID) {
|
||||
if err = e.sfile.DeleteSeriesID(id); err != nil {
|
||||
return
|
||||
|
@ -230,6 +233,7 @@ func (e *Engine) DeletePrefixRange(rootCtx context.Context, name []byte, min, ma
|
|||
|
||||
// This is the slow path, when not dropping the entire bucket (measurement)
|
||||
span, _ = tracing.StartSpanFromContextWithOperationName(rootCtx, "TSI/SFile Delete keys")
|
||||
span.LogKV("keys_to_delete", len(possiblyDead.keys))
|
||||
for key := range possiblyDead.keys {
|
||||
// TODO(jeff): ugh reduce copies here
|
||||
keyb := []byte(key)
|
||||
|
|
Loading…
Reference in New Issue