perf(storage): reduce amount of tracing

In a previous PR I added some tracing to help investigate delete
performance within the cache. Ironically this makes performance
significantly worse when you have a very high cardinality cache.

This keeps the main benefits of the tracing, but reduces the number of
spans created. The remaining spans are smarter with context, and include
useful information about the size of the operation being traced.

Performance on a benchmark shows a significant improvement:

name                                          old time/op    new time/op    delta
Engine_DeletePrefixRange_Cache/exists-24         262ms ± 6%     113ms ± 8%  -57.06%  (p=0.000 n=10+10)
Engine_DeletePrefixRange_Cache/not_exists-24     266ms ± 4%      96ms ± 2%  -64.09%  (p=0.000 n=8+10)

name                                          old alloc/op   new alloc/op   delta
Engine_DeletePrefixRange_Cache/exists-24        62.7MB ± 0%    29.6MB ± 1%  -52.82%  (p=0.000 n=9+10)
Engine_DeletePrefixRange_Cache/not_exists-24    59.2MB ± 0%    24.3MB ± 2%  -59.03%  (p=0.000 n=8+10)

name                                          old allocs/op  new allocs/op  delta
Engine_DeletePrefixRange_Cache/exists-24          711k ± 0%      334k ± 0%  -53.07%  (p=0.000 n=9+8)
Engine_DeletePrefixRange_Cache/not_exists-24      700k ± 0%      302k ± 1%  -56.79%  (p=0.000 n=8+10)

Raw benchmarks on a 24T/32GB/Nvme machine:

goos: linux
goarch: amd64
pkg: github.com/influxdata/influxdb/tsdb/tsm1
BenchmarkEngine_DeletePrefixRange_Cache/exists-24         	     100	 104851012 ns/op	29442514 B/op	  333599 allocs/op
BenchmarkEngine_DeletePrefixRange_Cache/exists-24         	     100	 107838824 ns/op	29485649 B/op	  334369 allocs/op
BenchmarkEngine_DeletePrefixRange_Cache/exists-24         	     100	 108020671 ns/op	29443324 B/op	  333610 allocs/op
BenchmarkEngine_DeletePrefixRange_Cache/exists-24         	     100	 106507506 ns/op	29977931 B/op	  338597 allocs/op
BenchmarkEngine_DeletePrefixRange_Cache/exists-24         	     100	 116393032 ns/op	29443516 B/op	  333614 allocs/op
BenchmarkEngine_DeletePrefixRange_Cache/exists-24         	     100	 112581877 ns/op	29691455 B/op	  334699 allocs/op
BenchmarkEngine_DeletePrefixRange_Cache/exists-24         	      50	 119833106 ns/op	29444712 B/op	  333625 allocs/op
BenchmarkEngine_DeletePrefixRange_Cache/exists-24         	     100	 113851895 ns/op	29921119 B/op	  337419 allocs/op
BenchmarkEngine_DeletePrefixRange_Cache/exists-24         	      50	 121735395 ns/op	29445551 B/op	  333634 allocs/op
BenchmarkEngine_DeletePrefixRange_Cache/exists-24         	      50	 115387319 ns/op	29444513 B/op	  333627 allocs/op
BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24     	     100	  94474658 ns/op	24696698 B/op	  306702 allocs/op
BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24     	     100	  94767020 ns/op	24004763 B/op	  300066 allocs/op
BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24     	     100	  97869523 ns/op	24556560 B/op	  305827 allocs/op
BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24     	     100	  93916119 ns/op	24172163 B/op	  301244 allocs/op
BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24     	     100	  96591891 ns/op	24006021 B/op	  300081 allocs/op
BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24     	     100	  93521244 ns/op	24266467 B/op	  303190 allocs/op
BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24     	     100	  95419569 ns/op	24006501 B/op	  300087 allocs/op
BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24     	     100	  96694570 ns/op	24521126 B/op	  306041 allocs/op
BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24     	     100	  95075965 ns/op	24299409 B/op	  301649 allocs/op
BenchmarkEngine_DeletePrefixRange_Cache/not_exists-24     	     100	  97182864 ns/op	24007644 B/op	  300101 allocs/op
PASS
ok  	github.com/influxdata/influxdb/tsdb/tsm1	490.287s
pull/14892/head
Edd Robinson 2019-08-30 11:48:58 +01:00
parent 15ade8c162
commit da2fb27cb9
1 changed files with 20 additions and 16 deletions

View File

@ -76,6 +76,7 @@ func (e *Engine) DeletePrefixRange(rootCtx context.Context, name []byte, min, ma
// TODO(edd): tracing this deep down is currently speculative, so I have
// not added the tracing into the TSMReader API.
span, _ := tracing.StartSpanFromContextWithOperationName(rootCtx, "TSMFile delete prefix")
span.LogKV("file_path", r.Path())
defer span.Finish()
return r.DeletePrefix(name, min, max, pred, func(key []byte) {
@ -87,13 +88,12 @@ func (e *Engine) DeletePrefixRange(rootCtx context.Context, name []byte, min, ma
return err
}
span, _ = tracing.StartSpanFromContextWithOperationName(rootCtx, "Cache find delete keys")
span.LogKV("cache_size", e.Cache.Size())
var keysChecked int // For tracing information.
// ApplySerialEntryFn cannot return an error in this invocation.
_ = e.Cache.ApplyEntryFn(func(k []byte, _ *entry) error {
// TODO(edd): tracing this deep down is currently speculative, so I have
// not added the tracing into the Cache API.
span, _ := tracing.StartSpanFromContextWithOperationName(rootCtx, "Cache find delete keys")
defer span.Finish()
keysChecked++
if !bytes.HasPrefix(k, name) {
return nil
}
@ -107,11 +107,10 @@ func (e *Engine) DeletePrefixRange(rootCtx context.Context, name []byte, min, ma
return nil
})
span.LogKV("cache_cardinality", keysChecked)
span.Finish()
sortSpan, _ := tracing.StartSpanFromContextWithOperationName(rootCtx, "Cache sort keys")
sortSpan.Finish()
// Delete from the cache.
// Delete from the cache (traced in cache).
e.Cache.DeleteBucketRange(ctx, name, min, max, pred)
// Now that all of the data is purged, we need to find if some keys are fully deleted
@ -120,11 +119,13 @@ func (e *Engine) DeletePrefixRange(rootCtx context.Context, name []byte, min, ma
// TODO(edd): tracing this deep down is currently speculative, so I have
// not added the tracing into the Engine API.
span, _ := tracing.StartSpanFromContextWithOperationName(rootCtx, "TSMFile determine fully deleted")
span.LogKV("file_path", r.Path())
defer span.Finish()
possiblyDead.RLock()
defer possiblyDead.RUnlock()
var keysChecked int
iter := r.Iterator(name)
for i := 0; iter.Next(); i++ {
key := iter.Key()
@ -149,19 +150,18 @@ func (e *Engine) DeletePrefixRange(rootCtx context.Context, name []byte, min, ma
possiblyDead.RLock()
}
}
span.LogKV("keys_checked", keysChecked)
return iter.Err()
}); err != nil {
return err
}
span, _ = tracing.StartSpanFromContextWithOperationName(rootCtx, "Cache find delete keys")
span.LogKV("cache_size", e.Cache.Size())
keysChecked = 0
// ApplySerialEntryFn cannot return an error in this invocation.
_ = e.Cache.ApplyEntryFn(func(k []byte, _ *entry) error {
// TODO(edd): tracing this deep down is currently speculative, so I have
// not added the tracing into the Cache API.
span, _ := tracing.StartSpanFromContextWithOperationName(rootCtx, "Cache find delete keys")
defer span.Finish()
keysChecked++
if !bytes.HasPrefix(k, name) {
return nil
}
@ -172,6 +172,8 @@ func (e *Engine) DeletePrefixRange(rootCtx context.Context, name []byte, min, ma
delete(possiblyDead.keys, string(k))
return nil
})
span.LogKV("cache_cardinality", keysChecked)
span.Finish()
if len(possiblyDead.keys) > 0 {
buf := make([]byte, 1024)
@ -218,7 +220,8 @@ func (e *Engine) DeletePrefixRange(rootCtx context.Context, name []byte, min, ma
// Iterate over the series ids we previously extracted from the index
// and remove from the series file.
span, _ = tracing.StartSpanFromContextWithOperationName(rootCtx, "SFile Delete Series ID")
span, _ = tracing.StartSpanFromContextWithOperationName(rootCtx, "SFile Delete Series IDs")
span.LogKV("series_id_set_size", set.Cardinality())
set.ForEachNoLock(func(id tsdb.SeriesID) {
if err = e.sfile.DeleteSeriesID(id); err != nil {
return
@ -230,6 +233,7 @@ func (e *Engine) DeletePrefixRange(rootCtx context.Context, name []byte, min, ma
// This is the slow path, when not dropping the entire bucket (measurement)
span, _ = tracing.StartSpanFromContextWithOperationName(rootCtx, "TSI/SFile Delete keys")
span.LogKV("keys_to_delete", len(possiblyDead.keys))
for key := range possiblyDead.keys {
// TODO(jeff): ugh reduce copies here
keyb := []byte(key)