From e9db11a3e972685449c194a3026339e93ba8b387 Mon Sep 17 00:00:00 2001 From: Jason Wilder Date: Wed, 31 Jan 2018 13:15:07 -0700 Subject: [PATCH 1/2] Reduce cache partitions to 16 The large number of partitions cause big HeapInUse swings at higher cardinality which can lead to OOMs. Reducing this to 16 lowers write throughput to some extent at lower cardinalities, keeps memory more stable over the long run. --- tsdb/engine/tsm1/cache.go | 2 +- tsdb/engine/tsm1/ring.go | 2 +- tsdb/engine/tsm1/ring_test.go | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tsdb/engine/tsm1/cache.go b/tsdb/engine/tsm1/cache.go index c49406eca4..ef5e20368a 100644 --- a/tsdb/engine/tsm1/cache.go +++ b/tsdb/engine/tsm1/cache.go @@ -19,7 +19,7 @@ import ( // testing, a value above the number of cores on the machine does not provide // any additional benefit. For now we'll set it to the number of cores on the // largest box we could imagine running influx. -const ringShards = 4096 +const ringShards = 16 var ( // ErrSnapshotInProgress is returned if a snapshot is attempted while one is already running. diff --git a/tsdb/engine/tsm1/ring.go b/tsdb/engine/tsm1/ring.go index f78b5d760c..a80763a377 100644 --- a/tsdb/engine/tsm1/ring.go +++ b/tsdb/engine/tsm1/ring.go @@ -13,7 +13,7 @@ import ( // basically defines the maximum number of partitions you can have in the ring. // If a smaller number of partitions are chosen when creating a ring, then // they're evenly spread across this many partitions in the ring. -const partitions = 4096 +const partitions = 16 // ring is a structure that maps series keys to entries. // diff --git a/tsdb/engine/tsm1/ring_test.go b/tsdb/engine/tsm1/ring_test.go index 868f79beeb..394de7246e 100644 --- a/tsdb/engine/tsm1/ring_test.go +++ b/tsdb/engine/tsm1/ring_test.go @@ -12,8 +12,8 @@ func TestRing_newRing(t *testing.T) { n int returnErr bool }{ - {n: 1}, {n: 2}, {n: 4}, {n: 8}, {n: 16}, {n: 32}, {n: 64}, {n: 128}, {n: 256}, - {n: 0, returnErr: true}, {n: 3, returnErr: true}, {n: 512, returnErr: true}, + {n: 1}, {n: 2}, {n: 4}, {n: 8}, {n: 16}, {n: 32, returnErr: true}, + {n: 0, returnErr: true}, {n: 3, returnErr: true}, } for i, example := range examples { From 3299e549aa192efce8a9b55b15cb12823b61d25c Mon Sep 17 00:00:00 2001 From: Jason Wilder Date: Wed, 31 Jan 2018 13:16:36 -0700 Subject: [PATCH 2/2] Increase WAL write buffer size The default of 4096 results in writes to the WAL still requiring muliple IOs. We had previously bumped this to 1M, but that was too high when there are many shards. Increasing to around 16k reduces the IOs to one or two for the workloads tested. We may want to make this configurable in the future. --- tsdb/engine/tsm1/wal.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tsdb/engine/tsm1/wal.go b/tsdb/engine/tsm1/wal.go index b0670e44ce..7b50e0207e 100644 --- a/tsdb/engine/tsm1/wal.go +++ b/tsdb/engine/tsm1/wal.go @@ -1025,7 +1025,7 @@ type WALSegmentWriter struct { // NewWALSegmentWriter returns a new WALSegmentWriter writing to w. func NewWALSegmentWriter(w io.WriteCloser) *WALSegmentWriter { return &WALSegmentWriter{ - bw: bufio.NewWriter(w), + bw: bufio.NewWriterSize(w, 16*1024), w: w, } }