chore: increase replications batch size limits (#22983)

pull/22990/head
William Baker 2021-12-13 11:02:38 -06:00 committed by GitHub
parent a7a5233432
commit 0e5b14fa5e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 88 additions and 50 deletions

View File

@ -21,9 +21,14 @@ import (
"golang.org/x/sync/errgroup" "golang.org/x/sync/errgroup"
) )
// This is the same batch size limit used by the influx write command // InfluxDB docs suggest a batch size of 5000 lines for optimal write performance.
// https://github.com/influxdata/influx-cli/blob/a408c02bd462946ac6ebdedf6f62f5e3d81c1f6f/clients/write/buffer_batcher.go#L14 // https://docs.influxdata.com/influxdb/v2.1/write-data/best-practices/optimize-writes/
const maxRemoteWriteBatchSize = 500000 const maxRemoteWritePointSize = 5000
// Uncompressed size (bytes) is used as a secondary limit to prevent network issues and stay below cloud maximum payload
// limitations. 2.5 MB is about 50% of the limit on a basic cloud plan.
// https://docs.influxdata.com/influxdb/cloud/account-management/pricing-plans/#data-limits
const maxRemoteWriteBatchSize = 2500000
func errLocalBucketNotFound(id platform.ID, cause error) error { func errLocalBucketNotFound(id platform.ID, cause error) error {
return &ierrors.Error{ return &ierrors.Error{
@ -51,6 +56,7 @@ func NewService(sqlStore *sqlite.SqlStore, bktSvc BucketService, localWriter sto
store, store,
), ),
maxRemoteWriteBatchSize: maxRemoteWriteBatchSize, maxRemoteWriteBatchSize: maxRemoteWriteBatchSize,
maxRemoteWritePointSize: maxRemoteWritePointSize,
}, metrs }, metrs
} }
@ -96,6 +102,7 @@ type service struct {
localWriter storage.PointsWriter localWriter storage.PointsWriter
log *zap.Logger log *zap.Logger
maxRemoteWriteBatchSize int maxRemoteWriteBatchSize int
maxRemoteWritePointSize int
} }
func (s service) ListReplications(ctx context.Context, filter influxdb.ReplicationListFilter) (*influxdb.Replications, error) { func (s service) ListReplications(ctx context.Context, filter influxdb.ReplicationListFilter) (*influxdb.Replications, error) {
@ -337,9 +344,9 @@ func (s service) WritePoints(ctx context.Context, orgID platform.ID, bucketID pl
gzw := gzip.NewWriter(batches[0].data) gzw := gzip.NewWriter(batches[0].data)
// Iterate through points and compress in batches // Iterate through points and compress in batches
for _, p := range points { for count, p := range points {
// If current point will cause this batch to exceed max size, start a new batch for it first // If current point will cause this batch to exceed max size, start a new batch for it first
if currentBatchSize+p.StringSize() > s.maxRemoteWriteBatchSize { if s.startNewBatch(currentBatchSize, p.StringSize(), count) {
batches = append(batches, &batch{ batches = append(batches, &batch{
data: &bytes.Buffer{}, data: &bytes.Buffer{},
numPoints: 0, numPoints: 0,
@ -416,3 +423,8 @@ func (s service) Close() error {
} }
return nil return nil
} }
func (s service) startNewBatch(currentSize, nextSize, pointCount int) bool {
return currentSize+nextSize > s.maxRemoteWriteBatchSize ||
pointCount > 0 && pointCount%s.maxRemoteWritePointSize == 0
}

View File

@ -689,10 +689,33 @@ disk,host=C value=1.3 1000000000`)
func TestWritePointsBatches(t *testing.T) { func TestWritePointsBatches(t *testing.T) {
t.Parallel() t.Parallel()
svc, mocks := newTestService(t) tests := []struct {
name string
setupFn func(*testing.T, *service)
}{
{
name: "batch bytes size",
setupFn: func(t *testing.T, svc *service) {
t.Helper()
// Set batch size to smaller size for testing (should result in 3 batches sized 93, 93, and 63 - total size 249) // Set batch size to smaller size for testing (should result in 3 batches sized 93, 93, and 63 - total size 249)
svc.maxRemoteWriteBatchSize = 100 svc.maxRemoteWriteBatchSize = 100
},
},
{
name: "batch point size",
setupFn: func(t *testing.T, svc *service) {
t.Helper()
// Set point size to smaller size for testing (should result in 3 batches with 3 points, 3 points, and 2 points)
svc.maxRemoteWritePointSize = 3
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
svc, mocks := newTestService(t)
tt.setupFn(t, svc)
// Define metadata for two replications // Define metadata for two replications
list := &influxdb.Replications{ list := &influxdb.Replications{
@ -750,6 +773,8 @@ disk,host=C value=1.3 1000000000`)
} }
require.NoError(t, svc.WritePoints(ctx, orgID, id1, points)) require.NoError(t, svc.WritePoints(ctx, orgID, id1, points))
})
}
} }
func TestWritePoints_LocalFailure(t *testing.T) { func TestWritePoints_LocalFailure(t *testing.T) {
@ -897,6 +922,7 @@ func newTestService(t *testing.T) (*service, mocks) {
durableQueueManager: mocks.durableQueueManager, durableQueueManager: mocks.durableQueueManager,
localWriter: mocks.pointWriter, localWriter: mocks.pointWriter,
maxRemoteWriteBatchSize: maxRemoteWriteBatchSize, maxRemoteWriteBatchSize: maxRemoteWriteBatchSize,
maxRemoteWritePointSize: maxRemoteWritePointSize,
} }
return &svc, mocks return &svc, mocks