2018-10-24 20:51:28 +00:00
|
|
|
package write
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
|
|
|
"bytes"
|
|
|
|
"context"
|
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"time"
|
|
|
|
|
2019-01-08 00:37:16 +00:00
|
|
|
platform "github.com/influxdata/influxdb"
|
2018-10-24 20:51:28 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
// DefaultMaxBytes is 500KB; this is typically 250 to 500 lines.
|
|
|
|
DefaultMaxBytes = 500000
|
|
|
|
// DefaultInterval will flush every 10 seconds.
|
|
|
|
DefaultInterval = 10 * time.Second
|
|
|
|
)
|
|
|
|
|
|
|
|
// batcher is a write service that batches for another write service.
|
|
|
|
var _ platform.WriteService = (*Batcher)(nil)
|
|
|
|
|
|
|
|
// Batcher batches line protocol for sends to output.
|
|
|
|
type Batcher struct {
|
|
|
|
MaxFlushBytes int // MaxFlushBytes is the maximum number of bytes to buffer before flushing
|
|
|
|
MaxFlushInterval time.Duration // MaxFlushInterval is the maximum amount of time to wait before flushing
|
|
|
|
Service platform.WriteService // Service receives batches flushed from Batcher.
|
|
|
|
}
|
|
|
|
|
|
|
|
// Write reads r in batches and sends to the output.
|
|
|
|
func (b *Batcher) Write(ctx context.Context, org, bucket platform.ID, r io.Reader) error {
|
2018-10-25 17:31:22 +00:00
|
|
|
ctx, cancel := context.WithCancel(ctx)
|
|
|
|
defer cancel()
|
|
|
|
|
2018-10-24 20:51:28 +00:00
|
|
|
if b.Service == nil {
|
|
|
|
return fmt.Errorf("destination write service required")
|
|
|
|
}
|
|
|
|
|
|
|
|
lines := make(chan []byte)
|
|
|
|
|
fix(write): fix close logic to avoid race from read error
In the case that there is a read error, we would close the lines
channel before sending the error into the read error channel. closing
lines then allows the write goroutine to possibly send in a nil error
before read is able to, causing the main function driving both to
return a nil error. Additionally, it is possible for both reads and
writes to race sending errors into their channels, and the main
goroutine will only read from one, causing the other goroutine to leak.
To fix this, we close lines only after we have sent an error into
the channel, we ensure we read from both errors to make sure that
both have exited, and we unify the channels and add a buffer of size
two to the channel. It is possible for write to exit leaving read blocked
forever, but write only exits with a nil error when read has exited, so
this only happens during an actual write error, just like before.
Channels are hard.
2018-12-29 22:08:09 +00:00
|
|
|
errC := make(chan error, 2)
|
|
|
|
go b.write(ctx, org, bucket, lines, errC)
|
|
|
|
go b.read(ctx, r, lines, errC)
|
2018-10-24 20:51:28 +00:00
|
|
|
|
fix(write): fix close logic to avoid race from read error
In the case that there is a read error, we would close the lines
channel before sending the error into the read error channel. closing
lines then allows the write goroutine to possibly send in a nil error
before read is able to, causing the main function driving both to
return a nil error. Additionally, it is possible for both reads and
writes to race sending errors into their channels, and the main
goroutine will only read from one, causing the other goroutine to leak.
To fix this, we close lines only after we have sent an error into
the channel, we ensure we read from both errors to make sure that
both have exited, and we unify the channels and add a buffer of size
two to the channel. It is possible for write to exit leaving read blocked
forever, but write only exits with a nil error when read has exited, so
this only happens during an actual write error, just like before.
Channels are hard.
2018-12-29 22:08:09 +00:00
|
|
|
// we loop twice to check if both read and write have an error. if read exits
|
|
|
|
// cleanly, then we still want to wait for write.
|
|
|
|
for i := 0; i < 2; i++ {
|
2018-10-24 20:51:28 +00:00
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return ctx.Err()
|
fix(write): fix close logic to avoid race from read error
In the case that there is a read error, we would close the lines
channel before sending the error into the read error channel. closing
lines then allows the write goroutine to possibly send in a nil error
before read is able to, causing the main function driving both to
return a nil error. Additionally, it is possible for both reads and
writes to race sending errors into their channels, and the main
goroutine will only read from one, causing the other goroutine to leak.
To fix this, we close lines only after we have sent an error into
the channel, we ensure we read from both errors to make sure that
both have exited, and we unify the channels and add a buffer of size
two to the channel. It is possible for write to exit leaving read blocked
forever, but write only exits with a nil error when read has exited, so
this only happens during an actual write error, just like before.
Channels are hard.
2018-12-29 22:08:09 +00:00
|
|
|
case err := <-errC:
|
|
|
|
// onky if there is any error, exit immediately.
|
2018-10-24 20:51:28 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
fix(write): fix close logic to avoid race from read error
In the case that there is a read error, we would close the lines
channel before sending the error into the read error channel. closing
lines then allows the write goroutine to possibly send in a nil error
before read is able to, causing the main function driving both to
return a nil error. Additionally, it is possible for both reads and
writes to race sending errors into their channels, and the main
goroutine will only read from one, causing the other goroutine to leak.
To fix this, we close lines only after we have sent an error into
the channel, we ensure we read from both errors to make sure that
both have exited, and we unify the channels and add a buffer of size
two to the channel. It is possible for write to exit leaving read blocked
forever, but write only exits with a nil error when read has exited, so
this only happens during an actual write error, just like before.
Channels are hard.
2018-12-29 22:08:09 +00:00
|
|
|
return nil
|
2018-10-24 20:51:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// read will close the line channel when there is no more data, or an error occurs.
|
|
|
|
// it is possible for an io.Reader to block forever; Write's context can be
|
|
|
|
// used to cancel, but, it's possible there will be dangling read go routines.
|
|
|
|
func (b *Batcher) read(ctx context.Context, r io.Reader, lines chan<- []byte, errC chan<- error) {
|
fix(write): fix close logic to avoid race from read error
In the case that there is a read error, we would close the lines
channel before sending the error into the read error channel. closing
lines then allows the write goroutine to possibly send in a nil error
before read is able to, causing the main function driving both to
return a nil error. Additionally, it is possible for both reads and
writes to race sending errors into their channels, and the main
goroutine will only read from one, causing the other goroutine to leak.
To fix this, we close lines only after we have sent an error into
the channel, we ensure we read from both errors to make sure that
both have exited, and we unify the channels and add a buffer of size
two to the channel. It is possible for write to exit leaving read blocked
forever, but write only exits with a nil error when read has exited, so
this only happens during an actual write error, just like before.
Channels are hard.
2018-12-29 22:08:09 +00:00
|
|
|
defer close(lines)
|
2018-10-24 20:51:28 +00:00
|
|
|
scanner := bufio.NewScanner(r)
|
|
|
|
scanner.Split(ScanLines)
|
|
|
|
for scanner.Scan() {
|
|
|
|
// exit early if the context is done
|
|
|
|
select {
|
2020-02-14 17:47:11 +00:00
|
|
|
case lines <- []byte(scanner.Text()):
|
2018-10-24 20:51:28 +00:00
|
|
|
case <-ctx.Done():
|
|
|
|
errC <- ctx.Err()
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
errC <- scanner.Err()
|
|
|
|
}
|
|
|
|
|
|
|
|
// finishes when the lines channel is closed or context is done.
|
|
|
|
// if an error occurs while writing data to the write service, the error is send in the
|
|
|
|
// errC channel and the function returns.
|
|
|
|
func (b *Batcher) write(ctx context.Context, org, bucket platform.ID, lines <-chan []byte, errC chan<- error) {
|
|
|
|
flushInterval := b.MaxFlushInterval
|
|
|
|
if flushInterval == 0 {
|
|
|
|
flushInterval = DefaultInterval
|
|
|
|
}
|
|
|
|
|
|
|
|
maxBytes := b.MaxFlushBytes
|
|
|
|
if maxBytes == 0 {
|
|
|
|
maxBytes = DefaultMaxBytes
|
|
|
|
}
|
|
|
|
|
2018-10-26 00:10:06 +00:00
|
|
|
timer := time.NewTimer(flushInterval)
|
|
|
|
defer func() { _ = timer.Stop() }()
|
|
|
|
|
2018-10-24 20:51:28 +00:00
|
|
|
buf := make([]byte, 0, maxBytes)
|
2018-10-25 23:51:24 +00:00
|
|
|
r := bytes.NewReader(buf)
|
2018-10-26 00:10:06 +00:00
|
|
|
|
2018-10-24 20:51:28 +00:00
|
|
|
var line []byte
|
|
|
|
var more = true
|
|
|
|
// if read closes the channel normally, exit the loop
|
|
|
|
for more {
|
|
|
|
select {
|
|
|
|
case line, more = <-lines:
|
|
|
|
if more {
|
|
|
|
buf = append(buf, line...)
|
|
|
|
}
|
|
|
|
// write if we exceed the max lines OR read routine has finished
|
|
|
|
if len(buf) >= maxBytes || (!more && len(buf) > 0) {
|
2018-10-25 23:51:24 +00:00
|
|
|
r.Reset(buf)
|
2018-10-26 00:10:06 +00:00
|
|
|
timer.Reset(flushInterval)
|
2018-10-24 20:51:28 +00:00
|
|
|
if err := b.Service.Write(ctx, org, bucket, r); err != nil {
|
|
|
|
errC <- err
|
|
|
|
return
|
|
|
|
}
|
|
|
|
buf = buf[:0]
|
|
|
|
}
|
2018-10-26 00:10:06 +00:00
|
|
|
case <-timer.C:
|
2018-10-24 20:51:28 +00:00
|
|
|
if len(buf) > 0 {
|
2018-10-25 23:51:24 +00:00
|
|
|
r.Reset(buf)
|
2018-10-26 00:10:06 +00:00
|
|
|
timer.Reset(flushInterval)
|
2018-10-24 20:51:28 +00:00
|
|
|
if err := b.Service.Write(ctx, org, bucket, r); err != nil {
|
|
|
|
errC <- err
|
|
|
|
return
|
|
|
|
}
|
|
|
|
buf = buf[:0]
|
|
|
|
}
|
|
|
|
case <-ctx.Done():
|
|
|
|
errC <- ctx.Err()
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
errC <- nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// ScanLines is used in bufio.Scanner.Split to split lines of line protocol.
|
|
|
|
func ScanLines(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
|
|
|
if atEOF && len(data) == 0 {
|
|
|
|
return 0, nil, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
if i := bytes.IndexByte(data, '\n'); i >= 0 {
|
|
|
|
// We have a full newline-terminated line.
|
|
|
|
return i + 1, data[0 : i+1], nil
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we're at EOF, we have a final, non-terminated line. Return it.
|
|
|
|
if atEOF {
|
|
|
|
return len(data), data, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Request more data.
|
|
|
|
return 0, nil, nil
|
|
|
|
}
|