2015-06-02 16:40:52 +00:00
|
|
|
package tsdb
|
2015-05-26 21:45:39 +00:00
|
|
|
|
|
|
|
import (
|
2015-06-09 02:44:42 +00:00
|
|
|
"sync"
|
2015-05-26 21:45:39 +00:00
|
|
|
"sync/atomic"
|
|
|
|
"time"
|
2015-09-16 20:33:08 +00:00
|
|
|
|
|
|
|
"github.com/influxdb/influxdb/models"
|
2015-05-26 21:45:39 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// PointBatcher accepts Points and will emit a batch of those points when either
|
|
|
|
// a) the batch reaches a certain size, or b) a certain time passes.
|
|
|
|
type PointBatcher struct {
|
2015-08-10 23:37:16 +00:00
|
|
|
stats PointBatcherStats
|
|
|
|
|
2015-05-26 21:45:39 +00:00
|
|
|
size int
|
|
|
|
duration time.Duration
|
|
|
|
|
2015-06-09 02:44:42 +00:00
|
|
|
stop chan struct{}
|
2015-09-16 20:33:08 +00:00
|
|
|
in chan models.Point
|
|
|
|
out chan []models.Point
|
2015-05-29 00:07:51 +00:00
|
|
|
flush chan struct{}
|
|
|
|
|
2015-06-09 02:44:42 +00:00
|
|
|
wg *sync.WaitGroup
|
2015-05-26 21:45:39 +00:00
|
|
|
}
|
|
|
|
|
2015-09-08 22:18:14 +00:00
|
|
|
// NewPointBatcher returns a new PointBatcher. sz is the batching size,
|
|
|
|
// bp is the maximum number of batches that may be pending. d is the time
|
|
|
|
// after which a batch will be emitted after the first point is received
|
|
|
|
// for the batch, regardless of its size.
|
|
|
|
func NewPointBatcher(sz int, bp int, d time.Duration) *PointBatcher {
|
2015-05-29 00:07:51 +00:00
|
|
|
return &PointBatcher{
|
|
|
|
size: sz,
|
|
|
|
duration: d,
|
2015-06-09 02:44:42 +00:00
|
|
|
stop: make(chan struct{}),
|
2015-09-16 20:33:08 +00:00
|
|
|
in: make(chan models.Point, bp*sz),
|
|
|
|
out: make(chan []models.Point),
|
2015-05-29 00:07:51 +00:00
|
|
|
flush: make(chan struct{}),
|
|
|
|
}
|
2015-05-26 21:45:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// PointBatcherStats are the statistics each batcher tracks.
|
|
|
|
type PointBatcherStats struct {
|
|
|
|
BatchTotal uint64 // Total count of batches transmitted.
|
|
|
|
PointTotal uint64 // Total count of points processed.
|
2015-05-29 00:07:51 +00:00
|
|
|
SizeTotal uint64 // Number of batches that reached size threshold.
|
|
|
|
TimeoutTotal uint64 // Number of timeouts that occurred.
|
2015-05-26 21:45:39 +00:00
|
|
|
}
|
|
|
|
|
2015-05-28 18:00:21 +00:00
|
|
|
// Start starts the batching process. Returns the in and out channels for points
|
|
|
|
// and point-batches respectively.
|
2015-05-29 00:07:51 +00:00
|
|
|
func (b *PointBatcher) Start() {
|
2015-06-09 02:44:42 +00:00
|
|
|
// Already running?
|
|
|
|
if b.wg != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2015-05-26 21:45:39 +00:00
|
|
|
var timer *time.Timer
|
2015-09-16 20:33:08 +00:00
|
|
|
var batch []models.Point
|
2015-05-26 21:45:39 +00:00
|
|
|
var timerCh <-chan time.Time
|
|
|
|
|
2015-05-29 00:07:51 +00:00
|
|
|
emit := func() {
|
|
|
|
b.out <- batch
|
|
|
|
atomic.AddUint64(&b.stats.BatchTotal, 1)
|
|
|
|
batch = nil
|
|
|
|
}
|
2015-05-28 18:00:21 +00:00
|
|
|
|
2015-06-09 02:44:42 +00:00
|
|
|
b.wg = &sync.WaitGroup{}
|
|
|
|
b.wg.Add(1)
|
|
|
|
|
2015-05-28 18:00:21 +00:00
|
|
|
go func() {
|
2015-06-09 02:44:42 +00:00
|
|
|
defer b.wg.Done()
|
2015-05-28 18:00:21 +00:00
|
|
|
for {
|
|
|
|
select {
|
2015-06-09 02:44:42 +00:00
|
|
|
case <-b.stop:
|
|
|
|
if len(batch) > 0 {
|
|
|
|
emit()
|
|
|
|
timerCh = nil
|
|
|
|
}
|
|
|
|
return
|
2015-05-29 00:07:51 +00:00
|
|
|
case p := <-b.in:
|
2015-05-28 18:00:21 +00:00
|
|
|
atomic.AddUint64(&b.stats.PointTotal, 1)
|
|
|
|
if batch == nil {
|
2015-09-16 20:33:08 +00:00
|
|
|
batch = make([]models.Point, 0, b.size)
|
2015-07-06 21:28:14 +00:00
|
|
|
if b.duration > 0 {
|
|
|
|
timer = time.NewTimer(b.duration)
|
|
|
|
timerCh = timer.C
|
|
|
|
}
|
2015-05-28 18:00:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
batch = append(batch, p)
|
|
|
|
if len(batch) >= b.size { // 0 means send immediately.
|
|
|
|
atomic.AddUint64(&b.stats.SizeTotal, 1)
|
2015-05-29 00:07:51 +00:00
|
|
|
emit()
|
|
|
|
timerCh = nil
|
|
|
|
}
|
|
|
|
|
|
|
|
case <-b.flush:
|
|
|
|
if len(batch) > 0 {
|
|
|
|
emit()
|
2015-05-28 18:00:21 +00:00
|
|
|
timerCh = nil
|
|
|
|
}
|
2015-05-26 21:45:39 +00:00
|
|
|
|
2015-05-28 18:00:21 +00:00
|
|
|
case <-timerCh:
|
|
|
|
atomic.AddUint64(&b.stats.TimeoutTotal, 1)
|
2015-05-29 00:07:51 +00:00
|
|
|
emit()
|
2015-05-26 21:45:39 +00:00
|
|
|
}
|
|
|
|
}
|
2015-05-28 18:00:21 +00:00
|
|
|
}()
|
2015-05-29 00:07:51 +00:00
|
|
|
}
|
|
|
|
|
2015-06-09 02:44:42 +00:00
|
|
|
func (b *PointBatcher) Stop() {
|
|
|
|
// If not running, nothing to stop.
|
|
|
|
if b.wg == nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
close(b.stop)
|
|
|
|
b.wg.Wait()
|
|
|
|
}
|
|
|
|
|
2015-05-29 00:07:51 +00:00
|
|
|
// In returns the channel to which points should be written.
|
2015-09-16 20:33:08 +00:00
|
|
|
func (b *PointBatcher) In() chan<- models.Point {
|
2015-06-09 02:44:42 +00:00
|
|
|
return b.in
|
|
|
|
}
|
2015-05-29 00:07:51 +00:00
|
|
|
|
|
|
|
// Out returns the channel from which batches should be read.
|
2015-09-16 20:33:08 +00:00
|
|
|
func (b *PointBatcher) Out() <-chan []models.Point {
|
2015-06-09 02:44:42 +00:00
|
|
|
return b.out
|
|
|
|
}
|
2015-05-28 18:00:21 +00:00
|
|
|
|
2015-05-29 00:07:51 +00:00
|
|
|
// Flush instructs the batcher to emit any pending points in a batch, regardless of batch size.
|
|
|
|
// If there are no pending points, no batch is emitted.
|
|
|
|
func (b *PointBatcher) Flush() {
|
|
|
|
b.flush <- struct{}{}
|
2015-05-26 21:45:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Stats returns a PointBatcherStats object for the PointBatcher. While the each statistic should be
|
|
|
|
// closely correlated with each other statistic, it is not guaranteed.
|
|
|
|
func (b *PointBatcher) Stats() *PointBatcherStats {
|
|
|
|
stats := PointBatcherStats{}
|
|
|
|
stats.BatchTotal = atomic.LoadUint64(&b.stats.BatchTotal)
|
|
|
|
stats.PointTotal = atomic.LoadUint64(&b.stats.PointTotal)
|
|
|
|
stats.SizeTotal = atomic.LoadUint64(&b.stats.SizeTotal)
|
|
|
|
stats.TimeoutTotal = atomic.LoadUint64(&b.stats.TimeoutTotal)
|
|
|
|
return &stats
|
|
|
|
}
|