201 lines
5.6 KiB
Go
201 lines
5.6 KiB
Go
package engine
|
|
|
|
import (
|
|
"code.google.com/p/log4go"
|
|
"github.com/influxdb/influxdb/protocol"
|
|
)
|
|
|
|
// Merger merges a number of StreamQuery into one stream of points
|
|
// where the output stream of points has a monotic timestamp order
|
|
// (increasing or decreasing depending on the SeriesHeap that is
|
|
// passed to NewCME)
|
|
type Merger struct {
|
|
name string
|
|
s []StreamQuery
|
|
size int
|
|
h SeriesHeap
|
|
n Processor
|
|
lastStreamIdx int
|
|
initializing bool
|
|
mergeColumns bool
|
|
fields map[string]struct{}
|
|
resultFields []string
|
|
resultFieldsPerStream map[int][]int
|
|
}
|
|
|
|
// Creates a new merger that will merge the given slice of StreamQuery
|
|
// and yields the result to the processor `n`. `name` is used to
|
|
// identify the merger in the logs since it's being used in multiple
|
|
// places. The SeriesHeap `h` is used to keep track of the next point
|
|
// (whether it's the smallest or largest timestamp depends on the
|
|
// SeriesHeap). If `mergeColumns` is true, the resulting time series
|
|
// will have the fields from all StreamQueries, i.e. if the first
|
|
// stream yields `column0` and `column1` while the second stream
|
|
// yields `column2` and `column3` then the result time series will
|
|
// have all 4 columns with two columns set to `nil` depending on which
|
|
// side the point came from.
|
|
func NewCME(name string, s []StreamQuery, h SeriesHeap, n Processor, mergeColumns bool) *Merger {
|
|
log4go.Debug("%sMerger: created with %d streams", name, len(s))
|
|
return &Merger{
|
|
name: name,
|
|
s: s,
|
|
h: h,
|
|
n: n,
|
|
lastStreamIdx: 0,
|
|
mergeColumns: mergeColumns,
|
|
initializing: true,
|
|
fields: make(map[string]struct{}),
|
|
resultFieldsPerStream: make(map[int][]int),
|
|
}
|
|
}
|
|
|
|
// public api
|
|
|
|
// Consume and yield as many points as we can until one of the streams
|
|
// runs out of points (i.e. the call to HasPoint() returns false)
|
|
func (cme *Merger) Update() (bool, error) {
|
|
if cme.initializing {
|
|
return cme.initialize()
|
|
}
|
|
return cme.tryYieldNextPoint()
|
|
}
|
|
|
|
// private api
|
|
|
|
// initialize the state of the Merger by reading one point from each
|
|
// StreamQuery. if `mergeColumns` is true the union of the fields
|
|
// received on the first point from each StreamQuery will be the
|
|
// fields of the result Series
|
|
func (cme *Merger) initialize() (bool, error) {
|
|
for cme.h.Size() != len(cme.s) {
|
|
stream := cme.s[cme.lastStreamIdx]
|
|
if !stream.HasPoint() && !stream.Closed() {
|
|
log4go.Debug("%sMerger: data not ready for stream %d, still initializing", cme.name, cme.lastStreamIdx)
|
|
return true, nil
|
|
}
|
|
|
|
if stream.HasPoint() {
|
|
p := stream.Next()
|
|
cme.h.Add(cme.lastStreamIdx, p)
|
|
for _, f := range p.Fields {
|
|
cme.fields[f] = struct{}{}
|
|
}
|
|
cme.lastStreamIdx++
|
|
} else if stream.Closed() {
|
|
s := len(cme.s)
|
|
cme.s[cme.lastStreamIdx] = cme.s[s-1]
|
|
cme.s = cme.s[:s-1]
|
|
}
|
|
|
|
}
|
|
|
|
if cme.mergeColumns {
|
|
// finished initialization
|
|
cme.resultFields = make([]string, 0, len(cme.fields))
|
|
for f := range cme.fields {
|
|
cme.resultFields = append(cme.resultFields, f)
|
|
}
|
|
}
|
|
|
|
log4go.Debug("%sMerger initialization finished", cme.name)
|
|
cme.initializing = false
|
|
cme.size = len(cme.s)
|
|
return cme.yieldNextPoint()
|
|
}
|
|
|
|
// Try to get more data points from the StreamQueries and if
|
|
// successful call yieldNextPoint()
|
|
func (cme *Merger) tryYieldNextPoint() (bool, error) {
|
|
stream := cme.s[cme.lastStreamIdx]
|
|
// If the stream has new points, added to the heap
|
|
if stream.HasPoint() {
|
|
cme.h.Add(cme.lastStreamIdx, stream.Next())
|
|
} else if stream.Closed() {
|
|
cme.size--
|
|
}
|
|
|
|
// If all streams have yielded one point. Then we can get the next
|
|
// point with the smallest (or largest) timestamp and yield it to the
|
|
// next processor.
|
|
if cme.h.Size() != cme.size {
|
|
return true, nil
|
|
}
|
|
|
|
return cme.yieldNextPoint()
|
|
}
|
|
|
|
// yield as many points as we can to the Processor `n`
|
|
func (cme *Merger) yieldNextPoint() (bool, error) {
|
|
// If we consumed all the input data points, return
|
|
// immediately. This can be the case for example if we finished
|
|
// initialization and the first call to yieldNextPoint() consumed
|
|
// all the data points. Without this check the call to the heap's
|
|
// Next() method will cause a panic
|
|
if cme.size == 0 {
|
|
return true, nil
|
|
}
|
|
|
|
for {
|
|
var s *protocol.Series
|
|
cme.lastStreamIdx, s = cme.h.Next()
|
|
log4go.Debug("cme.lastStreamIdx: %d, s: %s", cme.lastStreamIdx, s)
|
|
cme.fixFields(s)
|
|
log4go.Debug("%sMerger yielding to %s: %s", cme.name, cme.n.Name(), s)
|
|
ok, err := cme.n.Yield(s)
|
|
if !ok || err != nil {
|
|
return ok, err
|
|
}
|
|
|
|
stream := cme.s[cme.lastStreamIdx]
|
|
if stream.HasPoint() {
|
|
s := stream.Next()
|
|
log4go.Debug("%sMerger received %s from %d", s, cme.lastStreamIdx)
|
|
cme.h.Add(cme.lastStreamIdx, s)
|
|
continue
|
|
} else if stream.Closed() {
|
|
cme.size--
|
|
if cme.size != 0 {
|
|
continue
|
|
}
|
|
}
|
|
|
|
return true, nil
|
|
}
|
|
}
|
|
|
|
// modify the series to have the union of the columns from all
|
|
// StreamQueries
|
|
func (cme *Merger) fixFields(s *protocol.Series) {
|
|
if !cme.mergeColumns {
|
|
return
|
|
}
|
|
|
|
idx := cme.lastStreamIdx
|
|
mapping := cme.resultFieldsPerStream[idx]
|
|
if mapping == nil {
|
|
for _, f := range cme.resultFields {
|
|
index := -1
|
|
for i, sf := range s.Fields {
|
|
if sf == f {
|
|
index = i
|
|
break
|
|
}
|
|
}
|
|
mapping = append(mapping, index)
|
|
cme.resultFieldsPerStream[idx] = mapping
|
|
}
|
|
}
|
|
|
|
s.Fields = cme.resultFields
|
|
p := s.Points[0]
|
|
originalValues := p.Values
|
|
p.Values = nil
|
|
for _, i := range mapping {
|
|
if i == -1 {
|
|
p.Values = append(p.Values, nil)
|
|
continue
|
|
}
|
|
p.Values = append(p.Values, originalValues[i])
|
|
}
|
|
}
|