influxdb/engine/merger.go

201 lines
5.6 KiB
Go

package engine
import (
"code.google.com/p/log4go"
"github.com/influxdb/influxdb/protocol"
)
// Merger merges a number of StreamQuery into one stream of points
// where the output stream of points has a monotic timestamp order
// (increasing or decreasing depending on the SeriesHeap that is
// passed to NewCME)
type Merger struct {
name string
s []StreamQuery
size int
h SeriesHeap
n Processor
lastStreamIdx int
initializing bool
mergeColumns bool
fields map[string]struct{}
resultFields []string
resultFieldsPerStream map[int][]int
}
// Creates a new merger that will merge the given slice of StreamQuery
// and yields the result to the processor `n`. `name` is used to
// identify the merger in the logs since it's being used in multiple
// places. The SeriesHeap `h` is used to keep track of the next point
// (whether it's the smallest or largest timestamp depends on the
// SeriesHeap). If `mergeColumns` is true, the resulting time series
// will have the fields from all StreamQueries, i.e. if the first
// stream yields `column0` and `column1` while the second stream
// yields `column2` and `column3` then the result time series will
// have all 4 columns with two columns set to `nil` depending on which
// side the point came from.
func NewCME(name string, s []StreamQuery, h SeriesHeap, n Processor, mergeColumns bool) *Merger {
log4go.Debug("%sMerger: created with %d streams", name, len(s))
return &Merger{
name: name,
s: s,
h: h,
n: n,
lastStreamIdx: 0,
mergeColumns: mergeColumns,
initializing: true,
fields: make(map[string]struct{}),
resultFieldsPerStream: make(map[int][]int),
}
}
// public api
// Consume and yield as many points as we can until one of the streams
// runs out of points (i.e. the call to HasPoint() returns false)
func (cme *Merger) Update() (bool, error) {
if cme.initializing {
return cme.initialize()
}
return cme.tryYieldNextPoint()
}
// private api
// initialize the state of the Merger by reading one point from each
// StreamQuery. if `mergeColumns` is true the union of the fields
// received on the first point from each StreamQuery will be the
// fields of the result Series
func (cme *Merger) initialize() (bool, error) {
for cme.h.Size() != len(cme.s) {
stream := cme.s[cme.lastStreamIdx]
if !stream.HasPoint() && !stream.Closed() {
log4go.Debug("%sMerger: data not ready for stream %d, still initializing", cme.name, cme.lastStreamIdx)
return true, nil
}
if stream.HasPoint() {
p := stream.Next()
cme.h.Add(cme.lastStreamIdx, p)
for _, f := range p.Fields {
cme.fields[f] = struct{}{}
}
cme.lastStreamIdx++
} else if stream.Closed() {
s := len(cme.s)
cme.s[cme.lastStreamIdx] = cme.s[s-1]
cme.s = cme.s[:s-1]
}
}
if cme.mergeColumns {
// finished initialization
cme.resultFields = make([]string, 0, len(cme.fields))
for f := range cme.fields {
cme.resultFields = append(cme.resultFields, f)
}
}
log4go.Debug("%sMerger initialization finished", cme.name)
cme.initializing = false
cme.size = len(cme.s)
return cme.yieldNextPoint()
}
// Try to get more data points from the StreamQueries and if
// successful call yieldNextPoint()
func (cme *Merger) tryYieldNextPoint() (bool, error) {
stream := cme.s[cme.lastStreamIdx]
// If the stream has new points, added to the heap
if stream.HasPoint() {
cme.h.Add(cme.lastStreamIdx, stream.Next())
} else if stream.Closed() {
cme.size--
}
// If all streams have yielded one point. Then we can get the next
// point with the smallest (or largest) timestamp and yield it to the
// next processor.
if cme.h.Size() != cme.size {
return true, nil
}
return cme.yieldNextPoint()
}
// yield as many points as we can to the Processor `n`
func (cme *Merger) yieldNextPoint() (bool, error) {
// If we consumed all the input data points, return
// immediately. This can be the case for example if we finished
// initialization and the first call to yieldNextPoint() consumed
// all the data points. Without this check the call to the heap's
// Next() method will cause a panic
if cme.size == 0 {
return true, nil
}
for {
var s *protocol.Series
cme.lastStreamIdx, s = cme.h.Next()
log4go.Debug("cme.lastStreamIdx: %d, s: %s", cme.lastStreamIdx, s)
cme.fixFields(s)
log4go.Debug("%sMerger yielding to %s: %s", cme.name, cme.n.Name(), s)
ok, err := cme.n.Yield(s)
if !ok || err != nil {
return ok, err
}
stream := cme.s[cme.lastStreamIdx]
if stream.HasPoint() {
s := stream.Next()
log4go.Debug("%sMerger received %s from %d", s, cme.lastStreamIdx)
cme.h.Add(cme.lastStreamIdx, s)
continue
} else if stream.Closed() {
cme.size--
if cme.size != 0 {
continue
}
}
return true, nil
}
}
// modify the series to have the union of the columns from all
// StreamQueries
func (cme *Merger) fixFields(s *protocol.Series) {
if !cme.mergeColumns {
return
}
idx := cme.lastStreamIdx
mapping := cme.resultFieldsPerStream[idx]
if mapping == nil {
for _, f := range cme.resultFields {
index := -1
for i, sf := range s.Fields {
if sf == f {
index = i
break
}
}
mapping = append(mapping, index)
cme.resultFieldsPerStream[idx] = mapping
}
}
s.Fields = cme.resultFields
p := s.Points[0]
originalValues := p.Values
p.Values = nil
for _, i := range mapping {
if i == -1 {
p.Values = append(p.Values, nil)
continue
}
p.Values = append(p.Values, originalValues[i])
}
}