Merge points at the shard level
parent
a9d92301f6
commit
78b99f60c6
|
@ -238,7 +238,7 @@ func (self *ShardData) getProcessor(querySpec *parser.QuerySpec, processor engin
|
|||
// We should aggregate at the shard level
|
||||
if self.ShouldAggregateLocally(querySpec) {
|
||||
log.Debug("creating a query engine")
|
||||
processor, err = engine.NewQueryEngine(processor, query)
|
||||
processor, err = engine.NewQueryEngine(processor, query, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -300,6 +300,8 @@ func (self *ShardData) Query(querySpec *parser.QuerySpec, response chan<- *p.Res
|
|||
var processor engine.Processor = NewResponseChannelProcessor(NewResponseChannelWrapper(response))
|
||||
var err error
|
||||
|
||||
processor = NewShardIdInserterProcessor(self.Id(), processor)
|
||||
|
||||
processor, err = self.getProcessor(querySpec, processor)
|
||||
if err != nil {
|
||||
response <- &p.Response{
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
package cluster
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/influxdb/influxdb/engine"
|
||||
"github.com/influxdb/influxdb/protocol"
|
||||
)
|
||||
|
||||
// A processor to set the ShardId on the series to `id`
|
||||
type ShardIdInserterProcessor struct {
|
||||
id uint32
|
||||
next engine.Processor
|
||||
}
|
||||
|
||||
func NewShardIdInserterProcessor(id uint32, next engine.Processor) ShardIdInserterProcessor {
|
||||
return ShardIdInserterProcessor{id, next}
|
||||
}
|
||||
|
||||
func (sip ShardIdInserterProcessor) Yield(s *protocol.Series) (bool, error) {
|
||||
s.ShardId = &sip.id
|
||||
return sip.next.Yield(s)
|
||||
}
|
||||
|
||||
func (sip ShardIdInserterProcessor) Close() error {
|
||||
return sip.next.Close()
|
||||
}
|
||||
|
||||
func (sip ShardIdInserterProcessor) Name() string {
|
||||
return fmt.Sprintf("ShardIdInserterProcessor (%d)", sip.id)
|
||||
}
|
|
@ -237,7 +237,11 @@ func (self *Coordinator) getShardsAndProcessor(querySpec *parser.QuerySpec, writ
|
|||
if !shouldAggregateLocally {
|
||||
// if we should aggregate in the coordinator (i.e. aggregation
|
||||
// isn't happening locally at the shard level), create an engine
|
||||
writer, err = engine.NewQueryEngine(writer, q)
|
||||
shardIds := make([]uint32, len(shards))
|
||||
for i, s := range shards {
|
||||
shardIds[i] = s.Id()
|
||||
}
|
||||
writer, err = engine.NewQueryEngine(writer, q, shardIds)
|
||||
return shards, writer, err
|
||||
}
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ type PointIterator struct {
|
|||
// there's no need to call Next() after the call to NewPointIterator,
|
||||
// but the user should check Valid() to make sure the iterator is
|
||||
// pointing at a valid point.
|
||||
func NewPointIterator(itrs []storage.Iterator, fields []*metastore.Field, startTime, endTime time.Time, asc bool) PointIterator {
|
||||
func NewPointIterator(itrs []storage.Iterator, fields []*metastore.Field, startTime, endTime time.Time, asc bool) *PointIterator {
|
||||
pi := PointIterator{
|
||||
valid: true,
|
||||
err: nil,
|
||||
|
@ -44,7 +44,7 @@ func NewPointIterator(itrs []storage.Iterator, fields []*metastore.Field, startT
|
|||
|
||||
// seek to the first point
|
||||
pi.Next()
|
||||
return pi
|
||||
return &pi
|
||||
}
|
||||
|
||||
// public api
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
package datastore
|
||||
|
||||
import "github.com/influxdb/influxdb/protocol"
|
||||
|
||||
// PointIteratorStream is a struct that implements the StreamQuery
|
||||
// interface and is used by the shard with the Merger to merge the
|
||||
// data points locally to form a monotic stream of points (increasing
|
||||
// or decreasing timestamps)
|
||||
type PointIteratorStream struct {
|
||||
pi *PointIterator
|
||||
name string
|
||||
fields []string
|
||||
}
|
||||
|
||||
// Returns true if the point iterator is still valid
|
||||
func (pis PointIteratorStream) HasPoint() bool {
|
||||
return pis.pi.Valid()
|
||||
}
|
||||
|
||||
// Returns the next point from the point iterator
|
||||
func (pis PointIteratorStream) Next() *protocol.Series {
|
||||
p := pis.pi.Point()
|
||||
s := &protocol.Series{
|
||||
Name: &pis.name,
|
||||
Fields: pis.fields,
|
||||
Points: []*protocol.Point{p},
|
||||
}
|
||||
pis.pi.Next()
|
||||
return s
|
||||
}
|
||||
|
||||
// Returns true if the point iterator is not valid
|
||||
func (pis PointIteratorStream) Closed() bool {
|
||||
return !pis.pi.Valid()
|
||||
}
|
|
@ -102,12 +102,57 @@ func (self *Shard) Query(querySpec *parser.QuerySpec, processor engine.Processor
|
|||
return self.executeDeleteQuery(querySpec, processor)
|
||||
}
|
||||
|
||||
seriesAndColumns := querySpec.SelectQuery().GetReferencedColumns()
|
||||
|
||||
if !self.hasReadAccess(querySpec) {
|
||||
return errors.New("User does not have access to one or more of the series requested.")
|
||||
}
|
||||
|
||||
switch t := querySpec.SelectQuery().FromClause.Type; t {
|
||||
case parser.FromClauseArray:
|
||||
log.Debug("Shard %s: running a regular query", self.db.Path())
|
||||
return self.executeArrayQuery(querySpec, processor)
|
||||
case parser.FromClauseMerge, parser.FromClauseInnerJoin:
|
||||
log.Debug("Shard %s: running a merge query", self.db.Path())
|
||||
return self.executeMergeQuery(querySpec, processor, t)
|
||||
default:
|
||||
panic(fmt.Errorf("Unknown from clause type %s", t))
|
||||
}
|
||||
}
|
||||
|
||||
func (self *Shard) IsClosed() bool {
|
||||
return self.closed
|
||||
}
|
||||
|
||||
func (self *Shard) executeMergeQuery(querySpec *parser.QuerySpec, processor engine.Processor, t parser.FromClauseType) error {
|
||||
seriesAndColumns := querySpec.SelectQuery().GetReferencedColumns()
|
||||
iterators := make([]*PointIterator, len(seriesAndColumns))
|
||||
streams := make([]engine.StreamQuery, len(iterators))
|
||||
i := 0
|
||||
var err error
|
||||
for s, c := range seriesAndColumns {
|
||||
c, iterators[i], err = self.getPointIteratorForSeries(querySpec, s.Name, c)
|
||||
if err != nil {
|
||||
log.Error(err)
|
||||
return err
|
||||
}
|
||||
streams[i] = PointIteratorStream{
|
||||
pi: iterators[i],
|
||||
name: s.Name,
|
||||
fields: c,
|
||||
}
|
||||
i++
|
||||
}
|
||||
|
||||
h := &engine.SeriesHeap{Ascending: querySpec.IsAscending()}
|
||||
merger := engine.NewCME("Shard", streams, h, processor, t == parser.FromClauseMerge)
|
||||
if _, err := merger.Update(); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (self *Shard) executeArrayQuery(querySpec *parser.QuerySpec, processor engine.Processor) error {
|
||||
seriesAndColumns := querySpec.SelectQuery().GetReferencedColumns()
|
||||
|
||||
for series, columns := range seriesAndColumns {
|
||||
if regex, ok := series.GetCompiledRegex(); ok {
|
||||
seriesNames := self.metaStore.GetSeriesForDatabaseAndRegex(querySpec.Database(), regex)
|
||||
|
@ -127,37 +172,27 @@ func (self *Shard) Query(querySpec *parser.QuerySpec, processor engine.Processor
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (self *Shard) IsClosed() bool {
|
||||
return self.closed
|
||||
}
|
||||
|
||||
func (self *Shard) executeQueryForSeries(querySpec *parser.QuerySpec, seriesName string, columns []string, processor engine.Processor) error {
|
||||
fields, err := self.getFieldsForSeries(querySpec.Database(), seriesName, columns)
|
||||
func (self *Shard) executeQueryForSeries(querySpec *parser.QuerySpec, name string, columns []string, processor engine.Processor) error {
|
||||
if querySpec.IsSinglePointQuery() {
|
||||
log.Debug("Running single query for series %s", name)
|
||||
return self.executeSinglePointQuery(querySpec, name, columns, processor)
|
||||
}
|
||||
var pi *PointIterator
|
||||
var err error
|
||||
columns, pi, err = self.getPointIteratorForSeries(querySpec, name, columns)
|
||||
if err != nil {
|
||||
log.Error("Error looking up fields for %s: %s", seriesName, err)
|
||||
return err
|
||||
}
|
||||
|
||||
if querySpec.IsSinglePointQuery() {
|
||||
log.Debug("Running single query for series %s, fields %v", seriesName, fields)
|
||||
return self.executeSinglePointQuery(querySpec, seriesName, fields, processor)
|
||||
}
|
||||
|
||||
startTime := querySpec.GetStartTime()
|
||||
endTime := querySpec.GetEndTime()
|
||||
|
||||
query := querySpec.SelectQuery()
|
||||
|
||||
aliases := query.GetTableAliases(seriesName)
|
||||
|
||||
fieldNames, iterators := self.getIterators(fields, startTime, endTime, query.Ascending)
|
||||
seriesOutgoing := &protocol.Series{Name: protocol.String(seriesName), Fields: fieldNames, Points: make([]*protocol.Point, 0, self.pointBatchSize)}
|
||||
pi := NewPointIterator(iterators, fields, querySpec.GetStartTime(), querySpec.GetEndTime(), query.Ascending)
|
||||
defer pi.Close()
|
||||
|
||||
query := querySpec.SelectQuery()
|
||||
aliases := query.GetTableAliases(name)
|
||||
|
||||
seriesOutgoing := &protocol.Series{Name: protocol.String(name), Fields: columns, Points: make([]*protocol.Point, 0, self.pointBatchSize)}
|
||||
for pi.Valid() {
|
||||
p := pi.Point()
|
||||
seriesOutgoing.Points = append(seriesOutgoing.Points, p)
|
||||
|
@ -170,7 +205,7 @@ func (self *Shard) executeQueryForSeries(querySpec *parser.QuerySpec, seriesName
|
|||
return err
|
||||
}
|
||||
}
|
||||
seriesOutgoing = &protocol.Series{Name: protocol.String(seriesName), Fields: fieldNames, Points: make([]*protocol.Point, 0, self.pointBatchSize)}
|
||||
seriesOutgoing = &protocol.Series{Name: protocol.String(name), Fields: columns, Points: make([]*protocol.Point, 0, self.pointBatchSize)}
|
||||
}
|
||||
|
||||
pi.Next()
|
||||
|
@ -193,6 +228,79 @@ func (self *Shard) executeQueryForSeries(querySpec *parser.QuerySpec, seriesName
|
|||
return nil
|
||||
}
|
||||
|
||||
func (self *Shard) executeSinglePointQuery(querySpec *parser.QuerySpec, name string, columns []string, p engine.Processor) error {
|
||||
fields, err := self.getFieldsForSeries(querySpec.Database(), name, columns)
|
||||
if err != nil {
|
||||
log.Error("Error looking up fields for %s: %s", name, err)
|
||||
return err
|
||||
}
|
||||
|
||||
query := querySpec.SelectQuery()
|
||||
fieldCount := len(fields)
|
||||
fieldNames := make([]string, 0, fieldCount)
|
||||
point := &protocol.Point{Values: make([]*protocol.FieldValue, 0, fieldCount)}
|
||||
timestamp := common.TimeToMicroseconds(query.GetStartTime())
|
||||
sequenceNumber, err := query.GetSinglePointQuerySequenceNumber()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// set the timestamp and sequence number
|
||||
point.SequenceNumber = &sequenceNumber
|
||||
point.SetTimestampInMicroseconds(timestamp)
|
||||
|
||||
for _, field := range fields {
|
||||
sk := newStorageKey(field.Id, timestamp, sequenceNumber)
|
||||
data, err := self.db.Get(sk.bytes())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if data == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
fieldValue := &protocol.FieldValue{}
|
||||
err = proto.Unmarshal(data, fieldValue)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fieldNames = append(fieldNames, field.Name)
|
||||
point.Values = append(point.Values, fieldValue)
|
||||
}
|
||||
|
||||
result := &protocol.Series{Name: &name, Fields: fieldNames, Points: []*protocol.Point{point}}
|
||||
|
||||
if len(result.Points) > 0 {
|
||||
_, err := p.Yield(result)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (self *Shard) getPointIteratorForSeries(querySpec *parser.QuerySpec, name string, columns []string) ([]string, *PointIterator, error) {
|
||||
fields, err := self.getFieldsForSeries(querySpec.Database(), name, columns)
|
||||
if err != nil {
|
||||
log.Error("Error looking up fields for %s: %s", name, err)
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
startTime := querySpec.GetStartTime()
|
||||
endTime := querySpec.GetEndTime()
|
||||
|
||||
query := querySpec.SelectQuery()
|
||||
|
||||
iterators := self.getIterators(fields, startTime, endTime, query.Ascending)
|
||||
pi := NewPointIterator(iterators, fields, querySpec.GetStartTime(), querySpec.GetEndTime(), query.Ascending)
|
||||
|
||||
columns = make([]string, len(fields))
|
||||
for i := range fields {
|
||||
columns[i] = fields[i].Name
|
||||
}
|
||||
|
||||
return columns, pi, nil
|
||||
}
|
||||
|
||||
func (self *Shard) executeDeleteQuery(querySpec *parser.QuerySpec, processor engine.Processor) error {
|
||||
query := querySpec.DeleteQuery()
|
||||
series := query.GetFromClause()
|
||||
|
@ -282,57 +390,11 @@ func (self *Shard) close() {
|
|||
self.db = nil
|
||||
}
|
||||
|
||||
func (self *Shard) executeSinglePointQuery(querySpec *parser.QuerySpec, series string, fields []*metastore.Field, p engine.Processor) error {
|
||||
query := querySpec.SelectQuery()
|
||||
fieldCount := len(fields)
|
||||
fieldNames := make([]string, 0, fieldCount)
|
||||
point := &protocol.Point{Values: make([]*protocol.FieldValue, 0, fieldCount)}
|
||||
timestamp := common.TimeToMicroseconds(query.GetStartTime())
|
||||
sequenceNumber, err := query.GetSinglePointQuerySequenceNumber()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// set the timestamp and sequence number
|
||||
point.SequenceNumber = &sequenceNumber
|
||||
point.SetTimestampInMicroseconds(timestamp)
|
||||
|
||||
for _, field := range fields {
|
||||
sk := newStorageKey(field.Id, timestamp, sequenceNumber)
|
||||
data, err := self.db.Get(sk.bytes())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if data == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
fieldValue := &protocol.FieldValue{}
|
||||
err = proto.Unmarshal(data, fieldValue)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fieldNames = append(fieldNames, field.Name)
|
||||
point.Values = append(point.Values, fieldValue)
|
||||
}
|
||||
|
||||
result := &protocol.Series{Name: &series, Fields: fieldNames, Points: []*protocol.Point{point}}
|
||||
|
||||
if len(result.Points) > 0 {
|
||||
_, err := p.Yield(result)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (self *Shard) getIterators(fields []*metastore.Field, start, end time.Time, isAscendingQuery bool) (fieldNames []string, iterators []storage.Iterator) {
|
||||
func (self *Shard) getIterators(fields []*metastore.Field, start, end time.Time, isAscendingQuery bool) (iterators []storage.Iterator) {
|
||||
iterators = make([]storage.Iterator, len(fields))
|
||||
fieldNames = make([]string, len(fields))
|
||||
|
||||
// start the iterators to go through the series data
|
||||
for i, field := range fields {
|
||||
fieldNames[i] = field.Name
|
||||
iterators[i] = self.db.Iterator()
|
||||
|
||||
t := start
|
||||
|
@ -353,7 +415,7 @@ func (self *Shard) getIterators(fields []*metastore.Field, start, end time.Time,
|
|||
|
||||
if err := iterators[i].Error(); err != nil {
|
||||
log.Error("Error while getting iterators: %s", err)
|
||||
return nil, nil
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return
|
||||
|
|
|
@ -4,22 +4,22 @@ import "github.com/influxdb/influxdb/protocol"
|
|||
|
||||
type CommonMergeEngine struct {
|
||||
merger *Merger
|
||||
streams map[string]StreamUpdate
|
||||
streams map[uint32]StreamUpdate
|
||||
next Processor
|
||||
}
|
||||
|
||||
// returns a yield function that will sort points from table1 and
|
||||
// table2 no matter what the order in which they are received.
|
||||
func NewCommonMergeEngine(tables []string, mergeColumns bool, ascending bool, next Processor) *CommonMergeEngine {
|
||||
func NewCommonMergeEngine(shards []uint32, mergeColumns bool, ascending bool, next Processor) *CommonMergeEngine {
|
||||
cme := &CommonMergeEngine{
|
||||
streams: make(map[string]StreamUpdate, len(tables)),
|
||||
streams: make(map[uint32]StreamUpdate, len(shards)),
|
||||
next: next,
|
||||
}
|
||||
streams := make([]StreamQuery, len(tables))
|
||||
for i, t := range tables {
|
||||
streams := make([]StreamQuery, len(shards))
|
||||
for i, sh := range shards {
|
||||
s := NewStream()
|
||||
streams[i] = s
|
||||
cme.streams[t] = s
|
||||
cme.streams[sh] = s
|
||||
}
|
||||
h := &SeriesHeap{Ascending: ascending}
|
||||
cme.merger = NewCME("Engine", streams, h, next, mergeColumns)
|
||||
|
@ -39,7 +39,7 @@ func (cme *CommonMergeEngine) Close() error {
|
|||
}
|
||||
|
||||
func (cme *CommonMergeEngine) Yield(s *protocol.Series) (bool, error) {
|
||||
stream := cme.streams[*s.Name]
|
||||
stream := cme.streams[s.GetShardId()]
|
||||
stream.Yield(s)
|
||||
return cme.merger.Update()
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@ package engine
|
|||
|
||||
import "github.com/influxdb/influxdb/parser"
|
||||
|
||||
func NewQueryEngine(next Processor, query *parser.SelectQuery) (Processor, error) {
|
||||
func NewQueryEngine(next Processor, query *parser.SelectQuery, shards []uint32) (Processor, error) {
|
||||
limit := query.Limit
|
||||
|
||||
var engine Processor = NewPassthroughEngineWithLimit(next, 1, limit)
|
||||
|
@ -18,14 +18,16 @@ func NewQueryEngine(next Processor, query *parser.SelectQuery) (Processor, error
|
|||
|
||||
switch fromClause.Type {
|
||||
case parser.FromClauseInnerJoin:
|
||||
engine = NewJoinEngine(query, engine)
|
||||
engine = NewJoinEngine(shards, query, engine)
|
||||
case parser.FromClauseMerge:
|
||||
tables := make([]string, len(fromClause.Names))
|
||||
for i, name := range fromClause.Names {
|
||||
tables[i] = name.Name.Name
|
||||
}
|
||||
engine = NewMergeEngine(tables, query.Ascending, engine)
|
||||
engine = NewMergeEngine(shards, query.Ascending, engine)
|
||||
case parser.FromClauseMergeFun:
|
||||
// At this point the regex should be expanded to the list of
|
||||
// tables that will be queries
|
||||
panic("QueryEngine cannot be called with merge function")
|
||||
}
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@ type JoinEngine struct {
|
|||
lastFields1, lastFields2 []string
|
||||
}
|
||||
|
||||
func NewJoinEngine(query *parser.SelectQuery, next Processor) Processor {
|
||||
func NewJoinEngine(shards []uint32, query *parser.SelectQuery, next Processor) Processor {
|
||||
table1 := query.GetFromClause().Names[0].GetAlias()
|
||||
table2 := query.GetFromClause().Names[1].GetAlias()
|
||||
name := table1 + "_join_" + table2
|
||||
|
@ -26,7 +26,7 @@ func NewJoinEngine(query *parser.SelectQuery, next Processor) Processor {
|
|||
table2: table2,
|
||||
query: query,
|
||||
}
|
||||
mergeEngine := NewCommonMergeEngine([]string{table1, table2}, false, query.Ascending, joinEngine)
|
||||
mergeEngine := NewCommonMergeEngine(shards, false, query.Ascending, joinEngine)
|
||||
return mergeEngine
|
||||
}
|
||||
|
||||
|
|
|
@ -7,12 +7,12 @@ type MergeEngine struct {
|
|||
next Processor
|
||||
}
|
||||
|
||||
func NewMergeEngine(tables []string, ascending bool, next Processor) Processor {
|
||||
func NewMergeEngine(shards []uint32, ascending bool, next Processor) Processor {
|
||||
name := "merged"
|
||||
|
||||
me := &MergeEngine{name: name, next: next}
|
||||
|
||||
return NewCommonMergeEngine(tables, true, ascending, me)
|
||||
return NewCommonMergeEngine(shards, true, ascending, me)
|
||||
}
|
||||
|
||||
func (me *MergeEngine) Yield(s *protocol.Series) (bool, error) {
|
||||
|
|
|
@ -126,6 +126,15 @@ func (cme *Merger) tryYieldNextPoint() (bool, error) {
|
|||
|
||||
// yield as many points as we can to the Processor `n`
|
||||
func (cme *Merger) yieldNextPoint() (bool, error) {
|
||||
// If we consumed all the input data points, return
|
||||
// immediately. This can be the case for example if we finished
|
||||
// initialization and the first call to yieldNextPoint() consumed
|
||||
// all the data points. Without this check the call to the heap's
|
||||
// Next() method will cause a panic
|
||||
if cme.size == 0 {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
for {
|
||||
var s *protocol.Series
|
||||
cme.lastStreamIdx, s = cme.h.Next()
|
||||
|
|
|
@ -19,6 +19,7 @@ message Series {
|
|||
required string name = 2;
|
||||
repeated string fields = 3;
|
||||
repeated uint64 fieldIds = 4;
|
||||
optional uint32 shard_id = 5;
|
||||
}
|
||||
|
||||
message QueryResponseChunk {
|
||||
|
|
Loading…
Reference in New Issue