Merge points at the shard level

pull/72/head
John Shahid 2014-09-11 17:33:21 -04:00
parent a9d92301f6
commit 78b99f60c6
12 changed files with 238 additions and 92 deletions

View File

@ -238,7 +238,7 @@ func (self *ShardData) getProcessor(querySpec *parser.QuerySpec, processor engin
// We should aggregate at the shard level
if self.ShouldAggregateLocally(querySpec) {
log.Debug("creating a query engine")
processor, err = engine.NewQueryEngine(processor, query)
processor, err = engine.NewQueryEngine(processor, query, nil)
if err != nil {
return nil, err
}
@ -300,6 +300,8 @@ func (self *ShardData) Query(querySpec *parser.QuerySpec, response chan<- *p.Res
var processor engine.Processor = NewResponseChannelProcessor(NewResponseChannelWrapper(response))
var err error
processor = NewShardIdInserterProcessor(self.Id(), processor)
processor, err = self.getProcessor(querySpec, processor)
if err != nil {
response <- &p.Response{

View File

@ -0,0 +1,31 @@
package cluster
import (
"fmt"
"github.com/influxdb/influxdb/engine"
"github.com/influxdb/influxdb/protocol"
)
// A processor to set the ShardId on the series to `id`
type ShardIdInserterProcessor struct {
id uint32
next engine.Processor
}
func NewShardIdInserterProcessor(id uint32, next engine.Processor) ShardIdInserterProcessor {
return ShardIdInserterProcessor{id, next}
}
func (sip ShardIdInserterProcessor) Yield(s *protocol.Series) (bool, error) {
s.ShardId = &sip.id
return sip.next.Yield(s)
}
func (sip ShardIdInserterProcessor) Close() error {
return sip.next.Close()
}
func (sip ShardIdInserterProcessor) Name() string {
return fmt.Sprintf("ShardIdInserterProcessor (%d)", sip.id)
}

View File

@ -237,7 +237,11 @@ func (self *Coordinator) getShardsAndProcessor(querySpec *parser.QuerySpec, writ
if !shouldAggregateLocally {
// if we should aggregate in the coordinator (i.e. aggregation
// isn't happening locally at the shard level), create an engine
writer, err = engine.NewQueryEngine(writer, q)
shardIds := make([]uint32, len(shards))
for i, s := range shards {
shardIds[i] = s.Id()
}
writer, err = engine.NewQueryEngine(writer, q, shardIds)
return shards, writer, err
}

View File

@ -30,7 +30,7 @@ type PointIterator struct {
// there's no need to call Next() after the call to NewPointIterator,
// but the user should check Valid() to make sure the iterator is
// pointing at a valid point.
func NewPointIterator(itrs []storage.Iterator, fields []*metastore.Field, startTime, endTime time.Time, asc bool) PointIterator {
func NewPointIterator(itrs []storage.Iterator, fields []*metastore.Field, startTime, endTime time.Time, asc bool) *PointIterator {
pi := PointIterator{
valid: true,
err: nil,
@ -44,7 +44,7 @@ func NewPointIterator(itrs []storage.Iterator, fields []*metastore.Field, startT
// seek to the first point
pi.Next()
return pi
return &pi
}
// public api

View File

@ -0,0 +1,35 @@
package datastore
import "github.com/influxdb/influxdb/protocol"
// PointIteratorStream is a struct that implements the StreamQuery
// interface and is used by the shard with the Merger to merge the
// data points locally to form a monotic stream of points (increasing
// or decreasing timestamps)
type PointIteratorStream struct {
pi *PointIterator
name string
fields []string
}
// Returns true if the point iterator is still valid
func (pis PointIteratorStream) HasPoint() bool {
return pis.pi.Valid()
}
// Returns the next point from the point iterator
func (pis PointIteratorStream) Next() *protocol.Series {
p := pis.pi.Point()
s := &protocol.Series{
Name: &pis.name,
Fields: pis.fields,
Points: []*protocol.Point{p},
}
pis.pi.Next()
return s
}
// Returns true if the point iterator is not valid
func (pis PointIteratorStream) Closed() bool {
return !pis.pi.Valid()
}

View File

@ -102,12 +102,57 @@ func (self *Shard) Query(querySpec *parser.QuerySpec, processor engine.Processor
return self.executeDeleteQuery(querySpec, processor)
}
seriesAndColumns := querySpec.SelectQuery().GetReferencedColumns()
if !self.hasReadAccess(querySpec) {
return errors.New("User does not have access to one or more of the series requested.")
}
switch t := querySpec.SelectQuery().FromClause.Type; t {
case parser.FromClauseArray:
log.Debug("Shard %s: running a regular query", self.db.Path())
return self.executeArrayQuery(querySpec, processor)
case parser.FromClauseMerge, parser.FromClauseInnerJoin:
log.Debug("Shard %s: running a merge query", self.db.Path())
return self.executeMergeQuery(querySpec, processor, t)
default:
panic(fmt.Errorf("Unknown from clause type %s", t))
}
}
func (self *Shard) IsClosed() bool {
return self.closed
}
func (self *Shard) executeMergeQuery(querySpec *parser.QuerySpec, processor engine.Processor, t parser.FromClauseType) error {
seriesAndColumns := querySpec.SelectQuery().GetReferencedColumns()
iterators := make([]*PointIterator, len(seriesAndColumns))
streams := make([]engine.StreamQuery, len(iterators))
i := 0
var err error
for s, c := range seriesAndColumns {
c, iterators[i], err = self.getPointIteratorForSeries(querySpec, s.Name, c)
if err != nil {
log.Error(err)
return err
}
streams[i] = PointIteratorStream{
pi: iterators[i],
name: s.Name,
fields: c,
}
i++
}
h := &engine.SeriesHeap{Ascending: querySpec.IsAscending()}
merger := engine.NewCME("Shard", streams, h, processor, t == parser.FromClauseMerge)
if _, err := merger.Update(); err != nil {
return err
}
return nil
}
func (self *Shard) executeArrayQuery(querySpec *parser.QuerySpec, processor engine.Processor) error {
seriesAndColumns := querySpec.SelectQuery().GetReferencedColumns()
for series, columns := range seriesAndColumns {
if regex, ok := series.GetCompiledRegex(); ok {
seriesNames := self.metaStore.GetSeriesForDatabaseAndRegex(querySpec.Database(), regex)
@ -127,37 +172,27 @@ func (self *Shard) Query(querySpec *parser.QuerySpec, processor engine.Processor
}
}
}
return nil
}
func (self *Shard) IsClosed() bool {
return self.closed
}
func (self *Shard) executeQueryForSeries(querySpec *parser.QuerySpec, seriesName string, columns []string, processor engine.Processor) error {
fields, err := self.getFieldsForSeries(querySpec.Database(), seriesName, columns)
func (self *Shard) executeQueryForSeries(querySpec *parser.QuerySpec, name string, columns []string, processor engine.Processor) error {
if querySpec.IsSinglePointQuery() {
log.Debug("Running single query for series %s", name)
return self.executeSinglePointQuery(querySpec, name, columns, processor)
}
var pi *PointIterator
var err error
columns, pi, err = self.getPointIteratorForSeries(querySpec, name, columns)
if err != nil {
log.Error("Error looking up fields for %s: %s", seriesName, err)
return err
}
if querySpec.IsSinglePointQuery() {
log.Debug("Running single query for series %s, fields %v", seriesName, fields)
return self.executeSinglePointQuery(querySpec, seriesName, fields, processor)
}
startTime := querySpec.GetStartTime()
endTime := querySpec.GetEndTime()
query := querySpec.SelectQuery()
aliases := query.GetTableAliases(seriesName)
fieldNames, iterators := self.getIterators(fields, startTime, endTime, query.Ascending)
seriesOutgoing := &protocol.Series{Name: protocol.String(seriesName), Fields: fieldNames, Points: make([]*protocol.Point, 0, self.pointBatchSize)}
pi := NewPointIterator(iterators, fields, querySpec.GetStartTime(), querySpec.GetEndTime(), query.Ascending)
defer pi.Close()
query := querySpec.SelectQuery()
aliases := query.GetTableAliases(name)
seriesOutgoing := &protocol.Series{Name: protocol.String(name), Fields: columns, Points: make([]*protocol.Point, 0, self.pointBatchSize)}
for pi.Valid() {
p := pi.Point()
seriesOutgoing.Points = append(seriesOutgoing.Points, p)
@ -170,7 +205,7 @@ func (self *Shard) executeQueryForSeries(querySpec *parser.QuerySpec, seriesName
return err
}
}
seriesOutgoing = &protocol.Series{Name: protocol.String(seriesName), Fields: fieldNames, Points: make([]*protocol.Point, 0, self.pointBatchSize)}
seriesOutgoing = &protocol.Series{Name: protocol.String(name), Fields: columns, Points: make([]*protocol.Point, 0, self.pointBatchSize)}
}
pi.Next()
@ -193,6 +228,79 @@ func (self *Shard) executeQueryForSeries(querySpec *parser.QuerySpec, seriesName
return nil
}
func (self *Shard) executeSinglePointQuery(querySpec *parser.QuerySpec, name string, columns []string, p engine.Processor) error {
fields, err := self.getFieldsForSeries(querySpec.Database(), name, columns)
if err != nil {
log.Error("Error looking up fields for %s: %s", name, err)
return err
}
query := querySpec.SelectQuery()
fieldCount := len(fields)
fieldNames := make([]string, 0, fieldCount)
point := &protocol.Point{Values: make([]*protocol.FieldValue, 0, fieldCount)}
timestamp := common.TimeToMicroseconds(query.GetStartTime())
sequenceNumber, err := query.GetSinglePointQuerySequenceNumber()
if err != nil {
return err
}
// set the timestamp and sequence number
point.SequenceNumber = &sequenceNumber
point.SetTimestampInMicroseconds(timestamp)
for _, field := range fields {
sk := newStorageKey(field.Id, timestamp, sequenceNumber)
data, err := self.db.Get(sk.bytes())
if err != nil {
return err
}
if data == nil {
continue
}
fieldValue := &protocol.FieldValue{}
err = proto.Unmarshal(data, fieldValue)
if err != nil {
return err
}
fieldNames = append(fieldNames, field.Name)
point.Values = append(point.Values, fieldValue)
}
result := &protocol.Series{Name: &name, Fields: fieldNames, Points: []*protocol.Point{point}}
if len(result.Points) > 0 {
_, err := p.Yield(result)
return err
}
return nil
}
func (self *Shard) getPointIteratorForSeries(querySpec *parser.QuerySpec, name string, columns []string) ([]string, *PointIterator, error) {
fields, err := self.getFieldsForSeries(querySpec.Database(), name, columns)
if err != nil {
log.Error("Error looking up fields for %s: %s", name, err)
return nil, nil, err
}
startTime := querySpec.GetStartTime()
endTime := querySpec.GetEndTime()
query := querySpec.SelectQuery()
iterators := self.getIterators(fields, startTime, endTime, query.Ascending)
pi := NewPointIterator(iterators, fields, querySpec.GetStartTime(), querySpec.GetEndTime(), query.Ascending)
columns = make([]string, len(fields))
for i := range fields {
columns[i] = fields[i].Name
}
return columns, pi, nil
}
func (self *Shard) executeDeleteQuery(querySpec *parser.QuerySpec, processor engine.Processor) error {
query := querySpec.DeleteQuery()
series := query.GetFromClause()
@ -282,57 +390,11 @@ func (self *Shard) close() {
self.db = nil
}
func (self *Shard) executeSinglePointQuery(querySpec *parser.QuerySpec, series string, fields []*metastore.Field, p engine.Processor) error {
query := querySpec.SelectQuery()
fieldCount := len(fields)
fieldNames := make([]string, 0, fieldCount)
point := &protocol.Point{Values: make([]*protocol.FieldValue, 0, fieldCount)}
timestamp := common.TimeToMicroseconds(query.GetStartTime())
sequenceNumber, err := query.GetSinglePointQuerySequenceNumber()
if err != nil {
return err
}
// set the timestamp and sequence number
point.SequenceNumber = &sequenceNumber
point.SetTimestampInMicroseconds(timestamp)
for _, field := range fields {
sk := newStorageKey(field.Id, timestamp, sequenceNumber)
data, err := self.db.Get(sk.bytes())
if err != nil {
return err
}
if data == nil {
continue
}
fieldValue := &protocol.FieldValue{}
err = proto.Unmarshal(data, fieldValue)
if err != nil {
return err
}
fieldNames = append(fieldNames, field.Name)
point.Values = append(point.Values, fieldValue)
}
result := &protocol.Series{Name: &series, Fields: fieldNames, Points: []*protocol.Point{point}}
if len(result.Points) > 0 {
_, err := p.Yield(result)
return err
}
return nil
}
func (self *Shard) getIterators(fields []*metastore.Field, start, end time.Time, isAscendingQuery bool) (fieldNames []string, iterators []storage.Iterator) {
func (self *Shard) getIterators(fields []*metastore.Field, start, end time.Time, isAscendingQuery bool) (iterators []storage.Iterator) {
iterators = make([]storage.Iterator, len(fields))
fieldNames = make([]string, len(fields))
// start the iterators to go through the series data
for i, field := range fields {
fieldNames[i] = field.Name
iterators[i] = self.db.Iterator()
t := start
@ -353,7 +415,7 @@ func (self *Shard) getIterators(fields []*metastore.Field, start, end time.Time,
if err := iterators[i].Error(); err != nil {
log.Error("Error while getting iterators: %s", err)
return nil, nil
return nil
}
}
return

View File

@ -4,22 +4,22 @@ import "github.com/influxdb/influxdb/protocol"
type CommonMergeEngine struct {
merger *Merger
streams map[string]StreamUpdate
streams map[uint32]StreamUpdate
next Processor
}
// returns a yield function that will sort points from table1 and
// table2 no matter what the order in which they are received.
func NewCommonMergeEngine(tables []string, mergeColumns bool, ascending bool, next Processor) *CommonMergeEngine {
func NewCommonMergeEngine(shards []uint32, mergeColumns bool, ascending bool, next Processor) *CommonMergeEngine {
cme := &CommonMergeEngine{
streams: make(map[string]StreamUpdate, len(tables)),
streams: make(map[uint32]StreamUpdate, len(shards)),
next: next,
}
streams := make([]StreamQuery, len(tables))
for i, t := range tables {
streams := make([]StreamQuery, len(shards))
for i, sh := range shards {
s := NewStream()
streams[i] = s
cme.streams[t] = s
cme.streams[sh] = s
}
h := &SeriesHeap{Ascending: ascending}
cme.merger = NewCME("Engine", streams, h, next, mergeColumns)
@ -39,7 +39,7 @@ func (cme *CommonMergeEngine) Close() error {
}
func (cme *CommonMergeEngine) Yield(s *protocol.Series) (bool, error) {
stream := cme.streams[*s.Name]
stream := cme.streams[s.GetShardId()]
stream.Yield(s)
return cme.merger.Update()
}

View File

@ -2,7 +2,7 @@ package engine
import "github.com/influxdb/influxdb/parser"
func NewQueryEngine(next Processor, query *parser.SelectQuery) (Processor, error) {
func NewQueryEngine(next Processor, query *parser.SelectQuery, shards []uint32) (Processor, error) {
limit := query.Limit
var engine Processor = NewPassthroughEngineWithLimit(next, 1, limit)
@ -18,14 +18,16 @@ func NewQueryEngine(next Processor, query *parser.SelectQuery) (Processor, error
switch fromClause.Type {
case parser.FromClauseInnerJoin:
engine = NewJoinEngine(query, engine)
engine = NewJoinEngine(shards, query, engine)
case parser.FromClauseMerge:
tables := make([]string, len(fromClause.Names))
for i, name := range fromClause.Names {
tables[i] = name.Name.Name
}
engine = NewMergeEngine(tables, query.Ascending, engine)
engine = NewMergeEngine(shards, query.Ascending, engine)
case parser.FromClauseMergeFun:
// At this point the regex should be expanded to the list of
// tables that will be queries
panic("QueryEngine cannot be called with merge function")
}

View File

@ -14,7 +14,7 @@ type JoinEngine struct {
lastFields1, lastFields2 []string
}
func NewJoinEngine(query *parser.SelectQuery, next Processor) Processor {
func NewJoinEngine(shards []uint32, query *parser.SelectQuery, next Processor) Processor {
table1 := query.GetFromClause().Names[0].GetAlias()
table2 := query.GetFromClause().Names[1].GetAlias()
name := table1 + "_join_" + table2
@ -26,7 +26,7 @@ func NewJoinEngine(query *parser.SelectQuery, next Processor) Processor {
table2: table2,
query: query,
}
mergeEngine := NewCommonMergeEngine([]string{table1, table2}, false, query.Ascending, joinEngine)
mergeEngine := NewCommonMergeEngine(shards, false, query.Ascending, joinEngine)
return mergeEngine
}

View File

@ -7,12 +7,12 @@ type MergeEngine struct {
next Processor
}
func NewMergeEngine(tables []string, ascending bool, next Processor) Processor {
func NewMergeEngine(shards []uint32, ascending bool, next Processor) Processor {
name := "merged"
me := &MergeEngine{name: name, next: next}
return NewCommonMergeEngine(tables, true, ascending, me)
return NewCommonMergeEngine(shards, true, ascending, me)
}
func (me *MergeEngine) Yield(s *protocol.Series) (bool, error) {

View File

@ -126,6 +126,15 @@ func (cme *Merger) tryYieldNextPoint() (bool, error) {
// yield as many points as we can to the Processor `n`
func (cme *Merger) yieldNextPoint() (bool, error) {
// If we consumed all the input data points, return
// immediately. This can be the case for example if we finished
// initialization and the first call to yieldNextPoint() consumed
// all the data points. Without this check the call to the heap's
// Next() method will cause a panic
if cme.size == 0 {
return true, nil
}
for {
var s *protocol.Series
cme.lastStreamIdx, s = cme.h.Next()

View File

@ -19,6 +19,7 @@ message Series {
required string name = 2;
repeated string fields = 3;
repeated uint64 fieldIds = 4;
optional uint32 shard_id = 5;
}
message QueryResponseChunk {