2014-12-08 05:08:39 +00:00
package influxql
import (
2015-02-23 23:07:01 +00:00
"bytes"
2015-03-09 18:17:36 +00:00
"errors"
2014-12-18 15:44:21 +00:00
"hash/fnv"
"sort"
2014-12-08 05:08:39 +00:00
"time"
)
2015-01-26 12:19:35 +00:00
// DB represents an interface for creating transactions.
type DB interface {
Begin ( ) ( Tx , error )
}
2015-03-09 23:32:55 +00:00
const (
// Return an error if the user is trying to select more than this number of points in a group by statement.
// Most likely they specified a group by interval without time boundaries.
MaxGroupByPoints = 100000
// All queries that return raw non-aggregated data, will have 2 results returned from the ouptut of a reduce run.
// The first element will be a time that we ignore, and the second element will be an array of []*rawMapOutput
ResultCountInRawResults = 2
// Since time is always selected, the column count when selecting only a single other value will be 2
SelectColumnCountWithOneValue = 2
)
2015-03-09 18:17:36 +00:00
2015-01-26 12:19:35 +00:00
// Tx represents a transaction.
// The Tx must be opened before being used.
type Tx interface {
2015-02-23 23:07:01 +00:00
// Create MapReduceJobs for the given select statement. One MRJob will be created per unique tagset that matches the query
2015-03-02 06:37:09 +00:00
CreateMapReduceJobs ( stmt * SelectStatement , tagKeys [ ] string ) ( [ ] * MapReduceJob , error )
2014-12-11 06:32:45 +00:00
}
2015-02-23 23:07:01 +00:00
type MapReduceJob struct {
MeasurementName string
TagSet * TagSet
2015-03-02 06:37:09 +00:00
Mappers [ ] Mapper // the mappers to hit all shards for this MRJob
TMin int64 // minimum time specified in the query
TMax int64 // maximum time specified in the query
key [ ] byte // a key that identifies the MRJob so it can be sorted
interval int64 // the group by interval of the query
stmt * SelectStatement // the select statement this job was created for
2014-12-08 05:08:39 +00:00
}
2015-02-23 23:07:01 +00:00
func ( m * MapReduceJob ) Open ( ) error {
2015-03-02 06:37:09 +00:00
for _ , mm := range m . Mappers {
2015-02-23 23:07:01 +00:00
if err := mm . Open ( ) ; err != nil {
m . Close ( )
return err
2014-12-08 05:08:39 +00:00
}
2015-01-27 05:08:36 +00:00
}
2015-03-02 06:37:09 +00:00
return nil
2015-01-27 05:08:36 +00:00
}
2015-02-23 23:07:01 +00:00
func ( m * MapReduceJob ) Close ( ) {
2015-03-02 06:37:09 +00:00
for _ , mm := range m . Mappers {
2015-02-23 23:07:01 +00:00
mm . Close ( )
2015-01-27 05:08:36 +00:00
}
2014-12-08 05:08:39 +00:00
}
2015-02-23 23:07:01 +00:00
func ( m * MapReduceJob ) Key ( ) [ ] byte {
if m . key == nil {
m . key = append ( [ ] byte ( m . MeasurementName ) , m . TagSet . Key ... )
2014-12-20 04:36:52 +00:00
}
2015-02-23 23:07:01 +00:00
return m . key
2014-12-08 05:08:39 +00:00
}
2015-03-07 23:29:57 +00:00
func ( m * MapReduceJob ) Execute ( out chan * Row , filterEmptyResults bool ) {
2015-03-10 00:03:49 +00:00
aggregates := m . stmt . FunctionCalls ( )
2015-03-02 06:37:09 +00:00
reduceFuncs := make ( [ ] ReduceFunc , len ( aggregates ) )
for i , c := range aggregates {
reduceFunc , err := InitializeReduceFunc ( c )
if err != nil {
out <- & Row { Err : err }
return
}
reduceFuncs [ i ] = reduceFunc
}
2014-12-08 05:08:39 +00:00
2015-03-06 19:23:58 +00:00
isRaw := false
// modify if it's a raw data query
if len ( aggregates ) == 0 {
isRaw = true
aggregates = [ ] * Call { nil }
r , _ := InitializeReduceFunc ( nil )
reduceFuncs = append ( reduceFuncs , r )
2014-12-08 05:08:39 +00:00
}
2015-03-02 06:37:09 +00:00
// we'll have a fixed number of points with timestamps in buckets. Initialize those times and a slice to hold the associated values
var pointCountInResult int
// if the user didn't specify a start time or a group by interval, we're returning a single point that describes the entire range
2015-03-06 19:23:58 +00:00
if m . TMin == 0 || m . interval == 0 || isRaw {
2015-03-02 06:37:09 +00:00
// they want a single aggregate point for the entire time range
m . interval = m . TMax - m . TMin
pointCountInResult = 1
} else {
2015-03-06 19:23:58 +00:00
intervalTop := m . TMax / m . interval * m . interval + m . interval
2015-03-20 17:22:50 +00:00
intervalBottom := m . TMin / m . interval * m . interval
2015-03-19 15:28:47 +00:00
pointCountInResult = int ( ( intervalTop - intervalBottom ) / m . interval )
2014-12-18 15:44:21 +00:00
}
2014-12-08 05:08:39 +00:00
2015-03-19 15:41:18 +00:00
// For group by time queries, limit the number of data points returned by the limit and offset
// raw query limits are handled elsewhere
if ! m . stmt . IsRawQuery && ( m . stmt . Limit > 0 || m . stmt . Offset > 0 ) {
// ensure that the offset isn't higher than the number of points we'd get
2015-03-12 02:33:55 +00:00
if m . stmt . Offset > pointCountInResult {
2015-03-10 02:17:03 +00:00
return
2015-03-19 15:41:18 +00:00
}
// take the lesser of either the pre computed number of group by buckets that
// will be in the result or the limit passed in by the user
if m . stmt . Limit < pointCountInResult {
pointCountInResult = m . stmt . Limit
2015-03-10 02:17:03 +00:00
}
2015-03-12 02:33:55 +00:00
}
2015-03-19 00:33:21 +00:00
// If we are exceeding our MaxGroupByPoints and we aren't a raw query, error out
2015-03-19 15:41:18 +00:00
if ! m . stmt . IsRawQuery && pointCountInResult > MaxGroupByPoints {
2015-03-19 00:33:21 +00:00
out <- & Row {
2015-03-19 20:33:47 +00:00
Err : errors . New ( "too many points in the group by interval. maybe you forgot to specify a where time clause?" ) ,
2015-03-19 00:33:21 +00:00
}
return
}
2015-03-12 02:33:55 +00:00
// initialize the times of the aggregate points
resultValues := make ( [ ] [ ] interface { } , pointCountInResult )
// ensure that the start time for the results is on the start of the window
startTimeBucket := m . TMin / m . interval * m . interval
2015-03-19 20:48:47 +00:00
for i , _ := range resultValues {
2015-03-19 19:31:46 +00:00
var t int64
if m . stmt . Offset > 0 {
t = startTimeBucket + ( int64 ( i + 1 ) * m . interval * int64 ( m . stmt . Offset ) )
} else {
t = startTimeBucket + ( int64 ( i + 1 ) * m . interval ) - m . interval
}
// If we start getting out of our max time range, then truncate values and return
if t > m . TMax && ! isRaw {
resultValues = resultValues [ : i ]
break
}
2015-03-19 20:48:47 +00:00
2015-03-12 02:33:55 +00:00
// we always include time so we need one more column than we have aggregates
vals := make ( [ ] interface { } , 0 , len ( aggregates ) + 1 )
resultValues [ i ] = append ( vals , time . Unix ( 0 , t ) . UTC ( ) )
}
2015-03-12 18:39:57 +00:00
// This just makes sure that if they specify a start time less than what the start time would be with the offset,
// we just reset the start time to the later time to avoid going over data that won't show up in the result.
2015-03-19 15:41:18 +00:00
if m . stmt . Offset > 0 && ! m . stmt . IsRawQuery {
2015-03-19 20:48:47 +00:00
m . TMin = resultValues [ 0 ] [ 0 ] . ( time . Time ) . UnixNano ( )
2015-03-10 02:17:03 +00:00
}
2015-03-02 06:37:09 +00:00
// now loop through the aggregate functions and populate everything
for i , c := range aggregates {
if err := m . processAggregate ( c , reduceFuncs [ i ] , resultValues ) ; err != nil {
out <- & Row {
Name : m . MeasurementName ,
Tags : m . TagSet . Tags ,
Err : err ,
}
2015-03-06 19:23:58 +00:00
return
2015-03-02 06:37:09 +00:00
}
}
2015-03-06 19:23:58 +00:00
if isRaw {
2015-03-07 23:29:57 +00:00
row := m . processRawResults ( resultValues )
if filterEmptyResults && m . resultsEmpty ( row . Values ) {
return
}
// do any post processing like math and stuff
row . Values = m . processResults ( row . Values )
out <- row
return
}
// filter out empty results
if filterEmptyResults && m . resultsEmpty ( resultValues ) {
2015-03-06 19:23:58 +00:00
return
}
2015-03-02 06:37:09 +00:00
// put together the row to return
2015-03-07 23:29:57 +00:00
columnNames := make ( [ ] string , len ( m . stmt . Fields ) + 1 )
2015-03-02 06:37:09 +00:00
columnNames [ 0 ] = "time"
2015-03-07 23:29:57 +00:00
for i , f := range m . stmt . Fields {
columnNames [ i + 1 ] = f . Name ( )
2014-12-18 15:44:21 +00:00
}
2015-03-02 06:37:09 +00:00
2015-03-07 23:29:57 +00:00
// processes the result values if there's any math in there
resultValues = m . processResults ( resultValues )
2015-03-12 01:05:31 +00:00
// handle any fill options
resultValues = m . processFill ( resultValues )
2015-03-02 06:37:09 +00:00
row := & Row {
Name : m . MeasurementName ,
Tags : m . TagSet . Tags ,
Columns : columnNames ,
Values : resultValues ,
2014-12-18 15:44:21 +00:00
}
2015-03-02 06:37:09 +00:00
// and we out
out <- row
2014-12-18 15:44:21 +00:00
}
2015-03-07 23:29:57 +00:00
func ( m * MapReduceJob ) processResults ( results [ ] [ ] interface { } ) [ ] [ ] interface { } {
hasMath := false
for _ , f := range m . stmt . Fields {
if _ , ok := f . Expr . ( * BinaryExpr ) ; ok {
hasMath = true
} else if _ , ok := f . Expr . ( * ParenExpr ) ; ok {
hasMath = true
}
}
if ! hasMath {
return results
}
processors := make ( [ ] processor , len ( m . stmt . Fields ) )
startIndex := 1
for i , f := range m . stmt . Fields {
processors [ i ] , startIndex = getProcessor ( f . Expr , startIndex )
}
mathResults := make ( [ ] [ ] interface { } , len ( results ) )
for i , _ := range mathResults {
mathResults [ i ] = make ( [ ] interface { } , len ( m . stmt . Fields ) + 1 )
// put the time in
mathResults [ i ] [ 0 ] = results [ i ] [ 0 ]
for j , p := range processors {
mathResults [ i ] [ j + 1 ] = p ( results [ i ] )
}
}
return mathResults
}
2015-03-12 01:05:31 +00:00
// processFill will take the results and return new reaults (or the same if no fill modifications are needed) with whatever fill options the query has.
func ( m * MapReduceJob ) processFill ( results [ ] [ ] interface { } ) [ ] [ ] interface { } {
// don't do anything if it's raw query results or we're supposed to leave the nulls
2015-03-19 15:41:18 +00:00
if m . stmt . IsRawQuery || m . stmt . Fill == NullFill {
2015-03-12 01:05:31 +00:00
return results
}
if m . stmt . Fill == NoFill {
// remove any rows that have even one nil value. This one is tricky because they could have multiple
// aggregates, but this option means that any row that has even one nil gets purged.
newResults := make ( [ ] [ ] interface { } , 0 , len ( results ) )
for _ , vals := range results {
hasNil := false
// start at 1 because the first value is always time
for j := 1 ; j < len ( vals ) ; j ++ {
if vals [ j ] == nil {
hasNil = true
break
}
}
if ! hasNil {
newResults = append ( newResults , vals )
}
}
return newResults
}
// they're either filling with previous values or a specific number
for i , vals := range results {
// start at 1 because the first value is always time
for j := 1 ; j < len ( vals ) ; j ++ {
if vals [ j ] == nil {
switch m . stmt . Fill {
case PreviousFill :
if i != 0 {
vals [ j ] = results [ i - 1 ] [ j ]
}
case NumberFill :
vals [ j ] = m . stmt . FillValue
}
}
}
}
return results
}
2015-03-07 23:29:57 +00:00
func getProcessor ( expr Expr , startIndex int ) ( processor , int ) {
switch expr := expr . ( type ) {
case * VarRef :
return newEchoProcessor ( startIndex ) , startIndex + 1
case * Call :
return newEchoProcessor ( startIndex ) , startIndex + 1
case * BinaryExpr :
return getBinaryProcessor ( expr , startIndex )
case * ParenExpr :
return getProcessor ( expr . Expr , startIndex )
case * NumberLiteral :
return newLiteralProcessor ( expr . Val ) , startIndex
case * StringLiteral :
return newLiteralProcessor ( expr . Val ) , startIndex
case * BooleanLiteral :
return newLiteralProcessor ( expr . Val ) , startIndex
case * TimeLiteral :
return newLiteralProcessor ( expr . Val ) , startIndex
case * DurationLiteral :
return newLiteralProcessor ( expr . Val ) , startIndex
}
panic ( "unreachable" )
}
type processor func ( values [ ] interface { } ) interface { }
func newEchoProcessor ( index int ) processor {
return func ( values [ ] interface { } ) interface { } {
return values [ index ]
}
}
func newLiteralProcessor ( val interface { } ) processor {
return func ( values [ ] interface { } ) interface { } {
return val
}
}
func getBinaryProcessor ( expr * BinaryExpr , startIndex int ) ( processor , int ) {
lhs , index := getProcessor ( expr . LHS , startIndex )
rhs , index := getProcessor ( expr . RHS , index )
return newBinaryExprEvaluator ( expr . Op , lhs , rhs ) , index
}
func newBinaryExprEvaluator ( op Token , lhs , rhs processor ) processor {
switch op {
case ADD :
return func ( values [ ] interface { } ) interface { } {
l := lhs ( values )
r := rhs ( values )
if lv , ok := l . ( float64 ) ; ok {
if rv , ok := r . ( float64 ) ; ok {
if rv != 0 {
return lv + rv
}
}
}
return nil
}
case SUB :
return func ( values [ ] interface { } ) interface { } {
l := lhs ( values )
r := rhs ( values )
if lv , ok := l . ( float64 ) ; ok {
if rv , ok := r . ( float64 ) ; ok {
if rv != 0 {
return lv - rv
}
}
}
return nil
}
case MUL :
return func ( values [ ] interface { } ) interface { } {
l := lhs ( values )
r := rhs ( values )
if lv , ok := l . ( float64 ) ; ok {
if rv , ok := r . ( float64 ) ; ok {
if rv != 0 {
return lv * rv
}
}
}
return nil
}
case DIV :
return func ( values [ ] interface { } ) interface { } {
l := lhs ( values )
r := rhs ( values )
if lv , ok := l . ( float64 ) ; ok {
if rv , ok := r . ( float64 ) ; ok {
if rv != 0 {
return lv / rv
}
}
}
return nil
}
default :
// we shouldn't get here, but give them back nils if it goes this way
return func ( values [ ] interface { } ) interface { } {
return nil
}
}
}
func ( m * MapReduceJob ) resultsEmpty ( resultValues [ ] [ ] interface { } ) bool {
for _ , vals := range resultValues {
2015-03-09 23:32:55 +00:00
// start the loop at 1 because we want to skip over the time value
2015-03-07 23:29:57 +00:00
for i := 1 ; i < len ( vals ) ; i ++ {
if vals [ i ] != nil {
return false
}
}
}
return true
}
2015-03-06 19:23:58 +00:00
// processRawResults will handle converting the reduce results from a raw query into a Row
func ( m * MapReduceJob ) processRawResults ( resultValues [ ] [ ] interface { } ) * Row {
selectNames := m . stmt . NamesInSelect ( )
2015-03-07 23:29:57 +00:00
// ensure that time is in the select names and in the first position
2015-03-06 19:23:58 +00:00
hasTime := false
2015-03-07 23:29:57 +00:00
for i , n := range selectNames {
2015-03-06 19:23:58 +00:00
if n == "time" {
2015-03-07 23:29:57 +00:00
if i != 0 {
tmp := selectNames [ 0 ]
selectNames [ 0 ] = "time"
selectNames [ i ] = tmp
}
2015-03-06 19:23:58 +00:00
hasTime = true
}
}
// time should always be in the list of names they get back
if ! hasTime {
selectNames = append ( [ ] string { "time" } , selectNames ... )
}
// if they've selected only a single value we have to handle things a little differently
2015-03-09 23:32:55 +00:00
singleValue := len ( selectNames ) == SelectColumnCountWithOneValue
2015-03-06 19:23:58 +00:00
row := & Row {
Name : m . MeasurementName ,
Tags : m . TagSet . Tags ,
Columns : selectNames ,
}
// return an empty row if there are no results
2015-03-09 23:32:55 +00:00
// resultValues should have exactly 1 array of interface. And for that array, the first element
// will be a time that we ignore, and the second element will be an array of []*rawMapOutput
if len ( resultValues ) == 0 || len ( resultValues [ 0 ] ) != ResultCountInRawResults {
2015-03-06 19:23:58 +00:00
return row
}
2015-03-09 23:32:55 +00:00
// the results will have all of the raw mapper results, convert into the row
2015-03-06 19:23:58 +00:00
for _ , v := range resultValues [ 0 ] [ 1 ] . ( [ ] * rawQueryMapOutput ) {
vals := make ( [ ] interface { } , len ( selectNames ) )
if singleValue {
vals [ 0 ] = time . Unix ( 0 , v . timestamp ) . UTC ( )
vals [ 1 ] = v . values . ( interface { } )
} else {
fields := v . values . ( map [ string ] interface { } )
2015-03-07 23:29:57 +00:00
// time is always the first value
vals [ 0 ] = time . Unix ( 0 , v . timestamp ) . UTC ( )
// populate the other values
for i := 1 ; i < len ( selectNames ) ; i ++ {
vals [ i ] = fields [ selectNames [ i ] ]
2015-03-06 19:23:58 +00:00
}
}
row . Values = append ( row . Values , vals )
}
2015-03-10 02:17:03 +00:00
// apply limit and offset, if applicable
// TODO: make this so it doesn't read the whole result set into memory
if m . stmt . Limit > 0 || m . stmt . Offset > 0 {
if m . stmt . Offset > len ( row . Values ) {
row . Values = nil
} else {
limit := m . stmt . Limit
if m . stmt . Offset + m . stmt . Limit > len ( row . Values ) {
limit = len ( row . Values ) - m . stmt . Offset
}
row . Values = row . Values [ m . stmt . Offset : m . stmt . Offset + limit ]
}
}
2015-03-06 19:23:58 +00:00
return row
}
2015-03-02 06:37:09 +00:00
func ( m * MapReduceJob ) processAggregate ( c * Call , reduceFunc ReduceFunc , resultValues [ ] [ ] interface { } ) error {
mapperOutputs := make ( [ ] interface { } , len ( m . Mappers ) )
// intialize the mappers
for _ , mm := range m . Mappers {
if err := mm . Begin ( c , m . TMin ) ; err != nil {
return err
}
}
2014-12-18 15:44:21 +00:00
2015-03-24 22:24:24 +00:00
firstInterval := m . interval
2015-03-24 23:10:20 +00:00
if ! m . stmt . IsRawQuery {
firstInterval = ( m . TMin / m . interval * m . interval + m . interval ) - m . TMin
}
2015-03-02 06:37:09 +00:00
// populate the result values for each interval of time
for i , _ := range resultValues {
// collect the results from each mapper
for j , mm := range m . Mappers {
2015-03-24 22:24:24 +00:00
interval := m . interval
if i == 0 {
interval = firstInterval
}
res , err := mm . NextInterval ( interval )
2015-03-02 06:37:09 +00:00
if err != nil {
return err
}
mapperOutputs [ j ] = res
}
resultValues [ i ] = append ( resultValues [ i ] , reduceFunc ( mapperOutputs ) )
2015-01-26 12:19:35 +00:00
}
2015-02-07 11:29:04 +00:00
2015-03-02 06:37:09 +00:00
return nil
2014-12-09 15:45:29 +00:00
}
2014-12-08 05:08:39 +00:00
2015-02-23 23:07:01 +00:00
type MapReduceJobs [ ] * MapReduceJob
2014-12-08 05:08:39 +00:00
2015-02-23 23:07:01 +00:00
func ( a MapReduceJobs ) Len ( ) int { return len ( a ) }
func ( a MapReduceJobs ) Less ( i , j int ) bool { return bytes . Compare ( a [ i ] . Key ( ) , a [ j ] . Key ( ) ) == - 1 }
func ( a MapReduceJobs ) Swap ( i , j int ) { a [ i ] , a [ j ] = a [ j ] , a [ i ] }
2015-01-26 12:19:35 +00:00
2015-03-02 06:37:09 +00:00
// Mapper will run through a map function. A single mapper will be created
2015-02-23 23:07:01 +00:00
// for each shard for each tagset that must be hit to satisfy a query.
// Mappers can either point to a local shard or could point to a remote server.
type Mapper interface {
2015-03-02 06:37:09 +00:00
// Open will open the necessary resources to being the map job. Could be connections to remote servers or
// hitting the local bolt store
2015-02-23 23:07:01 +00:00
Open ( ) error
2014-12-08 05:08:39 +00:00
2015-03-02 06:37:09 +00:00
// Close will close the mapper (either the bolt transaction or the request)
Close ( )
2014-12-08 05:08:39 +00:00
2015-03-02 06:37:09 +00:00
// Begin will set up the mapper to run the map function for a given aggregate call starting at the passed in time
Begin ( * Call , int64 ) error
2014-12-09 15:45:29 +00:00
2015-03-02 06:37:09 +00:00
// NextInterval will get the time ordered next interval of the given interval size from the mapper. This is a
// forward only operation from the start time passed into Begin. Will return nil when there is no more data to be read.
// We pass the interval in here so that it can be varied over the period of the query. This is useful for the raw
// data queries where we'd like to gradually adjust the amount of time we scan over.
NextInterval ( interval int64 ) ( interface { } , error )
2015-01-26 12:19:35 +00:00
}
2015-02-23 23:07:01 +00:00
type TagSet struct {
Tags map [ string ] string
2015-03-02 06:37:09 +00:00
Filters [ ] Expr
SeriesIDs [ ] uint32
2015-02-23 23:07:01 +00:00
Key [ ] byte
2015-01-27 05:08:36 +00:00
}
2015-03-02 06:37:09 +00:00
func ( t * TagSet ) AddFilter ( id uint32 , filter Expr ) {
2015-02-23 23:07:01 +00:00
t . SeriesIDs = append ( t . SeriesIDs , id )
t . Filters = append ( t . Filters , filter )
2014-12-08 05:08:39 +00:00
}
2015-02-23 23:07:01 +00:00
// Planner represents an object for creating execution plans.
type Planner struct {
DB DB
2015-02-10 23:14:22 +00:00
2015-02-23 23:07:01 +00:00
// Returns the current time. Defaults to time.Now().
Now func ( ) time . Time
2015-02-10 23:14:22 +00:00
}
2015-02-23 23:07:01 +00:00
// NewPlanner returns a new instance of Planner.
func NewPlanner ( db DB ) * Planner {
return & Planner {
DB : db ,
Now : time . Now ,
2015-02-10 23:14:22 +00:00
}
}
2015-02-23 23:07:01 +00:00
// Plan creates an execution plan for the given SelectStatement and returns an Executor.
func ( p * Planner ) Plan ( stmt * SelectStatement ) ( * Executor , error ) {
2015-03-24 22:24:24 +00:00
now := p . Now ( ) . UTC ( )
2015-02-12 16:39:41 +00:00
2015-02-23 23:07:01 +00:00
// Replace instances of "now()" with the current time.
stmt . Condition = Reduce ( stmt . Condition , & nowValuer { Now : now } )
2015-02-12 16:39:41 +00:00
2015-02-23 23:07:01 +00:00
// Begin an unopened transaction.
tx , err := p . DB . Begin ( )
if err != nil {
return nil , err
2015-02-12 16:39:41 +00:00
}
2015-02-23 23:07:01 +00:00
// Determine group by tag keys.
interval , tags , err := stmt . Dimensions . Normalize ( )
if err != nil {
return nil , err
2015-02-11 19:37:14 +00:00
}
2015-02-11 17:16:46 +00:00
2015-02-23 23:07:01 +00:00
// TODO: hanldle queries that select from multiple measurements. This assumes that we're only selecting from a single one
2015-03-02 06:37:09 +00:00
jobs , err := tx . CreateMapReduceJobs ( stmt , tags )
if err != nil {
return nil , err
}
2015-03-08 00:30:42 +00:00
// LIMIT and OFFSET the unique series
2015-03-10 03:09:47 +00:00
if stmt . SLimit > 0 || stmt . SOffset > 0 {
if stmt . SOffset > len ( jobs ) {
2015-03-08 00:30:42 +00:00
jobs = nil
} else {
2015-03-10 03:09:47 +00:00
if stmt . SOffset + stmt . SLimit > len ( jobs ) {
stmt . SLimit = len ( jobs ) - stmt . SOffset
2015-03-08 00:30:42 +00:00
}
2015-03-10 03:09:47 +00:00
jobs = jobs [ stmt . SOffset : stmt . SOffset + stmt . SLimit ]
2015-03-08 00:30:42 +00:00
}
}
2015-03-02 06:37:09 +00:00
for _ , j := range jobs {
j . interval = interval . Nanoseconds ( )
j . stmt = stmt
}
2015-02-11 17:16:46 +00:00
2015-03-02 06:37:09 +00:00
return & Executor { tx : tx , stmt : stmt , jobs : jobs , interval : interval . Nanoseconds ( ) } , nil
2015-02-11 17:16:46 +00:00
}
2015-02-23 23:07:01 +00:00
// Executor represents the implementation of Executor.
// It executes all reducers and combines their result into a row.
type Executor struct {
2015-03-02 06:37:09 +00:00
tx Tx // transaction
stmt * SelectStatement // original statement
jobs [ ] * MapReduceJob // one job per unique tag set that will return in the query
interval int64 // the group by interval of the query in nanoseconds
2015-02-11 22:55:51 +00:00
}
2015-02-23 23:07:01 +00:00
// Execute begins execution of the query and returns a channel to receive rows.
func ( e * Executor ) Execute ( ) ( <- chan * Row , error ) {
// Open transaction.
for _ , j := range e . jobs {
if err := j . Open ( ) ; err != nil {
e . close ( )
return nil , err
2015-02-11 22:55:51 +00:00
}
}
2015-02-23 23:07:01 +00:00
// Create output channel and stream data in a separate goroutine.
out := make ( chan * Row , 0 )
go e . execute ( out )
2015-02-11 22:55:51 +00:00
2015-02-23 23:07:01 +00:00
return out , nil
2015-02-11 22:55:51 +00:00
}
2015-02-23 23:07:01 +00:00
func ( e * Executor ) close ( ) {
2015-03-02 06:37:09 +00:00
for _ , j := range e . jobs {
j . Close ( )
2015-02-11 22:55:51 +00:00
}
}
2015-02-23 23:07:01 +00:00
// execute runs in a separate separate goroutine and streams data from processors.
func ( e * Executor ) execute ( out chan * Row ) {
// Ensure the the MRJobs close after execution.
defer e . close ( )
2015-02-11 22:55:51 +00:00
2015-03-07 23:29:57 +00:00
// If we have multiple tag sets we'll want to filter out the empty ones
filterEmptyResults := len ( e . jobs ) > 1
2015-02-23 23:07:01 +00:00
// Execute each MRJob serially
for _ , j := range e . jobs {
2015-03-07 23:29:57 +00:00
j . Execute ( out , filterEmptyResults )
2015-02-11 22:55:51 +00:00
}
2015-01-27 00:42:29 +00:00
2015-02-23 23:07:01 +00:00
// Mark the end of the output channel.
close ( out )
2015-01-27 00:42:29 +00:00
}
2014-12-08 05:08:39 +00:00
// Row represents a single row returned from the execution of a statement.
type Row struct {
2014-12-11 06:32:45 +00:00
Name string ` json:"name,omitempty" `
Tags map [ string ] string ` json:"tags,omitempty" `
Columns [ ] string ` json:"columns" `
Values [ ] [ ] interface { } ` json:"values,omitempty" `
Err error ` json:"err,omitempty" `
2014-12-08 05:08:39 +00:00
}
2014-12-18 15:44:21 +00:00
// tagsHash returns a hash of tag key/value pairs.
func ( r * Row ) tagsHash ( ) uint64 {
h := fnv . New64a ( )
keys := r . tagsKeys ( )
for _ , k := range keys {
h . Write ( [ ] byte ( k ) )
h . Write ( [ ] byte ( r . Tags [ k ] ) )
}
return h . Sum64 ( )
}
// tagKeys returns a sorted list of tag keys.
func ( r * Row ) tagsKeys ( ) [ ] string {
a := make ( [ ] string , len ( r . Tags ) )
for k := range r . Tags {
a = append ( a , k )
}
sort . Strings ( a )
return a
}
// Rows represents a list of rows that can be sorted consistently by name/tag.
type Rows [ ] * Row
func ( p Rows ) Len ( ) int { return len ( p ) }
func ( p Rows ) Less ( i , j int ) bool {
// Sort by name first.
if p [ i ] . Name != p [ j ] . Name {
return p [ i ] . Name < p [ j ] . Name
}
2014-12-21 17:05:15 +00:00
// Sort by tag set hash. Tags don't have a meaningful sort order so we
// just compute a hash and sort by that instead. This allows the tests
// to receive rows in a predictable order every time.
2014-12-18 15:44:21 +00:00
return p [ i ] . tagsHash ( ) < p [ j ] . tagsHash ( )
}
func ( p Rows ) Swap ( i , j int ) { p [ i ] , p [ j ] = p [ j ] , p [ i ] }