initial copy of ifql repo
VERSION ?= $(shell git describe --always --tags)
SUBDIRS := ast parser promql
GO_ARGS=-tags '$(GO_TAGS)'
export GO_BUILD=go build $(GO_ARGS)
export GO_TEST=go test $(GO_ARGS)
export GO_GENERATE=go generate $(GO_ARGS)
SOURCES := $(shell find . -name '*.go' -not -name '*_test.go')
SOURCES_NO_VENDOR := $(shell find . -path ./vendor -prune -o -name "*.go" -not -name '*_test.go' -print)
all: Gopkg.lock $(SUBDIRS) bin/ifql bin/ifqld
$(SUBDIRS): bin/pigeon bin/cmpgen
bin/ifql: $(SOURCES) bin/pigeon bin/cmpgen
$(GO_BUILD) -i -o bin/ifql ./cmd/ifql
bin/ifqld: $(SOURCES) bin/pigeon bin/cmpgen
$(GO_BUILD) -i -o bin/ifqld ./cmd/ifqld
bin/pigeon: ./vendor/
go build -i -o bin/pigeon ./vendor/
bin/cmpgen: ./ast/asttest/cmpgen/main.go
go build -i -o bin/cmpgen ./ast/asttest/cmpgen
Gopkg.lock: Gopkg.toml
dep ensure -v
vendor/ Gopkg.lock
dep ensure -v
goimports -w $^
dep ensure -v -update
test: Gopkg.lock bin/ifql
$(GO_TEST) ./...
test-race: Gopkg.lock bin/ifql
$(GO_TEST) -race ./...
bench: Gopkg.lock bin/ifql
$(GO_TEST) -bench=. -run=^$$ ./...
go build -i -o bin/goreleaser ./vendor/
dist: bin/goreleaser
PATH=./bin:${PATH} goreleaser --rm-dist --release-notes
release: dist release-docker
docker build -t .
docker tag${VERSION}
docker push
docker push${VERSION}
clean: $(SUBDIRS)
rm -rf bin dist
.PHONY: all clean $(SUBDIRS) update test test-race bench release docker dist fmt
# IFQL (Influx Query Language)
`ifqld` is an HTTP server for running **IFQL** queries to one or more InfluxDB
`ifqld` runs on port `8093` by default
### Specification
Here is the rough design specification for details until we get documentation up:
1. Upgrade to InfluxDB >= 1.4.1
2. Update the InfluxDB configuration file to enable **IFQL** processing; restart
the InfluxDB server. InfluxDB will open port `8082` to accept **IFQL** queries.
> **This port has no authentication.**
enabled = true
log-enabled = true
bind-address = ":8082"
3. Download `ifqld` and install from
4. Start `ifqld` with the InfluxDB host and port of `8082`. To run in federated
mode (see below), add the `--host` option for each InfluxDB host.
ifqld --verbose --host localhost:8082
5. To run a query POST an **IFQL** query string to `/query` as the `q` parameter:
curl -XPOST --data-urlencode \
|> filter(fn: (r) => r["_measurement"] == "cpu" AND r["_field"] == "usage_user")
|> range(start:-170h)
|> sum()' \
#### docker compose
To spin up a testing environment you can run:
docker-compose up
Inside the `root` directory. It will spin up an `influxdb` and `ifqld` daemon
ready to be used. `influxd` is exposed on port `8086` and port `8082`.
### Prometheus metrics
Metrics are exposed on `/metrics`.
`ifqld` records the number of queries and the number of different functions within **IFQL** queries
### Federated Mode
By passing the `--host` option multiple times `ifqld` will query multiple
InfluxDB servers.
For example:
ifqld --host influxdb1:8082 --host influxdb2:8082
The results from multiple InfluxDB are merged together as if there was
one server.
### Basic Syntax
IFQL constructs a query by starting with a table of data and passing the table through transformations steps to describe the desired query operations.
Transformations are represented as functions which take a table of data as an input argument and return a new table that has been transformed.
There is a special function `from` which is a source function, meaning it does not accept a table as input, but rather produces a table.
All other transformation functions accept at least one table and return a table as a result.
For example to get the last point for each series in a database you start by creating a table using `from` and then pass that table into the `limit` function.
// Select the last point per series in the telegraf database.
limit(table:from(db:"telegraf"), n:1)
Since it is common to chain long lists of transformations together the pipe forward operator `|>` can be used to make reading the code easier.
These two expressions are equivalent:
// Select the last point per series in the telegraf database.
limit(table:from(db:"telegraf"), n:1)
// Same as above, but uses the pipe forward operator to indicate the flow of data.
from(db:"telegraf") |> limit(n:1)
Long list of functions can thus be chained together:
// Get the first point per host from the last minute of data.
from(db:"telegraf") |> range(start:-1m) |> group(by:["host"]) |> first()
### Supported Functions
Below is a list of supported functions.
#### from
Starting point for all queires. Get data from the specified database.
Example: `from(db:"telegraf")`
##### options
* `db` string
* `hosts` array of strings
`from(db:"telegraf", hosts:["host1", "host2"])`
#### count
Counts the number of results
##### options
* `useStartTime` boolean
Use the start time as the timestamp of the resulting aggregate.
Example: `from(db:"telegraf") |> count()`
#### filter
Filters the results using an expression
|> filter(fn: (r) => r["_measurement"]=="cpu" AND
r["_field"] == "usage_system" AND
r["service"] == "app-server")
|> range(start:-12h)
|> max()
##### options
* `fn` function(record) bool
Function to when filtering the records.
The function must accept a single parameter which will be the records and return a boolean value.
Records which evaluate to true, will be included in the results.
#### first
Returns the first result of the query
##### options
* `useStartTime` boolean
Use the start time as the timestamp of the resulting aggregate.
Example: `from(db:"telegraf") |> first()`
#### group
Groups results by a user-specified set of tags
##### options
* `by` array of strings
Group by these specific tag names
Cannot be used with `except` option
Example: `from(db: "telegraf") |> range(start: -30m) |> group(by: ["tag_a", "tag_b"])`
* `keep` array of strings
Keep specific tag keys that were not in `by` in the results
Example: `from(db: "telegraf") |> range(start: -30m) |> group(by: ["tag_a", "tag_b"], keep:["tag_c"])`
* `except` array of strings
Group by all but these tag keys
Cannot be used with `by` option
Example: `from(db: "telegraf") |> range(start: -30m) |> group(except: ["tag_a"], keep:["tag_b", "tag_c"])`
#### join
Join two time series together on time and the list of `on` keys.
cpu = from(db: "telegraf") |> filter(fn: (r) => r["_measurement"] == "cpu" and r["_field"] == "usage_user") |> range(start: -30m)
mem = from(db: "telegraf") |> filter(fn: (r) => r["_measurement"] == "mem" and r["_field"] == "used_percent") |> range(start: -30m)
join(tables:{cpu:cpu, mem:mem}, on:["host"], fn: (tables) => tables.cpu["_value"] + tables.mem["_value"])
##### options
* `tables` map of tables
Map of tables to join. Currently only two tables are allowed.
* `on` array of strings
List of tag keys that when equal produces a result set.
* `fn`
Defines the function that merges the values of the tables.
The function must defined to accept a single parameter.
The parameter is a map, which uses the same keys found in the `tables` map.
The function is called for each joined set of records from the tables.
#### last
Returns the last result of the query
##### options
* `useStartTime` boolean
Use the start time as the timestamp of the resulting aggregate.
Example: `from(db: "telegraf") |> last()`
#### limit
Restricts the number of rows returned in the results.
Example: `from(db: "telegraf") |> limit(n: 10)`
#### map
Applies a function to each row of the table.
##### options
* `fn` function
Function to apply to each row. The return value of the function may be a single value or an object.
|> filter(fn: (r) => r["_measurement"]=="cpu" AND
r["_field"] == "usage_system" AND
r["service"] == "app-server")
|> range(start:-12h)
// Square the value
|> map(fn: (r) => r._value * r._value)
|> filter(fn: (r) => r["_measurement"]=="cpu" AND
r["_field"] == "usage_system" AND
r["service"] == "app-server")
|> range(start:-12h)
// Square the value and keep the original value
|> map(fn: (r) => ({value: r._value, value2:r._value * r._value}))
#### max
Returns the max value within the results
##### options
* `useStartTime` boolean
Use the start time as the timestamp of the resulting aggregate.
|> filter(fn: (r) => r["_measurement"]=="cpu" AND
r["_field"] == "usage_system" AND
r["service"] == "app-server")
|> range(start:-12h)
|> window(every:10m)
|> max()
#### mean
Returns the mean of the values within the results
##### options
* `useStartTime` boolean
Use the start time as the timestamp of the resulting aggregate.
|> filter(fn: (r) => r["_measurement"] == "mem" AND
r["_field"] == "used_percent")
|> range(start:-12h)
|> window(every:10m)
|> mean()
#### min
Returns the min value within the results
##### options
* `useStartTime` boolean
Use the start time as the timestamp of the resulting aggregate.
|> filter(fn: (r) => r[ "_measurement"] == "cpu" AND
r["_field" ]== "usage_system")
|> range(start:-12h)
|> window(every:10m, period: 5m)
|> min()
#### range
Filters the results by time boundaries
|> filter(fn: (r) => r["_measurement"] == "cpu" AND
r["_field"] == "usage_system")
|> range(start:-12h, stop: -15m)
##### options
* start duration
Specifies the oldest time to be included in the results
* stop duration or timestamp
Specifies exclusive upper time bound
Defaults to "now"
#### sample
Sample values from a table.
##### options
* `useStartTime` boolean
Use the start time as the timestamp of the resulting aggregate.
* `n`
Sample every Nth element
* `pos`
Position offset from start of results to begin sampling
`pos` must be less than `n`
If `pos` less than 0, a random offset is used.
Default is -1 (random offset)
Example to sample every fifth point starting from the second element:
|> filter(fn: (r) => r["_measurement"] == "cpu" AND
r["_field"] == "usage_system")
|> range(start:-1d)
|> sample(n: 5, pos: 1)
#### set
Add tag of key and value to set
Example: `from(db: "telegraf") |> set(key: "mykey", value: "myvalue")`
##### options
* `key` string
* `value` string
#### skew
Skew of the results
##### options
* `useStartTime` boolean
Use the start time as the timestamp of the resulting aggregate.
Example: `from(db: "telegraf") |> range(start: -30m, stop: -15m) |> skew()`
#### sort
Sorts the results by the specified columns
Default sort is ascending
|> filter(fn: (r) => r["_measurement"] == "system" AND
r["_field"] == "uptime")
|> range(start:-12h)
|> sort(cols:["region", "host", "value"])
##### options
* `cols` array of strings
List of columns used to sort; precedence from left to right.
Default is `["value"]`
For example, this sorts by uptime descending to find the longest
running instances.
|> filter(fn: (r) => r["_measurement"] == "system" AND
r["_field"] == "uptime")
|> range(start:-12h)
|> sort(desc: true)
* `desc` bool
Sort results descending
#### spread
Difference between min and max values
##### options
* `useStartTime` boolean
Use the start time as the timestamp of the resulting aggregate.
Example: `from(db: "telegraf") |> range(start: -30m) |> spread()`
#### stddev
Standard Deviation of the results
##### options
* `useStartTime` boolean
Use the start time as the timestamp of the resulting aggregate.
Example: `from(db: "telegraf") |> range(start: -30m, stop: -15m) |> stddev()`
#### sum
Sum of the results
##### options
* `useStartTime` boolean
Use the start time as the timestamp of the resulting aggregate.
Example: `from(db: "telegraf") |> range(start: -30m, stop: -15m) |> sum()`
### toBool
Convert a value to a bool.
Example: `from(db: "telegraf") |> filter(fn:(r) => r._measurement == "mem" and r._field == "used") |> toBool()`
The function `toBool` is defined as `toBool = (table=<-) => table |> map(fn:(r) => bool(v:r._value))`.
If you need to convert other columns use the `map` function directly with the `bool` function.
### toInt
Convert a value to a int.
Example: `from(db: "telegraf") |> filter(fn:(r) => r._measurement == "mem" and r._field == "used") |> toInt()`
The function `toInt` is defined as `toInt = (table=<-) => table |> map(fn:(r) => int(v:r._value))`.
If you need to convert other columns use the `map` function directly with the `int` function.
### toFloat
Convert a value to a float.
Example: `from(db: "telegraf") |> filter(fn:(r) => r._measurement == "mem" and r._field == "used") |> toFloat()`
The function `toFloat` is defined as `toFloat = (table=<-) => table |> map(fn:(r) => float(v:r._value))`.
If you need to convert other columns use the `map` function directly with the `float` function.
### toDuration
Convert a value to a duration.
Example: `from(db: "telegraf") |> filter(fn:(r) => r._measurement == "mem" and r._field == "used") |> toDuration()`
The function `toDuration` is defined as `toDuration = (table=<-) => table |> map(fn:(r) => duration(v:r._value))`.
If you need to convert other columns use the `map` function directly with the `duration` function.
### toString
Convert a value to a string.
Example: `from(db: "telegraf") |> filter(fn:(r) => r._measurement == "mem" and r._field == "used") |> toString()`
The function `toString` is defined as `toString = (table=<-) => table |> map(fn:(r) => string(v:r._value))`.
If you need to convert other columns use the `map` function directly with the `string` function.
### toTime
Convert a value to a time.
Example: `from(db: "telegraf") |> filter(fn:(r) => r._measurement == "mem" and r._field == "used") |> toTime()`
The function `toTime` is defined as `toTime = (table=<-) => table |> map(fn:(r) => time(v:r._value))`.
If you need to convert other columns use the `map` function directly with the `time` function.
### toUInt
Convert a value to a uint.
Example: `from(db: "telegraf") |> filter(fn:(r) => r._measurement == "mem" and r._field == "used") |> toUInt()`
The function `toUInt` is defined as `toUInt = (table=<-) => table |> map(fn:(r) => uint(v:r._value))`.
If you need to convert other columns use the `map` function directly with the `uint` function.
#### window
Partitions the results by a given time range
##### options
* `every` duration
Duration of time between windows
Defaults to `period`'s value
|> range(start:-12h)
|> window(every:10m)
|> max()
* `period` duration
Duration of the windowed parition
|> range(start:-12h)
|> window(every:10m)
|> max()
Default to `every`'s value
* `start` time
The time of the initial window parition.
* `round` duration
Rounds a window's bounds to the nearest duration
|> range(start:-12h)
|> window(every:10m)
|> max()
### Custom Functions
IFQL also allows the user to define their own functions.
The function syntax is:
(parameter list) => <function body>
The list of parameters is simply a list of identifiers with optional default values.
The function body is either a single expression which is returned or a block of statements.
Functions may be assigned to identifiers to given them a name.
// Define a simple addition function
add = (a,b) => a + b
// Define a helper function to get data from a telegraf measurement.
// By default the database is expected to be named "telegraf".
telegrafM = (measurement, db="telegraf") =>
|> filter(fn: (r) => r._measurement == measurement)
// Define a helper function for a common join operation
// Use block syntax since we have more than a single expression
abJoin = (measurementA, measurementB, on) => {
a = telegrafM(measurement:measurementA)
b = telegrafM(measurement:measurementB)
return join(
tables:{a:a, b:b},
// Return a map from the join fn,
// this creates a table with a column for each key in the map.
// Note the () around the map to indicate a single map expression instead of function block.
fn: (t) => ({
a: t.a._value,
b: t.b._value,
#### Pipe Arguments
Functions may also declare that an argument can be piped into from an pipe forward operator by specifing a special default value:
// Define add function which accepts `a` as the piped argument.
add = (a=<-, b) => a + b
// Call add using the pipe forward syntax.
1 |> add(b:3) // 4
// Define measurement function which accepts table as the piped argument.
measurement = (m, table=<-) => table |> filter(fn: (r) => r._measurement == m)
// Define field function which accepts table as the piped argument
field = (field, table=<-) => table |> filter(fn: (r) => r._field == field)
// Query usage_idle from the cpu measurement and the telegraf database.
// Using the measurement and field functions.
|> measurement(m:"cpu")
|> field(field:"usage_idle")
SUBDIRS := asttest
all: $(SUBDIRS)
.PHONY: $(SUBDIRS) clean
@ -0,0 +1,850 @@
package ast
import (
// Position represents a specific location in the source
type Position struct {
Line int `json:"line"` // Line is the line in the source marked by this position
Column int `json:"column"` // Column is the column in the source marked by this position
// SourceLocation represents the location of a node in the AST
type SourceLocation struct {
Start Position `json:"start"` // Start is the location in the source the node starts
End Position `json:"end"` // End is the location in the source the node ends
Source *string `json:"source,omitempty"` // Source is optional raw source
// Node represents a node in the InfluxDB abstract syntax tree.
type Node interface {
Type() string // Type property is a string that contains the variant type of the node
Location() *SourceLocation
Copy() Node
// All node must support json marshalling
func (*Program) node() {}
func (*BlockStatement) node() {}
func (*ExpressionStatement) node() {}
func (*ReturnStatement) node() {}
func (*VariableDeclaration) node() {}
func (*VariableDeclarator) node() {}
func (*ArrayExpression) node() {}
func (*ArrowFunctionExpression) node() {}
func (*BinaryExpression) node() {}
func (*CallExpression) node() {}
func (*ConditionalExpression) node() {}
func (*LogicalExpression) node() {}
func (*MemberExpression) node() {}
func (*PipeExpression) node() {}
func (*ObjectExpression) node() {}
func (*UnaryExpression) node() {}
func (*Property) node() {}
func (*Identifier) node() {}
func (*BooleanLiteral) node() {}
func (*DateTimeLiteral) node() {}
func (*DurationLiteral) node() {}
func (*FloatLiteral) node() {}
func (*IntegerLiteral) node() {}
func (*PipeLiteral) node() {}
func (*RegexpLiteral) node() {}
func (*StringLiteral) node() {}
func (*UnsignedIntegerLiteral) node() {}
// BaseNode holds the attributes every expression or statement should have
type BaseNode struct {
Loc *SourceLocation `json:"location,omitempty"`
// Location is the source location of the Node
func (b *BaseNode) Location() *SourceLocation { return b.Loc }
// Program represents a complete program source tree
type Program struct {
Body []Statement `json:"body"`
// Type is the abstract type
func (*Program) Type() string { return "Program" }
func (p *Program) Copy() Node {
np := new(Program)
*np = *p
if len(p.Body) > 0 {
np.Body = make([]Statement, len(p.Body))
for i, s := range p.Body {
np.Body[i] = s.Copy().(Statement)
return np
// Statement Perhaps we don't even want statements nor expression statements
type Statement interface {
func (*BlockStatement) stmt() {}
func (*ExpressionStatement) stmt() {}
func (*ReturnStatement) stmt() {}
func (*VariableDeclaration) stmt() {}
// BlockStatement is a set of statements
type BlockStatement struct {
Body []Statement `json:"body"`
// Type is the abstract type
func (*BlockStatement) Type() string { return "BlockStatement" }
func (s *BlockStatement) Copy() Node {
ns := new(BlockStatement)
*ns = *s
if len(s.Body) > 0 {
ns.Body = make([]Statement, len(s.Body))
for i, stmt := range s.Body {
ns.Body[i] = stmt.Copy().(Statement)
return ns
// ExpressionStatement may consist of an expression that does not return a value and is executed solely for its side-effects.
type ExpressionStatement struct {
Expression Expression `json:"expression"`
// Type is the abstract type
func (*ExpressionStatement) Type() string { return "ExpressionStatement" }
func (s *ExpressionStatement) Copy() Node {
if s == nil {
return s
ns := new(ExpressionStatement)
*ns = *s
ns.Expression = s.Expression.Copy().(Expression)
return ns
// ReturnStatement defines an Expression to return
type ReturnStatement struct {
Argument Expression `json:"argument"`
// Type is the abstract type
func (*ReturnStatement) Type() string { return "ReturnStatement" }
func (s *ReturnStatement) Copy() Node {
if s == nil {
return s
ns := new(ReturnStatement)
*ns = *s
ns.Argument = s.Argument.Copy().(Expression)
return ns
// VariableDeclaration declares one or more variables using assignment
type VariableDeclaration struct {
Declarations []*VariableDeclarator `json:"declarations"`
// Type is the abstract type
func (*VariableDeclaration) Type() string { return "VariableDeclaration" }
func (d *VariableDeclaration) Copy() Node {
if d == nil {
return d
nd := new(VariableDeclaration)
*nd = *d
if len(d.Declarations) > 0 {
nd.Declarations = make([]*VariableDeclarator, len(d.Declarations))
for i, decl := range d.Declarations {
nd.Declarations[i] = decl.Copy().(*VariableDeclarator)
return nd
// VariableDeclarator represents the declaration of a variable
type VariableDeclarator struct {
ID *Identifier `json:"id"`
Init Expression `json:"init"`
// Type is the abstract type
func (*VariableDeclarator) Type() string { return "VariableDeclarator" }
func (d *VariableDeclarator) Copy() Node {
if d == nil {
return d
nd := new(VariableDeclarator)
*nd = *d
nd.Init = d.Init.Copy().(Expression)
return nd
// Expression represents an action that can be performed by InfluxDB that can be evaluated to a value.
type Expression interface {
func (*ArrayExpression) expression() {}
func (*ArrowFunctionExpression) expression() {}
func (*BinaryExpression) expression() {}
func (*BooleanLiteral) expression() {}
func (*CallExpression) expression() {}
func (*ConditionalExpression) expression() {}
func (*DateTimeLiteral) expression() {}
func (*DurationLiteral) expression() {}
func (*FloatLiteral) expression() {}
func (*Identifier) expression() {}
func (*IntegerLiteral) expression() {}
func (*LogicalExpression) expression() {}
func (*MemberExpression) expression() {}
func (*ObjectExpression) expression() {}
func (*PipeExpression) expression() {}
func (*PipeLiteral) expression() {}
func (*RegexpLiteral) expression() {}
func (*StringLiteral) expression() {}
func (*UnaryExpression) expression() {}
func (*UnsignedIntegerLiteral) expression() {}
// CallExpression represents a function all whose callee may be an Identifier or MemberExpression
type CallExpression struct {
Callee Expression `json:"callee"`
Arguments []Expression `json:"arguments,omitempty"`
// Type is the abstract type
func (*CallExpression) Type() string { return "CallExpression" }
func (e *CallExpression) Copy() Node {
if e == nil {
return e
ne := new(CallExpression)
*ne = *e
ne.Callee = e.Callee.Copy().(Expression)
if len(e.Arguments) > 0 {
ne.Arguments = make([]Expression, len(e.Arguments))
for i, arg := range e.Arguments {
ne.Arguments[i] = arg.Copy().(Expression)
return ne
type PipeExpression struct {
Argument Expression `json:"argument"`
Call *CallExpression `json:"call"`
// Type is the abstract type
func (*PipeExpression) Type() string { return "PipeExpression" }
func (e *PipeExpression) Copy() Node {
if e == nil {
return e
ne := new(PipeExpression)
*ne = *e
ne.Argument = e.Argument.Copy().(Expression)
ne.Call = e.Call.Copy().(*CallExpression)
return ne
// MemberExpression represents calling a property of a CallExpression
type MemberExpression struct {
Object Expression `json:"object"`
Property Expression `json:"property"`
// Type is the abstract type
func (*MemberExpression) Type() string { return "MemberExpression" }
func (e *MemberExpression) Copy() Node {
if e == nil {
return e
ne := new(MemberExpression)
*ne = *e
ne.Object = e.Object.Copy().(Expression)
ne.Property = e.Property.Copy().(Expression)
return ne
type ArrowFunctionExpression struct {
Params []*Property `json:"params"`
Body Node `json:"body"`
// Type is the abstract type
func (*ArrowFunctionExpression) Type() string { return "ArrowFunctionExpression" }
func (e *ArrowFunctionExpression) Copy() Node {
if e == nil {
return e
ne := new(ArrowFunctionExpression)
*ne = *e
if len(e.Params) > 0 {
ne.Params = make([]*Property, len(e.Params))
for i, param := range e.Params {
ne.Params[i] = param.Copy().(*Property)
ne.Body = e.Body.Copy()
return ne
// OperatorKind are Equality and Arithmatic operators.
// Result of evaluating an equality operator is always of type Boolean based on whether the
// comparison is true
// Arithmetic operators take numerical values (either literals or variables) as their operands
// and return a single numerical value.
type OperatorKind int
const (
opBegin OperatorKind = iota
func (o OperatorKind) String() string {
return OperatorTokens[o]
// OperatorLookup converts the operators to OperatorKind
func OperatorLookup(op string) OperatorKind {
return operators[op]
func (o OperatorKind) MarshalText() ([]byte, error) {
text, ok := OperatorTokens[o]
if !ok {
return nil, fmt.Errorf("unknown operator %d", int(o))
return []byte(text), nil
func (o *OperatorKind) UnmarshalText(data []byte) error {
var ok bool
*o, ok = operators[string(data)]
if !ok {
return fmt.Errorf("unknown operator %q", string(data))
return nil
// BinaryExpression use binary operators act on two operands in an expression.
// BinaryExpression includes relational and arithmatic operators
type BinaryExpression struct {
Operator OperatorKind `json:"operator"`
Left Expression `json:"left"`
Right Expression `json:"right"`
// Type is the abstract type
func (*BinaryExpression) Type() string { return "BinaryExpression" }
func (e *BinaryExpression) Copy() Node {
if e == nil {
return e
ne := new(BinaryExpression)
*ne = *e
ne.Left = e.Left.Copy().(Expression)
ne.Right = e.Right.Copy().(Expression)
return ne
// UnaryExpression use operators act on a single operand in an expression.
type UnaryExpression struct {
Operator OperatorKind `json:"operator"`
Argument Expression `json:"argument"`
// Type is the abstract type
func (*UnaryExpression) Type() string { return "UnaryExpression" }
func (e *UnaryExpression) Copy() Node {
if e == nil {
return e
ne := new(UnaryExpression)
*ne = *e
ne.Argument = e.Argument.Copy().(Expression)
return ne
// LogicalOperatorKind are used with boolean (logical) values
type LogicalOperatorKind int
const (
logOpBegin LogicalOperatorKind = iota
func (o LogicalOperatorKind) String() string {
return LogicalOperatorTokens[o]
// LogicalOperatorLookup converts the operators to LogicalOperatorKind
func LogicalOperatorLookup(op string) LogicalOperatorKind {
return logOperators[op]
func (o LogicalOperatorKind) MarshalText() ([]byte, error) {
text, ok := LogicalOperatorTokens[o]
if !ok {
return nil, fmt.Errorf("unknown logical operator %d", int(o))
return []byte(text), nil
func (o *LogicalOperatorKind) UnmarshalText(data []byte) error {
var ok bool
*o, ok = logOperators[string(data)]
if !ok {
return fmt.Errorf("unknown logical operator %q", string(data))
return nil
// LogicalExpression represent the rule conditions that collectively evaluate to either true or false.
// `or` expressions compute the disjunction of two boolean expressions and return boolean values.
// `and`` expressions compute the conjunction of two boolean expressions and return boolean values.
type LogicalExpression struct {
Operator LogicalOperatorKind `json:"operator"`
Left Expression `json:"left"`
Right Expression `json:"right"`
// Type is the abstract type
func (*LogicalExpression) Type() string { return "LogicalExpression" }
func (e *LogicalExpression) Copy() Node {
if e == nil {
return e
ne := new(LogicalExpression)
*ne = *e
ne.Left = e.Left.Copy().(Expression)
ne.Right = e.Right.Copy().(Expression)
return ne
// ArrayExpression is used to create and directly specify the elements of an array object
type ArrayExpression struct {
Elements []Expression `json:"elements"`
// Type is the abstract type
func (*ArrayExpression) Type() string { return "ArrayExpression" }
func (e *ArrayExpression) Copy() Node {
if e == nil {
return e
ne := new(ArrayExpression)
*ne = *e
if len(e.Elements) > 0 {
ne.Elements = make([]Expression, len(e.Elements))
for i, el := range e.Elements {
ne.Elements[i] = el.Copy().(Expression)
return ne
// ObjectExpression allows the declaration of an anonymous object within a declaration.
type ObjectExpression struct {
Properties []*Property `json:"properties"`
// Type is the abstract type
func (*ObjectExpression) Type() string { return "ObjectExpression" }
func (e *ObjectExpression) Copy() Node {
if e == nil {
return e
ne := new(ObjectExpression)
*ne = *e
if len(e.Properties) > 0 {
ne.Properties = make([]*Property, len(e.Properties))
for i, p := range e.Properties {
ne.Properties[i] = p.Copy().(*Property)
return ne
// ConditionalExpression selects one of two expressions, `Alternate` or `Consequent`
// depending on a third, boolean, expression, `Test`.
type ConditionalExpression struct {
Test Expression `json:"test"`
Alternate Expression `json:"alternate"`
Consequent Expression `json:"consequent"`
// Type is the abstract type
func (*ConditionalExpression) Type() string { return "ConditionalExpression" }
func (e *ConditionalExpression) Copy() Node {
if e == nil {
return e
ne := new(ConditionalExpression)
*ne = *e
ne.Test = e.Test.Copy().(Expression)
ne.Alternate = e.Alternate.Copy().(Expression)
ne.Consequent = e.Consequent.Copy().(Expression)
return ne
// Property is the value associated with a key
type Property struct {
Key *Identifier `json:"key"`
Value Expression `json:"value"`
func (p *Property) Copy() Node {
if p == nil {
return p
np := new(Property)
*np = *p
if p.Value != nil {
np.Value = p.Value.Copy().(Expression)
return np
// Type is the abstract type
func (*Property) Type() string { return "Property" }
// Identifier represents a name that identifies a unique Node
type Identifier struct {
Name string `json:"name"`
// Type is the abstract type
func (*Identifier) Type() string { return "Identifier" }
func (i *Identifier) Copy() Node {
if i == nil {
return i
ni := new(Identifier)
*ni = *i
return ni
// Literal are thelexical forms for literal expressions which define
// boolean, string, integer, number, duration, datetime and field values.
// Literals must be coerced explicitly.
type Literal interface {
func (*BooleanLiteral) literal() {}
func (*DateTimeLiteral) literal() {}
func (*DurationLiteral) literal() {}
func (*FloatLiteral) literal() {}
func (*IntegerLiteral) literal() {}
func (*PipeLiteral) literal() {}
func (*RegexpLiteral) literal() {}
func (*StringLiteral) literal() {}
func (*UnsignedIntegerLiteral) literal() {}
// PipeLiteral represents an specialized literal value, indicating the left hand value of a pipe expression.
type PipeLiteral struct {
// Type is the abstract type
func (*PipeLiteral) Type() string { return "PipeLiteral" }
func (i *PipeLiteral) Copy() Node {
if i == nil {
return i
ni := new(PipeLiteral)
*ni = *i
return ni
// StringLiteral expressions begin and end with double quote marks.
type StringLiteral struct {
Value string `json:"value"`
func (*StringLiteral) Type() string { return "StringLiteral" }
func (l *StringLiteral) Copy() Node {
if l == nil {
return l
nl := new(StringLiteral)
*nl = *l
return nl
// BooleanLiteral represent boolean values
type BooleanLiteral struct {
Value bool `json:"value"`
// Type is the abstract type
func (*BooleanLiteral) Type() string { return "BooleanLiteral" }
func (l *BooleanLiteral) Copy() Node {
if l == nil {
return l
nl := new(BooleanLiteral)
*nl = *l
return nl
// FloatLiteral represent floating point numbers according to the double representations defined by the IEEE-754-1985
type FloatLiteral struct {
Value float64 `json:"value"`
// Type is the abstract type
func (*FloatLiteral) Type() string { return "FloatLiteral" }
func (l *FloatLiteral) Copy() Node {
if l == nil {
return l
nl := new(FloatLiteral)
*nl = *l
return nl
// IntegerLiteral represent integer numbers.
type IntegerLiteral struct {
Value int64 `json:"value"`
// Type is the abstract type
func (*IntegerLiteral) Type() string { return "IntegerLiteral" }
func (l *IntegerLiteral) Copy() Node {
if l == nil {
return l
nl := new(IntegerLiteral)
*nl = *l
return nl
// UnsignedIntegerLiteral represent integer numbers.
type UnsignedIntegerLiteral struct {
Value uint64 `json:"value"`
// Type is the abstract type
func (*UnsignedIntegerLiteral) Type() string { return "UnsignedIntegerLiteral" }
func (l *UnsignedIntegerLiteral) Copy() Node {
if l == nil {
return l
nl := new(UnsignedIntegerLiteral)
*nl = *l
return nl
// RegexpLiteral expressions begin and end with `/` and are regular expressions with syntax accepted by RE2
type RegexpLiteral struct {
Value *regexp.Regexp `json:"value"`
// Type is the abstract type
func (*RegexpLiteral) Type() string { return "RegexpLiteral" }
func (l *RegexpLiteral) Copy() Node {
if l == nil {
return l
nl := new(RegexpLiteral)
*nl = *l
return nl
// DurationLiteral represents the elapsed time between two instants as an
// int64 nanosecond count with syntax of golang's time.Duration
// TODO: this may be better as a class initialization
type DurationLiteral struct {
Value time.Duration `json:"value"`
// Type is the abstract type
func (*DurationLiteral) Type() string { return "DurationLiteral" }
func (l *DurationLiteral) Copy() Node {
if l == nil {
return l
nl := new(DurationLiteral)
*nl = *l
return nl
// DateTimeLiteral represents an instant in time with nanosecond precision using
// the syntax of golang's RFC3339 Nanosecond variant
// TODO: this may be better as a class initialization
type DateTimeLiteral struct {
Value time.Time `json:"value"`
// Type is the abstract type
func (*DateTimeLiteral) Type() string { return "DateTimeLiteral" }
func (l *DateTimeLiteral) Copy() Node {
if l == nil {
return l
nl := new(DateTimeLiteral)
*nl = *l
return nl
// OperatorTokens converts OperatorKind to string
var OperatorTokens = map[OperatorKind]string{
MultiplicationOperator: "*",
DivisionOperator: "/",
AdditionOperator: "+",
SubtractionOperator: "-",
LessThanEqualOperator: "<=",
LessThanOperator: "<",
GreaterThanOperator: ">",
GreaterThanEqualOperator: ">=",
InOperator: "in",
NotOperator: "not",
NotEmptyOperator: "not empty",
EmptyOperator: "empty",
StartsWithOperator: "startswith",
EqualOperator: "==",
NotEqualOperator: "!=",
RegexpMatchOperator: "=~",
NotRegexpMatchOperator: "!~",
// LogicalOperatorTokens converts LogicalOperatorKind to string
var LogicalOperatorTokens = map[LogicalOperatorKind]string{
AndOperator: "and",
OrOperator: "or",
var operators map[string]OperatorKind
var logOperators map[string]LogicalOperatorKind
func init() {
operators = make(map[string]OperatorKind)
for op := opBegin + 1; op < opEnd; op++ {
operators[OperatorTokens[op]] = op
logOperators = make(map[string]LogicalOperatorKind)
for op := logOpBegin + 1; op < logOpEnd; op++ {
logOperators[LogicalOperatorTokens[op]] = op
all: cmpopts.go
cmpopts.go: ../ast.go gen.go ../../bin/cmpgen
PATH=../../bin:${PATH} $(GO_GENERATE) -x ./...
rm -f cmpopts.go
.PHONY: all clean
package main
import (
func main() {
if len(os.Args) != 2 {
fmt.Println("Usage: cmpgen <path to output file>")
f, err := os.Create(os.Args[1])
if err != nil {
defer f.Close()
pkg, err := importer.For("source", nil).Import("")
if err != nil {
scope := pkg.Scope()
fmt.Fprintln(f, "package asttest")
fmt.Fprintln(f, "// DO NOT EDIT: This file is autogenerated via the cmpgen command.")
fmt.Fprintln(f, `import (`)
fmt.Fprintln(f, ` ""`)
fmt.Fprintln(f, ` ""`)
fmt.Fprintln(f, ` ""`)
fmt.Fprintln(f, `)`)
fmt.Fprintln(f, `var IgnoreBaseNodeOptions = []cmp.Option{`)
for _, name := range scope.Names() {
obj := scope.Lookup(name)
if strct, ok := obj.Type().Underlying().(*types.Struct); obj.Exported() && ok {
for i := 0; i < strct.NumFields(); i++ {
field := strct.Field(i)
if field.Name() == "BaseNode" {
fmt.Fprintf(f, "\tcmpopts.IgnoreFields(ast.%s{}, \"BaseNode\"),\n", obj.Name())
fmt.Fprintln(f, `}`)
package asttest
// DO NOT EDIT: This file is autogenerated via the cmpgen command.
import (
var IgnoreBaseNodeOptions = []cmp.Option{
cmpopts.IgnoreFields(ast.ArrayExpression{}, "BaseNode"),
cmpopts.IgnoreFields(ast.ArrowFunctionExpression{}, "BaseNode"),
cmpopts.IgnoreFields(ast.BinaryExpression{}, "BaseNode"),
cmpopts.IgnoreFields(ast.BlockStatement{}, "BaseNode"),
cmpopts.IgnoreFields(ast.BooleanLiteral{}, "BaseNode"),
cmpopts.IgnoreFields(ast.CallExpression{}, "BaseNode"),
cmpopts.IgnoreFields(ast.ConditionalExpression{}, "BaseNode"),
cmpopts.IgnoreFields(ast.DateTimeLiteral{}, "BaseNode"),
cmpopts.IgnoreFields(ast.DurationLiteral{}, "BaseNode"),
cmpopts.IgnoreFields(ast.ExpressionStatement{}, "BaseNode"),
cmpopts.IgnoreFields(ast.FloatLiteral{}, "BaseNode"),
cmpopts.IgnoreFields(ast.Identifier{}, "BaseNode"),
cmpopts.IgnoreFields(ast.IntegerLiteral{}, "BaseNode"),
cmpopts.IgnoreFields(ast.LogicalExpression{}, "BaseNode"),
cmpopts.IgnoreFields(ast.MemberExpression{}, "BaseNode"),
cmpopts.IgnoreFields(ast.ObjectExpression{}, "BaseNode"),
cmpopts.IgnoreFields(ast.PipeExpression{}, "BaseNode"),
cmpopts.IgnoreFields(ast.PipeLiteral{}, "BaseNode"),
cmpopts.IgnoreFields(ast.Program{}, "BaseNode"),
cmpopts.IgnoreFields(ast.Property{}, "BaseNode"),
cmpopts.IgnoreFields(ast.RegexpLiteral{}, "BaseNode"),
cmpopts.IgnoreFields(ast.ReturnStatement{}, "BaseNode"),
cmpopts.IgnoreFields(ast.StringLiteral{}, "BaseNode"),
cmpopts.IgnoreFields(ast.UnaryExpression{}, "BaseNode"),
cmpopts.IgnoreFields(ast.UnsignedIntegerLiteral{}, "BaseNode"),
cmpopts.IgnoreFields(ast.VariableDeclaration{}, "BaseNode"),
cmpopts.IgnoreFields(ast.VariableDeclarator{}, "BaseNode"),
package asttest
import (
//go:generate cmpgen cmpopts.go
var CompareOptions = append(IgnoreBaseNodeOptions,
cmp.Comparer(func(x, y *regexp.Regexp) bool { return x.String() == y.String() }),
package ast
import (
func (p *Program) MarshalJSON() ([]byte, error) {
type Alias Program
raw := struct {
Type string `json:"type"`
Type: p.Type(),
Alias: (*Alias)(p),
return json.Marshal(raw)
func (p *Program) UnmarshalJSON(data []byte) error {
type Alias Program
raw := struct {
Body []json.RawMessage `json:"body"`
if err := json.Unmarshal(data, &raw); err != nil {
return err
if raw.Alias != nil {
*p = *(*Program)(raw.Alias)
p.Body = make([]Statement, len(raw.Body))
for i, r := range raw.Body {
s, err := unmarshalStatement(r)
if err != nil {
return err
p.Body[i] = s
return nil
func (s *BlockStatement) MarshalJSON() ([]byte, error) {
type Alias BlockStatement
raw := struct {
Type string `json:"type"`
Type: s.Type(),
Alias: (*Alias)(s),
return json.Marshal(raw)
func (s *BlockStatement) UnmarshalJSON(data []byte) error {
type Alias BlockStatement
raw := struct {
Body []json.RawMessage `json:"body"`
if err := json.Unmarshal(data, &raw); err != nil {
return err
if raw.Alias != nil {
*s = *(*BlockStatement)(raw.Alias)
s.Body = make([]Statement, len(raw.Body))
for i, r := range raw.Body {
stmt, err := unmarshalStatement(r)
if err != nil {
return err
s.Body[i] = stmt
return nil
func (s *ExpressionStatement) MarshalJSON() ([]byte, error) {
type Alias ExpressionStatement
raw := struct {
Type string `json:"type"`
Type: s.Type(),
Alias: (*Alias)(s),
return json.Marshal(raw)
func (s *ExpressionStatement) UnmarshalJSON(data []byte) error {
type Alias ExpressionStatement
raw := struct {
Expression json.RawMessage `json:"expression"`
if err := json.Unmarshal(data, &raw); err != nil {
return err
if raw.Alias != nil {
*s = *(*ExpressionStatement)(raw.Alias)
e, err := unmarshalExpression(raw.Expression)
if err != nil {
return err
s.Expression = e
return nil
func (s *ReturnStatement) MarshalJSON() ([]byte, error) {
type Alias ReturnStatement
raw := struct {
Type string `json:"type"`
Type: s.Type(),
Alias: (*Alias)(s),
return json.Marshal(raw)
func (s *ReturnStatement) UnmarshalJSON(data []byte) error {
type Alias ReturnStatement
raw := struct {
Argument json.RawMessage `json:"argument"`
if err := json.Unmarshal(data, &raw); err != nil {
return err
if raw.Alias != nil {
*s = *(*ReturnStatement)(raw.Alias)
e, err := unmarshalExpression(raw.Argument)
if err != nil {
return err
s.Argument = e
return nil
func (d *VariableDeclaration) MarshalJSON() ([]byte, error) {
type Alias VariableDeclaration
raw := struct {
Type string `json:"type"`
Type: d.Type(),
Alias: (*Alias)(d),
return json.Marshal(raw)
func (d *VariableDeclarator) MarshalJSON() ([]byte, error) {
type Alias VariableDeclarator
raw := struct {
Type string `json:"type"`
Type: d.Type(),
Alias: (*Alias)(d),
return json.Marshal(raw)
func (d *VariableDeclarator) UnmarshalJSON(data []byte) error {
type Alias VariableDeclarator
raw := struct {
Init json.RawMessage `json:"init"`
if err := json.Unmarshal(data, &raw); err != nil {
return err
if raw.Alias != nil {
*d = *(*VariableDeclarator)(raw.Alias)
e, err := unmarshalExpression(raw.Init)
if err != nil {
return err
d.Init = e
return nil
func (e *CallExpression) MarshalJSON() ([]byte, error) {
type Alias CallExpression
raw := struct {
Type string `json:"type"`
Type: e.Type(),
Alias: (*Alias)(e),
return json.Marshal(raw)
func (e *CallExpression) UnmarshalJSON(data []byte) error {
type Alias CallExpression
raw := struct {
Callee json.RawMessage `json:"callee"`
Arguments []json.RawMessage `json:"arguments"`
if err := json.Unmarshal(data, &raw); err != nil {
return err
if raw.Alias != nil {
*e = *(*CallExpression)(raw.Alias)
callee, err := unmarshalExpression(raw.Callee)
if err != nil {
return err
e.Callee = callee
e.Arguments = make([]Expression, len(raw.Arguments))
for i, r := range raw.Arguments {
expr, err := unmarshalExpression(r)
if err != nil {
return err
e.Arguments[i] = expr
return nil
func (e *PipeExpression) MarshalJSON() ([]byte, error) {
type Alias PipeExpression
raw := struct {
Type string `json:"type"`
Type: e.Type(),
Alias: (*Alias)(e),
return json.Marshal(raw)
func (e *PipeExpression) UnmarshalJSON(data []byte) error {
type Alias PipeExpression
raw := struct {
Argument json.RawMessage `json:"argument"`
if err := json.Unmarshal(data, &raw); err != nil {
return err
if raw.Alias != nil {
*e = *(*PipeExpression)(raw.Alias)
arg, err := unmarshalExpression(raw.Argument)
if err != nil {
return err
e.Argument = arg
return nil
func (e *MemberExpression) MarshalJSON() ([]byte, error) {
type Alias MemberExpression
raw := struct {
Type string `json:"type"`
Type: e.Type(),
Alias: (*Alias)(e),
return json.Marshal(raw)
func (e *MemberExpression) UnmarshalJSON(data []byte) error {
type Alias MemberExpression
raw := struct {
Object json.RawMessage `json:"object"`
Property json.RawMessage `json:"property"`
if err := json.Unmarshal(data, &raw); err != nil {
return err
if raw.Alias != nil {
*e = *(*MemberExpression)(raw.Alias)
object, err := unmarshalExpression(raw.Object)
if err != nil {
return err
e.Object = object
property, err := unmarshalExpression(raw.Property)
if err != nil {
return err
e.Property = property
return nil
func (e *ArrowFunctionExpression) MarshalJSON() ([]byte, error) {
type Alias ArrowFunctionExpression
raw := struct {
Type string `json:"type"`
Type: e.Type(),
Alias: (*Alias)(e),
return json.Marshal(raw)
func (e *ArrowFunctionExpression) UnmarshalJSON(data []byte) error {
type Alias ArrowFunctionExpression
raw := struct {
Body json.RawMessage `json:"body"`
if err := json.Unmarshal(data, &raw); err != nil {
return err
if raw.Alias != nil {
*e = *(*ArrowFunctionExpression)(raw.Alias)
body, err := unmarshalNode(raw.Body)
if err != nil {
return err
e.Body = body
return nil
func (e *BinaryExpression) MarshalJSON() ([]byte, error) {
type Alias BinaryExpression
raw := struct {
Type string `json:"type"`
Type: e.Type(),
Alias: (*Alias)(e),
return json.Marshal(raw)
func (e *BinaryExpression) UnmarshalJSON(data []byte) error {
type Alias BinaryExpression
raw := struct {
Left json.RawMessage `json:"left"`
Right json.RawMessage `json:"right"`
if err := json.Unmarshal(data, &raw); err != nil {
return err
if raw.Alias != nil {
*e = *(*BinaryExpression)(raw.Alias)
l, err := unmarshalExpression(raw.Left)
if err != nil {
return err
e.Left = l
r, err := unmarshalExpression(raw.Right)
if err != nil {
return err
e.Right = r
return nil
func (e *UnaryExpression) MarshalJSON() ([]byte, error) {
type Alias UnaryExpression
raw := struct {
Type string `json:"type"`
Type: e.Type(),
Alias: (*Alias)(e),
return json.Marshal(raw)
func (e *UnaryExpression) UnmarshalJSON(data []byte) error {
type Alias UnaryExpression
raw := struct {
Argument json.RawMessage `json:"argument"`
if err := json.Unmarshal(data, &raw); err != nil {
return err
if raw.Alias != nil {
*e = *(*UnaryExpression)(raw.Alias)
argument, err := unmarshalExpression(raw.Argument)
if err != nil {
return err
e.Argument = argument
return nil
func (e *LogicalExpression) MarshalJSON() ([]byte, error) {
type Alias LogicalExpression
raw := struct {
Type string `json:"type"`
Type: e.Type(),
Alias: (*Alias)(e),
return json.Marshal(raw)
func (e *LogicalExpression) UnmarshalJSON(data []byte) error {
type Alias LogicalExpression
raw := struct {
Left json.RawMessage `json:"left"`
Right json.RawMessage `json:"right"`
if err := json.Unmarshal(data, &raw); err != nil {
return err
if raw.Alias != nil {
*e = *(*LogicalExpression)(raw.Alias)
l, err := unmarshalExpression(raw.Left)
if err != nil {
return err
e.Left = l
r, err := unmarshalExpression(raw.Right)
if err != nil {
return err
e.Right = r
return nil
func (e *ArrayExpression) MarshalJSON() ([]byte, error) {
type Alias ArrayExpression
raw := struct {
Type string `json:"type"`
Type: e.Type(),
Alias: (*Alias)(e),
return json.Marshal(raw)
func (e *ArrayExpression) UnmarshalJSON(data []byte) error {
type Alias ArrayExpression
raw := struct {
Elements []json.RawMessage `json:"elements"`
if err := json.Unmarshal(data, &raw); err != nil {
return err
if raw.Alias != nil {
*e = *(*ArrayExpression)(raw.Alias)
e.Elements = make([]Expression, len(raw.Elements))
for i, r := range raw.Elements {
expr, err := unmarshalExpression(r)
if err != nil {
return err
e.Elements[i] = expr
return nil
func (e *ObjectExpression) MarshalJSON() ([]byte, error) {
type Alias ObjectExpression
raw := struct {
Type string `json:"type"`
Type: e.Type(),
Alias: (*Alias)(e),
return json.Marshal(raw)
func (e *ConditionalExpression) MarshalJSON() ([]byte, error) {
type Alias ConditionalExpression
raw := struct {
Type string `json:"type"`
Type: e.Type(),
Alias: (*Alias)(e),
return json.Marshal(raw)
func (e *ConditionalExpression) UnmarshalJSON(data []byte) error {
type Alias ConditionalExpression
raw := struct {
Test json.RawMessage `json:"test"`
Alternate json.RawMessage `json:"alternate"`
Consequent json.RawMessage `json:"consequent"`
if err := json.Unmarshal(data, &raw); err != nil {
return err
if raw.Alias != nil {
*e = *(*ConditionalExpression)(raw.Alias)
test, err := unmarshalExpression(raw.Test)
if err != nil {
return err
e.Test = test
alternate, err := unmarshalExpression(raw.Alternate)
if err != nil {
return err
e.Alternate = alternate
consequent, err := unmarshalExpression(raw.Consequent)
if err != nil {
return err
e.Consequent = consequent
return nil
func (p *Property) MarshalJSON() ([]byte, error) {
type Alias Property
raw := struct {
Type string `json:"type"`
Type: p.Type(),
Alias: (*Alias)(p),
return json.Marshal(raw)
func (p *Property) UnmarshalJSON(data []byte) error {
type Alias Property
raw := struct {
Value json.RawMessage `json:"value"`
if err := json.Unmarshal(data, &raw); err != nil {
return err
if raw.Alias != nil {
*p = *(*Property)(raw.Alias)
if raw.Value != nil {
value, err := unmarshalExpression(raw.Value)
if err != nil {
return err
p.Value = value
return nil
func (i *Identifier) MarshalJSON() ([]byte, error) {
type Alias Identifier
raw := struct {
Type string `json:"type"`
Type: i.Type(),
Alias: (*Alias)(i),
return json.Marshal(raw)
func (l *PipeLiteral) MarshalJSON() ([]byte, error) {
type Alias PipeLiteral
raw := struct {
Type string `json:"type"`
Type: l.Type(),
Alias: (*Alias)(l),
return json.Marshal(raw)
func (l *StringLiteral) MarshalJSON() ([]byte, error) {
type Alias StringLiteral
raw := struct {
Type string `json:"type"`
Type: l.Type(),
Alias: (*Alias)(l),
return json.Marshal(raw)
func (l *BooleanLiteral) MarshalJSON() ([]byte, error) {
type Alias BooleanLiteral
raw := struct {
Type string `json:"type"`
Type: l.Type(),
Alias: (*Alias)(l),
return json.Marshal(raw)
func (l *FloatLiteral) MarshalJSON() ([]byte, error) {
type Alias FloatLiteral
raw := struct {
Type string `json:"type"`
Type: l.Type(),
Alias: (*Alias)(l),
return json.Marshal(raw)
func (l *IntegerLiteral) MarshalJSON() ([]byte, error) {
type Alias IntegerLiteral
raw := struct {
Type string `json:"type"`
Value string `json:"value"`
Type: l.Type(),
Alias: (*Alias)(l),
Value: strconv.FormatInt(l.Value, 10),
return json.Marshal(raw)
func (l *IntegerLiteral) UnmarshalJSON(data []byte) error {
type Alias IntegerLiteral
raw := struct {
Value string `json:"value"`
if err := json.Unmarshal(data, &raw); err != nil {
return err
if raw.Alias != nil {
*l = *(*IntegerLiteral)(raw.Alias)
value, err := strconv.ParseInt(raw.Value, 10, 64)
if err != nil {
return err
l.Value = value
return nil
func (l *UnsignedIntegerLiteral) MarshalJSON() ([]byte, error) {
type Alias UnsignedIntegerLiteral
raw := struct {
Type string `json:"type"`
Value string `json:"value"`
Type: l.Type(),
Alias: (*Alias)(l),
Value: strconv.FormatUint(l.Value, 10),
return json.Marshal(raw)
func (l *UnsignedIntegerLiteral) UnmarshalJSON(data []byte) error {
type Alias UnsignedIntegerLiteral
raw := struct {
Value string `json:"value"`
if err := json.Unmarshal(data, &raw); err != nil {
return err
if raw.Alias != nil {
*l = *(*UnsignedIntegerLiteral)(raw.Alias)
value, err := strconv.ParseUint(raw.Value, 10, 64)
if err != nil {
return err
l.Value = value
return nil
func (l *RegexpLiteral) MarshalJSON() ([]byte, error) {
type Alias RegexpLiteral
raw := struct {
Type string `json:"type"`
Value string `json:"value"`
Type: l.Type(),
Alias: (*Alias)(l),
Value: l.Value.String(),
return json.Marshal(raw)
func (l *RegexpLiteral) UnmarshalJSON(data []byte) error {
type Alias RegexpLiteral
raw := struct {
Value string `json:"value"`
if err := json.Unmarshal(data, &raw); err != nil {
return err
if raw.Alias != nil {
*l = *(*RegexpLiteral)(raw.Alias)
value, err := regexp.Compile(raw.Value)
if err != nil {
return err
l.Value = value
return nil
func (l *DurationLiteral) MarshalJSON() ([]byte, error) {
type Alias DurationLiteral
raw := struct {
Type string `json:"type"`
Value string `json:"value"`
Type: l.Type(),
Alias: (*Alias)(l),
Value: l.Value.String(),
return json.Marshal(raw)
func (l *DurationLiteral) UnmarshalJSON(data []byte) error {
type Alias DurationLiteral
raw := struct {
Value string `json:"value"`
if err := json.Unmarshal(data, &raw); err != nil {
return err
if raw.Alias != nil {
*l = *(*DurationLiteral)(raw.Alias)
value, err := time.ParseDuration(raw.Value)
if err != nil {
return err
l.Value = value
return nil
func (l *DateTimeLiteral) MarshalJSON() ([]byte, error) {
type Alias DateTimeLiteral
raw := struct {
Type string `json:"type"`
Type: l.Type(),
Alias: (*Alias)(l),
return json.Marshal(raw)
func checkNullMsg(msg json.RawMessage) bool {
switch len(msg) {
case 0:
return true
case 4:
return string(msg) == "null"
return false
func unmarshalStatement(msg json.RawMessage) (Statement, error) {
if checkNullMsg(msg) {
return nil, nil
n, err := unmarshalNode(msg)
if err != nil {
return nil, err
s, ok := n.(Statement)
if !ok {
return nil, fmt.Errorf("node %q is not a statement", n.Type())
return s, nil
func unmarshalExpression(msg json.RawMessage) (Expression, error) {
if checkNullMsg(msg) {
return nil, nil
n, err := unmarshalNode(msg)
if err != nil {
return nil, err
e, ok := n.(Expression)
if !ok {
return nil, fmt.Errorf("node %q is not an expression", n.Type())
return e, nil
func unmarshalLiteral(msg json.RawMessage) (Literal, error) {
if checkNullMsg(msg) {
return nil, nil
n, err := unmarshalNode(msg)
if err != nil {
return nil, err
e, ok := n.(Literal)
if !ok {
return nil, fmt.Errorf("node %q is not a literal", n.Type())
return e, nil
func unmarshalNode(msg json.RawMessage) (Node, error) {
if checkNullMsg(msg) {
return nil, nil
type typeRawMessage struct {
Type string `json:"type"`
typ := typeRawMessage{}
if err := json.Unmarshal(msg, &typ); err != nil {
return nil, err
var node Node
switch typ.Type {
case "Program":
node = new(Program)
case "BlockStatement":
node = new(BlockStatement)
case "ExpressionStatement":
node = new(ExpressionStatement)
case "ReturnStatement":
node = new(ReturnStatement)
case "VariableDeclaration":
node = new(VariableDeclaration)
case "VariableDeclarator":
node = new(VariableDeclarator)
case "CallExpression":
node = new(CallExpression)
case "PipeExpression":
node = new(PipeExpression)
case "MemberExpression":
node = new(MemberExpression)
case "BinaryExpression":
node = new(BinaryExpression)
case "UnaryExpression":
node = new(UnaryExpression)
case "LogicalExpression":
node = new(LogicalExpression)
case "ObjectExpression":
node = new(ObjectExpression)
case "ConditionalExpression":
node = new(ConditionalExpression)
case "ArrayExpression":
node = new(ArrayExpression)
case "Identifier":
node = new(Identifier)
case "PipeLiteral":
node = new(PipeLiteral)
case "StringLiteral":
node = new(StringLiteral)
case "BooleanLiteral":
node = new(BooleanLiteral)
case "FloatLiteral":
node = new(FloatLiteral)
case "IntegerLiteral":
node = new(IntegerLiteral)
case "UnsignedIntegerLiteral":
node = new(UnsignedIntegerLiteral)
case "RegexpLiteral":
node = new(RegexpLiteral)
case "DurationLiteral":
node = new(DurationLiteral)
case "DateTimeLiteral":
node = new(DateTimeLiteral)
case "ArrowFunctionExpression":
node = new(ArrowFunctionExpression)
case "Property":
node = new(Property)
return nil, fmt.Errorf("unknown type %q", typ.Type)
if err := json.Unmarshal(msg, node); err != nil {
return nil, err
return node, nil
func UnmarshalNode(data []byte) (Node, error) {
return unmarshalNode((json.RawMessage)(data))
package ast_test
import (
func TestJSONMarshal(t *testing.T) {
testCases := []struct {
name string
node ast.Node
want string
name: "simple program",
node: &ast.Program{
Body: []ast.Statement{
Expression: &ast.StringLiteral{Value: "hello"},
want: `{"type":"Program","body":[{"type":"ExpressionStatement","expression":{"type":"StringLiteral","value":"hello"}}]}`,
name: "block statement",
node: &ast.BlockStatement{
Body: []ast.Statement{
Expression: &ast.StringLiteral{Value: "hello"},
want: `{"type":"BlockStatement","body":[{"type":"ExpressionStatement","expression":{"type":"StringLiteral","value":"hello"}}]}`,
name: "expression statement",
node: &ast.ExpressionStatement{
Expression: &ast.StringLiteral{Value: "hello"},
want: `{"type":"ExpressionStatement","expression":{"type":"StringLiteral","value":"hello"}}`,
name: "return statement",
node: &ast.ReturnStatement{
Argument: &ast.StringLiteral{Value: "hello"},
want: `{"type":"ReturnStatement","argument":{"type":"StringLiteral","value":"hello"}}`,
name: "variable declaration",
node: &ast.VariableDeclaration{
Declarations: []*ast.VariableDeclarator{
ID: &ast.Identifier{Name: "a"},
Init: &ast.StringLiteral{Value: "hello"},
want: `{"type":"VariableDeclaration","declarations":[{"type":"VariableDeclarator","id":{"type":"Identifier","name":"a"},"init":{"type":"StringLiteral","value":"hello"}}]}`,
name: "variable declarator",
node: &ast.VariableDeclarator{
ID: &ast.Identifier{Name: "a"},
Init: &ast.StringLiteral{Value: "hello"},
want: `{"type":"VariableDeclarator","id":{"type":"Identifier","name":"a"},"init":{"type":"StringLiteral","value":"hello"}}`,
name: "call expression",
node: &ast.CallExpression{
Callee: &ast.Identifier{Name: "a"},
Arguments: []ast.Expression{&ast.StringLiteral{Value: "hello"}},
want: `{"type":"CallExpression","callee":{"type":"Identifier","name":"a"},"arguments":[{"type":"StringLiteral","value":"hello"}]}`,
name: "pipe expression",
node: &ast.PipeExpression{
Argument: &ast.Identifier{Name: "a"},
Call: &ast.CallExpression{
Callee: &ast.Identifier{Name: "a"},
Arguments: []ast.Expression{&ast.StringLiteral{Value: "hello"}},
want: `{"type":"PipeExpression","argument":{"type":"Identifier","name":"a"},"call":{"type":"CallExpression","callee":{"type":"Identifier","name":"a"},"arguments":[{"type":"StringLiteral","value":"hello"}]}}`,
name: "member expression",
node: &ast.MemberExpression{
Object: &ast.Identifier{Name: "a"},
Property: &ast.StringLiteral{Value: "hello"},
want: `{"type":"MemberExpression","object":{"type":"Identifier","name":"a"},"property":{"type":"StringLiteral","value":"hello"}}`,
name: "arrow function expression",
node: &ast.ArrowFunctionExpression{
Params: []*ast.Property{{Key: &ast.Identifier{Name: "a"}}},
Body: &ast.StringLiteral{Value: "hello"},
want: `{"type":"ArrowFunctionExpression","params":[{"type":"Property","key":{"type":"Identifier","name":"a"},"value":null}],"body":{"type":"StringLiteral","value":"hello"}}`,
name: "binary expression",
node: &ast.BinaryExpression{
Operator: ast.AdditionOperator,
Left: &ast.StringLiteral{Value: "hello"},
Right: &ast.StringLiteral{Value: "world"},
want: `{"type":"BinaryExpression","operator":"+","left":{"type":"StringLiteral","value":"hello"},"right":{"type":"StringLiteral","value":"world"}}`,
name: "unary expression",
node: &ast.UnaryExpression{
Operator: ast.NotOperator,
Argument: &ast.BooleanLiteral{Value: true},
want: `{"type":"UnaryExpression","operator":"not","argument":{"type":"BooleanLiteral","value":true}}`,
name: "logical expression",
node: &ast.LogicalExpression{
Operator: ast.OrOperator,
Left: &ast.BooleanLiteral{Value: false},
Right: &ast.BooleanLiteral{Value: true},
want: `{"type":"LogicalExpression","operator":"or","left":{"type":"BooleanLiteral","value":false},"right":{"type":"BooleanLiteral","value":true}}`,
name: "array expression",
node: &ast.ArrayExpression{
Elements: []ast.Expression{&ast.StringLiteral{Value: "hello"}},
want: `{"type":"ArrayExpression","elements":[{"type":"StringLiteral","value":"hello"}]}`,
name: "object expression",
node: &ast.ObjectExpression{
Properties: []*ast.Property{{
Key: &ast.Identifier{Name: "a"},
Value: &ast.StringLiteral{Value: "hello"},
want: `{"type":"ObjectExpression","properties":[{"type":"Property","key":{"type":"Identifier","name":"a"},"value":{"type":"StringLiteral","value":"hello"}}]}`,
name: "conditional expression",
node: &ast.ConditionalExpression{
Test: &ast.BooleanLiteral{Value: true},
Alternate: &ast.StringLiteral{Value: "false"},
Consequent: &ast.StringLiteral{Value: "true"},
want: `{"type":"ConditionalExpression","test":{"type":"BooleanLiteral","value":true},"alternate":{"type":"StringLiteral","value":"false"},"consequent":{"type":"StringLiteral","value":"true"}}`,
name: "property",
node: &ast.Property{
Key: &ast.Identifier{Name: "a"},
Value: &ast.StringLiteral{Value: "hello"},
want: `{"type":"Property","key":{"type":"Identifier","name":"a"},"value":{"type":"StringLiteral","value":"hello"}}`,
name: "identifier",
node: &ast.Identifier{
Name: "a",
want: `{"type":"Identifier","name":"a"}`,
name: "string literal",
node: &ast.StringLiteral{
Value: "hello",
want: `{"type":"StringLiteral","value":"hello"}`,
name: "boolean literal",
node: &ast.BooleanLiteral{
Value: true,
want: `{"type":"BooleanLiteral","value":true}`,
name: "float literal",
node: &ast.FloatLiteral{
Value: 42.1,
want: `{"type":"FloatLiteral","value":42.1}`,
name: "integer literal",
node: &ast.IntegerLiteral{
Value: math.MaxInt64,
want: `{"type":"IntegerLiteral","value":"9223372036854775807"}`,
name: "unsigned integer literal",
node: &ast.UnsignedIntegerLiteral{
Value: math.MaxUint64,
want: `{"type":"UnsignedIntegerLiteral","value":"18446744073709551615"}`,
name: "regexp literal",
node: &ast.RegexpLiteral{
Value: regexp.MustCompile(`.*`),
want: `{"type":"RegexpLiteral","value":".*"}`,
name: "duration literal",
node: &ast.DurationLiteral{
Value: time.Hour + time.Minute,
want: `{"type":"DurationLiteral","value":"1h1m0s"}`,
name: "datetime literal",
node: &ast.DateTimeLiteral{
Value: time.Date(2017, 8, 8, 8, 8, 8, 8, time.UTC),
want: `{"type":"DateTimeLiteral","value":"2017-08-08T08:08:08.000000008Z"}`,
for _, tc := range testCases {
tc := tc
t.Run(, func(t *testing.T) {
data, err := json.Marshal(tc.node)
if err != nil {
if got := string(data); got != tc.want {
t.Errorf("unexpected json data:\nwant:%s\ngot: %s\n", tc.want, got)
node, err := ast.UnmarshalNode(data)
if err != nil {
if !cmp.Equal(tc.node, node, asttest.CompareOptions...) {
t.Errorf("unexpected node after unmarshalling: -want/+got:\n%s", cmp.Diff(tc.node, node, asttest.CompareOptions...))
## Benchmarks
The output generated by `curl` uses `curl-format.txt`, present along side this readme.
Telegraf is configured with the supplied `telegraf.conf` to capture metrics from ifqld and influxdb using the
Prometheus `/metrics` HTTP endpoint and machine metrics including CPU usage and disk I/O. Note that `influxd` is running
on port `8186`, allowing a separate `influxd` on the default port to receive metrics from Telegraf.
## Dataset #1
| | |
| ----- | ----- |
| series | 100,000 |
| pps | 3,000 |
| shards | 12 |
| pps / shard | 250 |
| total points | 300,000,000 |
**pps**: points per series
### Hardware
| | |
| ----- | ----- |
| AWS instance type | c3.4xlarge |
### Generate dataset
1. Use [ingen][ingen] to populate a database with data.
$ ingen -p=250 -t=1000,100 -shards=12 -start-time="2017-11-01T00:00:00Z" -data-path=~/.influxdb/data -meta-path=~/.influxdb/meta
The previous command will
* populate a database named `db` (default),
* create 100,000 series (1000×100),
* made up of 2 tag keys (`tag0` and `tag1`) each with 1000 and 100 tag values respectively.
* 250 points per series, per shard, for a total of 3,000 points per series.
* Points will start from `2017-11-01 00:00:00 UTC` and
* span 12 shards.
### ifql queries
Query #1
HOST=localhost:8093; curl -w "@curl-format.txt" -H 'Accept: text/plain' -o /dev/null -s http://${HOST}/query \
--data-urlencode 'q=from(db:"db").range(start:2017-11-01T00:00:00Z, stop:2017-11-02T00:00:00Z).filter(exp:{"_measurement" == "m0" and "_field" == "v0" and $ > 0}).sum()'
time_starttransfer: 0.138
size_download: 5800000
time_total: 7.578
Query #2
HOST=localhost:8093; curl -w "@curl-format.txt" -H 'Accept: text/plain' -o /dev/null -s http://${HOST}/query \
--data-urlencode 'q=from(db:"db").range(start:2017-11-01T00:00:00Z, stop:2017-11-05T00:00:00Z).filter(exp:{"_measurement" == "m0" and "_field" == "v0" and $ > 0}).sum()'
time_starttransfer: 0.305
size_download: 5900000
time_total: 17.909
Query #3
HOST=localhost:8093; curl -w "@curl-format.txt" -H 'Accept: text/plain' -o /dev/null -s http://${HOST}/query \
--data-urlencode 'q=from(db:"db").range(start:2017-11-01T00:00:00Z, stop:2017-11-05T00:00:00Z).filter(exp:{"_measurement" == "m0" and "_field" == "v0" and $ > 0}).group(by:["tag0"]).sum()'
time_starttransfer: 22.727
size_download: 60000
time_total: 22.730
Query #4
HOST=localhost:8093; curl -w "@curl-format.txt" -H 'Accept: text/plain' -o /dev/null -s http://${HOST}/query \
--data-urlencode 'q=from(db:"db").range(start:2017-11-01T00:00:00Z, stop:2017-11-13T00:00:00Z).filter(exp:{"_measurement" == "m0" and "_field" == "v0" and $ > 0}).sum()'
time_starttransfer: 0.713
size_download: 5900000
time_total: 44.159
Query #5
HOST=localhost:8093; curl -w "@curl-format.txt" -H 'Accept: text/plain' -o /dev/null -s http://${HOST}/query \
--data-urlencode 'q=from(db:"db").range(start:2017-11-01T00:00:00Z, stop:2017-11-13T00:00:00Z).filter(exp:{"_measurement" == "m0" and "_field" == "v0" and $ > 0}).group(by:["tag0"]).sum()'
time_starttransfer: 56.257
size_download: 60000
time_total: 56.261
## Dataset #2
| | |
| ----- | ----- |
| series | 10,000,000 |
| pps | 1,000 |
| shards | 4 |
| pps / shard | 250 |
| total points | 10,000,000,000 |
**pps**: points per series
### Hardware
| | |
| ----- | ----- |
| AWS instance type | c5.4xlarge |
### Generate dataset
1. Use [ingen][ingen] to populate a database with data.
$ ingen -p=250 -t=10000,100,10 -shards=4 -start-time="2017-11-01T00:00:00Z" -data-path=~/.influxdb/data -meta-path=~/.influxdb/meta
The previous command will
* populate a database named `db` (default),
* create 10,000,000 series (10000×100×10),
* made up of 3 tag keys (`tag0`, `tag1`, `tag2`) each with 10000, 100 and 10 tag values respectively.
* 250 points per series, per shard, for a total of 1,000 points per series.
* Points will start from `2017-11-01 00:00:00 UTC` and
* span 4 shards.
### ifql queries
Query #1
HOST=localhost:8093; curl -w "@curl-format.txt" -H 'Accept: text/plain' -o /dev/null -s http://${HOST}/query \
--data-urlencode 'q=from(db:"db").range(start:2017-11-01T00:00:00Z, stop:2017-11-05T00:00:00Z).filter(exp:{"_measurement" == "m0" and "_field" == "v0" and "tag1" == "value00"}).group(by:["tag0"]).sum()'
time_starttransfer: 0.325
size_download: 7200000
time_total: 11.437
Query #2
HOST=localhost:8093; curl -w "@curl-format.txt" -H 'Accept: text/plain' -o /dev/null -s http://${HOST}/query \
--data-urlencode 'q=from(db:"db").range(start:2017-11-01T00:00:00Z, stop:2017-11-05T00:00:00Z).filter(exp:{"_measurement" == "m0" and "_field" == "v0" and "tag1" == "value00"}).group(by:["tag0"]).sum()'
time_starttransfer: 13.174
size_download: 600000
time_total: 13.215
Query #3
HOST=localhost:8093; curl -w "@curl-format.txt" -H 'Accept: text/plain' -o /dev/null -s http://${HOST}/query \
--data-urlencode 'q=from(db:"db").range(start:2017-11-01T00:00:00Z, stop:2017-11-05T00:00:00Z).filter(exp:{"_measurement" == "m0" and "_field" == "v0"}).group(by:["tag0"]).sum()'
time_starttransfer: 1190.204
size_download: 620000
time_total: 1190.244
Query #4
HOST=localhost:8093; curl -w "@curl-format.txt" -H 'Accept: text/plain' -o /dev/null -s http://${HOST}/query \
--data-urlencode 'q=from(db:"db").range(start:2017-11-01T00:00:00Z, stop:2017-11-05T00:00:00Z).filter(exp:{"_measurement" == "m0" and "_field" == "v0"}).sum()'
time_starttransfer: 23.975
size_download: 720000000
time_total: 803.254
reporting-disabled = false
bind-address = ":8188"
dir = "/home/ubuntu/.influxdb/meta"
retention-autocreate = true
logging-enabled = true
dir = "/home/ubuntu/.influxdb/data"
index-version = "inmem"
wal-dir = "/home/ubuntu/.influxdb/wal"
wal-fsync-delay = "0s"
query-log-enabled = true
cache-max-memory-size = 1073741824
cache-snapshot-memory-size = 26214400
cache-snapshot-write-cold-duration = "10m0s"
compact-full-write-cold-duration = "4h0m0s"
max-series-per-database = 1000000
max-values-per-tag = 100000
max-concurrent-compactions = 0
trace-logging-enabled = false
write-timeout = "30s"
max-concurrent-queries = 0
#query-timeout = "5s"
log-queries-after = "0s"
max-select-point = 0
max-select-series = 0
max-select-buckets = 0
enabled = true
check-interval = "30m0s"
enabled = true
check-interval = "10m0s"
advance-period = "30m0s"
store-enabled = false
store-database = "_internal"
store-interval = "10s"
enabled = true
http-timeout = "30s"
insecure-skip-verify = false
ca-certs = ""
write-concurrency = 40
write-buffer-size = 1000
enabled = true
bind-address = ":8186"
auth-enabled = false
log-enabled = false
write-tracing = false
pprof-enabled = true
https-enabled = false
https-certificate = "/etc/ssl/influxdb.pem"
https-private-key = ""
max-row-limit = 0
max-connection-limit = 0
shared-secret = ""
realm = "InfluxDB"
unix-socket-enabled = false
bind-socket = "/var/run/influxdb.sock"
#max-body-size = 5
enabled = true
log-enabled = true
bind-address = ":8082"
enabled = false
bind-address = ":2003"
database = "graphite"
retention-policy = ""
protocol = "tcp"
batch-size = 5000
batch-pending = 10
batch-timeout = "1s"
consistency-level = "one"
separator = "."
udp-read-buffer = 0
enabled = false
bind-address = ":25826"
database = "collectd"
retention-policy = ""
batch-size = 5000
batch-pending = 10
batch-timeout = "10s"
read-buffer = 0
typesdb = "/usr/share/collectd/types.db"
security-level = "none"
auth-file = "/etc/collectd/auth_file"
enabled = false
bind-address = ":4242"
database = "opentsdb"
retention-policy = ""
consistency-level = "one"
tls-enabled = false
certificate = "/etc/ssl/influxdb.pem"
batch-size = 1000
batch-pending = 5
batch-timeout = "1s"
log-point-errors = true
enabled = false
bind-address = ":8089"
database = "udp"
retention-policy = ""
batch-size = 5000
batch-pending = 10
read-buffer = 0
batch-timeout = "1s"
precision = ""
log-enabled = true
enabled = true
run-interval = "1s"
time_starttransfer: %{time_starttransfer}\n
size_download: %{size_download}\n
time_total: %{time_total}\n
interval = "2s"
round_interval = true
metric_batch_size = 1000
metric_buffer_limit = 10000
collection_jitter = "10ms"
flush_interval = "10s"
flush_jitter = "1s"
precision = ""
debug = false
quiet = false
hostname = "stuart-bench-oss-0"
omit_hostname = false
percpu = true
totalcpu = true
fielddrop = ["time_*"]
ignore_fs = ["tmpfs", "devtmpfs"]
# Read metrics about disk IO by device
## By default, telegraf will gather stats for all devices including
## disk partitions.
## Setting devices will restrict the stats to the specified devices.
# devices = ["sda", "sdb"]
## Uncomment the following line if you need disk serial numbers.
# skip_serial_number = false
pattern = "influx"
prefix = ""
fielddrop = ["cpu_time_*"]
# Read metrics from one or many prometheus clients
## An array of urls to scrape metrics from.
urls = ["http://localhost:8093/metrics","http://localhost:8186/metrics"]
package query
import (
opentracing ""
const (
TableParameter = "table"
tableIDKey = "id"
tableKindKey = "kind"
tableParentsKey = "parents"
//tableSpecKey = "spec"
type Option func(*options)
func Verbose(v bool) Option {
return func(o *options) {
o.verbose = v
type options struct {
verbose bool
// Compile evaluates an IFQL script producing a query Spec.
func Compile(ctx context.Context, q string, opts ...Option) (*Spec, error) {
o := new(options)
for _, opt := range opts {
s, _ := opentracing.StartSpanFromContext(ctx, "parse")
astProg, err := parser.NewAST(q)
if err != nil {
return nil, err
s, _ = opentracing.StartSpanFromContext(ctx, "compile")
defer s.Finish()
qd := new(queryDomain)
scope, decls := builtIns(qd)
interpScope := interpreter.NewScopeWithValues(scope)
// Convert AST program to a semantic program
semProg, err := semantic.New(astProg, decls)
if err != nil {
return nil, err
if err := interpreter.Eval(semProg, interpScope); err != nil {
return nil, err
spec := qd.ToSpec()
if o.verbose {
log.Println("Query Spec: ", Formatted(spec, FmtJSON))
return spec, nil
type CreateOperationSpec func(args Arguments, a *Administration) (OperationSpec, error)
var builtinScope = make(map[string]values.Value)
var builtinDeclarations = make(semantic.DeclarationScope)
// list of builtin scripts
var builtins = make(map[string]string)
var finalized bool
// RegisterBuiltIn adds any variable declarations in the script to the builtin scope.
func RegisterBuiltIn(name, script string) {
if finalized {
panic(errors.New("already finalized, cannot register builtin"))
builtins[name] = script
// RegisterFunction adds a new builtin top level function.
func RegisterFunction(name string, c CreateOperationSpec, sig semantic.FunctionSignature) {
f := function{
t: semantic.NewFunctionType(sig),
name: name,
createOpSpec: c,
RegisterBuiltInValue(name, f)
// RegisterBuiltInValue adds the value to the builtin scope.
func RegisterBuiltInValue(name string, v values.Value) {
if finalized {
panic(errors.New("already finalized, cannot register builtin"))
if _, ok := builtinScope[name]; ok {
panic(fmt.Errorf("duplicate registration for builtin %q", name))
builtinDeclarations[name] = semantic.NewExternalVariableDeclaration(name, v.Type())
builtinScope[name] = v
// FinalizeRegistration must be called to complete registration.
// Future calls to RegisterFunction, RegisterBuiltIn or RegisterBuiltInValue will panic.
func FinalizeRegistration() {
if finalized {
panic("already finalized")
finalized = true
//for name, script := range builtins {
// astProg, err := parser.NewAST(script)
// if err != nil {
// panic(errors.Wrapf(err, "failed to parse builtin %q", name))
// }
// semProg, err := semantic.New(astProg, builtinDeclarations)
// if err != nil {
// panic(errors.Wrapf(err, "failed to create semantic graph for builtin %q", name))
// }
// if err := interpreter.Eval(semProg, builtinScope); err != nil {
// panic(errors.Wrapf(err, "failed to evaluate builtin %q", name))
// }
//// free builtins list
//builtins = nil
var TableObjectType = semantic.NewObjectType(map[string]semantic.Type{
tableIDKey: semantic.String,
tableKindKey: semantic.String,
// TODO(nathanielc): The spec types vary significantly making type comparisons impossible, for now the solution is to state the type as an empty object.
//tableSpecKey: semantic.EmptyObject,
// TODO(nathanielc): Support recursive types, for now we state that the array has empty objects.
tableParentsKey: semantic.NewArrayType(semantic.EmptyObject),
type TableObject struct {
ID OperationID
Kind OperationKind
Spec OperationSpec
Parents values.Array
func (t TableObject) Operation() *Operation {
return &Operation{
ID: t.ID,
Spec: t.Spec,
func (t TableObject) String() string {
return fmt.Sprintf("{id: %q, kind: %q}", t.ID, t.Kind)
func (t TableObject) ToSpec() *Spec {
visited := make(map[OperationID]bool)
spec := new(Spec)
t.buildSpec(spec, visited)
return spec
func (t TableObject) buildSpec(spec *Spec, visited map[OperationID]bool) {
id := t.ID
t.Parents.Range(func(i int, v values.Value) {
p := v.(TableObject)
if !visited[p.ID] {
// rescurse up parents
p.buildSpec(spec, visited)
spec.Edges = append(spec.Edges, Edge{
Parent: p.ID,
Child: id,
visited[id] = true
spec.Operations = append(spec.Operations, t.Operation())
func (t TableObject) Type() semantic.Type {
return TableObjectType
func (t TableObject) Str() string {
panic(values.UnexpectedKind(semantic.Object, semantic.String))
func (t TableObject) Int() int64 {
panic(values.UnexpectedKind(semantic.Object, semantic.Int))
func (t TableObject) UInt() uint64 {
panic(values.UnexpectedKind(semantic.Object, semantic.UInt))
func (t TableObject) Float() float64 {
panic(values.UnexpectedKind(semantic.Object, semantic.Float))
func (t TableObject) Bool() bool {
panic(values.UnexpectedKind(semantic.Object, semantic.Bool))
func (t TableObject) Time() values.Time {
panic(values.UnexpectedKind(semantic.Object, semantic.Time))
func (t TableObject) Duration() values.Duration {
panic(values.UnexpectedKind(semantic.Object, semantic.Duration))
func (t TableObject) Regexp() *regexp.Regexp {
panic(values.UnexpectedKind(semantic.Object, semantic.Regexp))
func (t TableObject) Array() values.Array {
panic(values.UnexpectedKind(semantic.Object, semantic.Array))
func (t TableObject) Object() values.Object {
return t
func (t TableObject) Function() values.Function {
panic(values.UnexpectedKind(semantic.Object, semantic.Function))
func (t TableObject) Get(name string) (values.Value, bool) {
switch name {
case tableIDKey:
return values.NewStringValue(string(t.ID)), true
case tableKindKey:
return values.NewStringValue(string(t.Kind)), true
case tableParentsKey:
return t.Parents, true
return nil, false
func (t TableObject) Set(name string, v values.Value) {
//TableObject is immutable
func (t TableObject) Len() int {
return 3
func (t TableObject) Range(f func(name string, v values.Value)) {
f(tableIDKey, values.NewStringValue(string(t.ID)))
f(tableKindKey, values.NewStringValue(string(t.Kind)))
f(tableParentsKey, t.Parents)
// DefaultFunctionSignature returns a FunctionSignature for standard functions which accept a table piped argument.
// It is safe to modify the returned signature.
func DefaultFunctionSignature() semantic.FunctionSignature {
return semantic.FunctionSignature{
Params: map[string]semantic.Type{
TableParameter: TableObjectType,
ReturnType: TableObjectType,
PipeArgument: TableParameter,
func BuiltIns() (map[string]values.Value, semantic.DeclarationScope) {
qd := new(queryDomain)
return builtIns(qd)
func builtIns(qd *queryDomain) (map[string]values.Value, semantic.DeclarationScope) {
decls := builtinDeclarations.Copy()
scope := make(map[string]values.Value, len(builtinScope))
for k, v := range builtinScope {
if v.Type().Kind() == semantic.Function {
if f, ok := v.Function().(function); ok {
f.qd = qd
v = f
scope[k] = v
interpScope := interpreter.NewScopeWithValues(scope)
for name, script := range builtins {
astProg, err := parser.NewAST(script)
if err != nil {
panic(errors.Wrapf(err, "failed to parse builtin %q", name))
semProg, err := semantic.New(astProg, decls)
if err != nil {
panic(errors.Wrapf(err, "failed to create semantic graph for builtin %q", name))
if err := interpreter.Eval(semProg, interpScope); err != nil {
panic(errors.Wrapf(err, "failed to evaluate builtin %q", name))
return scope, decls
type Administration struct {
id OperationID
parents values.Array
func newAdministration(id OperationID) *Administration {
return &Administration{
id: id,
// TODO(nathanielc): Once we can support recursive types change this to,
// interpreter.NewArray(TableObjectType)
parents: values.NewArray(semantic.EmptyObject),
// AddParentFromArgs reads the args for the `table` argument and adds the value as a parent.
func (a *Administration) AddParentFromArgs(args Arguments) error {
parent, err := args.GetRequiredObject(TableParameter)
if err != nil {
return err
p, ok := parent.(TableObject)
if !ok {
return fmt.Errorf("argument is not a table object: got %T", parent)
return nil
// AddParent instructs the evaluation Context that a new edge should be created from the parent to the current operation.
// Duplicate parents will be removed, so the caller need not concern itself with which parents have already been added.
func (a *Administration) AddParent(np TableObject) {
// Check for duplicates
found := false
a.parents.Range(func(i int, p values.Value) {
if p.(TableObject).ID == np.ID {
found = true
if !found {
type Domain interface {
ToSpec() *Spec
func NewDomain() Domain {
return new(queryDomain)
type queryDomain struct {
id int
operations []TableObject
func (d *queryDomain) NewID(name string) OperationID {
return OperationID(fmt.Sprintf("%s%d", name, d.nextID()))
func (d *queryDomain) nextID() int {
id :=
return id
func (d *queryDomain) ToSpec() *Spec {
spec := new(Spec)
visited := make(map[OperationID]bool)
for _, t := range d.operations {
t.buildSpec(spec, visited)
return spec
type function struct {
name string
t semantic.Type
createOpSpec CreateOperationSpec
qd *queryDomain
func (f function) Type() semantic.Type {
return f.t
func (f function) Str() string {
panic(values.UnexpectedKind(semantic.Function, semantic.String))
func (f function) Int() int64 {
panic(values.UnexpectedKind(semantic.Function, semantic.Int))
func (f function) UInt() uint64 {
panic(values.UnexpectedKind(semantic.Function, semantic.UInt))
func (f function) Float() float64 {
panic(values.UnexpectedKind(semantic.Function, semantic.Float))
func (f function) Bool() bool {
panic(values.UnexpectedKind(semantic.Function, semantic.Bool))
func (f function) Time() values.Time {
panic(values.UnexpectedKind(semantic.Function, semantic.Time))
func (f function) Duration() values.Duration {
panic(values.UnexpectedKind(semantic.Function, semantic.Duration))
func (f function) Regexp() *regexp.Regexp {
panic(values.UnexpectedKind(semantic.Function, semantic.Regexp))
func (f function) Array() values.Array {
panic(values.UnexpectedKind(semantic.Function, semantic.Array))
func (f function) Object() values.Object {
panic(values.UnexpectedKind(semantic.Function, semantic.Object))
func (f function) Function() values.Function {
return f
func (f function) Call(argsObj values.Object) (values.Value, error) {
return interpreter.DoFunctionCall(, argsObj)
func (f function) call(args interpreter.Arguments) (values.Value, error) {
id := f.qd.NewID(
a := newAdministration(id)
spec, err := f.createOpSpec(Arguments{Arguments: args}, a)
if err != nil {
return nil, err
if a.parents.Len() > 1 {
// Always add parents in a consistent order
a.parents.Sort(func(i, j values.Value) bool {
return i.(TableObject).ID < j.(TableObject).ID
t := TableObject{
ID: id,
Kind: spec.Kind(),
Spec: spec,
Parents: a.parents,
f.qd.operations = append(f.qd.operations, t)
return t, nil
type specValue struct {
spec OperationSpec
func (v specValue) Type() semantic.Type {
return semantic.EmptyObject
func (v specValue) Value() interface{} {
return v.spec
func (v specValue) Property(name string) (interpreter.Value, error) {
return nil, errors.New("spec does not have properties")
type Arguments struct {
func (a Arguments) GetTime(name string) (Time, bool, error) {
v, ok := a.Get(name)
if !ok {
return Time{}, false, nil
qt, err := ToQueryTime(v)
if err != nil {
return Time{}, ok, err
return qt, ok, nil
func (a Arguments) GetRequiredTime(name string) (Time, error) {
qt, ok, err := a.GetTime(name)
if err != nil {
return Time{}, err
if !ok {
return Time{}, fmt.Errorf("missing required keyword argument %q", name)
return qt, nil
func (a Arguments) GetDuration(name string) (Duration, bool, error) {
v, ok := a.Get(name)
if !ok {
return 0, false, nil
return Duration(v.Duration()), true, nil
func (a Arguments) GetRequiredDuration(name string) (Duration, error) {
d, ok, err := a.GetDuration(name)
if err != nil {
return 0, err
if !ok {
return 0, fmt.Errorf("missing required keyword argument %q", name)
return d, nil
func ToQueryTime(value values.Value) (Time, error) {
switch value.Type().Kind() {
case semantic.Time:
return Time{
Absolute: value.Time().Time(),
}, nil
case semantic.Duration:
return Time{
Relative: value.Duration().Duration(),
IsRelative: true,
}, nil
case semantic.Int:
return Time{
Absolute: time.Unix(value.Int(), 0),
}, nil
return Time{}, fmt.Errorf("value is not a time, got %v", value.Type())
package compiler
import (
func Compile(f *semantic.FunctionExpression, inTypes map[string]semantic.Type, builtinScope Scope, builtinDeclarations semantic.DeclarationScope) (Func, error) {
if builtinDeclarations == nil {
builtinDeclarations = make(semantic.DeclarationScope)
for k, t := range inTypes {
builtinDeclarations[k] = semantic.NewExternalVariableDeclaration(k, t)
semantic.SolveTypes(f, builtinDeclarations)
declarations := make(map[string]semantic.VariableDeclaration, len(inTypes))
for k, t := range inTypes {
declarations[k] = semantic.NewExternalVariableDeclaration(k, t)
f = f.Copy().(*semantic.FunctionExpression)
semantic.ApplyNewDeclarations(f, declarations)
root, err := compile(f.Body, builtinScope)
if err != nil {
return nil, err
cpy := make(map[string]semantic.Type)
for k, v := range inTypes {
cpy[k] = v
return compiledFn{
root: root,
inTypes: cpy,
}, nil
func compile(n semantic.Node, builtIns Scope) (Evaluator, error) {
switch n := n.(type) {
case *semantic.BlockStatement:
body := make([]Evaluator, len(n.Body))
for i, s := range n.Body {
node, err := compile(s, builtIns)
if err != nil {
return nil, err
body[i] = node
return &blockEvaluator{
t: n.ReturnStatement().Argument.Type(),
body: body,
}, nil
case *semantic.ExpressionStatement:
return nil, errors.New("statement does nothing, sideffects are not supported by the compiler")
case *semantic.ReturnStatement:
node, err := compile(n.Argument, builtIns)
if err != nil {
return nil, err
return returnEvaluator{
Evaluator: node,
}, nil
case *semantic.NativeVariableDeclaration:
node, err := compile(n.Init, builtIns)
if err != nil {
return nil, err
return &declarationEvaluator{
t: n.Init.Type(),
id: n.Identifier.Name,
init: node,
}, nil
case *semantic.ObjectExpression:
properties := make(map[string]Evaluator, len(n.Properties))
propertyTypes := make(map[string]semantic.Type, len(n.Properties))
for _, p := range n.Properties {
node, err := compile(p.Value, builtIns)
if err != nil {
return nil, err
properties[p.Key.Name] = node
propertyTypes[p.Key.Name] = node.Type()
return &objEvaluator{
t: semantic.NewObjectType(propertyTypes),
properties: properties,
}, nil
case *semantic.IdentifierExpression:
if v, ok := builtIns[n.Name]; ok {
//Resolve any built in identifiers now
return &valueEvaluator{
value: v,
}, nil
return &identifierEvaluator{
t: n.Type(),
name: n.Name,
}, nil
case *semantic.MemberExpression:
object, err := compile(n.Object, builtIns)
if err != nil {
return nil, err
return &memberEvaluator{
t: n.Type(),
object: object,
property: n.Property,
}, nil
case *semantic.BooleanLiteral:
return &booleanEvaluator{
t: n.Type(),
b: n.Value,
}, nil
case *semantic.IntegerLiteral:
return &integerEvaluator{
t: n.Type(),
i: n.Value,
}, nil
case *semantic.FloatLiteral:
return &floatEvaluator{
t: n.Type(),
f: n.Value,
}, nil
case *semantic.StringLiteral:
return &stringEvaluator{
t: n.Type(),
s: n.Value,
}, nil
case *semantic.RegexpLiteral:
return ®expEvaluator{
t: n.Type(),
r: n.Value,
}, nil
case *semantic.DateTimeLiteral:
return &timeEvaluator{
t: n.Type(),
time: values.ConvertTime(n.Value),
}, nil
case *semantic.UnaryExpression:
node, err := compile(n.Argument, builtIns)
if err != nil {
return nil, err
return &unaryEvaluator{
t: n.Type(),
node: node,
}, nil
case *semantic.LogicalExpression:
l, err := compile(n.Left, builtIns)
if err != nil {
return nil, err
r, err := compile(n.Right, builtIns)
if err != nil {
return nil, err
return &logicalEvaluator{
t: n.Type(),
operator: n.Operator,
left: l,
right: r,
}, nil
case *semantic.BinaryExpression:
l, err := compile(n.Left, builtIns)
if err != nil {
return nil, err
lt := l.Type()
r, err := compile(n.Right, builtIns)
if err != nil {
return nil, err
rt := r.Type()
f, err := values.LookupBinaryFunction(values.BinaryFuncSignature{
Operator: n.Operator,
Left: lt,
Right: rt,
if err != nil {
return nil, err
return &binaryEvaluator{
t: n.Type(),
left: l,
right: r,
f: f,
}, nil
case *semantic.CallExpression:
callee, err := compile(n.Callee, builtIns)
if err != nil {
return nil, err
args, err := compile(n.Arguments, builtIns)
if err != nil {
return nil, err
return &callEvaluator{
t: n.Type(),
callee: callee,
args: args,
}, nil
case *semantic.FunctionExpression:
body, err := compile(n.Body, builtIns)
if err != nil {
return nil, err
params := make([]functionParam, len(n.Params))
for i, param := range n.Params {
params[i] = functionParam{
Key: param.Key.Name,
Type: param.Type(),
if param.Default != nil {
d, err := compile(param.Default, builtIns)
if err != nil {
return nil, err
params[i].Default = d
return &functionEvaluator{
t: n.Type(),
params: params,
body: body,
}, nil
return nil, fmt.Errorf("unknown semantic node of type %T", n)
// CompilationCache caches compilation results based on the types of the input parameters.
type CompilationCache struct {
fn *semantic.FunctionExpression
root *compilationCacheNode
func NewCompilationCache(fn *semantic.FunctionExpression, scope Scope, decls semantic.DeclarationScope) *CompilationCache {
return &CompilationCache{
fn: fn,
root: &compilationCacheNode{
scope: scope,
decls: decls,
// Compile returnes a compiled function bsaed on the provided types.
// The result will be cached for subsequent calls.
func (c *CompilationCache) Compile(types map[string]semantic.Type) (Func, error) {
return c.root.compile(c.fn, 0, types)
type compilationCacheNode struct {
scope Scope
decls semantic.DeclarationScope
children map[semantic.Type]*compilationCacheNode
fn Func
err error
// compile recursively searches for a matching child node that has compiled the function.
// If the compilation has not been performed previously its result is cached and returned.
func (c *compilationCacheNode) compile(fn *semantic.FunctionExpression, idx int, types map[string]semantic.Type) (Func, error) {
if idx == len(fn.Params) {
// We are the matching child, return the cached result or do the compilation.
if c.fn == nil && c.err == nil {
c.fn, c.err = Compile(fn, types, c.scope, c.decls)
return c.fn, c.err
// Find the matching child based on the order.
next := fn.Params[idx].Key.Name
t := types[next]
child := c.children[t]
if child == nil {
child = &compilationCacheNode{
scope: c.scope,
decls: c.decls,
if c.children == nil {
c.children = make(map[semantic.Type]*compilationCacheNode)
c.children[t] = child
return child.compile(fn, idx+1, types)
package compiler_test
import (
var CmpOptions []cmp.Option
func init() {
CmpOptions = append(semantictest.CmpOptions, cmp.Comparer(ValueEqual))
func ValueEqual(x, y values.Value) bool {
if x.Type() != y.Type() {
return false
switch k := x.Type().Kind(); k {
case semantic.Bool:
return x.Bool() == y.Bool()
case semantic.UInt:
return x.UInt() == y.UInt()
case semantic.Int:
return x.Int() == y.Int()
case semantic.Float:
return x.Float() == y.Float()
case semantic.String:
return x.Str() == y.Str()
case semantic.Time:
return x.Time() == y.Time()
case semantic.Object:
return cmp.Equal(x.Object(), y.Object(), CmpOptions...)
return false
func TestCompilationCache(t *testing.T) {
add := &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{
{Key: &semantic.Identifier{Name: "a"}},
{Key: &semantic.Identifier{Name: "b"}},
Body: &semantic.BinaryExpression{
Operator: ast.AdditionOperator,
Left: &semantic.IdentifierExpression{Name: "a"},
Right: &semantic.IdentifierExpression{Name: "b"},
testCases := []struct {
name string
types map[string]semantic.Type
scope map[string]values.Value
want values.Value
name: "floats",
types: map[string]semantic.Type{
"a": semantic.Float,
"b": semantic.Float,
scope: map[string]values.Value{
"a": values.NewFloatValue(5),
"b": values.NewFloatValue(4),
want: values.NewFloatValue(9),
name: "ints",
types: map[string]semantic.Type{
"a": semantic.Int,
"b": semantic.Int,
scope: map[string]values.Value{
"a": values.NewIntValue(5),
"b": values.NewIntValue(4),
want: values.NewIntValue(9),
name: "uints",
types: map[string]semantic.Type{
"a": semantic.UInt,
"b": semantic.UInt,
scope: map[string]values.Value{
"a": values.NewUIntValue(5),
"b": values.NewUIntValue(4),
want: values.NewUIntValue(9),
//Reuse the same cache for all test cases
cache := compiler.NewCompilationCache(add, nil, nil)
for _, tc := range testCases {
tc := tc
t.Run(, func(t *testing.T) {
f0, err := cache.Compile(tc.types)
if err != nil {
f1, err := cache.Compile(tc.types)
if err != nil {
if !reflect.DeepEqual(f0, f1) {
t.Errorf("unexpected new compilation result")
got0, err := f0.Eval(tc.scope)
if err != nil {
got1, err := f1.Eval(tc.scope)
if err != nil {
if !cmp.Equal(got0, tc.want, CmpOptions...) {
t.Errorf("unexpected eval result -want/+got\n%s", cmp.Diff(tc.want, got0, CmpOptions...))
if !cmp.Equal(got0, got1, CmpOptions...) {
t.Errorf("unexpected differing results -got0/+got1\n%s", cmp.Diff(got0, got1, CmpOptions...))
func TestCompileAndEval(t *testing.T) {
testCases := []struct {
name string
fn *semantic.FunctionExpression
types map[string]semantic.Type
scope map[string]values.Value
want values.Value
wantErr bool
name: "simple ident return",
fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{
{Key: &semantic.Identifier{Name: "r"}},
Body: &semantic.IdentifierExpression{Name: "r"},
types: map[string]semantic.Type{
"r": semantic.Int,
scope: map[string]values.Value{
"r": values.NewIntValue(4),
want: values.NewIntValue(4),
wantErr: false,
name: "call function",
fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{
{Key: &semantic.Identifier{Name: "r"}},
Body: &semantic.CallExpression{
Callee: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{
{Key: &semantic.Identifier{Name: "a"}, Default: &semantic.IntegerLiteral{Value: 1}},
{Key: &semantic.Identifier{Name: "b"}, Default: &semantic.IntegerLiteral{Value: 1}},
Body: &semantic.BinaryExpression{
Operator: ast.AdditionOperator,
Left: &semantic.IdentifierExpression{Name: "a"},
Right: &semantic.IdentifierExpression{Name: "b"},
Arguments: &semantic.ObjectExpression{
Properties: []*semantic.Property{
{Key: &semantic.Identifier{Name: "a"}, Value: &semantic.IntegerLiteral{Value: 1}},
{Key: &semantic.Identifier{Name: "b"}, Value: &semantic.IdentifierExpression{Name: "r"}},
types: map[string]semantic.Type{
"r": semantic.Int,
scope: map[string]values.Value{
"r": values.NewIntValue(4),
want: values.NewIntValue(5),
wantErr: false,
name: "call function via identifier",
fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{
{Key: &semantic.Identifier{Name: "r"}},
Body: &semantic.BlockStatement{
Body: []semantic.Statement{
Identifier: &semantic.Identifier{Name: "f"}, Init: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{
{Key: &semantic.Identifier{Name: "a"}, Default: &semantic.IntegerLiteral{Value: 1}},
{Key: &semantic.Identifier{Name: "b"}, Default: &semantic.IntegerLiteral{Value: 1}},
Body: &semantic.BinaryExpression{
Operator: ast.AdditionOperator,
Left: &semantic.IdentifierExpression{Name: "a"},
Right: &semantic.IdentifierExpression{Name: "b"},
Argument: &semantic.CallExpression{
Callee: &semantic.IdentifierExpression{Name: "f"},
Arguments: &semantic.ObjectExpression{
Properties: []*semantic.Property{
{Key: &semantic.Identifier{Name: "a"}, Value: &semantic.IntegerLiteral{Value: 1}},
{Key: &semantic.Identifier{Name: "b"}, Value: &semantic.IdentifierExpression{Name: "r"}},
types: map[string]semantic.Type{
"r": semantic.Int,
scope: map[string]values.Value{
"r": values.NewIntValue(4),
want: values.NewIntValue(5),
wantErr: false,
for _, tc := range testCases {
tc := tc
t.Run(, func(t *testing.T) {
f, err := compiler.Compile(tc.fn, tc.types, nil, nil)
if tc.wantErr != (err != nil) {
t.Fatalf("unexpected error %s", err)
got, err := f.Eval(tc.scope)
if tc.wantErr != (err != nil) {
t.Errorf("unexpected error %s", err)
if !cmp.Equal(tc.want, got, CmpOptions...) {
t.Errorf("unexpected value -want/+got\n%s", cmp.Diff(tc.want, got, CmpOptions...))
// The compiler package provides a compiler and Go runtime for a subset of the IFQL language.
// Only pure functions are supported by the compiler.
// A function is compiled and then may be called repeatedly with different arguments.
// The function must be pure meaning it has no side effects. Other language features are not supported.
// This runtime is not portable by design. The runtime consists of Go types that have been constructed based on the IFQL function being compiled.
// Those types are not serializable and cannot be transported to other systems or environments.
// This design is intended to limit the scope under which compilation must be supported.
package compiler
File diff suppressed because it is too large
Load Diff
package complete
import (
type functionType interface {
Params() map[string]semantic.Type
// FunctionSuggestion provides information about a function
type FunctionSuggestion struct {
Params map[string]string
// Completer provides methods for suggestions in IFQL queries
type Completer struct {
scope *interpreter.Scope
declarations semantic.DeclarationScope
// NewCompleter creates a new completer from scope and declarations
func NewCompleter(scope *interpreter.Scope, declarations semantic.DeclarationScope) Completer {
return Completer{scope: scope, declarations: declarations}
// Names returns the slice of names of declared expressions
func (c Completer) Names() []string {
names := c.scope.Names()
return names
// Declaration returns a declaration based on the expression name, if one exists
func (c Completer) Declaration(name string) (semantic.VariableDeclaration, error) {
d, ok := c.declarations[name]
if !ok {
return d, errors.New("could not find declaration")
return d, nil
// FunctionNames returns all declaration names of the Function Kind
func (c Completer) FunctionNames() []string {
funcs := []string{}
for name, d := range c.declarations {
if isFunction(d) {
funcs = append(funcs, name)
return funcs
// FunctionSuggestion returns information needed for autocomplete suggestions for a function
func (c Completer) FunctionSuggestion(name string) (FunctionSuggestion, error) {
var s FunctionSuggestion
d, err := c.Declaration(name)
if err != nil {
return s, err
if !isFunction(d) {
return s, fmt.Errorf("name ( %s ) is not a function", name)
funcType, ok := d.InitType().(functionType)
if !ok {
return s, errors.New("could not cast function type")
params := map[string]string{}
for k, v := range funcType.Params() {
params[k] = v.Kind().String()
s = FunctionSuggestion{
Params: params,
return s, nil
func isFunction(d semantic.VariableDeclaration) bool {
return d.InitType().Kind() == semantic.Function
package complete
import (
_ ""
var scope *interpreter.Scope
var declarations semantic.DeclarationScope
func init() {
s, d := query.BuiltIns()
scope = interpreter.NewScopeWithValues(s)
declarations = d
func TestNames(t *testing.T) {
s := interpreter.NewScope()
var v values.Value
s.Set("boom", v)
s.Set("tick", v)
c := NewCompleter(s, semantic.DeclarationScope{})
results := c.Names()
expected := []string{
if !cmp.Equal(results, expected) {
t.Error(cmp.Diff(results, expected), "unexpected names from declarations")
func TestDeclaration(t *testing.T) {
name := "range"
expected := declarations[name].ID()
declaration, _ := NewCompleter(scope, declarations).Declaration(name)
result := declaration.ID()
if !cmp.Equal(result, expected) {
t.Error(cmp.Diff(result, expected), "unexpected declaration for name")
func TestFunctionNames(t *testing.T) {
d := make(semantic.DeclarationScope)
d["boom"] = semantic.NewExternalVariableDeclaration(
"boom", semantic.NewFunctionType(semantic.FunctionSignature{}))
d["noBoom"] = semantic.NewExternalVariableDeclaration("noBoom", semantic.String)
s := interpreter.NewScope()
c := NewCompleter(s, d)
results := c.FunctionNames()
expected := []string{
if !cmp.Equal(results, expected) {
t.Error(cmp.Diff(results, expected), "unexpected function names")
func TestFunctionSuggestion(t *testing.T) {
name := "range"
result, _ := NewCompleter(scope, declarations).FunctionSuggestion(name)
expected := FunctionSuggestion{
Params: map[string]string{
"start": semantic.Time.String(),
"stop": semantic.Time.String(),
"table": query.TableObjectType.Kind().String(),
if !cmp.Equal(result, expected) {
t.Error(cmp.Diff(result, expected), "does not match expected suggestion")
package control
import (
opentracing ""
// Controller provides a central location to manage all incoming queries.
// The controller is responsible for queueing, planning, and executing queries.
type Controller struct {
newQueries chan *Query
lastID QueryID
queriesMu sync.RWMutex
queries map[QueryID]*Query
queryDone chan *Query
cancelRequest chan QueryID
verbose bool
lplanner plan.LogicalPlanner
pplanner plan.Planner
executor execute.Executor
maxConcurrency int
availableConcurrency int
availableMemory int64
type Config struct {
ConcurrencyQuota int
MemoryBytesQuota int64
ExecutorDependencies execute.Dependencies
Verbose bool
type QueryID uint64
func New(c Config) *Controller {
ctrl := &Controller{
newQueries: make(chan *Query),
queries: make(map[QueryID]*Query),
queryDone: make(chan *Query),
cancelRequest: make(chan QueryID),
maxConcurrency: c.ConcurrencyQuota,
availableConcurrency: c.ConcurrencyQuota,
availableMemory: c.MemoryBytesQuota,
lplanner: plan.NewLogicalPlanner(),
pplanner: plan.NewPlanner(),
executor: execute.NewExecutor(c.ExecutorDependencies),
verbose: c.Verbose,
return ctrl
// QueryWithCompile submits a query for execution returning immediately.
// The query will first be compiled before submitting for execution.
// Done must be called on any returned Query objects.
func (c *Controller) QueryWithCompile(ctx context.Context, orgID id.ID, queryStr string) (*Query, error) {
q := c.createQuery(ctx, orgID)
err := c.compileQuery(q, queryStr)
if err != nil {
return nil, err
err = c.enqueueQuery(q)
return q, err
// Query submits a query for execution returning immediately.
// The spec must not be modified while the query is still active.
// Done must be called on any returned Query objects.
func (c *Controller) Query(ctx context.Context, orgID id.ID, qSpec *query.Spec) (*Query, error) {
q := c.createQuery(ctx, orgID)
q.spec = *qSpec
err := c.enqueueQuery(q)
return q, err
func (c *Controller) createQuery(ctx context.Context, orgID id.ID) *Query {
id := c.nextID()
cctx, cancel := context.WithCancel(ctx)
ready := make(chan map[string]execute.Result, 1)
return &Query{
id: id,
orgID: orgID,
labelValues: []string{
state: Created,
c: c,
now: time.Now().UTC(),
ready: ready,
parentCtx: cctx,
cancel: cancel,
func (c *Controller) compileQuery(q *Query, queryStr string) error {
if !q.tryCompile() {
return errors.New("failed to transition query to compiling state")
spec, err := query.Compile(q.compilingCtx, queryStr, query.Verbose(c.verbose))
if err != nil {
return errors.Wrap(err, "failed to compile query")
q.spec = *spec
return nil
func (c *Controller) enqueueQuery(q *Query) error {
if c.verbose {
log.Println("query", query.Formatted(&q.spec, query.FmtJSON))
if !q.tryQueue() {
return errors.New("failed to transition query to queueing state")
if err := q.spec.Validate(); err != nil {
return errors.Wrap(err, "invalid query")
// Add query to the queue
c.newQueries <- q
return nil
func (c *Controller) nextID() QueryID {
defer c.queriesMu.RUnlock()
ok := true
for ok {
_, ok = c.queries[c.lastID]
return c.lastID
// Queries reports the active queries.
func (c *Controller) Queries() []*Query {
defer c.queriesMu.RUnlock()
queries := make([]*Query, 0, len(c.queries))
for _, q := range c.queries {
queries = append(queries, q)
return queries
func (c *Controller) run() {
pq := newPriorityQueue()
for {
select {
// Wait for resources to free
case q := <-c.queryDone:
// Wait for new queries
case q := <-c.newQueries:
c.queries[] = q
// Wait for cancel query requests
case id := <-c.cancelRequest:
q := c.queries[id]
// Peek at head of priority queue
q := pq.Peek()
if q != nil {
err := c.processQuery(pq, q)
if err != nil {
go q.setErr(err)
func (c *Controller) processQuery(pq *PriorityQueue, q *Query) error {
if q.tryPlan() {
// Plan query to determine needed resources
lp, err := c.lplanner.Plan(&q.spec)
if err != nil {
return errors.Wrap(err, "failed to create logical plan")
if c.verbose {
log.Println("logical plan", plan.Formatted(lp))
p, err := c.pplanner.Plan(lp, nil,
if err != nil {
return errors.Wrap(err, "failed to create physical plan")
q.plan = p
q.concurrency = p.Resources.ConcurrencyQuota
if q.concurrency > c.maxConcurrency {
q.concurrency = c.maxConcurrency
q.memory = p.Resources.MemoryBytesQuota
if c.verbose {
log.Println("physical plan", plan.Formatted(q.plan))
// Check if we have enough resources
if c.check(q) {
// Update resource gauges
// Remove the query from the queue
// Execute query
if !q.tryExec() {
return errors.New("failed to transition query into executing state")
r, err := c.executor.Execute(q.executeCtx, q.orgID, q.plan)
if err != nil {
return errors.Wrap(err, "failed to execute query")
} else {
// update state to queueing
if !q.tryRequeue() {
return errors.New("failed to transition query into requeueing state")
return nil
func (c *Controller) check(q *Query) bool {
return c.availableConcurrency >= q.concurrency && (q.memory == math.MaxInt64 || c.availableMemory >= q.memory)
func (c *Controller) consume(q *Query) {
c.availableConcurrency -= q.concurrency
if q.memory != math.MaxInt64 {
c.availableMemory -= q.memory
func (c *Controller) free(q *Query) {
c.availableConcurrency += q.concurrency
if q.memory != math.MaxInt64 {
c.availableMemory += q.memory
// Query represents a single request.
type Query struct {
id QueryID
orgID id.ID
labelValues []string
c *Controller
spec query.Spec
now time.Time
err error
ready chan map[string]execute.Result
mu sync.Mutex
state State
cancel func()
executeCtx context.Context
executeSpan *span
plan *plan.PlanSpec
concurrency int
memory int64
// ID reports an ephemeral unique ID for the query.
func (q *Query) ID() QueryID {
func (q *Query) OrganizationID() id.ID {
return q.orgID
func (q *Query) Spec() *query.Spec {
return &q.spec
// Cancel will stop the query execution.
func (q *Query) Cancel() {
// call cancel func
// Finish the query immediately.
// This allows for receiving from the Ready channel in the same goroutine
// that has called defer q.Done()
if q.state != Errored {
q.state = Canceled
// Ready returns a channel that will deliver the query results.
// Its possible that the channel is closed before any results arrive, in which case the query should be
// inspected for an error using Err().
func (q *Query) Ready() <-chan map[string]execute.Result {
return q.ready
// finish informs the controller and the Ready channel that the query is finished.
func (q *Query) finish() {
switch q.state {
case Compiling:
case Queueing:
case Planning:
case Requeueing:
case Executing:
case Errored:
// The query has already been finished in the call to setErr.
case Canceled:
// The query has already been finished in the call to Cancel.
case Finished:
// The query has already finished
panic("unreachable, all states have been accounted for")
q.c.queryDone <- q
// Done must always be called to free resources.
func (q *Query) Done() {
q.state = Finished
// State reports the current state of the query.
func (q *Query) State() State {
s := q.state
return s
func (q *Query) isOK() bool {
ok := q.state != Canceled && q.state != Errored
return ok
// Err reports any error the query may have encountered.
func (q *Query) Err() error {
err := q.err
return err
func (q *Query) setErr(err error) {
q.err = err
// Finish the query immediately.
// This allows for receiving from the Ready channel in the same goroutine
// that has called defer q.Done()
q.state = Errored
func (q *Query) setResults(r map[string]execute.Result) {
if q.state == Executing {
q.ready <- r
// tryCompile attempts to transition the query into the Compiling state.
func (q *Query) tryCompile() bool {
if q.state == Created {
q.compileSpan, q.compilingCtx = StartSpanFromContext(
q.state = Compiling
return true
return false
// tryQueue attempts to transition the query into the Queueing state.
func (q *Query) tryQueue() bool {
if q.state == Compiling || q.state == Created {
if q.state == Compiling {
q.queueSpan, q.queueCtx = StartSpanFromContext(
q.state = Queueing
return true
return false
// tryRequeue attempts to transition the query into the Requeueing state.
func (q *Query) tryRequeue() bool {
if q.state == Planning {
q.requeueSpan, q.requeueCtx = StartSpanFromContext(
q.state = Requeueing
return true
return false
// tryPlan attempts to transition the query into the Planning state.
func (q *Query) tryPlan() bool {
if q.state == Queueing {
q.planSpan, q.planCtx = StartSpanFromContext(
q.state = Planning
return true
return false
// tryExec attempts to transition the query into the Executing state.
func (q *Query) tryExec() bool {
if q.state == Requeueing || q.state == Planning {
switch q.state {
case Requeueing:
case Planning:
q.executeSpan, q.executeCtx = StartSpanFromContext(
q.state = Executing
return true
return false
// State is the query state.
type State int
const (
Created State = iota
func (s State) String() string {
switch s {
case Created:
return "created"
case Compiling:
return "compiling"
case Queueing:
return "queueing"
case Planning:
return "planning"
case Requeueing:
return "requeing"
case Executing:
return "executing"
case Errored:
return "errored"
case Finished:
return "finished"
case Canceled:
return "canceled"
return "unknown"
// span is a simple wrapper around opentracing.Span in order to
// get access to the duration of the span for metrics reporting.
type span struct {
s opentracing.Span
start time.Time
Duration time.Duration
hist prometheus.Observer
gauge prometheus.Gauge
func StartSpanFromContext(ctx context.Context, operationName string, hist prometheus.Observer, gauge prometheus.Gauge) (*span, context.Context) {
start := time.Now()
s, sctx := opentracing.StartSpanFromContext(ctx, operationName, opentracing.StartTime(start))
return &span{
s: s,
start: start,
hist: hist,
gauge: gauge,
}, sctx
func (s *span) Finish() {
finish := time.Now()
s.Duration = finish.Sub(s.start)
FinishTime: finish,
package control
import ""
const (
namespace = "query"
subsystem = "control"
var (
labels = []string{"org"}
var (
compilingGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "compiling_active",
Help: "Number of queries actively compiling",
}, labels)
queueingGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "queueing_active",
Help: "Number of queries actively queueing",
}, labels)
requeueingGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "requeueing_active",
Help: "Number of queries actively requeueing",
}, labels)
planningGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "planning_active",
Help: "Number of queries actively planning",
}, labels)
executingGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "executing_active",
Help: "Number of queries actively executing",
}, labels)
compilingHist = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "compiling_duration_seconds",
Help: "Histogram of times spent compiling queries",
Buckets: prometheus.ExponentialBuckets(1e-3, 5, 7),
}, labels)
queueingHist = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "queueing_duration_seconds",
Help: "Histogram of times spent queueing queries",
Buckets: prometheus.ExponentialBuckets(1e-3, 5, 7),
}, labels)
requeueingHist = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "requeueing_duration_seconds",
Help: "Histogram of times spent requeueing queries",
Buckets: prometheus.ExponentialBuckets(1e-3, 5, 7),
}, labels)
planningHist = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "planning_duration_seconds",
Help: "Histogram of times spent planning queries",
Buckets: prometheus.ExponentialBuckets(1e-5, 5, 7),
}, labels)
executingHist = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "executing_duration_seconds",
Help: "Histogram of times spent executing queries",
Buckets: prometheus.ExponentialBuckets(1e-3, 5, 7),
}, labels)
func init() {
package control
import "container/heap"
// priorityQueue implements heap.Interface and holds Query objects.
type priorityQueue []*Query
func (pq priorityQueue) Len() int { return len(pq) }
func (pq priorityQueue) Less(i, j int) bool {
return pq[i].spec.Resources.Priority < pq[j].spec.Resources.Priority
func (pq priorityQueue) Swap(i, j int) {
pq[i], pq[j] = pq[j], pq[i]
func (pq *priorityQueue) Push(x interface{}) {
q := x.(*Query)
*pq = append(*pq, q)
func (pq *priorityQueue) Pop() interface{} {
old := *pq
n := len(old)
q := old[n-1]
*pq = old[0 : n-1]
return q
type PriorityQueue struct {
queue priorityQueue
func newPriorityQueue() *PriorityQueue {
return &PriorityQueue{
queue: make(priorityQueue, 0, 100),
func (p *PriorityQueue) Push(q *Query) {
heap.Push(&p.queue, q)
func (p *PriorityQueue) Peek() *Query {
for {
if p.queue.Len() == 0 {
return nil
q := p.queue[0]
if q.isOK() {
return q
func (p *PriorityQueue) Pop() *Query {
for {
if p.queue.Len() == 0 {
return nil
q := heap.Pop(&p.queue).(*Query)
if q.isOK() {
return q
# Datasets
Datasets are the container for data between transformations of a query.
Datasets and transformations come in pairs.
A transformations receives data for processing and writes its results to its downstream dataset.
A dataset decides when data should be materialized and passed down to the next transformation dataset pair.
A dataset is made up of blocks.
A block is a time bounded set of data for a given grouping key.
Blocks are modeled as matrixes where rows labels are series keys and columns labels are timestamps.
The query engine is capable of processing data out of order and still providing complete and correct results.
This is accomplished through a mechanism of watermarks and triggers.
A watermark gives an indication that no data older than the mark is likely to arrive.
A trigger defines when a block of data should be materialized for processing.
Then if late data does arrive the block can be updated and rematerialized.
This mechanism allows for a trade off between three aspects of stream processing.
* Completeness - Did the query process all of the data?
* Latency - How quickly is a result produced once data is received?
* Cost - How much compute resources are used to process the pipeline?
Datasets cache active blocks and materialize them when ever they are triggered and remove then once they are finished.
## Resilience
The plan is to implement datasets as resilient data stores like Spark's RDD, so that if a given dataset is lost, a replacement can be rebuilt quickly.
## Performance
The Dataset and Block interfaces are designed to allow different implementations to make various performance trade offs.
### Batching
Blocks represents time and group bounded data.
It is possible that data for a single block is too large to maintain in RAM.
Bounds on data indicate how aggregate transformations, etc. should behave.
Batching the data so that it can be processed with available resources is an orthogonal issue to the bounds of the data.
As such is not part of the Dataset or Block interfaces and is left up to the implementation of the interfaces as needed.
### Sparse vs. Dense
There will be three different implementations of the Block interface.
* Dense
* Sparse Row Optimized
* Sparse Column Optimized
A dense matrix implementation assumes that there is little to no missing data.
A dense matrix is typically "row-major" meaning its optimized for row based operations, at this point it doesn't seem helpful to have a column major dense implementation.
A sparse matrix implementation assumes that there is a significant amount of missing data.
Sparse implementations can be optimized for either row or column operations.
Since different processes access data in different patterns the planning step will be responsible for deciding which implementation is best at which steps in a query.
The planner will add transformations procedures for conversions between the different implementations.
@ -0,0 +1,39 @@
# Executor Design
This document lays out the design of the executor.
## Interface
The Executor interface is defined as:
type Executor interface {
Execute(context.Context, Plan) ([]Result, ErrorState)
The executor is responsible for taking a specific plan from the Planner and executing it to produce the result which is a list of Results will allow for streaming of the various results to a client.
## Concepts
The executor interacts with many different systems and has its own internal systems
Below is a list of concepts within the executor.
| Concept | Description |
| ------- | ----------- |
| Bounded Data | Datasets that are finite, in other words `batch` data. |
| Unbounded Data | Datasets that have no know end, or are infinite, in other words `stream` data. |
| Event time | The time the event actually occurred. |
| Processing time | The time the event is processed. This time may be completely out of order with respect to its event time and the event time of other events with similar processing time. |
| Watermarks | Watermarks communicate the lag between event time and processing time. Watermarks define a bound on the event time of data that has been observed. |
| Triggers | Triggers communicate when data should be materialized. |
| Accumulation | Accumulation defines how different results from events of the same window can be combined into a single result. |
| Dataset | A set of data produced from a transformation. The dataset is resilient because its lineage and watermarks are known, therfore it can be recreated in the event of loss. |
| Block | A subset of a dataset. Row represent series and columns represent time. |
| Transformation | Performs a transformation on data received from a parent dataset and writes results to a child dataset. |
| Execution State | Execution state tracks the state of an execution. |
## Execution State
While both queries and plans are specifications the execution state encapsulates the implementation and state of executing a query.
# IFQL Language
This document details the design of the IFQL langauage.
If you are looking for usage information on the langauage see the
# Overview
The IFQL langauage is used to construct query specifications.
# Syntax
The langauage syntax is defined by the ifql/ifql.peg grammar.
## Keyword Arguments
IFQL uses keyword arguments for ALL arguments to ALL functions.
Keyword arguments enable iterative improvements to the langauage while remaining backwards compatible.
Since IFQL is functional in style it is important to note that the choice of keyword arguments means that many functional concepts that deal with positional arguments have to be mapped into a space where only keyword arguments exist.
### Default Arguments
Since all arguments are keyword arguments and there are no positional arguments it is possible for any argument to have a default value.
If an argument is not specified at call time, then if the argument has a default it is used, otherwise an error occurs.
## Abstract Syntax Tree
The abstract syntax tree (AST) of IFQL is closely modeled after the javascript AST.
Using the javascript AST provides a good foundation for organization and structure of the syntax tree.
Since IFQL is so similar to javascript this design works well.
# Semantics
The `semantic` package provides a graph structure that represents the meaning of an IFQL script.
An AST is converted into a semantic graph for use with other systems.
Using a semantic graph representation of the IFQL, enables highlevel meaning to be specified programatically.
For example since IFQL uses the javascript AST structures, arguments to a function are represented as a single positional argument that is always an object expression.
The semantic graph validates that the AST correctly follows these semantics, and use structures that are strongly typed for this expectation.
The semantic structures are to be designed to facilitate the interpretation and compilation of IFQL.
# Interpretation
IFQL is primarily an interpreted language.
The implementation of the IFQL interpreter can be found in the `interpreter` package.
# Compilation and Go Runtime
A subset of IFQL can be compiled into a runtime hosted in Go.
The subset consists of only pure functions.
Meaning a function defintion in IFQL can be compiled and then called repeatedly with different arguments.
The function must be pure, meaning it has no side effects.
Other language feature like imports etc are not supported.
This runtime is entirely not portable.
The runtime consists of Go types that have been constructed based on the IFQL function being compiled.
Those types are not serializable and cannot be transported to other systems or environments.
This design is intended to limit the scope under which compilation must be supported.
# Features
This sections details various features of the language.
## Functions
IFQL supports defining functions.
add = (a,b) => a + b
add(a:1, b:2) // 3
Functions can be assigned to identifiers and can call other functions.
Functions are first class types within IFQL.
## Scoping
IFQL uses lexical scoping.
Scoping boundaries occur at functions.
x = 5
addX = (a) => a + x
add(a:1) // 6
The `x` referred to in the `addX` function is the same as is defined in the toplevel scope.
Scope names can be changed for more specific scopes.
x = 5
add = (x,y) => x + y
add(x:1,y:2) // 3
In this example the `x = 5` definition is unused, as the `add` function defines it own local identifier `x` as a parameter.
# Design Overview
This document provides an overview of the design of the query engine.
## Concepts
There are several different concepts that make up the complete query engine.
* Query - A query defines work to be performed on time series data and a result.
A query is represented as a directed acyclic graph (DAG).
* IFQL - Functional Language for defining a query to execute.
* Parser - Parses an IFQL script and produces a query.
* Data Frame - A data frame is a matrix of time series data where one dimension is time and the other is series.
* Query Node - A query node represents a single step in the DAG of a query.
* Planner - The planner creates a plan of execution from a query.
* Plan - A plan is also a DAG of node the explicitly state how a query will be performed.
* Plan Node - A plan node represents a single step in the DAG of a plan.
* Executor - The executor is responsible for processing a query plan.
The executor process data via data frames.
* Storage - The Storage interface provides a mechanism for accessing the underlying data as data frames.
* Capabilities - The Storage interface exposes its capabilities.
The planner uses the available capabilities to determine the best plan.
* Hints - The Storage interface exposes hints about the data.
The planner uses the hints to determine the best plan.
* Query Engine - Query Engine is the name given to the entire system being described in this document.
Both a query and a plan are represented by a DAG and describe an operation that needs to be performed.
The difference is that a plan in addition to describing what the operation is, also describes how that operation will be performed.
In short, a query describes what the operation is and a plan describes how that operation will be carried out.
## Relations
Below is a high level description, using the Go language, of the relations between the different components and concepts of the query engine.
type Parser interface {
Parse(ifql string) Query
// Query defines work to be performed on time series data and a result.
// A query is represented as a directed acyclic graph (DAG).
type Query interface {
Nodes() []QueryNode
Edges() []Edge
// QueryNode is a single step in the DAG of a query
type QueryNode interface {
ID() NodeID
// More details about what the node does
// NodeID uniquely identifies a node.
type NodeID
// Edge establishes a parent child relationship between two nodes.
type Edge interface {
Parent() NodeID
Child() NodeID
// Planner computes a plan from a query and available storage interfaces
type Planner interface {
Plan(Query, []Storage) Plan
// Plan is a DAG of the specific steps to execute.
type Plan interface {
Nodes() []PlanNode
Edges() []Edge
// PlanNode is a single step in the plan DAG.
type PlanNode interface {
ID() NodeID
Predicates() []Predicate
// Predicate filters data.
type Predicate interface {}
// Storage provides an interface to the storage layer.
type Storage interface {
// Read gets data from the underlying storage system and returns a data frame or error state.
Read(context.Context, []Predicate, TimeRange, Grouping) (DataFrame, ErrorState)
// Capabilities exposes the capabilities of the storage interface.
Capabilities() []Capability
// Hints provides hints about the characteristics of the data.
Hints(context.Context, []Predicate, TimeRange, Grouping) Hints
// TimeRange is the beginning time and ending time
type TimeRange interface {
Begin() int64
End() int64
// Grouping are key groups
type Grouping interface {
Keys() []string
// Hints provide insight into the size and shape of the data that would likely be returned
// from a storage read operation.
type Hints interface {
Cardinality() // Count tag values
ByteSize() int64
Blocks() int64
// Capability represents a single capability of a storage interface.
type Capability interface{
Name() string
// Executor processes a plan and returns the resulting data frames or an error state.
type Executor interface{
Execute(context.Context, Plan) ([]DataFrame, ErrorState)
// ErrorState describes precisely the state of an errored operation such that appropraite recovery may be attempted.
type ErrorState interface {
Error() error
// Retryable() bool ?
# Plan DAG
The plan is represented as a DAG, where each node performs an operation and produces a result.
The plan DAG is separate and distinct from the query DAG.
The plan DAG specifies details about how the query will be executed, while the query DAG only specifies what the query is.
There may be multiple roots to the DAG where each root represents a source of data.
A root or source node may retrieve data from multiple different systems.
Primarily data will be read from the storage interface, but may be streamed from the write ingest system or potentially external systems as well.
The leaves of the DAG represent the results of the operation.
The results are collected and returned.
@ -0,0 +1,63 @@
# Planner Design
This document lays out the design of the planner.
## Interface
The Planner inter is defined as:
type Planner interface {
Plan(Query, []Storage) Plan
The planner is responsible for taking a query DAG and a set of available storage interfaces and produce a plan DAG.
## Plans
Plans are created via a two step process:
1. Create a general plan from the query.
2. Create a specific plan from the general plan and available storage interface.
The general plan specifies all the needed data frames and their lineage needed to produce the final query result.
The specific plan specifies how the general plan will be executed, which storage interfaces will be consumed and how.
The general plan does not leave the scope of the Planner and is not part of the API of the planner.
Hence the Plan type above it the specific plan.
## Plan DAG
Both the general and specific plans are represented as DAGs.
The nodes of the DAG represent data frames to be produced, while the edges of the DAG represent the operations need to construct the data frames.
This is inverted from the Query DAG where the nodes are operations and edges represents data sets.
The leaves of the plan DAG represent sources of data and the data flows from bottom up through the tree.
Again this is inverted from the Query DAG where data flows top down.
## Data Frames
Data frames are a set of data and their lineage is known.
Meaning it is known what parent data frames and operations are needed to construct the data frame.
Using this concept of lineage allows a data frame to be reconstructed if it is loss due to node failure or if its parent data frames are modified.
### Windowing
Data frames will specify their windowing properties. ????
## Operations
Operations are a definition of a transformation to be applied on one data frame resulting in another.
### Narrow vs Wide
Operations are classified as either narrow or wide:
* Narrow operations map each parent data frame to exactly one child data frame.
Specifically a narrow operation is a one-to-one mapping of parent to child data frames.
* Wide operations map multiple parent data frames to multiple child data frames.
Specifically a wide operation is a many-to-many mapping of parent to child data frames.
This distinction is necessary to precisely define the lineage of a data frame.
@ -0,0 +1,22 @@
# Query DAG
The query is represented as a DAG, where each node represents an operation to be performed.
There may be multiple roots to the DAG where each root represents a source of data.
Root nodes will specify whether they are selecting data from the database or consuming data as a stream.
The leaves of the DAG represent the results of the operation.
A result node may be added as a child to any node to make that intermediate representation a finalized result.
## Specification
A query DAG consists of a set of nodes and a set of edges that form a directed acyclic graph (DAG).
Each node has the following properties:
* ID - A unique identifier for the node within the graph.
* Kind - The kind of operation the node performs.
* Spec - The spec, specifies the parameters provided to the node detailing the specifics of the operation.
The parameters vary by the kind of node.
### Creating Release tag
We are using semantic versioning with the format "vMajor.Minor.Patch"
git tag -s v0.0.1
make release
@ -0,0 +1,39 @@
# Learning Resources
This documents contains a list of papers articles etc. that are useful for understanding the design of IFQL.
## Stream Processing
* Set of articles by Tyler Akidau on building stream processing engines with correctness guarantees.
* Published paper by Tyler AKidau on building stream processing engines with correctness guarantees.
* Paper from UC Berkley introducing Spark and RDDs
* A summary of the different data APIs in Spark
## Map Reduce
* Google research paper on Dremel
## DataFrames
* Good overview on various sparse matrix implementations.
## Query Optimization
* Volcano Optimizer Generator
* The Cascades Framework for Query Optimization
* Chapter 7: Query Optimization
From Readings in Database Systems, 5th Edition (2015)
This chaper references various other valuable readings.
* Cost-based Optimization in Parallel Data Frameworks
File diff suppressed because it is too large
Load Diff
# Transpiler Design
IFQL will support transpiling various other languages into query specification that can be executed.
Executing a transpiled query involes two steps outside the normal execution process.
1. Transpile the query to an query spec.
2. Write the result in the desired format.
The following interfaces found in the `crossexecute` package represent these concepts.
type QueryTranspiler interface {
Transpile(ctx context.Context, txt string) (*query.Spec, error)
type ResultWriter interface {
WriteTo(w io.Writer, results map[string]execute.Result) error
Each different language/system need only implement a query transpiler and result writer.
## Producing IFQL txt via transpilation
The various transpilers only define the `somelang txt -> spec` transformation.
In general the reverse process will be possible, `spec -> ifql txt`.
Once any transpiler has been implemented then IFQL txt can be produced from that source language.
## InfluxQL
Specific to writing the InfluxQL transpiler there is a major problem to overcome:
### How can the transpiler disambiguate fields and tags?
The transpiler will need a service that can report whether an identifier is a field or a tag for a given measurement.
The service will also need to be able to report which measurements exist for a given regexp pattern.
With this extra information the transpiler should be able to process any InfluxQL query.
#### Open Questions
* What to do about measurements that have different schemas and are queried as if they have the same schema?
select sum("value") from /(a|b)/ where "c" == 'foo'
`c` is a tag on measurement `a`.
`c` is a field on measurement `b`.
For 1.x does this error or return some result?
* Does a query spec contain enough information to convert 2.0 results into InfluxQL 1.x result JSON?
The final table wil contain the correct column names etc to produce the correct JSON output.
IFQL Table -> InfluxQL JSON
@ -0,0 +1,48 @@
The following benchmarks measure reading 10,000,000 points via TCP, yamux and gRPC.
These were performed using a VM and host machine on the same physical hardware.
`iperf` reported about 390 MB/s as the maximum throughput between the machines.
`influxd` running on VM, client on host.
All protocols used the same protobuf message (to provide framing) with an embedded `[]byte`
array to serialize batches of points – large arrays of structures for points are simply too slow.
The underlying storage engine cursor can read 10,000,000 points in about 230ms, therefore
the overhead for each protocol is as follows
TCP → 470ms
yamux → 620ms
gRPC → 970ms
Maximum transfer rates are therefore:
TCP → 340 MB/s or ~21e6 points / sec
yamux → 258 MB/s or ~16e6 points / sec
gRPC → 164 MB/s or ~10e6 points / sec
It is worth noting that I have not tested Go's network libraries to determine maximum throughput,
however I suspect it may be close to the TCP maximum. Whilst we will benchmark using independent
machines in AWS, these tests helped me understand relative performance of the various transports
and the impact different serialization mechanisms have on our throughput. Protobuf is ok as long
as we keep the graph small, meaning we customize the serialization of the points.
As a comparison, I also tested client and server on localhost, to compare the protocols without the
network stack overhead. gRPC was very inconsistent, varying anywhere from 463ms to 793ms so the result
represents the average of a number of runs.
TCP → 95ms
yamux → 108ms
gRPC → 441ms
These numbers bring TCP and yamux within about 10% of each other. The majority of the difference
between TCP and yamux is due to the additional frames sent by yamux to manage flow control,
which add latency. If that overhead is a concern, we may need to tune the flow control algorithm.
package execute_test
import (
func TestAggregate_Process(t *testing.T) {
sumAgg := new(functions.SumAgg)
countAgg := new(functions.CountAgg)
testCases := []struct {
name string
agg execute.Aggregate
config execute.AggregateConfig
data []*executetest.Block
want []*executetest.Block
name: "single",
config: execute.DefaultAggregateConfig,
agg: sumAgg,
data: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(0), 0.0},
{execute.Time(0), execute.Time(100), execute.Time(10), 1.0},
{execute.Time(0), execute.Time(100), execute.Time(20), 2.0},
{execute.Time(0), execute.Time(100), execute.Time(30), 3.0},
{execute.Time(0), execute.Time(100), execute.Time(40), 4.0},
{execute.Time(0), execute.Time(100), execute.Time(50), 5.0},
{execute.Time(0), execute.Time(100), execute.Time(60), 6.0},
{execute.Time(0), execute.Time(100), execute.Time(70), 7.0},
{execute.Time(0), execute.Time(100), execute.Time(80), 8.0},
{execute.Time(0), execute.Time(100), execute.Time(90), 9.0},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(100), 45.0},
name: "single use start time",
config: execute.AggregateConfig{
Columns: []string{execute.DefaultValueColLabel},
TimeSrc: execute.DefaultStartColLabel,
TimeDst: execute.DefaultTimeColLabel,
agg: sumAgg,
data: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(0), 0.0},
{execute.Time(0), execute.Time(100), execute.Time(10), 1.0},
{execute.Time(0), execute.Time(100), execute.Time(20), 2.0},
{execute.Time(0), execute.Time(100), execute.Time(30), 3.0},
{execute.Time(0), execute.Time(100), execute.Time(40), 4.0},
{execute.Time(0), execute.Time(100), execute.Time(50), 5.0},
{execute.Time(0), execute.Time(100), execute.Time(60), 6.0},
{execute.Time(0), execute.Time(100), execute.Time(70), 7.0},
{execute.Time(0), execute.Time(100), execute.Time(80), 8.0},
{execute.Time(0), execute.Time(100), execute.Time(90), 9.0},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(0), 45.0},
name: "multiple blocks",
config: execute.DefaultAggregateConfig,
agg: sumAgg,
data: []*executetest.Block{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(0), 0.0},
{execute.Time(0), execute.Time(100), execute.Time(10), 1.0},
{execute.Time(0), execute.Time(100), execute.Time(20), 2.0},
{execute.Time(0), execute.Time(100), execute.Time(30), 3.0},
{execute.Time(0), execute.Time(100), execute.Time(40), 4.0},
{execute.Time(0), execute.Time(100), execute.Time(50), 5.0},
{execute.Time(0), execute.Time(100), execute.Time(60), 6.0},
{execute.Time(0), execute.Time(100), execute.Time(70), 7.0},
{execute.Time(0), execute.Time(100), execute.Time(80), 8.0},
{execute.Time(0), execute.Time(100), execute.Time(90), 9.0},
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(100), execute.Time(200), execute.Time(100), 10.0},
{execute.Time(100), execute.Time(200), execute.Time(110), 11.0},
{execute.Time(100), execute.Time(200), execute.Time(120), 12.0},
{execute.Time(100), execute.Time(200), execute.Time(130), 13.0},
{execute.Time(100), execute.Time(200), execute.Time(140), 14.0},
{execute.Time(100), execute.Time(200), execute.Time(150), 15.0},
{execute.Time(100), execute.Time(200), execute.Time(160), 16.0},
{execute.Time(100), execute.Time(200), execute.Time(170), 17.0},
{execute.Time(100), execute.Time(200), execute.Time(180), 18.0},
{execute.Time(100), execute.Time(200), execute.Time(190), 19.0},
want: []*executetest.Block{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(100), 45.0},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(100), execute.Time(200), execute.Time(200), 145.0},
name: "multiple blocks with keyed columns",
config: execute.DefaultAggregateConfig,
agg: sumAgg,
data: []*executetest.Block{
KeyCols: []string{"_start", "_stop", "t1"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), "a", execute.Time(0), 0.0},
{execute.Time(0), execute.Time(100), "a", execute.Time(10), 1.0},
{execute.Time(0), execute.Time(100), "a", execute.Time(20), 2.0},
{execute.Time(0), execute.Time(100), "a", execute.Time(30), 3.0},
{execute.Time(0), execute.Time(100), "a", execute.Time(40), 4.0},
{execute.Time(0), execute.Time(100), "a", execute.Time(50), 5.0},
{execute.Time(0), execute.Time(100), "a", execute.Time(60), 6.0},
{execute.Time(0), execute.Time(100), "a", execute.Time(70), 7.0},
{execute.Time(0), execute.Time(100), "a", execute.Time(80), 8.0},
{execute.Time(0), execute.Time(100), "a", execute.Time(90), 9.0},
KeyCols: []string{"_start", "_stop", "t1"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), "b", execute.Time(0), 0.3},
{execute.Time(0), execute.Time(100), "b", execute.Time(10), 1.3},
{execute.Time(0), execute.Time(100), "b", execute.Time(20), 2.3},
{execute.Time(0), execute.Time(100), "b", execute.Time(30), 3.3},
{execute.Time(0), execute.Time(100), "b", execute.Time(40), 4.3},
{execute.Time(0), execute.Time(100), "b", execute.Time(50), 5.3},
{execute.Time(0), execute.Time(100), "b", execute.Time(60), 6.3},
{execute.Time(0), execute.Time(100), "b", execute.Time(70), 7.3},
{execute.Time(0), execute.Time(100), "b", execute.Time(80), 8.3},
{execute.Time(0), execute.Time(100), "b", execute.Time(90), 9.3},
KeyCols: []string{"_start", "_stop", "t1"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(100), execute.Time(200), "a", execute.Time(100), 10.0},
{execute.Time(100), execute.Time(200), "a", execute.Time(110), 11.0},
{execute.Time(100), execute.Time(200), "a", execute.Time(120), 12.0},
{execute.Time(100), execute.Time(200), "a", execute.Time(130), 13.0},
{execute.Time(100), execute.Time(200), "a", execute.Time(140), 14.0},
{execute.Time(100), execute.Time(200), "a", execute.Time(150), 15.0},
{execute.Time(100), execute.Time(200), "a", execute.Time(160), 16.0},
{execute.Time(100), execute.Time(200), "a", execute.Time(170), 17.0},
{execute.Time(100), execute.Time(200), "a", execute.Time(180), 18.0},
{execute.Time(100), execute.Time(200), "a", execute.Time(190), 19.0},
KeyCols: []string{"_start", "_stop", "t1"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(100), execute.Time(200), "b", execute.Time(100), 10.3},
{execute.Time(100), execute.Time(200), "b", execute.Time(110), 11.3},
{execute.Time(100), execute.Time(200), "b", execute.Time(120), 12.3},
{execute.Time(100), execute.Time(200), "b", execute.Time(130), 13.3},
{execute.Time(100), execute.Time(200), "b", execute.Time(140), 14.3},
{execute.Time(100), execute.Time(200), "b", execute.Time(150), 15.3},
{execute.Time(100), execute.Time(200), "b", execute.Time(160), 16.3},
{execute.Time(100), execute.Time(200), "b", execute.Time(170), 17.3},
{execute.Time(100), execute.Time(200), "b", execute.Time(180), 18.3},
{execute.Time(100), execute.Time(200), "b", execute.Time(190), 19.3},
want: []*executetest.Block{
KeyCols: []string{"_start", "_stop", "t1"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), "a", execute.Time(100), 45.0},
KeyCols: []string{"_start", "_stop", "t1"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(100), execute.Time(200), "a", execute.Time(200), 145.0},
KeyCols: []string{"_start", "_stop", "t1"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), "b", execute.Time(100), 48.0},
KeyCols: []string{"_start", "_stop", "t1"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(100), execute.Time(200), "b", execute.Time(200), 148.0},
name: "multiple values",
config: execute.AggregateConfig{
Columns: []string{"x", "y"},
TimeSrc: execute.DefaultStopColLabel,
TimeDst: execute.DefaultTimeColLabel,
agg: sumAgg,
data: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(0), 0.0, 0.0},
{execute.Time(0), execute.Time(100), execute.Time(10), 1.0, -1.0},
{execute.Time(0), execute.Time(100), execute.Time(20), 2.0, -2.0},
{execute.Time(0), execute.Time(100), execute.Time(30), 3.0, -3.0},
{execute.Time(0), execute.Time(100), execute.Time(40), 4.0, -4.0},
{execute.Time(0), execute.Time(100), execute.Time(50), 5.0, -5.0},
{execute.Time(0), execute.Time(100), execute.Time(60), 6.0, -6.0},
{execute.Time(0), execute.Time(100), execute.Time(70), 7.0, -7.0},
{execute.Time(0), execute.Time(100), execute.Time(80), 8.0, -8.0},
{execute.Time(0), execute.Time(100), execute.Time(90), 9.0, -9.0},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(100), 45.0, -45.0},
name: "multiple values changing types",
config: execute.AggregateConfig{
Columns: []string{"x", "y"},
TimeSrc: execute.DefaultStopColLabel,
TimeDst: execute.DefaultTimeColLabel,
agg: countAgg,
data: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(0), 0.0, 0.0},
{execute.Time(0), execute.Time(100), execute.Time(10), 1.0, -1.0},
{execute.Time(0), execute.Time(100), execute.Time(20), 2.0, -2.0},
{execute.Time(0), execute.Time(100), execute.Time(30), 3.0, -3.0},
{execute.Time(0), execute.Time(100), execute.Time(40), 4.0, -4.0},
{execute.Time(0), execute.Time(100), execute.Time(50), 5.0, -5.0},
{execute.Time(0), execute.Time(100), execute.Time(60), 6.0, -6.0},
{execute.Time(0), execute.Time(100), execute.Time(70), 7.0, -7.0},
{execute.Time(0), execute.Time(100), execute.Time(80), 8.0, -8.0},
{execute.Time(0), execute.Time(100), execute.Time(90), 9.0, -9.0},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TInt},
{Label: "y", Type: execute.TInt},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(100), int64(10), int64(10)},
for _, tc := range testCases {
tc := tc
t.Run(, func(t *testing.T) {
d := executetest.NewDataset(executetest.RandomDatasetID())
c := execute.NewBlockBuilderCache(executetest.UnlimitedAllocator)
agg := execute.NewAggregateTransformation(d, c, tc.agg, tc.config)
parentID := executetest.RandomDatasetID()
for _, b := range {
if err := agg.Process(parentID, b); err != nil {
got, err := executetest.BlocksFromCache(c)
if err != nil {
if !cmp.Equal(tc.want, got, cmpopts.EquateNaNs()) {
t.Errorf("unexpected blocks -want/+got\n%s", cmp.Diff(tc.want, got))
@ -0,0 +1,275 @@
package execute
import (
type aggregateTransformation struct {
d Dataset
cache BlockBuilderCache
agg Aggregate
config AggregateConfig
type AggregateConfig struct {
Columns []string `json:"columns"`
TimeSrc string `json:"time_src"`
TimeDst string `json:"time_dst"`
var DefaultAggregateConfig = AggregateConfig{
Columns: []string{DefaultValueColLabel},
TimeSrc: DefaultStopColLabel,
TimeDst: DefaultTimeColLabel,
func (c AggregateConfig) Copy() AggregateConfig {
nc := c
if c.Columns != nil {
nc.Columns = make([]string, len(c.Columns))
copy(nc.Columns, c.Columns)
return nc
func (c *AggregateConfig) ReadArgs(args query.Arguments) error {
if label, ok, err := args.GetString("timeDst"); err != nil {
return err
} else if ok {
c.TimeDst = label
} else {
c.TimeDst = DefaultAggregateConfig.TimeDst
if timeValue, ok, err := args.GetString("timeSrc"); err != nil {
return err
} else if ok {
c.TimeSrc = timeValue
} else {
c.TimeSrc = DefaultAggregateConfig.TimeSrc
if cols, ok, err := args.GetArray("columns", semantic.String); err != nil {
return err
} else if ok {
columns, err := interpreter.ToStringArray(cols)
if err != nil {
return err
c.Columns = columns
} else {
c.Columns = DefaultAggregateConfig.Columns
return nil
func NewAggregateTransformation(d Dataset, c BlockBuilderCache, agg Aggregate, config AggregateConfig) *aggregateTransformation {
return &aggregateTransformation{
d: d,
cache: c,
agg: agg,
config: config,
func NewAggregateTransformationAndDataset(id DatasetID, mode AccumulationMode, agg Aggregate, config AggregateConfig, a *Allocator) (*aggregateTransformation, Dataset) {
cache := NewBlockBuilderCache(a)
d := NewDataset(id, mode, cache)
return NewAggregateTransformation(d, cache, agg, config), d
func (t *aggregateTransformation) RetractBlock(id DatasetID, key PartitionKey) error {
//TODO(nathanielc): Store intermediate state for retractions
return t.d.RetractBlock(key)
func (t *aggregateTransformation) Process(id DatasetID, b Block) error {
builder, new := t.cache.BlockBuilder(b.Key())
if !new {
return fmt.Errorf("aggregate found duplicate block with key: %v", b.Key())
AddBlockKeyCols(b.Key(), builder)
Label: t.config.TimeDst,
Type: TTime,
builderColMap := make([]int, len(t.config.Columns))
blockColMap := make([]int, len(t.config.Columns))
aggregates := make([]ValueFunc, len(t.config.Columns))
cols := b.Cols()
for j, label := range t.config.Columns {
idx := -1
for bj, bc := range cols {
if bc.Label == label {
idx = bj
if idx < 0 {
return fmt.Errorf("column %q does not exist", label)
c := cols[idx]
if b.Key().HasCol(c.Label) {
return errors.New("cannot aggregate columns that are part of the partition key")
var vf ValueFunc
switch c.Type {
case TBool:
vf = t.agg.NewBoolAgg()
case TInt:
vf = t.agg.NewIntAgg()
case TUInt:
vf = t.agg.NewUIntAgg()
case TFloat:
vf = t.agg.NewFloatAgg()
case TString:
vf = t.agg.NewStringAgg()
return fmt.Errorf("unsupported aggregate column type %v", c.Type)
aggregates[j] = vf
builderColMap[j] = builder.AddCol(ColMeta{
Label: c.Label,
Type: vf.Type(),
blockColMap[j] = idx
if err := AppendAggregateTime(t.config.TimeSrc, t.config.TimeDst, b.Key(), builder); err != nil {
return err
b.Do(func(cr ColReader) error {
for j := range t.config.Columns {
vf := aggregates[j]
tj := blockColMap[j]
c := b.Cols()[tj]
switch c.Type {
case TBool:
case TInt:
case TUInt:
case TFloat:
case TString:
return fmt.Errorf("unsupport aggregate type %v", c.Type)
return nil
for j, vf := range aggregates {
bj := builderColMap[j]
// Append aggregated value
switch vf.Type() {
case TBool:
builder.AppendBool(bj, vf.(BoolValueFunc).ValueBool())
case TInt:
builder.AppendInt(bj, vf.(IntValueFunc).ValueInt())
case TUInt:
builder.AppendUInt(bj, vf.(UIntValueFunc).ValueUInt())
case TFloat:
builder.AppendFloat(bj, vf.(FloatValueFunc).ValueFloat())
case TString:
builder.AppendString(bj, vf.(StringValueFunc).ValueString())
AppendKeyValues(b.Key(), builder)
return nil
func (t *aggregateTransformation) UpdateWatermark(id DatasetID, mark Time) error {
return t.d.UpdateWatermark(mark)
func (t *aggregateTransformation) UpdateProcessingTime(id DatasetID, pt Time) error {
return t.d.UpdateProcessingTime(pt)
func (t *aggregateTransformation) Finish(id DatasetID, err error) {
func AppendAggregateTime(srcTime, dstTime string, key PartitionKey, builder BlockBuilder) error {
srcTimeIdx := ColIdx(srcTime, key.Cols())
if srcTimeIdx < 0 {
return fmt.Errorf("timeValue column %q does not exist", srcTime)
srcTimeCol := key.Cols()[srcTimeIdx]
if srcTimeCol.Type != TTime {
return fmt.Errorf("timeValue column %q does not have type time", srcTime)
dstTimeIdx := ColIdx(dstTime, builder.Cols())
if dstTimeIdx < 0 {
return fmt.Errorf("timeValue column %q does not exist", dstTime)
dstTimeCol := builder.Cols()[dstTimeIdx]
if dstTimeCol.Type != TTime {
return fmt.Errorf("timeValue column %q does not have type time", dstTime)
builder.AppendTime(dstTimeIdx, key.ValueTime(srcTimeIdx))
return nil
type Aggregate interface {
NewBoolAgg() DoBoolAgg
NewIntAgg() DoIntAgg
NewUIntAgg() DoUIntAgg
NewFloatAgg() DoFloatAgg
NewStringAgg() DoStringAgg
type ValueFunc interface {
Type() DataType
type DoBoolAgg interface {
type DoFloatAgg interface {
type DoIntAgg interface {
type DoUIntAgg interface {
type DoStringAgg interface {
type BoolValueFunc interface {
ValueBool() bool
type FloatValueFunc interface {
ValueFloat() float64
type IntValueFunc interface {
ValueInt() int64
type UIntValueFunc interface {
ValueUInt() uint64
type StringValueFunc interface {
ValueString() string
package execute
import (
const (
boolSize = 1
int64Size = 8
uint64Size = 8
float64Size = 8
stringSize = 16
timeSize = 8
// Allocator tracks the amount of memory being consumed by a query.
// The allocator provides methods similar to make and append, to allocate large slices of data.
// The allocator also provides a Free method to account for when memory will be freed.
type Allocator struct {
Limit int64
bytesAllocated int64
maxAllocated int64
func (a *Allocator) count(n, size int) (c int64) {
c = atomic.AddInt64(&a.bytesAllocated, int64(n*size))
for max := atomic.LoadInt64(&a.maxAllocated); c > max; max = atomic.LoadInt64(&a.maxAllocated) {
if atomic.CompareAndSwapInt64(&a.maxAllocated, max, c) {
// Free informs the allocator that memory has been freed.
func (a *Allocator) Free(n, size int) {
a.count(-n, size)
// Max reports the maximum amount of allocated memory at any point in the query.
func (a *Allocator) Max() int64 {
return atomic.LoadInt64(&a.maxAllocated)
func (a *Allocator) account(n, size int) {
if want := a.count(n, size); want > a.Limit {
allocated := a.count(-n, size)
Limit: a.Limit,
Allocated: allocated,
Wanted: want - allocated,
// Bools makes a slice of bool values.
func (a *Allocator) Bools(l, c int) []bool {
a.account(c, boolSize)
return make([]bool, l, c)
// AppendBools appends bools to a slice
func (a *Allocator) AppendBools(slice []bool, vs ...bool) []bool {
if cap(slice)-len(slice) > len(vs) {
return append(slice, vs...)
s := append(slice, vs...)
diff := cap(s) - cap(slice)
a.account(diff, boolSize)
return s
// Ints makes a slice of int64 values.
func (a *Allocator) Ints(l, c int) []int64 {
a.account(c, int64Size)
return make([]int64, l, c)
// AppendInts appends int64s to a slice
func (a *Allocator) AppendInts(slice []int64, vs ...int64) []int64 {
if cap(slice)-len(slice) > len(vs) {
return append(slice, vs...)
s := append(slice, vs...)
diff := cap(s) - cap(slice)
a.account(diff, int64Size)
return s
// UInts makes a slice of uint64 values.
func (a *Allocator) UInts(l, c int) []uint64 {
a.account(c, uint64Size)
return make([]uint64, l, c)
// AppendUInts appends uint64s to a slice
func (a *Allocator) AppendUInts(slice []uint64, vs ...uint64) []uint64 {
if cap(slice)-len(slice) > len(vs) {
return append(slice, vs...)
s := append(slice, vs...)
diff := cap(s) - cap(slice)
a.account(diff, uint64Size)
return s
// Floats makes a slice of float64 values.
func (a *Allocator) Floats(l, c int) []float64 {
a.account(c, float64Size)
return make([]float64, l, c)
// AppendFloats appends float64s to a slice
func (a *Allocator) AppendFloats(slice []float64, vs ...float64) []float64 {
if cap(slice)-len(slice) > len(vs) {
return append(slice, vs...)
s := append(slice, vs...)
diff := cap(s) - cap(slice)
a.account(diff, float64Size)
return s
// Strings makes a slice of string values.
// Only the string headers are accounted for.
func (a *Allocator) Strings(l, c int) []string {
a.account(c, stringSize)
return make([]string, l, c)
// AppendStrings appends strings to a slice.
// Only the string headers are accounted for.
func (a *Allocator) AppendStrings(slice []string, vs ...string) []string {
//TODO(nathanielc): Account for actual size of strings
if cap(slice)-len(slice) > len(vs) {
return append(slice, vs...)
s := append(slice, vs...)
diff := cap(s) - cap(slice)
a.account(diff, stringSize)
return s
// Times makes a slice of Time values.
func (a *Allocator) Times(l, c int) []Time {
a.account(c, timeSize)
return make([]Time, l, c)
// AppendTimes appends Times to a slice
func (a *Allocator) AppendTimes(slice []Time, vs ...Time) []Time {
if cap(slice)-len(slice) > len(vs) {
return append(slice, vs...)
s := append(slice, vs...)
diff := cap(s) - cap(slice)
a.account(diff, timeSize)
return s
type AllocError struct {
Limit int64
Allocated int64
Wanted int64
func (a AllocError) Error() string {
return fmt.Sprintf("allocation limit reached: limit %d, allocated: %d, wanted: %d", a.Limit, a.Allocated, a.Wanted)
File diff suppressed because it is too large
Load Diff
package execute
import (
type Time = values.Time
type Duration = values.Duration
const (
MaxTime = math.MaxInt64
MinTime = math.MinInt64
type Bounds struct {
Start Time
Stop Time
var AllTime = Bounds{
Start: MinTime,
Stop: MaxTime,
func (b Bounds) String() string {
return fmt.Sprintf("[%v, %v)", b.Start, b.Stop)
func (b Bounds) Contains(t Time) bool {
return t >= b.Start && t < b.Stop
func (b Bounds) Overlaps(o Bounds) bool {
return b.Contains(o.Start) || b.Contains(o.Stop)
func (b Bounds) Equal(o Bounds) bool {
return b == o
func (b Bounds) Shift(d Duration) Bounds {
return Bounds{Start: b.Start.Add(d), Stop: b.Stop.Add(d)}
func Now() Time {
return values.ConvertTime(time.Now())
@ -0,0 +1,188 @@
package execute
import (
uuid ""
// Dataset represents the set of data produced by a transformation.
type Dataset interface {
RetractBlock(key PartitionKey) error
UpdateProcessingTime(t Time) error
UpdateWatermark(mark Time) error
SetTriggerSpec(t query.TriggerSpec)
// DataCache holds all working data for a transformation.
type DataCache interface {
Block(PartitionKey) (Block, error)
ForEachWithContext(func(PartitionKey, Trigger, BlockContext))
SetTriggerSpec(t query.TriggerSpec)
type AccumulationMode int
const (
DiscardingMode AccumulationMode = iota
type DatasetID uuid.UUID
func (id DatasetID) String() string {
return uuid.UUID(id).String()
var ZeroDatasetID DatasetID
func (id DatasetID) IsZero() bool {
return id == ZeroDatasetID
type dataset struct {
id DatasetID
ts []Transformation
accMode AccumulationMode
watermark Time
processingTime Time
cache DataCache
func NewDataset(id DatasetID, accMode AccumulationMode, cache DataCache) *dataset {
return &dataset{
id: id,
accMode: accMode,
cache: cache,
func (d *dataset) AddTransformation(t Transformation) {
d.ts = append(d.ts, t)
func (d *dataset) SetTriggerSpec(spec query.TriggerSpec) {
func (d *dataset) UpdateWatermark(mark Time) error {
d.watermark = mark
if err := d.evalTriggers(); err != nil {
return err
for _, t := range d.ts {
if err := t.UpdateWatermark(, mark); err != nil {
return err
return nil
func (d *dataset) UpdateProcessingTime(time Time) error {
d.processingTime = time
if err := d.evalTriggers(); err != nil {
return err
for _, t := range d.ts {
if err := t.UpdateProcessingTime(, time); err != nil {
return err
return nil
func (d *dataset) evalTriggers() (err error) {
d.cache.ForEachWithContext(func(key PartitionKey, trigger Trigger, bc BlockContext) {
if err != nil {
// Skip the rest once we have encountered an error
c := TriggerContext{
Block: bc,
Watermark: d.watermark,
CurrentProcessingTime: d.processingTime,
if trigger.Triggered(c) {
err = d.triggerBlock(key)
if trigger.Finished() {
return err
func (d *dataset) triggerBlock(key PartitionKey) error {
b, err := d.cache.Block(key)
if err != nil {
return err
switch d.accMode {
case DiscardingMode:
for _, t := range d.ts {
if err := t.Process(, b); err != nil {
return err
case AccumulatingRetractingMode:
for _, t := range d.ts {
if err := t.RetractBlock(, b.Key()); err != nil {
return err
case AccumulatingMode:
for _, t := range d.ts {
if err := t.Process(, b); err != nil {
return err
return nil
func (d *dataset) expireBlock(key PartitionKey) {
func (d *dataset) RetractBlock(key PartitionKey) error {
for _, t := range d.ts {
if err := t.RetractBlock(, key); err != nil {
return err
return nil
func (d *dataset) Finish(err error) {
if err == nil {
// Only trigger blocks we if we not finishing because of an error.
d.cache.ForEach(func(bk PartitionKey) {
if err != nil {
err = d.triggerBlock(bk)
for _, t := range d.ts {
t.Finish(, err)
package execute
import (
// Dispatcher schedules work for a query.
// Each transformation submits work to be done to the dispatcher.
// Then the dispatcher schedules to work based on the available resources.
type Dispatcher interface {
// Schedule fn to be executed
Schedule(fn ScheduleFunc)
// ScheduleFunc is a function that represents work to do.
// The throughput is the maximum number of messages to process for this scheduling.
type ScheduleFunc func(throughput int)
// poolDispatcher implements Dispatcher using a pool of goroutines.
type poolDispatcher struct {
work chan ScheduleFunc
throughput int
mu sync.Mutex
closed bool
closing chan struct{}
wg sync.WaitGroup
err error
errC chan error
func newPoolDispatcher(throughput int) *poolDispatcher {
return &poolDispatcher{
throughput: throughput,
work: make(chan ScheduleFunc, 100),
closing: make(chan struct{}),
errC: make(chan error, 1),
func (d *poolDispatcher) Schedule(fn ScheduleFunc) {
select {
case <- fn:
case <-d.closing:
func (d *poolDispatcher) Start(n int, ctx context.Context) {
for i := 0; i < n; i++ {
go func() {
defer d.wg.Done()
// Setup panic handling on the worker goroutines
defer func() {
if e := recover(); e != nil {
var err error
switch e := e.(type) {
case error:
err = e
err = fmt.Errorf("%v", e)
d.setErr(fmt.Errorf("panic: %v\n%s", err, debug.Stack()))
// Err returns a channel with will produce an error if encountered.
func (d *poolDispatcher) Err() <-chan error {
return d.errC
func (d *poolDispatcher) setErr(err error) {
// TODO(nathanielc): Collect all error information.
if d.err == nil {
d.err = err
d.errC <- err
//Stop the dispatcher.
func (d *poolDispatcher) Stop() error {
if d.closed {
return d.err
d.closed = true
return d.err
// run is the logic executed by each worker goroutine in the pool.
func (d *poolDispatcher) run(ctx context.Context) {
for {
select {
case <-ctx.Done():
// Immediately return, do not process any more work
case <-d.closing:
// We are done, nothing left to do.
case fn := <
package executetest
import (
// AggFuncTestHelper splits the data in half, runs Do over each split and compares
// the Value to want.
func AggFuncTestHelper(t *testing.T, agg execute.Aggregate, data []float64, want interface{}) {
// Call Do twice, since this is possible according to the interface.
h := len(data) / 2
vf := agg.NewFloatAgg()
if h < len(data) {
var got interface{}
switch vf.Type() {
case execute.TBool:
got = vf.(execute.BoolValueFunc).ValueBool()
case execute.TInt:
got = vf.(execute.IntValueFunc).ValueInt()
case execute.TUInt:
got = vf.(execute.UIntValueFunc).ValueUInt()
case execute.TFloat:
got = vf.(execute.FloatValueFunc).ValueFloat()
case execute.TString:
got = vf.(execute.StringValueFunc).ValueString()
if !cmp.Equal(want, got, cmpopts.EquateNaNs()) {
t.Errorf("unexpected value -want/+got\n%s", cmp.Diff(want, got))
// AggFuncBenchmarkHelper benchmarks the aggregate function over data and compares to wantValue
func AggFuncBenchmarkHelper(b *testing.B, agg execute.Aggregate, data []float64, want interface{}) {
for n := 0; n < b.N; n++ {
vf := agg.NewFloatAgg()
var got interface{}
switch vf.Type() {
case execute.TBool:
got = vf.(execute.BoolValueFunc).ValueBool()
case execute.TInt:
got = vf.(execute.IntValueFunc).ValueInt()
case execute.TUInt:
got = vf.(execute.UIntValueFunc).ValueUInt()
case execute.TFloat:
got = vf.(execute.FloatValueFunc).ValueFloat()
case execute.TString:
got = vf.(execute.StringValueFunc).ValueString()
if !cmp.Equal(want, got) {
b.Errorf("unexpected value -want/+got\n%s", cmp.Diff(want, got))
package executetest
import (
var UnlimitedAllocator = &execute.Allocator{
Limit: math.MaxInt64,
package executetest
import (
// Block is an implementation of execute.Block
// It is designed to make it easy to statically declare the data within the block.
// Not all fields need to be set. See comments on each field.
// Use Normalize to ensure that all fields are set before equality comparisons.
type Block struct {
// PartitionKey of the block. Does not need to be set explicitly.
PartitionKey execute.PartitionKey
// KeyCols is a list of column that are part of the partition key.
// The column type is deduced from the ColMeta slice.
KeyCols []string
// KeyValues is a list of values for the partition key columns.
// Only needs to be set when no data is present on the Block.
KeyValues []interface{}
// ColMeta is a list of columns of the block.
ColMeta []execute.ColMeta
// Data is a list of rows, i.e. Data[row][col]
// Each row must be a list with length equal to len(ColMeta)
Data [][]interface{}
// Normalize ensures all fields of the Block are set correctly.
func (b *Block) Normalize() {
if b.PartitionKey == nil {
cols := make([]execute.ColMeta, len(b.KeyCols))
if len(b.KeyValues) != len(b.KeyCols) {
b.KeyValues = make([]interface{}, len(b.KeyCols))
for j, label := range b.KeyCols {
idx := execute.ColIdx(label, b.ColMeta)
if idx < 0 {
panic(fmt.Errorf("block invalid: missing partition column %q", label))
cols[j] = b.ColMeta[idx]
if len(b.Data) > 0 {
b.KeyValues[j] = b.Data[0][idx]
b.PartitionKey = execute.NewPartitionKey(cols, b.KeyValues)
func (b *Block) RefCount(n int) {}
func (b *Block) Cols() []execute.ColMeta {
return b.ColMeta
func (b *Block) Key() execute.PartitionKey {
return b.PartitionKey
func (b *Block) Do(f func(execute.ColReader) error) error {
for _, r := range b.Data {
if err := f(ColReader{
key: b.Key(),
cols: b.ColMeta,
row: r,
}); err != nil {
return err
return nil
type ColReader struct {
key execute.PartitionKey
cols []execute.ColMeta
row []interface{}
func (cr ColReader) Cols() []execute.ColMeta {
return cr.cols
func (cr ColReader) Key() execute.PartitionKey {
return cr.key
func (cr ColReader) Len() int {
return 1
func (cr ColReader) Bools(j int) []bool {
return []bool{cr.row[j].(bool)}
func (cr ColReader) Ints(j int) []int64 {
return []int64{cr.row[j].(int64)}
func (cr ColReader) UInts(j int) []uint64 {
return []uint64{cr.row[j].(uint64)}
func (cr ColReader) Floats(j int) []float64 {
return []float64{cr.row[j].(float64)}
func (cr ColReader) Strings(j int) []string {
return []string{cr.row[j].(string)}
func (cr ColReader) Times(j int) []execute.Time {
return []execute.Time{cr.row[j].(execute.Time)}
func BlocksFromCache(c execute.DataCache) (blocks []*Block, err error) {
c.ForEach(func(key execute.PartitionKey) {
if err != nil {
var b execute.Block
b, err = c.Block(key)
if err != nil {
var cb *Block
cb, err = ConvertBlock(b)
if err != nil {
blocks = append(blocks, cb)
return blocks, nil
func ConvertBlock(b execute.Block) (*Block, error) {
key := b.Key()
blk := &Block{
PartitionKey: key,
ColMeta: b.Cols(),
keyCols := key.Cols()
if len(keyCols) > 0 {
blk.KeyCols = make([]string, len(keyCols))
blk.KeyValues = make([]interface{}, len(keyCols))
for j, c := range keyCols {
blk.KeyCols[j] = c.Label
blk.KeyValues[j] = key.Value(j)
err := b.Do(func(cr execute.ColReader) error {
l := cr.Len()
for i := 0; i < l; i++ {
row := make([]interface{}, len(blk.ColMeta))
for j, c := range blk.ColMeta {
var v interface{}
switch c.Type {
case execute.TBool:
v = cr.Bools(j)[i]
case execute.TInt:
v = cr.Ints(j)[i]
case execute.TUInt:
v = cr.UInts(j)[i]
case execute.TFloat:
v = cr.Floats(j)[i]
case execute.TString:
v = cr.Strings(j)[i]
case execute.TTime:
v = cr.Times(j)[i]
panic(fmt.Errorf("unknown column type %s", c.Type))
row[j] = v
blk.Data = append(blk.Data, row)
return nil
if err != nil {
return nil, err
return blk, nil
type SortedBlocks []*Block
func (b SortedBlocks) Len() int {
return len(b)
func (b SortedBlocks) Less(i int, j int) bool {
return b[i].Key().Less(b[j].Key())
func (b SortedBlocks) Swap(i int, j int) {
b[i], b[j] = b[j], b[i]
// NormalizeBlocks ensures that each block is normalized
func NormalizeBlocks(bs []*Block) {
for _, b := range bs {
@ -0,0 +1,92 @@
package executetest
import (
uuid ""
func RandomDatasetID() execute.DatasetID {
return execute.DatasetID(uuid.NewV4())
type Dataset struct {
ID execute.DatasetID
Retractions []execute.PartitionKey
ProcessingTimeUpdates []execute.Time
WatermarkUpdates []execute.Time
Finished bool
FinishedErr error
func NewDataset(id execute.DatasetID) *Dataset {
return &Dataset{
ID: id,
func (d *Dataset) AddTransformation(t execute.Transformation) {
panic("not implemented")
func (d *Dataset) RetractBlock(key execute.PartitionKey) error {
d.Retractions = append(d.Retractions, key)
return nil
func (d *Dataset) UpdateProcessingTime(t execute.Time) error {
d.ProcessingTimeUpdates = append(d.ProcessingTimeUpdates, t)
return nil
func (d *Dataset) UpdateWatermark(mark execute.Time) error {
d.WatermarkUpdates = append(d.WatermarkUpdates, mark)
return nil
func (d *Dataset) Finish(err error) {
if d.Finished {
panic("finish has already been called")
d.Finished = true
d.FinishedErr = err
func (d *Dataset) SetTriggerSpec(t query.TriggerSpec) {
panic("not implemented")
type NewTransformation func(execute.Dataset, execute.BlockBuilderCache) execute.Transformation
func TransformationPassThroughTestHelper(t *testing.T, newTr NewTransformation) {
now := execute.Now()
d := NewDataset(RandomDatasetID())
c := execute.NewBlockBuilderCache(UnlimitedAllocator)
parentID := RandomDatasetID()
tr := newTr(d, c)
if err := tr.UpdateWatermark(parentID, now); err != nil {
if err := tr.UpdateProcessingTime(parentID, now); err != nil {
tr.Finish(parentID, nil)
exp := &Dataset{
ID: d.ID,
ProcessingTimeUpdates: []execute.Time{now},
WatermarkUpdates: []execute.Time{now},
Finished: true,
FinishedErr: nil,
if !cmp.Equal(d, exp) {
t.Errorf("unexpected dataset -want/+got\n%s", cmp.Diff(exp, d))
@ -0,0 +1,34 @@
package executetest
import ""
type Result struct {
Blks []*Block
func NewResult(blocks []*Block) *Result {
return &Result{Blks: blocks}
func (r *Result) Blocks() execute.BlockIterator {
return &BlockIterator{
func (r *Result) Normalize() {
type BlockIterator struct {
blocks []*Block
func (bi *BlockIterator) Do(f func(execute.Block) error) error {
for _, b := range bi.blocks {
if err := f(b); err != nil {
return err
return nil
package executetest
import (
func RowSelectorFuncTestHelper(t *testing.T, selector execute.RowSelector, data execute.Block, want []execute.Row) {
s := selector.NewFloatSelector()
valueIdx := execute.ColIdx(execute.DefaultValueColLabel, data.Cols())
if valueIdx < 0 {
t.Fatal("no _value column found")
data.Do(func(cr execute.ColReader) error {
s.DoFloat(cr.Floats(valueIdx), cr)
return nil
got := s.Rows()
if !cmp.Equal(want, got) {
t.Errorf("unexpected value -want/+got\n%s", cmp.Diff(want, got))
var rows []execute.Row
func RowSelectorFuncBenchmarkHelper(b *testing.B, selector execute.RowSelector, data execute.Block) {
valueIdx := execute.ColIdx(execute.DefaultValueColLabel, data.Cols())
if valueIdx < 0 {
b.Fatal("no _value column found")
for n := 0; n < b.N; n++ {
s := selector.NewFloatSelector()
data.Do(func(cr execute.ColReader) error {
s.DoFloat(cr.Floats(valueIdx), cr)
return nil
rows = s.Rows()
func IndexSelectorFuncTestHelper(t *testing.T, selector execute.IndexSelector, data execute.Block, want [][]int) {
var got [][]int
s := selector.NewFloatSelector()
valueIdx := execute.ColIdx(execute.DefaultValueColLabel, data.Cols())
if valueIdx < 0 {
t.Fatal("no _value column found")
data.Do(func(cr execute.ColReader) error {
var cpy []int
selected := s.DoFloat(cr.Floats(valueIdx))
if len(selected) > 0 {
cpy = make([]int, len(selected))
copy(cpy, selected)
got = append(got, cpy)
return nil
if !cmp.Equal(want, got) {
t.Errorf("unexpected value -want/+got\n%s", cmp.Diff(want, got))
func IndexSelectorFuncBenchmarkHelper(b *testing.B, selector execute.IndexSelector, data execute.Block) {
valueIdx := execute.ColIdx(execute.DefaultValueColLabel, data.Cols())
if valueIdx < 0 {
b.Fatal("no _value column found")
var got [][]int
for n := 0; n < b.N; n++ {
s := selector.NewFloatSelector()
data.Do(func(cr execute.ColReader) error {
got = append(got, s.DoFloat(cr.Floats(valueIdx)))
return nil
package executetest
import (
func ProcessTestHelper(
t *testing.T,
data []execute.Block,
want []*Block,
create func(d execute.Dataset, c execute.BlockBuilderCache) execute.Transformation,
) {
d := NewDataset(RandomDatasetID())
c := execute.NewBlockBuilderCache(UnlimitedAllocator)
tx := create(d, c)
parentID := RandomDatasetID()
for _, b := range data {
if err := tx.Process(parentID, b); err != nil {
got, err := BlocksFromCache(c)
if err != nil {
if !cmp.Equal(want, got, cmpopts.EquateNaNs()) {
t.Errorf("unexpected blocks -want/+got\n%s", cmp.Diff(want, got))
@ -0,0 +1,244 @@
package execute
import (
type Executor interface {
Execute(ctx context.Context, orgID id.ID, p *plan.PlanSpec) (map[string]Result, error)
type executor struct {
deps Dependencies
func NewExecutor(deps Dependencies) Executor {
e := &executor{
deps: deps,
return e
type executionState struct {
p *plan.PlanSpec
deps Dependencies
orgID id.ID
alloc *Allocator
resources query.ResourceManagement
bounds Bounds
results map[string]Result
sources []Source
transports []Transport
dispatcher *poolDispatcher
func (e *executor) Execute(ctx context.Context, orgID id.ID, p *plan.PlanSpec) (map[string]Result, error) {
es, err := e.createExecutionState(ctx, orgID, p)
if err != nil {
return nil, errors.Wrap(err, "failed to initialize execute state")
return es.results, nil
func validatePlan(p *plan.PlanSpec) error {
if p.Resources.ConcurrencyQuota == 0 {
return errors.New("plan must have a non-zero concurrency quota")
return nil
func (e *executor) createExecutionState(ctx context.Context, orgID id.ID, p *plan.PlanSpec) (*executionState, error) {
if err := validatePlan(p); err != nil {
return nil, errors.Wrap(err, "invalid plan")
es := &executionState{
orgID: orgID,
p: p,
deps: e.deps,
alloc: &Allocator{
Limit: p.Resources.MemoryBytesQuota,
resources: p.Resources,
results: make(map[string]Result, len(p.Results)),
// TODO(nathanielc): Have the planner specify the dispatcher throughput
dispatcher: newPoolDispatcher(10),
bounds: Bounds{
Start: Time(p.Bounds.Start.Time(p.Now).UnixNano()),
Stop: Time(p.Bounds.Stop.Time(p.Now).UnixNano()),
nodes := make(map[plan.ProcedureID]Node, len(p.Procedures))
for name, yield := range p.Results {
ds, err := es.createNode(ctx, p.Procedures[yield.ID], nodes)
if err != nil {
return nil, err
r := newResult(yield)
es.results[name] = r
return es, nil
// DefaultTriggerSpec defines the triggering that should be used for datasets
// whose parent transformation is not a windowing transformation.
var DefaultTriggerSpec = query.AfterWatermarkTriggerSpec{}
type triggeringSpec interface {
TriggerSpec() query.TriggerSpec
func (es *executionState) createNode(ctx context.Context, pr *plan.Procedure, nodes map[plan.ProcedureID]Node) (Node, error) {
// Check if we already created this node
if n, ok := nodes[pr.ID]; ok {
return n, nil
// Build execution context
ec := executionContext{
es: es,
if len(pr.Parents) > 0 {
ec.parents = make([]DatasetID, len(pr.Parents))
for i, parentID := range pr.Parents {
ec.parents[i] = DatasetID(parentID)
// If source create source
if createS, ok := procedureToSource[pr.Spec.Kind()]; ok {
s, err := createS(pr.Spec, DatasetID(pr.ID), ec)
if err != nil {
return nil, err
es.sources = append(es.sources, s)
nodes[pr.ID] = s
return s, nil
createT, ok := procedureToTransformation[pr.Spec.Kind()]
if !ok {
return nil, fmt.Errorf("unsupported procedure %v", pr.Spec.Kind())
// Create the transformation
t, ds, err := createT(DatasetID(pr.ID), AccumulatingMode, pr.Spec, ec)
if err != nil {
return nil, err
nodes[pr.ID] = ds
// Setup triggering
var ts query.TriggerSpec = DefaultTriggerSpec
if t, ok := pr.Spec.(triggeringSpec); ok {
ts = t.TriggerSpec()
// Recurse creating parents
for _, parentID := range pr.Parents {
parent, err := es.createNode(ctx, es.p.Procedures[parentID], nodes)
if err != nil {
return nil, err
transport := newConescutiveTransport(es.dispatcher, t)
es.transports = append(es.transports, transport)
return ds, nil
func (es *executionState) abort(err error) {
for _, r := range es.results {
func (es *executionState) do(ctx context.Context) {
for _, src := range es.sources {
go func(src Source) {
// Setup panic handling on the source goroutines
defer func() {
if e := recover(); e != nil {
// We had a panic, abort the entire execution.
var err error
switch e := e.(type) {
case error:
err = e
err = fmt.Errorf("%v", e)
es.abort(fmt.Errorf("panic: %v\n%s", err, debug.Stack()))
es.dispatcher.Start(es.resources.ConcurrencyQuota, ctx)
go func() {
// Wait for all transports to finish
for _, t := range es.transports {
select {
case <-t.Finished():
case <-ctx.Done():
es.abort(errors.New("context done"))
case err := <-es.dispatcher.Err():
if err != nil {
// Check for any errors on the dispatcher
err := es.dispatcher.Stop()
if err != nil {
type executionContext struct {
es *executionState
parents []DatasetID
// Satisfy the ExecutionContext interface
func (ec executionContext) OrganizationID() id.ID {
func (ec executionContext) ResolveTime(qt query.Time) Time {
return Time(qt.Time(
func (ec executionContext) Bounds() Bounds {
func (ec executionContext) Allocator() *Allocator {
func (ec executionContext) Parents() []DatasetID {
return ec.parents
func (ec executionContext) ConvertID(id plan.ProcedureID) DatasetID {
return DatasetID(id)
@ -0,0 +1,429 @@
package execute_test
import (
uuid ""
var epoch = time.Unix(0, 0)
var orgID id.ID
func init() {
func TestExecutor_Execute(t *testing.T) {
testCases := []struct {
name string
plan *plan.PlanSpec
want map[string][]*executetest.Block
name: "simple aggregate",
plan: &plan.PlanSpec{
Now: epoch.Add(5),
Resources: query.ResourceManagement{
ConcurrencyQuota: 1,
MemoryBytesQuota: math.MaxInt64,
Bounds: plan.BoundsSpec{
Start: query.Time{Absolute: time.Unix(0, 1)},
Stop: query.Time{Absolute: time.Unix(0, 5)},
Procedures: map[plan.ProcedureID]*plan.Procedure{
plan.ProcedureIDFromOperationID("from"): {
ID: plan.ProcedureIDFromOperationID("from"),
Spec: &testFromProcedureSource{
data: []execute.Block{&executetest.Block{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(0), 1.0},
{execute.Time(0), execute.Time(5), execute.Time(1), 2.0},
{execute.Time(0), execute.Time(5), execute.Time(2), 3.0},
{execute.Time(0), execute.Time(5), execute.Time(3), 4.0},
{execute.Time(0), execute.Time(5), execute.Time(4), 5.0},
Parents: nil,
Children: []plan.ProcedureID{plan.ProcedureIDFromOperationID("sum")},
plan.ProcedureIDFromOperationID("sum"): {
ID: plan.ProcedureIDFromOperationID("sum"),
Spec: &functions.SumProcedureSpec{
AggregateConfig: execute.DefaultAggregateConfig,
Parents: []plan.ProcedureID{
Children: nil,
Results: map[string]plan.YieldSpec{
plan.DefaultYieldName: {ID: plan.ProcedureIDFromOperationID("sum")},
want: map[string][]*executetest.Block{
plan.DefaultYieldName: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(5), 15.0},
name: "simple join",
plan: &plan.PlanSpec{
Now: epoch.Add(5),
Resources: query.ResourceManagement{
ConcurrencyQuota: 1,
MemoryBytesQuota: math.MaxInt64,
Bounds: plan.BoundsSpec{
Start: query.Time{Absolute: time.Unix(0, 1)},
Stop: query.Time{Absolute: time.Unix(0, 5)},
Procedures: map[plan.ProcedureID]*plan.Procedure{
plan.ProcedureIDFromOperationID("from"): {
ID: plan.ProcedureIDFromOperationID("from"),
Spec: &testFromProcedureSource{
data: []execute.Block{&executetest.Block{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TInt},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(0), int64(1)},
{execute.Time(0), execute.Time(5), execute.Time(1), int64(2)},
{execute.Time(0), execute.Time(5), execute.Time(2), int64(3)},
{execute.Time(0), execute.Time(5), execute.Time(3), int64(4)},
{execute.Time(0), execute.Time(5), execute.Time(4), int64(5)},
Parents: nil,
Children: []plan.ProcedureID{plan.ProcedureIDFromOperationID("sum")},
plan.ProcedureIDFromOperationID("sum"): {
ID: plan.ProcedureIDFromOperationID("sum"),
Spec: &functions.SumProcedureSpec{
AggregateConfig: execute.DefaultAggregateConfig,
Parents: []plan.ProcedureID{
Children: []plan.ProcedureID{plan.ProcedureIDFromOperationID("join")},
plan.ProcedureIDFromOperationID("count"): {
ID: plan.ProcedureIDFromOperationID("count"),
Spec: &functions.CountProcedureSpec{
AggregateConfig: execute.DefaultAggregateConfig,
Parents: []plan.ProcedureID{
Children: []plan.ProcedureID{plan.ProcedureIDFromOperationID("join")},
plan.ProcedureIDFromOperationID("join"): {
ID: plan.ProcedureIDFromOperationID("join"),
Spec: &functions.MergeJoinProcedureSpec{
TableNames: map[plan.ProcedureID]string{
plan.ProcedureIDFromOperationID("sum"): "sum",
plan.ProcedureIDFromOperationID("count"): "count",
On: []string{"_time", "_start", "_stop"},
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "t"}}},
Body: &semantic.ObjectExpression{
Properties: []*semantic.Property{
Key: &semantic.Identifier{Name: "_time"},
Value: &semantic.MemberExpression{
Object: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "t",
Property: "sum",
Property: "_time",
Key: &semantic.Identifier{Name: "_start"},
Value: &semantic.MemberExpression{
Object: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "t",
Property: "sum",
Property: "_start",
Key: &semantic.Identifier{Name: "_stop"},
Value: &semantic.MemberExpression{
Object: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "t",
Property: "sum",
Property: "_stop",
Key: &semantic.Identifier{Name: "_value"},
Value: &semantic.BinaryExpression{
Operator: ast.DivisionOperator,
Left: &semantic.MemberExpression{
Object: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "t",
Property: "sum",
Property: "_value",
Right: &semantic.MemberExpression{
Object: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "t",
Property: "count",
Property: "_value",
Parents: []plan.ProcedureID{
Children: nil,
Results: map[string]plan.YieldSpec{
plan.DefaultYieldName: {ID: plan.ProcedureIDFromOperationID("join")},
want: map[string][]*executetest.Block{
plan.DefaultYieldName: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TInt},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(5), int64(3)},
name: "multiple aggregates",
plan: &plan.PlanSpec{
Now: epoch.Add(5),
Resources: query.ResourceManagement{
ConcurrencyQuota: 1,
MemoryBytesQuota: math.MaxInt64,
Bounds: plan.BoundsSpec{
Start: query.Time{Absolute: time.Unix(0, 1)},
Stop: query.Time{Absolute: time.Unix(0, 5)},
Procedures: map[plan.ProcedureID]*plan.Procedure{
plan.ProcedureIDFromOperationID("from"): {
ID: plan.ProcedureIDFromOperationID("from"),
Spec: &testFromProcedureSource{
data: []execute.Block{&executetest.Block{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(0), 1.0},
{execute.Time(0), execute.Time(5), execute.Time(1), 2.0},
{execute.Time(0), execute.Time(5), execute.Time(2), 3.0},
{execute.Time(0), execute.Time(5), execute.Time(3), 4.0},
{execute.Time(0), execute.Time(5), execute.Time(4), 5.0},
Parents: nil,
Children: []plan.ProcedureID{
plan.ProcedureIDFromOperationID("sum"): {
ID: plan.ProcedureIDFromOperationID("sum"),
Spec: &functions.SumProcedureSpec{
AggregateConfig: execute.DefaultAggregateConfig,
Parents: []plan.ProcedureID{
Children: nil,
plan.ProcedureIDFromOperationID("mean"): {
ID: plan.ProcedureIDFromOperationID("mean"),
Spec: &functions.MeanProcedureSpec{
AggregateConfig: execute.DefaultAggregateConfig,
Parents: []plan.ProcedureID{
Children: nil,
Results: map[string]plan.YieldSpec{
"sum": {ID: plan.ProcedureIDFromOperationID("sum")},
"mean": {ID: plan.ProcedureIDFromOperationID("mean")},
want: map[string][]*executetest.Block{
"sum": []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(5), 15.0},
"mean": []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(5), 3.0},
for _, tc := range testCases {
tc := tc
t.Run(, func(t *testing.T) {
exe := execute.NewExecutor(nil)
results, err := exe.Execute(context.Background(), orgID, tc.plan)
if err != nil {
got := make(map[string][]*executetest.Block, len(results))
for name, r := range results {
if err := r.Blocks().Do(func(b execute.Block) error {
cb, err := executetest.ConvertBlock(b)
if err != nil {
return err
got[name] = append(got[name], cb)
return nil
}); err != nil {
for _, g := range got {
for _, w := range tc.want {
if !cmp.Equal(got, tc.want) {
t.Error("unexpected results -want/+got", cmp.Diff(tc.want, got))
type testFromProcedureSource struct {
data []execute.Block
ts []execute.Transformation
func (p *testFromProcedureSource) Kind() plan.ProcedureKind {
return "from-test"
func (p *testFromProcedureSource) Copy() plan.ProcedureSpec {
return p
func (p *testFromProcedureSource) AddTransformation(t execute.Transformation) {
p.ts = append(p.ts, t)
func (p *testFromProcedureSource) Run(ctx context.Context) {
id := execute.DatasetID(uuid.NewV4())
for _, t := range p.ts {
var max execute.Time
for _, b := range {
t.Process(id, b)
stopIdx := execute.ColIdx(execute.DefaultStopColLabel, b.Cols())
if stopIdx >= 0 {
if s := b.Key().ValueTime(stopIdx); s > max {
max = s
t.UpdateWatermark(id, max)
t.Finish(id, nil)
// },
// },
// {
// name: "float addition",
// expr: expression.Expression{
// Root: &expression.BinaryNode{
// Operator: expression.AdditionOperator,
// Left: &expression.FloatLiteralNode{
// Value: 18,
// },
// Right: &expression.FloatLiteralNode{
// Value: 24,
// },
// },
// },
// want: execute.Value{
// Type: execute.TFloat,
// Value: float64(42),
// },
// },
// {
// name: "boolean and",
// expr: expression.Expression{
// Root: &expression.BinaryNode{
// Operator: expression.AndOperator,
// Left: &expression.BooleanLiteralNode{
// Value: true,
// },
// Right: &expression.BooleanLiteralNode{
// Value: true,
// },
// },
// },
// want: execute.Value{
// Type: execute.TBool,
// Value: true,
// },
// },
// {
// name: "boolean or",
// expr: expression.Expression{
// Root: &expression.BinaryNode{
// Operator: expression.OrOperator,
// Left: &expression.BooleanLiteralNode{
// Value: false,
// },
// Right: &expression.BooleanLiteralNode{
// Value: true,
// },
// },
// },
// want: execute.Value{
// Type: execute.TBool,
// Value: true,
// },
// },
// }
// for _, tc := range testCases {
// tc := tc
// t.Run(, func(t *testing.T) {
// ce, err := execute.CompileExpression(tc.expr, tc.types)
// if err != nil {
// t.Fatal(err)
// }
// got, err := ce.Eval(tc.scope)
// if err != nil {
// if !tc.wantErr {
// t.Fatal(err)
// }
// } else if tc.wantErr {
// t.Fatal("expected evaluation error")
// }
// if !cmp.Equal(got, tc.want) {
// t.Errorf("unexpected value: -want/+got\n%s", cmp.Diff(tc.want, got))
// }
// })
// }
@ -0,0 +1,286 @@
package execute
import (
const fixedWidthTimeFmt = "2006-01-02T15:04:05.000000000Z"
// Formatter writes a block to a Writer.
type Formatter struct {
b Block
widths []int
maxWidth int
newWidths []int
pad []byte
dash []byte
// fmtBuf is used to format values
fmtBuf [64]byte
opts FormatOptions
cols orderedCols
type FormatOptions struct {
// RepeatHeaderCount is the number of rows to print before printing the header again.
// If zero then the headers are not repeated.
RepeatHeaderCount int
func DefaultFormatOptions() *FormatOptions {
return &FormatOptions{}
var eol = []byte{'\n'}
// NewFormatter creates a Formatter for a given block.
// If opts is nil, the DefaultFormatOptions are used.
func NewFormatter(b Block, opts *FormatOptions) *Formatter {
if opts == nil {
opts = DefaultFormatOptions()
return &Formatter{
b: b,
opts: *opts,
type writeToHelper struct {
w io.Writer
n int64
err error
func (w *writeToHelper) write(data []byte) {
if w.err != nil {
n, err := w.w.Write(data)
w.n += int64(n)
w.err = err
var minWidthsByType = map[DataType]int{
TBool: 12,
TInt: 26,
TUInt: 27,
TFloat: 28,
TString: 22,
TTime: len(fixedWidthTimeFmt),
TInvalid: 10,
// WriteTo writes the formatted block data to w.
func (f *Formatter) WriteTo(out io.Writer) (int64, error) {
w := &writeToHelper{w: out}
// Sort cols
cols := f.b.Cols()
f.cols = newOrderedCols(cols)
// Compute header widths
f.widths = make([]int, len(cols))
for j, c := range cols {
l := len(c.Label)
min := minWidthsByType[c.Type]
if min > l {
l = min
if l > f.widths[j] {
f.widths[j] = l
if l > f.maxWidth {
f.maxWidth = l
// Write Block header
w.write([]byte("Block: keys: ["))
labels := make([]string, len(f.b.Key().Cols()))
for i, c := range f.b.Key().Cols() {
labels[i] = c.Label
w.write([]byte(strings.Join(labels, ", ")))
// Check err and return early
if w.err != nil {
return w.n, w.err
// Write rows
r := 0
f.b.Do(func(cr ColReader) error {
if r == 0 {
l := cr.Len()
for i := 0; i < l; i++ {
for oj, c := range f.cols.cols {
j := f.cols.Idx(oj)
buf := f.valueBuf(i, j, c.Type, cr)
l := len(buf)
if l > f.widths[j] {
f.widths[j] = l
if l > f.maxWidth {
f.maxWidth = l
f.newWidths = make([]int, len(f.widths))
copy(f.newWidths, f.widths)
l := cr.Len()
for i := 0; i < l; i++ {
for oj, c := range f.cols.cols {
j := f.cols.Idx(oj)
buf := f.valueBuf(i, j, c.Type, cr)
l := len(buf)
padding := f.widths[j] - l
if padding >= 0 {
} else {
//TODO make unicode friendly
w.write([]byte{'.', '.', '.'})
if l > f.newWidths[j] {
f.newWidths[j] = l
if l > f.maxWidth {
f.maxWidth = l
if f.opts.RepeatHeaderCount > 0 && r%f.opts.RepeatHeaderCount == 0 {
copy(f.widths, f.newWidths)
return w.err
return w.n, w.err
func (f *Formatter) makePaddingBuffers() {
if len(f.pad) != f.maxWidth {
f.pad = make([]byte, f.maxWidth)
for i := range f.pad {
f.pad[i] = ' '
if len(f.dash) != f.maxWidth {
f.dash = make([]byte, f.maxWidth)
for i := range f.dash {
f.dash[i] = '-'
func (f *Formatter) writeHeader(w *writeToHelper) {
for oj, c := range f.cols.cols {
j := f.cols.Idx(oj)
buf := append(append([]byte(c.Label), ':'), []byte(c.Type.String())...)
func (f *Formatter) writeHeaderSeparator(w *writeToHelper) {
for oj := range f.cols.cols {
j := f.cols.Idx(oj)
func (f *Formatter) valueBuf(i, j int, typ DataType, cr ColReader) (buf []byte) {
switch typ {
case TBool:
buf = strconv.AppendBool(f.fmtBuf[0:0], cr.Bools(j)[i])
case TInt:
buf = strconv.AppendInt(f.fmtBuf[0:0], cr.Ints(j)[i], 10)
case TUInt:
buf = strconv.AppendUint(f.fmtBuf[0:0], cr.UInts(j)[i], 10)
case TFloat:
// TODO allow specifying format and precision
buf = strconv.AppendFloat(f.fmtBuf[0:0], cr.Floats(j)[i], 'f', -1, 64)
case TString:
buf = []byte(cr.Strings(j)[i])
case TTime:
buf = []byte(cr.Times(j)[i].String())
// orderedCols sorts a list of columns:
// * time
// * common tags sorted by label
// * other tags sorted by label
// * value
type orderedCols struct {
indexMap []int
cols []ColMeta
func newOrderedCols(cols []ColMeta) orderedCols {
indexMap := make([]int, len(cols))
for i := range indexMap {
indexMap[i] = i
cpy := make([]ColMeta, len(cols))
copy(cpy, cols)
return orderedCols{
indexMap: indexMap,
cols: cpy,
func (o orderedCols) Idx(oj int) int {
return o.indexMap[oj]
func (o orderedCols) Len() int { return len(o.cols) }
func (o orderedCols) Swap(i int, j int) {
o.cols[i], o.cols[j] = o.cols[j], o.cols[i]
o.indexMap[i], o.indexMap[j] = o.indexMap[j], o.indexMap[i]
func (o orderedCols) Less(i int, j int) bool {
// Time column is always first
if o.cols[i].Label == DefaultTimeColLabel {
return true
if o.cols[j].Label == DefaultTimeColLabel {
return false
// Value column is always last
if o.cols[i].Label == DefaultValueColLabel {
return false
if o.cols[j].Label == DefaultValueColLabel {
return true
// within a class sort by label
return o.cols[i].Label < o.cols[j].Label
@ -0,0 +1,102 @@
package execute
import (
type PartitionLookup struct {
partitions map[uint64][]partitionEntry
type partitionEntry struct {
key PartitionKey
value interface{}
func NewPartitionLookup() *PartitionLookup {
return &PartitionLookup{
partitions: make(map[uint64][]partitionEntry),
func (l *PartitionLookup) Lookup(key PartitionKey) (interface{}, bool) {
if key == nil {
return nil, false
h := key.Hash()
entries := l.partitions[h]
if len(entries) == 1 {
return entries[0].value, true
for _, entry := range entries {
if entry.key.Equal(key) {
return entry.value, true
return nil, false
func (l *PartitionLookup) Set(key PartitionKey, value interface{}) {
h := key.Hash()
entries := l.partitions[h]
l.partitions[h] = append(entries, partitionEntry{
key: key,
value: value,
func (l *PartitionLookup) Delete(key PartitionKey) (interface{}, bool) {
if key == nil {
return nil, false
h := key.Hash()
entries := l.partitions[h]
if len(entries) == 1 {
delete(l.partitions, h)
return entries[0].value, true
for i, entry := range entries {
if entry.key.Equal(key) {
l.partitions[h] = append(entries[:i+1], entries[i+1:]...)
return entry.value, true
return nil, false
func (l *PartitionLookup) Range(f func(key PartitionKey, value interface{})) {
for _, entries := range l.partitions {
for _, entry := range entries {
f(entry.key, entry.value)
func computeKeyHash(key PartitionKey) uint64 {
h := fnv.New64()
for j, c := range key.Cols() {
switch c.Type {
case TBool:
if key.ValueBool(j) {
} else {
case TInt:
binary.Write(h, binary.BigEndian, key.ValueInt(j))
case TUInt:
binary.Write(h, binary.BigEndian, key.ValueUInt(j))
case TFloat:
binary.Write(h, binary.BigEndian, math.Float64bits(key.ValueFloat(j)))
case TString:
case TTime:
binary.Write(h, binary.BigEndian, uint64(key.ValueTime(j)))
return h.Sum64()
@ -0,0 +1,64 @@
package execute
import (
// MessageQueue provides a concurrency safe queue for messages.
// The queue must have a single consumer calling Pop.
type MessageQueue interface {
Pop() Message
type unboundedMessageQueue struct {
buf []Message
head int
tail int
mu sync.Mutex
len int32
func newMessageQueue(n int) *unboundedMessageQueue {
return &unboundedMessageQueue{
buf: make([]Message, n),
func (q *unboundedMessageQueue) Push(m Message) {
size := len(q.buf)
q.tail = (q.tail + 1) % size
if q.tail == q.head {
// Resize
buf := make([]Message, size*2)
copy(buf, q.buf[q.head:])
copy(buf[size-q.head:], q.buf[:q.head])
q.head = 0
q.tail = size
q.buf = buf
atomic.AddInt32(&q.len, 1)
q.buf[q.tail] = m
func (q *unboundedMessageQueue) Len() int {
return int(atomic.LoadInt32(&q.len))
func (q *unboundedMessageQueue) Pop() Message {
if q.Len() == 0 {
return nil
size := len(q.buf)
q.head = (q.head + 1) % size
m := q.buf[q.head]
q.buf[q.head] = nil
atomic.AddInt32(&q.len, -1)
return m
@ -0,0 +1,119 @@
package execute
import (
type Result interface {
// Blocks returns a BlockIterator for iterating through results
Blocks() BlockIterator
// result implements both the Transformation and Result interfaces,
// mapping the pushed based Transformation API to the pull based Result interface.
type result struct {
mu sync.Mutex
blocks chan resultMessage
abortErr chan error
aborted chan struct{}
type resultMessage struct {
block Block
err error
func newResult(plan.YieldSpec) *result {
return &result{
// TODO(nathanielc): Currently this buffer needs to be big enough hold all result blocks :(
blocks: make(chan resultMessage, 1000),
abortErr: make(chan error, 1),
aborted: make(chan struct{}),
func (s *result) RetractBlock(DatasetID, PartitionKey) error {
//TODO implement
return nil
func (s *result) Process(id DatasetID, b Block) error {
select {
case s.blocks <- resultMessage{
block: b,
case <-s.aborted:
return nil
func (s *result) Blocks() BlockIterator {
return s
func (s *result) Do(f func(Block) error) error {
for {
select {
case err := <-s.abortErr:
return err
case msg, more := <-s.blocks:
if !more {
return nil
if msg.err != nil {
return msg.err
if err := f(msg.block); err != nil {
return err
func (s *result) UpdateWatermark(id DatasetID, mark Time) error {
//Nothing to do
return nil
func (s *result) UpdateProcessingTime(id DatasetID, t Time) error {
//Nothing to do
return nil
func (s *result) setTrigger(Trigger) {
//TODO: Change interfaces so that resultSink, does not need to implement this method.
func (s *result) Finish(id DatasetID, err error) {
if err != nil {
select {
case s.blocks <- resultMessage{
err: err,
case <-s.aborted:
// Abort the result with the given error
func (s *result) abort(err error) {
// Check if we have already aborted
aborted := false
select {
case <-s.aborted:
aborted = true
if aborted {
return // already aborted
s.abortErr <- err
@ -0,0 +1,335 @@
package execute
import (
type rowFn struct {
fn *semantic.FunctionExpression
compilationCache *compiler.CompilationCache
scope compiler.Scope
preparedFn compiler.Func
recordName string
record *Record
recordCols map[string]int
references []string
func newRowFn(fn *semantic.FunctionExpression) (rowFn, error) {
if len(fn.Params) != 1 {
return rowFn{}, fmt.Errorf("function should only have a single parameter, got %d", len(fn.Params))
scope, decls := query.BuiltIns()
return rowFn{
compilationCache: compiler.NewCompilationCache(fn, scope, decls),
scope: make(compiler.Scope, 1),
recordName: fn.Params[0].Key.Name,
references: findColReferences(fn),
recordCols: make(map[string]int),
}, nil
func (f *rowFn) prepare(cols []ColMeta) error {
// Prepare types and recordCols
propertyTypes := make(map[string]semantic.Type, len(f.references))
for _, r := range f.references {
found := false
for j, c := range cols {
if r == c.Label {
f.recordCols[r] = j
found = true
propertyTypes[r] = ConvertToKind(c.Type)
if !found {
return fmt.Errorf("function references unknown column %q", r)
f.record = NewRecord(semantic.NewObjectType(propertyTypes))
// Compile fn for given types
fn, err := f.compilationCache.Compile(map[string]semantic.Type{
f.recordName: f.record.Type(),
if err != nil {
return err
f.preparedFn = fn
return nil
func ConvertToKind(t DataType) semantic.Kind {
// TODO make this an array lookup.
switch t {
case TInvalid:
return semantic.Invalid
case TBool:
return semantic.Bool
case TInt:
return semantic.Int
case TUInt:
return semantic.UInt
case TFloat:
return semantic.Float
case TString:
return semantic.String
case TTime:
return semantic.Time
return semantic.Invalid
func ConvertFromKind(k semantic.Kind) DataType {
// TODO make this an array lookup.
switch k {
case semantic.Invalid:
return TInvalid
case semantic.Bool:
return TBool
case semantic.Int:
return TInt
case semantic.UInt:
return TUInt
case semantic.Float:
return TFloat
case semantic.String:
return TString
case semantic.Time:
return TTime
return TInvalid
func (f *rowFn) eval(row int, cr ColReader) (values.Value, error) {
for _, r := range f.references {
f.record.Set(r, ValueForRow(row, f.recordCols[r], cr))
f.scope[f.recordName] = f.record
return f.preparedFn.Eval(f.scope)
type RowPredicateFn struct {
func NewRowPredicateFn(fn *semantic.FunctionExpression) (*RowPredicateFn, error) {
r, err := newRowFn(fn)
if err != nil {
return nil, err
return &RowPredicateFn{
rowFn: r,
}, nil
func (f *RowPredicateFn) Prepare(cols []ColMeta) error {
err := f.rowFn.prepare(cols)
if err != nil {
return err
if f.preparedFn.Type() != semantic.Bool {
return errors.New("row predicate function does not evaluate to a boolean")
return nil
func (f *RowPredicateFn) Eval(row int, cr ColReader) (bool, error) {
v, err := f.rowFn.eval(row, cr)
if err != nil {
return false, err
return v.Bool(), nil
type RowMapFn struct {
isWrap bool
wrapObj *Record
func NewRowMapFn(fn *semantic.FunctionExpression) (*RowMapFn, error) {
r, err := newRowFn(fn)
if err != nil {
return nil, err
return &RowMapFn{
rowFn: r,
}, nil
func (f *RowMapFn) Prepare(cols []ColMeta) error {
err := f.rowFn.prepare(cols)
if err != nil {
return err
k := f.preparedFn.Type().Kind()
f.isWrap = k != semantic.Object
if f.isWrap {
f.wrapObj = NewRecord(semantic.NewObjectType(map[string]semantic.Type{
DefaultValueColLabel: f.preparedFn.Type(),
return nil
func (f *RowMapFn) Type() semantic.Type {
if f.isWrap {
return f.wrapObj.Type()
return f.preparedFn.Type()
func (f *RowMapFn) Eval(row int, cr ColReader) (values.Object, error) {
v, err := f.rowFn.eval(row, cr)
if err != nil {
return nil, err
if f.isWrap {
f.wrapObj.Set(DefaultValueColLabel, v)
return f.wrapObj, nil
return v.Object(), nil
func ValueForRow(i, j int, cr ColReader) values.Value {
t := cr.Cols()[j].Type
switch t {
case TString:
return values.NewStringValue(cr.Strings(j)[i])
case TInt:
return values.NewIntValue(cr.Ints(j)[i])
case TUInt:
return values.NewUIntValue(cr.UInts(j)[i])
case TFloat:
return values.NewFloatValue(cr.Floats(j)[i])
case TBool:
return values.NewBoolValue(cr.Bools(j)[i])
case TTime:
return values.NewTimeValue(cr.Times(j)[i])
return nil
func AppendValue(builder BlockBuilder, j int, v values.Value) {
switch k := v.Type().Kind(); k {
case semantic.Bool:
builder.AppendBool(j, v.Bool())
case semantic.Int:
builder.AppendInt(j, v.Int())
case semantic.UInt:
builder.AppendUInt(j, v.UInt())
case semantic.Float:
builder.AppendFloat(j, v.Float())
case semantic.String:
builder.AppendString(j, v.Str())
case semantic.Time:
builder.AppendTime(j, v.Time())
func findColReferences(fn *semantic.FunctionExpression) []string {
v := &colReferenceVisitor{
recordName: fn.Params[0].Key.Name,
semantic.Walk(v, fn)
return v.refs
type colReferenceVisitor struct {
recordName string
refs []string
func (c *colReferenceVisitor) Visit(node semantic.Node) semantic.Visitor {
if me, ok := node.(*semantic.MemberExpression); ok {
if obj, ok := me.Object.(*semantic.IdentifierExpression); ok && obj.Name == c.recordName {
c.refs = append(c.refs, me.Property)
return c
func (c *colReferenceVisitor) Done() {}
type Record struct {
t semantic.Type
values map[string]values.Value
func NewRecord(t semantic.Type) *Record {
return &Record{
t: t,
values: make(map[string]values.Value),
func (r *Record) Type() semantic.Type {
return r.t
func (r *Record) Str() string {
panic(values.UnexpectedKind(semantic.Object, semantic.String))
func (r *Record) Int() int64 {
panic(values.UnexpectedKind(semantic.Object, semantic.Int))
func (r *Record) UInt() uint64 {
panic(values.UnexpectedKind(semantic.Object, semantic.UInt))
func (r *Record) Float() float64 {
panic(values.UnexpectedKind(semantic.Object, semantic.Float))
func (r *Record) Bool() bool {
panic(values.UnexpectedKind(semantic.Object, semantic.Bool))
func (r *Record) Time() values.Time {
panic(values.UnexpectedKind(semantic.Object, semantic.Time))
func (r *Record) Duration() values.Duration {
panic(values.UnexpectedKind(semantic.Object, semantic.Duration))
func (r *Record) Regexp() *regexp.Regexp {
panic(values.UnexpectedKind(semantic.Object, semantic.Regexp))
func (r *Record) Array() values.Array {
panic(values.UnexpectedKind(semantic.Object, semantic.Array))
func (r *Record) Object() values.Object {
return r
func (r *Record) Function() values.Function {
panic(values.UnexpectedKind(semantic.Object, semantic.Function))
func (r *Record) Set(name string, v values.Value) {
r.values[name] = v
func (r *Record) Get(name string) (values.Value, bool) {
v, ok := r.values[name]
return v, ok
func (r *Record) Len() int {
return len(r.values)
func (r *Record) Range(f func(name string, v values.Value)) {
for k, v := range r.values {
f(k, v)
@ -0,0 +1,326 @@
package execute
import (
type selectorTransformation struct {
d Dataset
cache BlockBuilderCache
config SelectorConfig
type SelectorConfig struct {
Column string `json:"column"`
func (c *SelectorConfig) ReadArgs(args query.Arguments) error {
if col, ok, err := args.GetString("column"); err != nil {
return err
} else if ok {
c.Column = col
return nil
type rowSelectorTransformation struct {
selector RowSelector
type indexSelectorTransformation struct {
selector IndexSelector
func NewRowSelectorTransformationAndDataset(id DatasetID, mode AccumulationMode, selector RowSelector, config SelectorConfig, a *Allocator) (*rowSelectorTransformation, Dataset) {
cache := NewBlockBuilderCache(a)
d := NewDataset(id, mode, cache)
return NewRowSelectorTransformation(d, cache, selector, config), d
func NewRowSelectorTransformation(d Dataset, c BlockBuilderCache, selector RowSelector, config SelectorConfig) *rowSelectorTransformation {
return &rowSelectorTransformation{
selectorTransformation: newSelectorTransformation(d, c, config),
selector: selector,
func NewIndexSelectorTransformationAndDataset(id DatasetID, mode AccumulationMode, selector IndexSelector, config SelectorConfig, a *Allocator) (*indexSelectorTransformation, Dataset) {
cache := NewBlockBuilderCache(a)
d := NewDataset(id, mode, cache)
return NewIndexSelectorTransformation(d, cache, selector, config), d
func NewIndexSelectorTransformation(d Dataset, c BlockBuilderCache, selector IndexSelector, config SelectorConfig) *indexSelectorTransformation {
return &indexSelectorTransformation{
selectorTransformation: newSelectorTransformation(d, c, config),
selector: selector,
func newSelectorTransformation(d Dataset, c BlockBuilderCache, config SelectorConfig) selectorTransformation {
if config.Column == "" {
config.Column = DefaultValueColLabel
return selectorTransformation{
d: d,
cache: c,
config: config,
func (t *selectorTransformation) RetractBlock(id DatasetID, key PartitionKey) error {
//TODO(nathanielc): Store intermediate state for retractions
return t.d.RetractBlock(key)
func (t *selectorTransformation) UpdateWatermark(id DatasetID, mark Time) error {
return t.d.UpdateWatermark(mark)
func (t *selectorTransformation) UpdateProcessingTime(id DatasetID, pt Time) error {
return t.d.UpdateProcessingTime(pt)
func (t *selectorTransformation) Finish(id DatasetID, err error) {
func (t *selectorTransformation) setupBuilder(b Block) (BlockBuilder, int, error) {
builder, new := t.cache.BlockBuilder(b.Key())
if !new {
return nil, 0, fmt.Errorf("found duplicate block with key: %v", b.Key())
AddBlockCols(b, builder)
cols := builder.Cols()
valueIdx := ColIdx(t.config.Column, cols)
if valueIdx < 0 {
return nil, 0, fmt.Errorf("no column %q exists", t.config.Column)
return builder, valueIdx, nil
func (t *indexSelectorTransformation) Process(id DatasetID, b Block) error {
builder, valueIdx, err := t.setupBuilder(b)
if err != nil {
return err
valueCol := builder.Cols()[valueIdx]
var s interface{}
switch valueCol.Type {
case TBool:
s = t.selector.NewBoolSelector()
case TInt:
s = t.selector.NewIntSelector()
case TUInt:
s = t.selector.NewUIntSelector()
case TFloat:
s = t.selector.NewFloatSelector()
case TString:
s = t.selector.NewStringSelector()
return fmt.Errorf("unsupported selector type %v", valueCol.Type)
return b.Do(func(cr ColReader) error {
switch valueCol.Type {
case TBool:
selected := s.(DoBoolIndexSelector).DoBool(cr.Bools(valueIdx))
t.appendSelected(selected, builder, cr)
case TInt:
selected := s.(DoIntIndexSelector).DoInt(cr.Ints(valueIdx))
t.appendSelected(selected, builder, cr)
case TUInt:
selected := s.(DoUIntIndexSelector).DoUInt(cr.UInts(valueIdx))
t.appendSelected(selected, builder, cr)
case TFloat:
selected := s.(DoFloatIndexSelector).DoFloat(cr.Floats(valueIdx))
t.appendSelected(selected, builder, cr)
case TString:
selected := s.(DoStringIndexSelector).DoString(cr.Strings(valueIdx))
t.appendSelected(selected, builder, cr)
return fmt.Errorf("unsupported selector type %v", valueCol.Type)
return nil
func (t *rowSelectorTransformation) Process(id DatasetID, b Block) error {
builder, valueIdx, err := t.setupBuilder(b)
if err != nil {
return err
valueCol := builder.Cols()[valueIdx]
var rower Rower
switch valueCol.Type {
case TBool:
rower = t.selector.NewBoolSelector()
case TInt:
rower = t.selector.NewIntSelector()
case TUInt:
rower = t.selector.NewUIntSelector()
case TFloat:
rower = t.selector.NewFloatSelector()
case TString:
rower = t.selector.NewStringSelector()
return fmt.Errorf("unsupported selector type %v", valueCol.Type)
b.Do(func(cr ColReader) error {
switch valueCol.Type {
case TBool:
rower.(DoBoolRowSelector).DoBool(cr.Bools(valueIdx), cr)
case TInt:
rower.(DoIntRowSelector).DoInt(cr.Ints(valueIdx), cr)
case TUInt:
rower.(DoUIntRowSelector).DoUInt(cr.UInts(valueIdx), cr)
case TFloat:
rower.(DoFloatRowSelector).DoFloat(cr.Floats(valueIdx), cr)
case TString:
rower.(DoStringRowSelector).DoString(cr.Strings(valueIdx), cr)
return fmt.Errorf("unsupported selector type %v", valueCol.Type)
return nil
rows := rower.Rows()
t.appendRows(builder, rows)
return nil
func (t *indexSelectorTransformation) appendSelected(selected []int, builder BlockBuilder, cr ColReader) {
if len(selected) == 0 {
cols := builder.Cols()
for j, c := range cols {
for _, i := range selected {
switch c.Type {
case TBool:
builder.AppendBool(j, cr.Bools(j)[i])
case TInt:
builder.AppendInt(j, cr.Ints(j)[i])
case TUInt:
builder.AppendUInt(j, cr.UInts(j)[i])
case TFloat:
builder.AppendFloat(j, cr.Floats(j)[i])
case TString:
builder.AppendString(j, cr.Strings(j)[i])
case TTime:
builder.AppendTime(j, cr.Times(j)[i])
func (t *rowSelectorTransformation) appendRows(builder BlockBuilder, rows []Row) {
cols := builder.Cols()
for j, c := range cols {
for _, row := range rows {
v := row.Values[j]
switch c.Type {
case TBool:
builder.AppendBool(j, v.(bool))
case TInt:
builder.AppendInt(j, v.(int64))
case TUInt:
builder.AppendUInt(j, v.(uint64))
case TFloat:
builder.AppendFloat(j, v.(float64))
case TString:
builder.AppendString(j, v.(string))
case TTime:
builder.AppendTime(j, v.(Time))
type IndexSelector interface {
NewBoolSelector() DoBoolIndexSelector
NewIntSelector() DoIntIndexSelector
NewUIntSelector() DoUIntIndexSelector
NewFloatSelector() DoFloatIndexSelector
NewStringSelector() DoStringIndexSelector
type DoBoolIndexSelector interface {
DoBool([]bool) []int
type DoIntIndexSelector interface {
DoInt([]int64) []int
type DoUIntIndexSelector interface {
DoUInt([]uint64) []int
type DoFloatIndexSelector interface {
DoFloat([]float64) []int
type DoStringIndexSelector interface {
DoString([]string) []int
type RowSelector interface {
NewBoolSelector() DoBoolRowSelector
NewIntSelector() DoIntRowSelector
NewUIntSelector() DoUIntRowSelector
NewFloatSelector() DoFloatRowSelector
NewStringSelector() DoStringRowSelector
type Rower interface {
Rows() []Row
type DoBoolRowSelector interface {
DoBool(vs []bool, cr ColReader)
type DoIntRowSelector interface {
DoInt(vs []int64, cr ColReader)
type DoUIntRowSelector interface {
DoUInt(vs []uint64, cr ColReader)
type DoFloatRowSelector interface {
DoFloat(vs []float64, cr ColReader)
type DoStringRowSelector interface {
DoString(vs []string, cr ColReader)
type Row struct {
Values []interface{}
func ReadRow(i int, cr ColReader) (row Row) {
cols := cr.Cols()
row.Values = make([]interface{}, len(cols))
for j, c := range cols {
switch c.Type {
case TBool:
row.Values[j] = cr.Bools(j)[i]
case TInt:
row.Values[j] = cr.Ints(j)[i]
case TUInt:
row.Values[j] = cr.UInts(j)[i]
case TFloat:
row.Values[j] = cr.Floats(j)[i]
case TString:
row.Values[j] = cr.Strings(j)[i]
case TTime:
row.Values[j] = cr.Times(j)[i]
@ -0,0 +1,367 @@
package execute_test
import (
func TestRowSelector_Process(t *testing.T) {
// All test cases use a simple MinSelector
testCases := []struct {
name string
config execute.SelectorConfig
data []*executetest.Block
want []*executetest.Block
name: "single",
config: execute.SelectorConfig{
Column: "_value",
data: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(1), 0.0},
{execute.Time(0), execute.Time(100), execute.Time(10), 1.0},
{execute.Time(0), execute.Time(100), execute.Time(20), 2.0},
{execute.Time(0), execute.Time(100), execute.Time(30), 3.0},
{execute.Time(0), execute.Time(100), execute.Time(40), 4.0},
{execute.Time(0), execute.Time(100), execute.Time(50), 5.0},
{execute.Time(0), execute.Time(100), execute.Time(60), 6.0},
{execute.Time(0), execute.Time(100), execute.Time(70), 7.0},
{execute.Time(0), execute.Time(100), execute.Time(80), 8.0},
{execute.Time(0), execute.Time(100), execute.Time(90), 9.0},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(1), 0.0},
name: "single custom column",
config: execute.SelectorConfig{
Column: "x",
data: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(1), 0.0},
{execute.Time(0), execute.Time(100), execute.Time(10), 1.0},
{execute.Time(0), execute.Time(100), execute.Time(20), 2.0},
{execute.Time(0), execute.Time(100), execute.Time(30), 3.0},
{execute.Time(0), execute.Time(100), execute.Time(40), 4.0},
{execute.Time(0), execute.Time(100), execute.Time(50), 5.0},
{execute.Time(0), execute.Time(100), execute.Time(60), 6.0},
{execute.Time(0), execute.Time(100), execute.Time(70), 7.0},
{execute.Time(0), execute.Time(100), execute.Time(80), 8.0},
{execute.Time(0), execute.Time(100), execute.Time(90), 9.0},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(1), 0.0},
name: "multiple blocks",
config: execute.SelectorConfig{
Column: "_value",
data: []*executetest.Block{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(1), 0.0},
{execute.Time(0), execute.Time(100), execute.Time(10), 1.0},
{execute.Time(0), execute.Time(100), execute.Time(20), 2.0},
{execute.Time(0), execute.Time(100), execute.Time(30), 3.0},
{execute.Time(0), execute.Time(100), execute.Time(40), 4.0},
{execute.Time(0), execute.Time(100), execute.Time(50), 5.0},
{execute.Time(0), execute.Time(100), execute.Time(60), 6.0},
{execute.Time(0), execute.Time(100), execute.Time(70), 7.0},
{execute.Time(0), execute.Time(100), execute.Time(80), 8.0},
{execute.Time(0), execute.Time(100), execute.Time(90), 9.0},
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(100), execute.Time(200), execute.Time(101), 10.0},
{execute.Time(100), execute.Time(200), execute.Time(110), 11.0},
{execute.Time(100), execute.Time(200), execute.Time(120), 12.0},
{execute.Time(100), execute.Time(200), execute.Time(130), 13.0},
{execute.Time(100), execute.Time(200), execute.Time(140), 14.0},
{execute.Time(100), execute.Time(200), execute.Time(150), 15.0},
{execute.Time(100), execute.Time(200), execute.Time(160), 16.0},
{execute.Time(100), execute.Time(200), execute.Time(170), 17.0},
{execute.Time(100), execute.Time(200), execute.Time(180), 18.0},
{execute.Time(100), execute.Time(200), execute.Time(190), 19.0},
want: []*executetest.Block{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(1), 0.0},
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(100), execute.Time(200), execute.Time(101), 10.0},
for _, tc := range testCases {
tc := tc
t.Run(, func(t *testing.T) {
d := executetest.NewDataset(executetest.RandomDatasetID())
c := execute.NewBlockBuilderCache(executetest.UnlimitedAllocator)
selector := execute.NewRowSelectorTransformation(d, c, new(functions.MinSelector), tc.config)
parentID := executetest.RandomDatasetID()
for _, b := range {
if err := selector.Process(parentID, b); err != nil {
got, err := executetest.BlocksFromCache(c)
if err != nil {
if !cmp.Equal(tc.want, got, cmpopts.EquateNaNs()) {
t.Errorf("unexpected blocks -want/+got\n%s", cmp.Diff(tc.want, got))
func TestIndexSelector_Process(t *testing.T) {
// All test cases use a simple FirstSelector
testCases := []struct {
name string
config execute.SelectorConfig
data []*executetest.Block
want []*executetest.Block
name: "single",
config: execute.SelectorConfig{
Column: "_value",
data: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(1), 0.0},
{execute.Time(0), execute.Time(100), execute.Time(10), 1.0},
{execute.Time(0), execute.Time(100), execute.Time(20), 2.0},
{execute.Time(0), execute.Time(100), execute.Time(30), 3.0},
{execute.Time(0), execute.Time(100), execute.Time(40), 4.0},
{execute.Time(0), execute.Time(100), execute.Time(50), 5.0},
{execute.Time(0), execute.Time(100), execute.Time(60), 6.0},
{execute.Time(0), execute.Time(100), execute.Time(70), 7.0},
{execute.Time(0), execute.Time(100), execute.Time(80), 8.0},
{execute.Time(0), execute.Time(100), execute.Time(90), 9.0},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(1), 0.0},
name: "multiple blocks",
config: execute.SelectorConfig{
Column: "_value",
data: []*executetest.Block{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(1), 0.0},
{execute.Time(0), execute.Time(100), execute.Time(10), 1.0},
{execute.Time(0), execute.Time(100), execute.Time(20), 2.0},
{execute.Time(0), execute.Time(100), execute.Time(30), 3.0},
{execute.Time(0), execute.Time(100), execute.Time(40), 4.0},
{execute.Time(0), execute.Time(100), execute.Time(50), 5.0},
{execute.Time(0), execute.Time(100), execute.Time(60), 6.0},
{execute.Time(0), execute.Time(100), execute.Time(70), 7.0},
{execute.Time(0), execute.Time(100), execute.Time(80), 8.0},
{execute.Time(0), execute.Time(100), execute.Time(90), 9.0},
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(100), execute.Time(200), execute.Time(101), 10.0},
{execute.Time(100), execute.Time(200), execute.Time(110), 11.0},
{execute.Time(100), execute.Time(200), execute.Time(120), 12.0},
{execute.Time(100), execute.Time(200), execute.Time(130), 13.0},
{execute.Time(100), execute.Time(200), execute.Time(140), 14.0},
{execute.Time(100), execute.Time(200), execute.Time(150), 15.0},
{execute.Time(100), execute.Time(200), execute.Time(160), 16.0},
{execute.Time(100), execute.Time(200), execute.Time(170), 17.0},
{execute.Time(100), execute.Time(200), execute.Time(180), 18.0},
{execute.Time(100), execute.Time(200), execute.Time(190), 19.0},
want: []*executetest.Block{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(1), 0.0},
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(100), execute.Time(200), execute.Time(101), 10.0},
for _, tc := range testCases {
tc := tc
t.Run(, func(t *testing.T) {
d := executetest.NewDataset(executetest.RandomDatasetID())
c := execute.NewBlockBuilderCache(executetest.UnlimitedAllocator)
selector := execute.NewIndexSelectorTransformation(d, c, new(functions.FirstSelector), tc.config)
parentID := executetest.RandomDatasetID()
for _, b := range {
if err := selector.Process(parentID, b); err != nil {
got, err := executetest.BlocksFromCache(c)
if err != nil {
if !cmp.Equal(tc.want, got, cmpopts.EquateNaNs()) {
t.Errorf("unexpected blocks -want/+got\n%s", cmp.Diff(tc.want, got))
@ -0,0 +1,28 @@
package execute
import (
type Node interface {
AddTransformation(t Transformation)
type Source interface {
Run(ctx context.Context)
type CreateSource func(spec plan.ProcedureSpec, id DatasetID, ctx Administration) (Source, error)
var procedureToSource = make(map[plan.ProcedureKind]CreateSource)
func RegisterSource(k plan.ProcedureKind, c CreateSource) {
if procedureToSource[k] != nil {
panic(fmt.Errorf("duplicate registration for source with procedure kind %v", k))
procedureToSource[k] = c
@ -0,0 +1,44 @@
package execute
import (
type Transformation interface {
RetractBlock(id DatasetID, key PartitionKey) error
Process(id DatasetID, b Block) error
UpdateWatermark(id DatasetID, t Time) error
UpdateProcessingTime(id DatasetID, t Time) error
Finish(id DatasetID, err error)
type Administration interface {
OrganizationID() id.ID
ResolveTime(qt query.Time) Time
Bounds() Bounds
Allocator() *Allocator
Parents() []DatasetID
ConvertID(plan.ProcedureID) DatasetID
Dependencies() Dependencies
// Dependencies represents the provided dependencies to the execution environment.
// The dependencies is opaque.
type Dependencies map[string]interface{}
type CreateTransformation func(id DatasetID, mode AccumulationMode, spec plan.ProcedureSpec, a Administration) (Transformation, Dataset, error)
var procedureToTransformation = make(map[plan.ProcedureKind]CreateTransformation)
func RegisterTransformation(k plan.ProcedureKind, c CreateTransformation) {
if procedureToTransformation[k] != nil {
panic(fmt.Errorf("duplicate registration for transformation with procedure kind %v", k))
procedureToTransformation[k] = c
@ -0,0 +1,314 @@
package execute
import (
type Transport interface {
// Finished reports when the Transport has completed and there is no more work to do.
Finished() <-chan struct{}
// consecutiveTransport implements Transport by transporting data consecutively to the downstream Transformation.
type consecutiveTransport struct {
dispatcher Dispatcher
t Transformation
messages MessageQueue
finished chan struct{}
errMu sync.Mutex
errValue error
schedulerState int32
inflight int32
func newConescutiveTransport(dispatcher Dispatcher, t Transformation) *consecutiveTransport {
return &consecutiveTransport{
dispatcher: dispatcher,
t: t,
// TODO(nathanielc): Have planner specify message queue initial buffer size.
messages: newMessageQueue(64),
finished: make(chan struct{}),
func (t *consecutiveTransport) setErr(err error) {
t.errValue = err
func (t *consecutiveTransport) err() error {
err := t.errValue
return err
func (t *consecutiveTransport) Finished() <-chan struct{} {
return t.finished
func (t *consecutiveTransport) RetractBlock(id DatasetID, key PartitionKey) error {
select {
case <-t.finished:
return t.err()
srcMessage: srcMessage(id),
key: key,
return nil
func (t *consecutiveTransport) Process(id DatasetID, b Block) error {
select {
case <-t.finished:
return t.err()
srcMessage: srcMessage(id),
block: b,
return nil
func (t *consecutiveTransport) UpdateWatermark(id DatasetID, time Time) error {
select {
case <-t.finished:
return t.err()
srcMessage: srcMessage(id),
time: time,
return nil
func (t *consecutiveTransport) UpdateProcessingTime(id DatasetID, time Time) error {
select {
case <-t.finished:
return t.err()
srcMessage: srcMessage(id),
time: time,
return nil
func (t *consecutiveTransport) Finish(id DatasetID, err error) {
select {
case <-t.finished:
srcMessage: srcMessage(id),
err: err,
func (t *consecutiveTransport) pushMsg(m Message) {
atomic.AddInt32(&t.inflight, 1)
const (
// consecutiveTransport schedule states
idle int32 = iota
// schedule indicates that there is work available to schedule.
func (t *consecutiveTransport) schedule() {
if t.tryTransition(idle, running) {
// tryTransition attempts to transition into the new state and returns true on success.
func (t *consecutiveTransport) tryTransition(old, new int32) bool {
return atomic.CompareAndSwapInt32(&t.schedulerState, old, new)
// transition sets the new state.
func (t *consecutiveTransport) transition(new int32) {
atomic.StoreInt32(&t.schedulerState, new)
func (t *consecutiveTransport) processMessages(throughput int) {
i := 0
for m := t.messages.Pop(); m != nil; m = t.messages.Pop() {
atomic.AddInt32(&t.inflight, -1)
if f, err := processMessage(t.t, m); err != nil || f {
// Set the error if there was any
// Transition to the finished state.
if t.tryTransition(running, finished) {
// Call Finish if we have not already
if !f {
t.t.Finish(m.SrcDatasetID(), err)
// We are finished
if i >= throughput {
// We have done enough work.
// Transition to the idle state and reschedule for later.
// Check if more messages arrived after the above loop finished.
// This check must happen in the idle state.
if atomic.LoadInt32(&t.inflight) > 0 {
if t.tryTransition(idle, running) {
} // else we have already been scheduled again, we can return
// processMessage processes the message on t.
// The return value is true if the message was a FinishMsg.
func processMessage(t Transformation, m Message) (finished bool, err error) {
switch m := m.(type) {
case RetractBlockMsg:
err = t.RetractBlock(m.SrcDatasetID(), m.Key())
case ProcessMsg:
b := m.Block()
err = t.Process(m.SrcDatasetID(), b)
case UpdateWatermarkMsg:
err = t.UpdateWatermark(m.SrcDatasetID(), m.WatermarkTime())
case UpdateProcessingTimeMsg:
err = t.UpdateProcessingTime(m.SrcDatasetID(), m.ProcessingTime())
case FinishMsg:
t.Finish(m.SrcDatasetID(), m.Error())
finished = true
type Message interface {
Type() MessageType
SrcDatasetID() DatasetID
type MessageType int
const (
RetractBlockType MessageType = iota
type srcMessage DatasetID
func (m srcMessage) SrcDatasetID() DatasetID {
return DatasetID(m)
type RetractBlockMsg interface {
Key() PartitionKey
type retractBlockMsg struct {
key PartitionKey
func (m *retractBlockMsg) Type() MessageType {
return RetractBlockType
func (m *retractBlockMsg) Key() PartitionKey {
return m.key
type ProcessMsg interface {
Block() Block
type processMsg struct {
block Block
func (m *processMsg) Type() MessageType {
return ProcessType
func (m *processMsg) Block() Block {
return m.block
type UpdateWatermarkMsg interface {
WatermarkTime() Time
type updateWatermarkMsg struct {
time Time
func (m *updateWatermarkMsg) Type() MessageType {
return UpdateWatermarkType
func (m *updateWatermarkMsg) WatermarkTime() Time {
return m.time
type UpdateProcessingTimeMsg interface {
ProcessingTime() Time
type updateProcessingTimeMsg struct {
time Time
func (m *updateProcessingTimeMsg) Type() MessageType {
return UpdateProcessingTimeType
func (m *updateProcessingTimeMsg) ProcessingTime() Time {
return m.time
type FinishMsg interface {
Error() error
type finishMsg struct {
err error
func (m *finishMsg) Type() MessageType {
return FinishType
func (m *finishMsg) Error() error {
return m.err
@ -0,0 +1,153 @@
package execute
import (
type Trigger interface {
Triggered(TriggerContext) bool
Finished() bool
type TriggerContext struct {
Block BlockContext
Watermark Time
CurrentProcessingTime Time
type BlockContext struct {
Key PartitionKey
Count int
func NewTriggerFromSpec(spec query.TriggerSpec) Trigger {
switch s := spec.(type) {
case query.AfterWatermarkTriggerSpec:
return &afterWatermarkTrigger{
allowedLateness: Duration(s.AllowedLateness),
case query.RepeatedTriggerSpec:
return &repeatedlyForever{
t: NewTriggerFromSpec(s.Trigger),
case query.AfterProcessingTimeTriggerSpec:
return &afterProcessingTimeTrigger{
duration: Duration(s.Duration),
case query.AfterAtLeastCountTriggerSpec:
return &afterAtLeastCount{
atLeast: s.Count,
case query.OrFinallyTriggerSpec:
return &orFinally{
main: NewTriggerFromSpec(s.Main),
finally: NewTriggerFromSpec(s.Finally),
//TODO(nathanielc): Add proper error handling here.
// Maybe separate validation of a spec and creation of a spec so we know we cannot error during creation?
panic(fmt.Sprintf("unsupported trigger spec provided %T", spec))
// afterWatermarkTrigger triggers once the watermark is greater than the bounds of the block.
type afterWatermarkTrigger struct {
allowedLateness Duration
finished bool
func (t *afterWatermarkTrigger) Triggered(c TriggerContext) bool {
timeIdx := ColIdx(DefaultStopColLabel, c.Block.Key.Cols())
if timeIdx < 0 {
return false
stop := c.Block.Key.ValueTime(timeIdx)
if c.Watermark >= stop+Time(t.allowedLateness) {
t.finished = true
return c.Watermark >= stop
func (t *afterWatermarkTrigger) Finished() bool {
return t.finished
func (t *afterWatermarkTrigger) Reset() {
t.finished = false
type repeatedlyForever struct {
t Trigger
func (t *repeatedlyForever) Triggered(c TriggerContext) bool {
return t.t.Triggered(c)
func (t *repeatedlyForever) Finished() bool {
if t.t.Finished() {
return false
func (t *repeatedlyForever) Reset() {
type afterProcessingTimeTrigger struct {
duration Duration
triggerTimeSet bool
triggerTime Time
current Time
func (t *afterProcessingTimeTrigger) Triggered(c TriggerContext) bool {
if !t.triggerTimeSet {
t.triggerTimeSet = true
t.triggerTime = c.CurrentProcessingTime + Time(t.duration)
t.current = c.CurrentProcessingTime
return t.current >= t.triggerTime
func (t *afterProcessingTimeTrigger) Finished() bool {
return t.triggerTimeSet && t.current >= t.triggerTime
func (t *afterProcessingTimeTrigger) Reset() {
t.triggerTimeSet = false
type afterAtLeastCount struct {
n, atLeast int
func (t *afterAtLeastCount) Triggered(c TriggerContext) bool {
t.n = c.Block.Count
return t.n >= t.atLeast
func (t *afterAtLeastCount) Finished() bool {
return t.n >= t.atLeast
func (t *afterAtLeastCount) Reset() {
t.n = 0
type orFinally struct {
main Trigger
finally Trigger
finished bool
func (t *orFinally) Triggered(c TriggerContext) bool {
if t.finally.Triggered(c) {
t.finished = true
return true
return t.main.Triggered(c)
func (t *orFinally) Finished() bool {
return t.finished
func (t *orFinally) Reset() {
t.finished = false
@ -0,0 +1,8 @@
package execute
type Window struct {
Every Duration
Period Duration
Round Duration
Start Time
@ -0,0 +1,55 @@
package query
import (
// TODO(nathanielc): Add better options for formatting plans as Graphviz dot format.
type FormatOption func(*formatter)
func Formatted(q *Spec, opts ...FormatOption) fmt.Formatter {
f := formatter{
q: q,
for _, o := range opts {
return f
func FmtJSON(f *formatter) { f.json = true }
type formatter struct {
q *Spec
json bool
func (f formatter) Format(fs fmt.State, c rune) {
if c == 'v' && fs.Flag('#') {
fmt.Fprintf(fs, "%#v", f.q)
if f.json {
} else {
func (f formatter) formatJSON(fs fmt.State) {
e := json.NewEncoder(fs)
e.SetIndent("", " ")
func (f formatter) formatDAG(fs fmt.State) {
fmt.Fprint(fs, "digraph QuerySpec {\n")
_ = f.q.Walk(func(o *Operation) error {
fmt.Fprintf(fs, "%s[kind=%q];\n", o.ID, o.Spec.Kind())
for _, child := range f.q.Children(o.ID) {
fmt.Fprintf(fs, "%s->%s;\n", o.ID, child.ID)
return nil
fmt.Fprintln(fs, "}")
@ -0,0 +1,151 @@
package functions
import (
const CountKind = "count"
type CountOpSpec struct {
var countSignature = query.DefaultFunctionSignature()
func init() {
query.RegisterFunction(CountKind, createCountOpSpec, countSignature)
query.RegisterOpSpec(CountKind, newCountOp)
plan.RegisterProcedureSpec(CountKind, newCountProcedure, CountKind)
execute.RegisterTransformation(CountKind, createCountTransformation)
func createCountOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
s := new(CountOpSpec)
if err := s.AggregateConfig.ReadArgs(args); err != nil {
return nil, err
return s, nil
func newCountOp() query.OperationSpec {
return new(CountOpSpec)
func (s *CountOpSpec) Kind() query.OperationKind {
return CountKind
type CountProcedureSpec struct {
func newCountProcedure(qs query.OperationSpec, a plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*CountOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
return &CountProcedureSpec{
AggregateConfig: spec.AggregateConfig,
}, nil
func (s *CountProcedureSpec) Kind() plan.ProcedureKind {
return CountKind
func (s *CountProcedureSpec) Copy() plan.ProcedureSpec {
return &CountProcedureSpec{
AggregateConfig: s.AggregateConfig,
func (s *CountProcedureSpec) AggregateMethod() string {
return CountKind
func (s *CountProcedureSpec) ReAggregateSpec() plan.ProcedureSpec {
return new(SumProcedureSpec)
func (s *CountProcedureSpec) PushDownRules() []plan.PushDownRule {
return []plan.PushDownRule{{
Root: FromKind,
Through: nil,
Match: func(spec plan.ProcedureSpec) bool {
selectSpec := spec.(*FromProcedureSpec)
return !selectSpec.GroupingSet
func (s *CountProcedureSpec) PushDown(root *plan.Procedure, dup func() *plan.Procedure) {
selectSpec := root.Spec.(*FromProcedureSpec)
if selectSpec.AggregateSet {
root = dup()
selectSpec = root.Spec.(*FromProcedureSpec)
selectSpec.AggregateSet = false
selectSpec.AggregateMethod = ""
selectSpec.AggregateSet = true
selectSpec.AggregateMethod = s.AggregateMethod()
type CountAgg struct {
count int64
func createCountTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
s, ok := spec.(*CountProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", spec)
t, d := execute.NewAggregateTransformationAndDataset(id, mode, new(CountAgg), s.AggregateConfig, a.Allocator())
return t, d, nil
func (a *CountAgg) NewBoolAgg() execute.DoBoolAgg {
return new(CountAgg)
func (a *CountAgg) NewIntAgg() execute.DoIntAgg {
return new(CountAgg)
func (a *CountAgg) NewUIntAgg() execute.DoUIntAgg {
return new(CountAgg)
func (a *CountAgg) NewFloatAgg() execute.DoFloatAgg {
return new(CountAgg)
func (a *CountAgg) NewStringAgg() execute.DoStringAgg {
return new(CountAgg)
func (a *CountAgg) DoBool(vs []bool) {
a.count += int64(len(vs))
func (a *CountAgg) DoUInt(vs []uint64) {
a.count += int64(len(vs))
func (a *CountAgg) DoInt(vs []int64) {
a.count += int64(len(vs))
func (a *CountAgg) DoFloat(vs []float64) {
a.count += int64(len(vs))
func (a *CountAgg) DoString(vs []string) {
a.count += int64(len(vs))
func (a *CountAgg) Type() execute.DataType {
return execute.TInt
func (a *CountAgg) ValueInt() int64 {
return a.count
@ -0,0 +1,134 @@
package functions_test
import (
func TestCount_NewQuery(t *testing.T) {
tests := []querytest.NewQueryTestCase{
Name: "from with range and count",
Raw: `from(db:"mydb") |> range(start:-4h, stop:-2h) |> count()`,
Want: &query.Spec{
Operations: []*query.Operation{
ID: "from0",
Spec: &functions.FromOpSpec{
Database: "mydb",
ID: "range1",
Spec: &functions.RangeOpSpec{
Start: query.Time{
Relative: -4 * time.Hour,
IsRelative: true,
Stop: query.Time{
Relative: -2 * time.Hour,
IsRelative: true,
ID: "count2",
Spec: &functions.CountOpSpec{
AggregateConfig: execute.DefaultAggregateConfig,
Edges: []query.Edge{
{Parent: "from0", Child: "range1"},
{Parent: "range1", Child: "count2"},
for _, tc := range tests {
tc := tc
t.Run(tc.Name, func(t *testing.T) {
querytest.NewQueryTestHelper(t, tc)
func TestCountOperation_Marshaling(t *testing.T) {
data := []byte(`{"id":"count","kind":"count"}`)
op := &query.Operation{
ID: "count",
Spec: &functions.CountOpSpec{},
querytest.OperationMarshalingTestHelper(t, data, op)
func TestCount_Process(t *testing.T) {
[]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
func BenchmarkCount(b *testing.B) {
func TestCount_PushDown_Match(t *testing.T) {
spec := new(functions.CountProcedureSpec)
from := new(functions.FromProcedureSpec)
// Should not match when an aggregate is set
from.GroupingSet = true
plantest.PhysicalPlan_PushDown_Match_TestHelper(t, spec, from, []bool{false})
// Should match when no aggregate is set
from.GroupingSet = false
plantest.PhysicalPlan_PushDown_Match_TestHelper(t, spec, from, []bool{true})
func TestCount_PushDown(t *testing.T) {
spec := new(functions.CountProcedureSpec)
root := &plan.Procedure{
Spec: new(functions.FromProcedureSpec),
want := &plan.Procedure{
Spec: &functions.FromProcedureSpec{
AggregateSet: true,
AggregateMethod: functions.CountKind,
plantest.PhysicalPlan_PushDown_TestHelper(t, spec, root, false, want)
func TestCount_PushDown_Duplicate(t *testing.T) {
spec := new(functions.CountProcedureSpec)
root := &plan.Procedure{
Spec: &functions.FromProcedureSpec{
AggregateSet: true,
AggregateMethod: functions.CountKind,
want := &plan.Procedure{
// Expect the duplicate has been reset to zero values
Spec: new(functions.FromProcedureSpec),
plantest.PhysicalPlan_PushDown_TestHelper(t, spec, root, true, want)
@ -0,0 +1,251 @@
package functions
import (
const CovarianceKind = "covariance"
type CovarianceOpSpec struct {
PearsonCorrelation bool `json:"pearsonr"`
ValueDst string `json:"value_dst"`
var covarianceSignature = query.DefaultFunctionSignature()
func init() {
covarianceSignature.Params["pearsonr"] = semantic.Bool
covarianceSignature.Params["columns"] = semantic.Array
query.RegisterBuiltIn("covariance", covarianceBuiltIn)
query.RegisterFunction(CovarianceKind, createCovarianceOpSpec, covarianceSignature)
query.RegisterOpSpec(CovarianceKind, newCovarianceOp)
plan.RegisterProcedureSpec(CovarianceKind, newCovarianceProcedure, CovarianceKind)
execute.RegisterTransformation(CovarianceKind, createCovarianceTransformation)
// covarianceBuiltIn defines a `cov` function with an automatic join.
var covarianceBuiltIn = `
cov = (x,y,on,pearsonr=false) =>
tables:{x:x, y:y},
fn: (t) => ({x:t.x._value, y:t.y._value}),
|> covariance(pearsonr:pearsonr, columns:["x","y"])
pearsonr = (x,y,on) => cov(x:x, y:y, on:on, pearsonr:true)
func createCovarianceOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
spec := new(CovarianceOpSpec)
pearsonr, ok, err := args.GetBool("pearsonr")
if err != nil {
return nil, err
} else if ok {
spec.PearsonCorrelation = pearsonr
label, ok, err := args.GetString("valueDst")
if err != nil {
return nil, err
} else if ok {
spec.ValueDst = label
} else {
spec.ValueDst = execute.DefaultValueColLabel
if err := spec.AggregateConfig.ReadArgs(args); err != nil {
return nil, err
if len(spec.Columns) != 2 {
return nil, errors.New("must provide exactly two columns")
return spec, nil
func newCovarianceOp() query.OperationSpec {
return new(CovarianceOpSpec)
func (s *CovarianceOpSpec) Kind() query.OperationKind {
return CovarianceKind
type CovarianceProcedureSpec struct {
PearsonCorrelation bool
ValueLabel string
func newCovarianceProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*CovarianceOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
return &CovarianceProcedureSpec{
PearsonCorrelation: spec.PearsonCorrelation,
ValueLabel: spec.ValueDst,
AggregateConfig: spec.AggregateConfig,
}, nil
func (s *CovarianceProcedureSpec) Kind() plan.ProcedureKind {
return CovarianceKind
func (s *CovarianceProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(CovarianceProcedureSpec)
*ns = *s
ns.AggregateConfig = s.AggregateConfig.Copy()
return ns
type CovarianceTransformation struct {
d execute.Dataset
cache execute.BlockBuilderCache
bounds execute.Bounds
spec CovarianceProcedureSpec
yIdx int
xym2 float64
func createCovarianceTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
s, ok := spec.(*CovarianceProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", spec)
cache := execute.NewBlockBuilderCache(a.Allocator())
d := execute.NewDataset(id, mode, cache)
t := NewCovarianceTransformation(d, cache, s)
return t, d, nil
func NewCovarianceTransformation(d execute.Dataset, cache execute.BlockBuilderCache, spec *CovarianceProcedureSpec) *CovarianceTransformation {
return &CovarianceTransformation{
d: d,
cache: cache,
spec: *spec,
func (t *CovarianceTransformation) RetractBlock(id execute.DatasetID, key execute.PartitionKey) error {
return t.d.RetractBlock(key)
func (t *CovarianceTransformation) Process(id execute.DatasetID, b execute.Block) error {
cols := b.Cols()
builder, created := t.cache.BlockBuilder(b.Key())
if !created {
return fmt.Errorf("covariance found duplicate block with key: %v", b.Key())
execute.AddBlockKeyCols(b.Key(), builder)
Label: t.spec.TimeDst,
Type: execute.TTime,
valueIdx := builder.AddCol(execute.ColMeta{
Label: t.spec.ValueLabel,
Type: execute.TFloat,
xIdx := execute.ColIdx(t.spec.Columns[0], cols)
yIdx := execute.ColIdx(t.spec.Columns[1], cols)
if cols[xIdx].Type != cols[yIdx].Type {
return errors.New("cannot compute the covariance between different types")
if err := execute.AppendAggregateTime(t.spec.TimeSrc, t.spec.TimeDst, b.Key(), builder); err != nil {
return err
b.Do(func(cr execute.ColReader) error {
switch typ := cols[xIdx].Type; typ {
case execute.TFloat:
t.DoFloat(cr.Floats(xIdx), cr.Floats(yIdx))
return fmt.Errorf("covariance does not support %v", typ)
return nil
execute.AppendKeyValues(b.Key(), builder)
builder.AppendFloat(valueIdx, t.value())
return nil
func (t *CovarianceTransformation) reset() {
t.n = 0
t.xm1 = 0
t.ym1 = 0
t.xm2 = 0
t.ym2 = 0
t.xym2 = 0
func (t *CovarianceTransformation) DoFloat(xs, ys []float64) {
var xdelta, ydelta, xdelta2, ydelta2 float64
for i, x := range xs {
y := ys[i]
// Update means
xdelta = x - t.xm1
ydelta = y - t.ym1
t.xm1 += xdelta / t.n
t.ym1 += ydelta / t.n
// Update variance sums
xdelta2 = x - t.xm1
ydelta2 = y - t.ym1
t.xm2 += xdelta * xdelta2
t.ym2 += ydelta * ydelta2
// Update covariance sum
// Covariance is symetric so we do not need to compute the yxm2 value.
t.xym2 += xdelta * ydelta2
func (t *CovarianceTransformation) value() float64 {
if t.n < 2 {
return math.NaN()
if t.spec.PearsonCorrelation {
return (t.xym2) / math.Sqrt(t.xm2*t.ym2)
return t.xym2 / (t.n - 1)
func (t *CovarianceTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error {
return t.d.UpdateWatermark(mark)
func (t *CovarianceTransformation) UpdateProcessingTime(id execute.DatasetID, pt execute.Time) error {
return t.d.UpdateProcessingTime(pt)
func (t *CovarianceTransformation) Finish(id execute.DatasetID, err error) {
@ -0,0 +1,399 @@
package functions_test
import (
func TestCovariance_NewQuery(t *testing.T) {
tests := []querytest.NewQueryTestCase{
Name: "simple covariance",
Raw: `from(db:"mydb") |> covariance(columns:["a","b"],)`,
Want: &query.Spec{
Operations: []*query.Operation{
ID: "from0",
Spec: &functions.FromOpSpec{
Database: "mydb",
ID: "covariance1",
Spec: &functions.CovarianceOpSpec{
ValueDst: execute.DefaultValueColLabel,
AggregateConfig: execute.AggregateConfig{
TimeSrc: execute.DefaultStopColLabel,
TimeDst: execute.DefaultTimeColLabel,
Columns: []string{"a", "b"},
Edges: []query.Edge{
{Parent: "from0", Child: "covariance1"},
Name: "pearsonr",
Raw: `from(db:"mydb")|>covariance(columns:["a","b"],pearsonr:true)`,
Want: &query.Spec{
Operations: []*query.Operation{
ID: "from0",
Spec: &functions.FromOpSpec{
Database: "mydb",
ID: "covariance1",
Spec: &functions.CovarianceOpSpec{
ValueDst: execute.DefaultValueColLabel,
PearsonCorrelation: true,
AggregateConfig: execute.AggregateConfig{
TimeSrc: execute.DefaultStopColLabel,
TimeDst: execute.DefaultTimeColLabel,
Columns: []string{"a", "b"},
Edges: []query.Edge{
{Parent: "from0", Child: "covariance1"},
Name: "global covariance",
Raw: `cov(x: from(db:"mydb"), y:from(db:"mydb"), on:["host"], pearsonr:true)`,
Want: &query.Spec{
Operations: []*query.Operation{
ID: "from0",
Spec: &functions.FromOpSpec{
Database: "mydb",
ID: "from1",
Spec: &functions.FromOpSpec{
Database: "mydb",
ID: "join2",
Spec: &functions.JoinOpSpec{
On: []string{"host"},
TableNames: map[query.OperationID]string{
"from0": "x",
"from1": "y",
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{
{Key: &semantic.Identifier{Name: "t"}},
Body: &semantic.ObjectExpression{
Properties: []*semantic.Property{
Key: &semantic.Identifier{Name: "x"},
Value: &semantic.MemberExpression{
Object: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "t"},
Property: "x",
Property: "_value",
Key: &semantic.Identifier{Name: "y"},
Value: &semantic.MemberExpression{
Object: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "t"},
Property: "y",
Property: "_value",
ID: "covariance3",
Spec: &functions.CovarianceOpSpec{
ValueDst: execute.DefaultValueColLabel,
PearsonCorrelation: true,
AggregateConfig: execute.AggregateConfig{
TimeSrc: execute.DefaultStopColLabel,
TimeDst: execute.DefaultTimeColLabel,
Columns: []string{"x", "y"},
Edges: []query.Edge{
{Parent: "from0", Child: "join2"},
{Parent: "from1", Child: "join2"},
{Parent: "join2", Child: "covariance3"},
for _, tc := range tests {
tc := tc
t.Run(tc.Name, func(t *testing.T) {
querytest.NewQueryTestHelper(t, tc)
func TestCovarianceOperation_Marshaling(t *testing.T) {
data := []byte(`{
op := &query.Operation{
ID: "covariance",
Spec: &functions.CovarianceOpSpec{
PearsonCorrelation: true,
querytest.OperationMarshalingTestHelper(t, data, op)
func TestCovariance_Process(t *testing.T) {
testCases := []struct {
name string
spec *functions.CovarianceProcedureSpec
data []execute.Block
want []*executetest.Block
name: "variance",
spec: &functions.CovarianceProcedureSpec{
ValueLabel: execute.DefaultValueColLabel,
AggregateConfig: execute.AggregateConfig{
TimeSrc: execute.DefaultStopColLabel,
TimeDst: execute.DefaultTimeColLabel,
Columns: []string{"x", "y"},
data: []execute.Block{&executetest.Block{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(0), 1.0, 1.0},
{execute.Time(0), execute.Time(5), execute.Time(1), 2.0, 2.0},
{execute.Time(0), execute.Time(5), execute.Time(2), 3.0, 3.0},
{execute.Time(0), execute.Time(5), execute.Time(3), 4.0, 4.0},
{execute.Time(0), execute.Time(5), execute.Time(4), 5.0, 5.0},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(5), 2.5},
name: "negative covariance",
spec: &functions.CovarianceProcedureSpec{
ValueLabel: execute.DefaultValueColLabel,
AggregateConfig: execute.AggregateConfig{
TimeSrc: execute.DefaultStopColLabel,
TimeDst: execute.DefaultTimeColLabel,
Columns: []string{"x", "y"},
data: []execute.Block{&executetest.Block{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(0), 1.0, 5.0},
{execute.Time(0), execute.Time(5), execute.Time(1), 2.0, 4.0},
{execute.Time(0), execute.Time(5), execute.Time(2), 3.0, 3.0},
{execute.Time(0), execute.Time(5), execute.Time(3), 4.0, 2.0},
{execute.Time(0), execute.Time(5), execute.Time(4), 5.0, 1.0},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(5), -2.5},
name: "small covariance",
spec: &functions.CovarianceProcedureSpec{
ValueLabel: execute.DefaultValueColLabel,
AggregateConfig: execute.AggregateConfig{
TimeSrc: execute.DefaultStopColLabel,
TimeDst: execute.DefaultTimeColLabel,
Columns: []string{"x", "y"},
data: []execute.Block{&executetest.Block{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(0), 1.0, 1.0},
{execute.Time(0), execute.Time(5), execute.Time(1), 2.0, 1.0},
{execute.Time(0), execute.Time(5), execute.Time(2), 3.0, 1.0},
{execute.Time(0), execute.Time(5), execute.Time(3), 4.0, 1.0},
{execute.Time(0), execute.Time(5), execute.Time(4), 5.0, 2.0},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(5), 0.5},
name: "pearson correlation",
spec: &functions.CovarianceProcedureSpec{
ValueLabel: execute.DefaultValueColLabel,
PearsonCorrelation: true,
AggregateConfig: execute.AggregateConfig{
TimeSrc: execute.DefaultStopColLabel,
TimeDst: execute.DefaultTimeColLabel,
Columns: []string{"x", "y"},
data: []execute.Block{&executetest.Block{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(0), 1.0, 1.0},
{execute.Time(0), execute.Time(5), execute.Time(1), 2.0, 2.0},
{execute.Time(0), execute.Time(5), execute.Time(2), 3.0, 3.0},
{execute.Time(0), execute.Time(5), execute.Time(3), 4.0, 4.0},
{execute.Time(0), execute.Time(5), execute.Time(4), 5.0, 5.0},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(5), 1.0},
name: "pearson correlation opposite",
spec: &functions.CovarianceProcedureSpec{
ValueLabel: execute.DefaultValueColLabel,
PearsonCorrelation: true,
AggregateConfig: execute.AggregateConfig{
TimeSrc: execute.DefaultStopColLabel,
TimeDst: execute.DefaultTimeColLabel,
Columns: []string{"x", "y"},
data: []execute.Block{&executetest.Block{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(0), 1.0, 5.0},
{execute.Time(0), execute.Time(5), execute.Time(1), 2.0, 4.0},
{execute.Time(0), execute.Time(5), execute.Time(2), 3.0, 3.0},
{execute.Time(0), execute.Time(5), execute.Time(3), 4.0, 2.0},
{execute.Time(0), execute.Time(5), execute.Time(4), 5.0, 1.0},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(5), -1.0},
for _, tc := range testCases {
tc := tc
t.Run(, func(t *testing.T) {
func(d execute.Dataset, c execute.BlockBuilderCache) execute.Transformation {
return functions.NewCovarianceTransformation(d, c, tc.spec)
@ -0,0 +1,199 @@
package functions
import (
const CumulativeSumKind = "cumulativeSum"
type CumulativeSumOpSpec struct {
Columns []string `json:"columns"`
var cumulativeSumSignature = query.DefaultFunctionSignature()
func init() {
query.RegisterFunction(CumulativeSumKind, createCumulativeSumOpSpec, cumulativeSumSignature)
query.RegisterOpSpec(CumulativeSumKind, newCumulativeSumOp)
plan.RegisterProcedureSpec(CumulativeSumKind, newCumulativeSumProcedure, CumulativeSumKind)
execute.RegisterTransformation(CumulativeSumKind, createCumulativeSumTransformation)
func createCumulativeSumOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
spec := new(CumulativeSumOpSpec)
if cols, ok, err := args.GetArray("columns", semantic.String); err != nil {
return nil, err
} else if ok {
columns, err := interpreter.ToStringArray(cols)
if err != nil {
return nil, err
spec.Columns = columns
} else {
spec.Columns = []string{execute.DefaultValueColLabel}
return spec, nil
func newCumulativeSumOp() query.OperationSpec {
return new(CumulativeSumOpSpec)
func (s *CumulativeSumOpSpec) Kind() query.OperationKind {
return CumulativeSumKind
type CumulativeSumProcedureSpec struct {
Columns []string
func newCumulativeSumProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*CumulativeSumOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
return &CumulativeSumProcedureSpec{
Columns: spec.Columns,
}, nil
func (s *CumulativeSumProcedureSpec) Kind() plan.ProcedureKind {
return CumulativeSumKind
func (s *CumulativeSumProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(CumulativeSumProcedureSpec)
*ns = *s
if s.Columns != nil {
ns.Columns = make([]string, len(s.Columns))
copy(ns.Columns, s.Columns)
return ns
func createCumulativeSumTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
s, ok := spec.(*CumulativeSumProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", spec)
cache := execute.NewBlockBuilderCache(a.Allocator())
d := execute.NewDataset(id, mode, cache)
t := NewCumulativeSumTransformation(d, cache, s)
return t, d, nil
type cumulativeSumTransformation struct {
d execute.Dataset
cache execute.BlockBuilderCache
spec CumulativeSumProcedureSpec
func NewCumulativeSumTransformation(d execute.Dataset, cache execute.BlockBuilderCache, spec *CumulativeSumProcedureSpec) *cumulativeSumTransformation {
return &cumulativeSumTransformation{
d: d,
cache: cache,
spec: *spec,
func (t *cumulativeSumTransformation) RetractBlock(id execute.DatasetID, key execute.PartitionKey) error {
return t.d.RetractBlock(key)
func (t *cumulativeSumTransformation) Process(id execute.DatasetID, b execute.Block) error {
builder, created := t.cache.BlockBuilder(b.Key())
if !created {
return fmt.Errorf("cumulative sum found duplicate block with key: %v", b.Key())
execute.AddBlockCols(b, builder)
cols := b.Cols()
sumers := make([]*cumulativeSum, len(cols))
for j, c := range cols {
for _, label := range t.spec.Columns {
if c.Label == label {
sumers[j] = &cumulativeSum{}
return b.Do(func(cr execute.ColReader) error {
l := cr.Len()
for j, c := range cols {
switch c.Type {
case execute.TBool:
builder.AppendBools(j, cr.Bools(j))
case execute.TInt:
if sumers[j] != nil {
for i := 0; i < l; i++ {
builder.AppendInt(j, sumers[j].sumInt(cr.Ints(j)[i]))
} else {
builder.AppendInts(j, cr.Ints(j))
case execute.TUInt:
if sumers[j] != nil {
for i := 0; i < l; i++ {
builder.AppendUInt(j, sumers[j].sumUInt(cr.UInts(j)[i]))
} else {
builder.AppendUInts(j, cr.UInts(j))
case execute.TFloat:
if sumers[j] != nil {
for i := 0; i < l; i++ {
builder.AppendFloat(j, sumers[j].sumFloat(cr.Floats(j)[i]))
} else {
builder.AppendFloats(j, cr.Floats(j))
case execute.TString:
builder.AppendStrings(j, cr.Strings(j))
case execute.TTime:
builder.AppendTimes(j, cr.Times(j))
return nil
func (t *cumulativeSumTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error {
return t.d.UpdateWatermark(mark)
func (t *cumulativeSumTransformation) UpdateProcessingTime(id execute.DatasetID, pt execute.Time) error {
return t.d.UpdateProcessingTime(pt)
func (t *cumulativeSumTransformation) Finish(id execute.DatasetID, err error) {
type cumulativeSum struct {
intVal int64
uintVal uint64
floatVal float64
func (s *cumulativeSum) sumInt(val int64) int64 {
s.intVal += val
return s.intVal
func (s *cumulativeSum) sumUInt(val uint64) uint64 {
s.uintVal += val
return s.uintVal
func (s *cumulativeSum) sumFloat(val float64) float64 {
s.floatVal += val
return s.floatVal
@ -0,0 +1,232 @@
package functions_test
import (
func TestCumulativeSumOperation_Marshaling(t *testing.T) {
data := []byte(`{"id":"cumulativeSum","kind":"cumulativeSum","spec":{}}`)
op := &query.Operation{
ID: "cumulativeSum",
Spec: &functions.CumulativeSumOpSpec{},
querytest.OperationMarshalingTestHelper(t, data, op)
func TestCumulativeSum_PassThrough(t *testing.T) {
executetest.TransformationPassThroughTestHelper(t, func(d execute.Dataset, c execute.BlockBuilderCache) execute.Transformation {
s := functions.NewCumulativeSumTransformation(
return s
func TestCumulativeSum_Process(t *testing.T) {
testCases := []struct {
name string
spec *functions.CumulativeSumProcedureSpec
data []execute.Block
want []*executetest.Block
name: "float",
spec: &functions.CumulativeSumProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), 2.0},
{execute.Time(1), 1.0},
{execute.Time(2), 3.0},
{execute.Time(3), 4.0},
{execute.Time(4), 2.0},
{execute.Time(5), 6.0},
{execute.Time(6), 2.0},
{execute.Time(7), 7.0},
{execute.Time(8), 3.0},
{execute.Time(9), 8.0},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), 2.0},
{execute.Time(1), 3.0},
{execute.Time(2), 6.0},
{execute.Time(3), 10.0},
{execute.Time(4), 12.0},
{execute.Time(5), 18.0},
{execute.Time(6), 20.0},
{execute.Time(7), 27.0},
{execute.Time(8), 30.0},
{execute.Time(9), 38.0},
name: "multiple value columns",
spec: &functions.CumulativeSumProcedureSpec{
Columns: []string{"int", "uint", "float"},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "int", Type: execute.TInt},
{Label: "uint", Type: execute.TUInt},
{Label: "float", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), int64(2), uint64(1), 1.0},
{execute.Time(1), int64(1), uint64(2), 1.0},
{execute.Time(2), int64(3), uint64(3), 2.0},
{execute.Time(3), int64(4), uint64(4), 13.0},
{execute.Time(4), int64(2), uint64(5), 4.0},
{execute.Time(5), int64(6), uint64(6), 5.0},
{execute.Time(6), int64(2), uint64(7), -7.0},
{execute.Time(7), int64(-7), uint64(8), 2.0},
{execute.Time(8), int64(3), uint64(9), -6.0},
{execute.Time(9), int64(8), uint64(11), 3.0},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "int", Type: execute.TInt},
{Label: "uint", Type: execute.TUInt},
{Label: "float", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), int64(2), uint64(1), 1.0},
{execute.Time(1), int64(3), uint64(3), 2.0},
{execute.Time(2), int64(6), uint64(6), 4.0},
{execute.Time(3), int64(10), uint64(10), 17.0},
{execute.Time(4), int64(12), uint64(15), 21.0},
{execute.Time(5), int64(18), uint64(21), 26.0},
{execute.Time(6), int64(20), uint64(28), 19.0},
{execute.Time(7), int64(13), uint64(36), 21.0},
{execute.Time(8), int64(16), uint64(45), 15.0},
{execute.Time(9), int64(24), uint64(56), 18.0},
name: "multiple time columns",
spec: &functions.CumulativeSumProcedureSpec{
Columns: []string{"int", "float"},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "time2", Type: execute.TTime},
{Label: "int", Type: execute.TInt},
{Label: "float", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(0), int64(2), 1.0},
{execute.Time(1), execute.Time(1), int64(1), 1.0},
{execute.Time(2), execute.Time(2), int64(3), 2.0},
{execute.Time(3), execute.Time(3), int64(4), 13.0},
{execute.Time(4), execute.Time(4), int64(2), 4.0},
{execute.Time(5), execute.Time(5), int64(6), 5.0},
{execute.Time(6), execute.Time(6), int64(2), -7.0},
{execute.Time(7), execute.Time(7), int64(7), 2.0},
{execute.Time(8), execute.Time(8), int64(3), -6.0},
{execute.Time(9), execute.Time(9), int64(8), 3.0},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "time2", Type: execute.TTime},
{Label: "int", Type: execute.TInt},
{Label: "float", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(0), execute.Time(0), int64(2), 1.0},
{execute.Time(1), execute.Time(1), int64(3), 2.0},
{execute.Time(2), execute.Time(2), int64(6), 4.0},
{execute.Time(3), execute.Time(3), int64(10), 17.0},
{execute.Time(4), execute.Time(4), int64(12), 21.0},
{execute.Time(5), execute.Time(5), int64(18), 26.0},
{execute.Time(6), execute.Time(6), int64(20), 19.0},
{execute.Time(7), execute.Time(7), int64(27), 21.0},
{execute.Time(8), execute.Time(8), int64(30), 15.0},
{execute.Time(9), execute.Time(9), int64(38), 18.0},
name: "tag columns",
spec: &functions.CumulativeSumProcedureSpec{
Columns: []string{"int"},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "int", Type: execute.TInt},
{Label: "t", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(0), int64(2), "tag0"},
{execute.Time(1), int64(1), "tag0"},
{execute.Time(2), int64(3), "tag1"},
{execute.Time(3), int64(4), "tag1"},
{execute.Time(4), int64(2), "tag0"},
{execute.Time(5), int64(6), "tag0"},
{execute.Time(6), int64(2), "tag1"},
{execute.Time(7), int64(7), "tag1"},
{execute.Time(8), int64(3), "tag0"},
{execute.Time(9), int64(8), "tag0"},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "int", Type: execute.TInt},
{Label: "t", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(0), int64(2), "tag0"},
{execute.Time(1), int64(3), "tag0"},
{execute.Time(2), int64(6), "tag1"},
{execute.Time(3), int64(10), "tag1"},
{execute.Time(4), int64(12), "tag0"},
{execute.Time(5), int64(18), "tag0"},
{execute.Time(6), int64(20), "tag1"},
{execute.Time(7), int64(27), "tag1"},
{execute.Time(8), int64(30), "tag0"},
{execute.Time(9), int64(38), "tag0"},
for _, tc := range testCases {
tc := tc
t.Run(, func(t *testing.T) {
func(d execute.Dataset, c execute.BlockBuilderCache) execute.Transformation {
return functions.NewCumulativeSumTransformation(d, c, tc.spec)
@ -0,0 +1,97 @@
package functions_test
import (
const (
N = 1e6
Mu = 10
Sigma = 3
seed = 42
func init() {
// NormalData is a slice of N random values that are normaly distributed with mean Mu and standard deviation Sigma.
var NormalData []float64
// NormalBlock is a block of data whose value col is NormalData.
var NormalBlock execute.Block
func init() {
dist := distuv.Normal{
Mu: Mu,
Sigma: Sigma,
Source: rand.New(rand.NewSource(seed)),
NormalData = make([]float64, N)
for i := range NormalData {
NormalData[i] = dist.Rand()
start := execute.Time(time.Date(2016, 10, 10, 0, 0, 0, 0, time.UTC).UnixNano())
stop := execute.Time(time.Date(2017, 10, 10, 0, 0, 0, 0, time.UTC).UnixNano())
t1Value := "a"
key := execute.NewPartitionKey(
{Label: execute.DefaultStartColLabel, Type: execute.TTime},
{Label: execute.DefaultStopColLabel, Type: execute.TTime},
{Label: "t1", Type: execute.TString},
normalBlockBuilder := execute.NewColListBlockBuilder(key, executetest.UnlimitedAllocator)
normalBlockBuilder.AddCol(execute.ColMeta{Label: execute.DefaultTimeColLabel, Type: execute.TTime})
normalBlockBuilder.AddCol(execute.ColMeta{Label: execute.DefaultStartColLabel, Type: execute.TTime})
normalBlockBuilder.AddCol(execute.ColMeta{Label: execute.DefaultStopColLabel, Type: execute.TTime})
normalBlockBuilder.AddCol(execute.ColMeta{Label: execute.DefaultValueColLabel, Type: execute.TFloat})
normalBlockBuilder.AddCol(execute.ColMeta{Label: "t1", Type: execute.TString})
normalBlockBuilder.AddCol(execute.ColMeta{Label: "t2", Type: execute.TString})
times := make([]execute.Time, N)
startTimes := make([]execute.Time, N)
stopTimes := make([]execute.Time, N)
values := NormalData
t1 := make([]string, N)
t2 := make([]string, N)
for i, v := range values {
startTimes[i] = start
stopTimes[i] = stop
t1[i] = t1Value
// There are roughly 1 million, 31 second intervals in a year.
times[i] = start + execute.Time(time.Duration(i*31)*time.Second)
// Pick t2 based off the value
switch int(v) % 3 {
case 0:
t2[i] = "x"
case 1:
t2[i] = "y"
case 2:
t2[i] = "z"
normalBlockBuilder.AppendTimes(0, times)
normalBlockBuilder.AppendTimes(1, startTimes)
normalBlockBuilder.AppendTimes(2, stopTimes)
normalBlockBuilder.AppendFloats(3, values)
normalBlockBuilder.AppendStrings(4, t1)
normalBlockBuilder.AppendStrings(5, t2)
NormalBlock, _ = normalBlockBuilder.Block()
@ -0,0 +1,355 @@
package functions
import (
const DerivativeKind = "derivative"
type DerivativeOpSpec struct {
Unit query.Duration `json:"unit"`
NonNegative bool `json:"non_negative"`
Columns []string `json:"columns"`
TimeSrc string `json:"time_src"`
var derivativeSignature = query.DefaultFunctionSignature()
func init() {
derivativeSignature.Params["unit"] = semantic.Duration
derivativeSignature.Params["nonNegative"] = semantic.Bool
derivativeSignature.Params["columns"] = semantic.NewArrayType(semantic.String)
derivativeSignature.Params["timeSrc"] = semantic.String
query.RegisterFunction(DerivativeKind, createDerivativeOpSpec, derivativeSignature)
query.RegisterOpSpec(DerivativeKind, newDerivativeOp)
plan.RegisterProcedureSpec(DerivativeKind, newDerivativeProcedure, DerivativeKind)
execute.RegisterTransformation(DerivativeKind, createDerivativeTransformation)
func createDerivativeOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
spec := new(DerivativeOpSpec)
if unit, ok, err := args.GetDuration("unit"); err != nil {
return nil, err
} else if ok {
spec.Unit = unit
} else {
//Default is 1s
spec.Unit = query.Duration(time.Second)
if nn, ok, err := args.GetBool("nonNegative"); err != nil {
return nil, err
} else if ok {
spec.NonNegative = nn
if timeCol, ok, err := args.GetString("timeSrc"); err != nil {
return nil, err
} else if ok {
spec.TimeSrc = timeCol
} else {
spec.TimeSrc = execute.DefaultTimeColLabel
if cols, ok, err := args.GetArray("columns", semantic.String); err != nil {
return nil, err
} else if ok {
columns, err := interpreter.ToStringArray(cols)
if err != nil {
return nil, err
spec.Columns = columns
} else {
spec.Columns = []string{execute.DefaultValueColLabel}
return spec, nil
func newDerivativeOp() query.OperationSpec {
return new(DerivativeOpSpec)
func (s *DerivativeOpSpec) Kind() query.OperationKind {
return DerivativeKind
type DerivativeProcedureSpec struct {
Unit query.Duration `json:"unit"`
NonNegative bool `json:"non_negative"`
Columns []string `json:"columns"`
TimeCol string `json:"time_col"`
func newDerivativeProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*DerivativeOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
return &DerivativeProcedureSpec{
Unit: spec.Unit,
NonNegative: spec.NonNegative,
Columns: spec.Columns,
TimeCol: spec.TimeSrc,
}, nil
func (s *DerivativeProcedureSpec) Kind() plan.ProcedureKind {
return DerivativeKind
func (s *DerivativeProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(DerivativeProcedureSpec)
*ns = *s
if s.Columns != nil {
ns.Columns = make([]string, len(s.Columns))
copy(ns.Columns, s.Columns)
return ns
func createDerivativeTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
s, ok := spec.(*DerivativeProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", spec)
cache := execute.NewBlockBuilderCache(a.Allocator())
d := execute.NewDataset(id, mode, cache)
t := NewDerivativeTransformation(d, cache, s)
return t, d, nil
type derivativeTransformation struct {
d execute.Dataset
cache execute.BlockBuilderCache
unit time.Duration
nonNegative bool
columns []string
timeCol string
func NewDerivativeTransformation(d execute.Dataset, cache execute.BlockBuilderCache, spec *DerivativeProcedureSpec) *derivativeTransformation {
return &derivativeTransformation{
d: d,
cache: cache,
unit: time.Duration(spec.Unit),
nonNegative: spec.NonNegative,
columns: spec.Columns,
timeCol: spec.TimeCol,
func (t *derivativeTransformation) RetractBlock(id execute.DatasetID, key execute.PartitionKey) error {
return t.d.RetractBlock(key)
func (t *derivativeTransformation) Process(id execute.DatasetID, b execute.Block) error {
builder, created := t.cache.BlockBuilder(b.Key())
if !created {
return fmt.Errorf("derivative found duplicate block with key: %v", b.Key())
cols := b.Cols()
derivatives := make([]*derivative, len(cols))
timeIdx := -1
for j, c := range cols {
found := false
for _, label := range t.columns {
if c.Label == label {
found = true
if c.Label == t.timeCol {
timeIdx = j
if found {
dc := c
// Derivative always results in a float
dc.Type = execute.TFloat
derivatives[j] = newDerivative(j, t.unit, t.nonNegative)
} else {
if timeIdx < 0 {
return fmt.Errorf("no column %q exists", t.timeCol)
// We need to drop the first row since its derivative is undefined
firstIdx := 1
return b.Do(func(cr execute.ColReader) error {
l := cr.Len()
for j, c := range cols {
d := derivatives[j]
switch c.Type {
case execute.TBool:
builder.AppendBools(j, cr.Bools(j)[firstIdx:])
case execute.TInt:
if d != nil {
for i := 0; i < l; i++ {
time := cr.Times(timeIdx)[i]
v := d.updateInt(time, cr.Ints(j)[i])
if i != 0 || firstIdx == 0 {
builder.AppendFloat(j, v)
} else {
builder.AppendInts(j, cr.Ints(j)[firstIdx:])
case execute.TUInt:
if d != nil {
for i := 0; i < l; i++ {
time := cr.Times(timeIdx)[i]
v := d.updateUInt(time, cr.UInts(j)[i])
if i != 0 || firstIdx == 0 {
builder.AppendFloat(j, v)
} else {
builder.AppendUInts(j, cr.UInts(j)[firstIdx:])
case execute.TFloat:
if d != nil {
for i := 0; i < l; i++ {
time := cr.Times(timeIdx)[i]
v := d.updateFloat(time, cr.Floats(j)[i])
if i != 0 || firstIdx == 0 {
builder.AppendFloat(j, v)
} else {
builder.AppendFloats(j, cr.Floats(j)[firstIdx:])
case execute.TString:
builder.AppendStrings(j, cr.Strings(j)[firstIdx:])
case execute.TTime:
builder.AppendTimes(j, cr.Times(j)[firstIdx:])
// Now that we skipped the first row, start at 0 for the rest of the batches
firstIdx = 0
return nil
func (t *derivativeTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error {
return t.d.UpdateWatermark(mark)
func (t *derivativeTransformation) UpdateProcessingTime(id execute.DatasetID, pt execute.Time) error {
return t.d.UpdateProcessingTime(pt)
func (t *derivativeTransformation) Finish(id execute.DatasetID, err error) {
func newDerivative(col int, unit time.Duration, nonNegative bool) *derivative {
return &derivative{
col: col,
first: true,
unit: float64(unit),
nonNegative: nonNegative,
type derivative struct {
col int
first bool
unit float64
nonNegative bool
pIntValue int64
pUIntValue uint64
pFloatValue float64
pTime execute.Time
func (d *derivative) updateInt(t execute.Time, v int64) float64 {
if d.first {
d.pTime = t
d.pIntValue = v
d.first = false
return math.NaN()
diff := float64(v - d.pIntValue)
if d.nonNegative && diff < 0 {
//TODO(nathanielc): Should we return null when we have null support
// Or should we assume the previous is 0?
diff = float64(v)
elapsed := float64(time.Duration(t-d.pTime)) / d.unit
d.pTime = t
d.pIntValue = v
if d.nonNegative && diff < 0 {
//TODO(nathanielc): Should we return null when we have null support
// Or should we assume the previous is 0?
return float64(v)
return diff / elapsed
func (d *derivative) updateUInt(t execute.Time, v uint64) float64 {
if d.first {
d.pTime = t
d.pUIntValue = v
d.first = false
return math.NaN()
var diff float64
if d.pUIntValue > v {
// Prevent uint64 overflow by applying the negative sign after the conversion to a float64.
diff = float64(d.pUIntValue-v) * -1
} else {
diff = float64(v - d.pUIntValue)
if d.nonNegative && diff < 0 {
//TODO(nathanielc): Should we return null when we have null support
// Or should we assume the previous is 0?
diff = float64(v)
elapsed := float64(time.Duration(t-d.pTime)) / d.unit
d.pTime = t
d.pUIntValue = v
return diff / elapsed
func (d *derivative) updateFloat(t execute.Time, v float64) float64 {
if d.first {
d.pTime = t
d.pFloatValue = v
d.first = false
return math.NaN()
diff := v - d.pFloatValue
if d.nonNegative && diff < 0 {
//TODO(nathanielc): Should we return null when we have null support
// Or should we assume the previous is 0?
diff = v
elapsed := float64(time.Duration(t-d.pTime)) / d.unit
d.pTime = t
d.pFloatValue = v
return diff / elapsed
@ -0,0 +1,427 @@
package functions_test
import (
func TestDerivativeOperation_Marshaling(t *testing.T) {
data := []byte(`{"id":"derivative","kind":"derivative","spec":{"unit":"1m","non_negative":true}}`)
op := &query.Operation{
ID: "derivative",
Spec: &functions.DerivativeOpSpec{
Unit: query.Duration(time.Minute),
NonNegative: true,
querytest.OperationMarshalingTestHelper(t, data, op)
func TestDerivative_PassThrough(t *testing.T) {
executetest.TransformationPassThroughTestHelper(t, func(d execute.Dataset, c execute.BlockBuilderCache) execute.Transformation {
s := functions.NewDerivativeTransformation(
return s
func TestDerivative_Process(t *testing.T) {
testCases := []struct {
name string
spec *functions.DerivativeProcedureSpec
data []execute.Block
want []*executetest.Block
name: "float",
spec: &functions.DerivativeProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
TimeCol: execute.DefaultTimeColLabel,
Unit: 1,
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(1), 2.0},
{execute.Time(2), 1.0},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(2), -1.0},
name: "float with units",
spec: &functions.DerivativeProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
TimeCol: execute.DefaultTimeColLabel,
Unit: query.Duration(time.Second),
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(1 * time.Second), 2.0},
{execute.Time(3 * time.Second), 1.0},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(3 * time.Second), -0.5},
name: "int",
spec: &functions.DerivativeProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
TimeCol: execute.DefaultTimeColLabel,
Unit: 1,
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TInt},
Data: [][]interface{}{
{execute.Time(1), int64(20)},
{execute.Time(2), int64(10)},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(2), -10.0},
name: "int with units",
spec: &functions.DerivativeProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
TimeCol: execute.DefaultTimeColLabel,
Unit: query.Duration(time.Second),
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TInt},
Data: [][]interface{}{
{execute.Time(1 * time.Second), int64(20)},
{execute.Time(3 * time.Second), int64(10)},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(3 * time.Second), -5.0},
name: "int non negative",
spec: &functions.DerivativeProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
TimeCol: execute.DefaultTimeColLabel,
Unit: 1,
NonNegative: true,
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TInt},
Data: [][]interface{}{
{execute.Time(1), int64(20)},
{execute.Time(2), int64(10)},
{execute.Time(3), int64(20)},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(2), 10.0},
{execute.Time(3), 10.0},
name: "uint",
spec: &functions.DerivativeProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
TimeCol: execute.DefaultTimeColLabel,
Unit: 1,
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TUInt},
Data: [][]interface{}{
{execute.Time(1), uint64(10)},
{execute.Time(2), uint64(20)},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(2), 10.0},
name: "uint with negative result",
spec: &functions.DerivativeProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
TimeCol: execute.DefaultTimeColLabel,
Unit: 1,
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TUInt},
Data: [][]interface{}{
{execute.Time(1), uint64(20)},
{execute.Time(2), uint64(10)},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(2), -10.0},
name: "uint with non negative",
spec: &functions.DerivativeProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
TimeCol: execute.DefaultTimeColLabel,
Unit: 1,
NonNegative: true,
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TUInt},
Data: [][]interface{}{
{execute.Time(1), uint64(20)},
{execute.Time(2), uint64(10)},
{execute.Time(3), uint64(20)},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(2), 10.0},
{execute.Time(3), 10.0},
name: "uint with units",
spec: &functions.DerivativeProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
TimeCol: execute.DefaultTimeColLabel,
Unit: query.Duration(time.Second),
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TUInt},
Data: [][]interface{}{
{execute.Time(1 * time.Second), uint64(20)},
{execute.Time(3 * time.Second), uint64(10)},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(3 * time.Second), -5.0},
name: "non negative one block",
spec: &functions.DerivativeProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
TimeCol: execute.DefaultTimeColLabel,
Unit: 1,
NonNegative: true,
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(1), 2.0},
{execute.Time(2), 1.0},
{execute.Time(3), 2.0},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(2), 1.0},
{execute.Time(3), 1.0},
name: "float with tags",
spec: &functions.DerivativeProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
TimeCol: execute.DefaultTimeColLabel,
Unit: 1,
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(1), 2.0, "a"},
{execute.Time(2), 1.0, "b"},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(2), -1.0, "b"},
name: "float with multiple values",
spec: &functions.DerivativeProcedureSpec{
Columns: []string{"x", "y"},
TimeCol: execute.DefaultTimeColLabel,
Unit: 1,
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(1), 2.0, 20.0},
{execute.Time(2), 1.0, 10.0},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(2), -1.0, -10.0},
name: "float non negative with multiple values",
spec: &functions.DerivativeProcedureSpec{
Columns: []string{"x", "y"},
TimeCol: execute.DefaultTimeColLabel,
Unit: 1,
NonNegative: true,
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(1), 2.0, 20.0},
{execute.Time(2), 1.0, 10.0},
{execute.Time(3), 2.0, 0.0},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(2), 1.0, 10.0},
{execute.Time(3), 1.0, 0.0},
for _, tc := range testCases {
tc := tc
t.Run(, func(t *testing.T) {
func(d execute.Dataset, c execute.BlockBuilderCache) execute.Transformation {
return functions.NewDerivativeTransformation(d, c, tc.spec)
@ -0,0 +1,313 @@
package functions
import (
const DifferenceKind = "difference"
type DifferenceOpSpec struct {
NonNegative bool `json:"non_negative"`
Columns []string `json:"columns"`
var differenceSignature = query.DefaultFunctionSignature()
func init() {
differenceSignature.Params["nonNegative"] = semantic.Bool
derivativeSignature.Params["columns"] = semantic.NewArrayType(semantic.String)
query.RegisterFunction(DifferenceKind, createDifferenceOpSpec, differenceSignature)
query.RegisterOpSpec(DifferenceKind, newDifferenceOp)
plan.RegisterProcedureSpec(DifferenceKind, newDifferenceProcedure, DifferenceKind)
execute.RegisterTransformation(DifferenceKind, createDifferenceTransformation)
func createDifferenceOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
err := a.AddParentFromArgs(args)
if err != nil {
return nil, err
spec := new(DifferenceOpSpec)
if nn, ok, err := args.GetBool("nonNegative"); err != nil {
return nil, err
} else if ok {
spec.NonNegative = nn
if cols, ok, err := args.GetArray("columns", semantic.String); err != nil {
return nil, err
} else if ok {
columns, err := interpreter.ToStringArray(cols)
if err != nil {
return nil, err
spec.Columns = columns
} else {
spec.Columns = []string{execute.DefaultValueColLabel}
return spec, nil
func newDifferenceOp() query.OperationSpec {
return new(DifferenceOpSpec)
func (s *DifferenceOpSpec) Kind() query.OperationKind {
return DifferenceKind
type DifferenceProcedureSpec struct {
NonNegative bool `json:"non_negative"`
Columns []string `json:"columns"`
func newDifferenceProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*DifferenceOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
return &DifferenceProcedureSpec{
NonNegative: spec.NonNegative,
Columns: spec.Columns,
}, nil
func (s *DifferenceProcedureSpec) Kind() plan.ProcedureKind {
return DifferenceKind
func (s *DifferenceProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(DifferenceProcedureSpec)
*ns = *s
if s.Columns != nil {
ns.Columns = make([]string, len(s.Columns))
copy(ns.Columns, s.Columns)
return ns
func createDifferenceTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
s, ok := spec.(*DifferenceProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", spec)
cache := execute.NewBlockBuilderCache(a.Allocator())
d := execute.NewDataset(id, mode, cache)
t := NewDifferenceTransformation(d, cache, s)
return t, d, nil
type differenceTransformation struct {
d execute.Dataset
cache execute.BlockBuilderCache
nonNegative bool
columns []string
func NewDifferenceTransformation(d execute.Dataset, cache execute.BlockBuilderCache, spec *DifferenceProcedureSpec) *differenceTransformation {
return &differenceTransformation{
d: d,
cache: cache,
nonNegative: spec.NonNegative,
columns: spec.Columns,
func (t *differenceTransformation) RetractBlock(id execute.DatasetID, key execute.PartitionKey) error {
return t.d.RetractBlock(key)
func (t *differenceTransformation) Process(id execute.DatasetID, b execute.Block) error {
builder, created := t.cache.BlockBuilder(b.Key())
if !created {
return fmt.Errorf("difference found duplicate block with key: %v", b.Key())
cols := b.Cols()
differences := make([]*difference, len(cols))
for j, c := range cols {
found := false
for _, label := range t.columns {
if c.Label == label {
found = true
if found {
var typ execute.DataType
switch c.Type {
case execute.TInt, execute.TUInt:
typ = execute.TInt
case execute.TFloat:
typ = execute.TFloat
Label: c.Label,
Type: typ,
differences[j] = newDifference(j, t.nonNegative)
} else {
// We need to drop the first row since its derivative is undefined
firstIdx := 1
return b.Do(func(cr execute.ColReader) error {
l := cr.Len()
for j, c := range cols {
d := differences[j]
switch c.Type {
case execute.TBool:
builder.AppendBools(j, cr.Bools(j)[firstIdx:])
case execute.TInt:
if d != nil {
for i := 0; i < l; i++ {
v := d.updateInt(cr.Ints(j)[i])
if i != 0 || firstIdx == 0 {
builder.AppendInt(j, v)
} else {
builder.AppendInts(j, cr.Ints(j)[firstIdx:])
case execute.TUInt:
if d != nil {
for i := 0; i < l; i++ {
v := d.updateUInt(cr.UInts(j)[i])
if i != 0 || firstIdx == 0 {
builder.AppendInt(j, v)
} else {
builder.AppendUInts(j, cr.UInts(j)[firstIdx:])
case execute.TFloat:
if d != nil {
for i := 0; i < l; i++ {
v := d.updateFloat(cr.Floats(j)[i])
if i != 0 || firstIdx == 0 {
builder.AppendFloat(j, v)
} else {
builder.AppendFloats(j, cr.Floats(j)[firstIdx:])
case execute.TString:
builder.AppendStrings(j, cr.Strings(j)[firstIdx:])
case execute.TTime:
builder.AppendTimes(j, cr.Times(j)[firstIdx:])
// Now that we skipped the first row, start at 0 for the rest of the batches
firstIdx = 0
return nil
func (t *differenceTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error {
return t.d.UpdateWatermark(mark)
func (t *differenceTransformation) UpdateProcessingTime(id execute.DatasetID, pt execute.Time) error {
return t.d.UpdateProcessingTime(pt)
func (t *differenceTransformation) Finish(id execute.DatasetID, err error) {
func newDifference(col int, nonNegative bool) *difference {
return &difference{
col: col,
first: true,
nonNegative: nonNegative,
type difference struct {
col int
first bool
nonNegative bool
pIntValue int64
pUIntValue uint64
pFloatValue float64
func (d *difference) updateInt(v int64) int64 {
if d.first {
d.pIntValue = v
d.first = false
return 0
diff := v - d.pIntValue
d.pIntValue = v
if d.nonNegative && diff < 0 {
//TODO(nathanielc): Return null when we have null support
// Also see
return v
return diff
func (d *difference) updateUInt(v uint64) int64 {
if d.first {
d.pUIntValue = v
d.first = false
return 0
var diff int64
if d.pUIntValue > v {
// Prevent uint64 overflow by applying the negative sign after the conversion to an int64.
diff = int64(d.pUIntValue-v) * -1
} else {
diff = int64(v - d.pUIntValue)
d.pUIntValue = v
if d.nonNegative && diff < 0 {
//TODO(nathanielc): Return null when we have null support
// Also see
return int64(v)
return diff
func (d *difference) updateFloat(v float64) float64 {
if d.first {
d.pFloatValue = v
d.first = false
return math.NaN()
diff := v - d.pFloatValue
d.pFloatValue = v
if d.nonNegative && diff < 0 {
//TODO(nathanielc): Return null when we have null support
// Also see
return v
return diff
@ -0,0 +1,324 @@
package functions_test
import (
func TestDifferenceOperation_Marshaling(t *testing.T) {
data := []byte(`{"id":"difference","kind":"difference","spec":{"non_negative":true}}`)
op := &query.Operation{
ID: "difference",
Spec: &functions.DifferenceOpSpec{
NonNegative: true,
querytest.OperationMarshalingTestHelper(t, data, op)
func TestDifference_PassThrough(t *testing.T) {
executetest.TransformationPassThroughTestHelper(t, func(d execute.Dataset, c execute.BlockBuilderCache) execute.Transformation {
s := functions.NewDifferenceTransformation(
return s
func TestDifference_Process(t *testing.T) {
testCases := []struct {
name string
spec *functions.DifferenceProcedureSpec
data []execute.Block
want []*executetest.Block
name: "float",
spec: &functions.DifferenceProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(1), 2.0},
{execute.Time(2), 1.0},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(2), -1.0},
name: "int",
spec: &functions.DifferenceProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TInt},
Data: [][]interface{}{
{execute.Time(1), int64(20)},
{execute.Time(2), int64(10)},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TInt},
Data: [][]interface{}{
{execute.Time(2), int64(-10)},
name: "int non negative",
spec: &functions.DifferenceProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
NonNegative: true,
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TInt},
Data: [][]interface{}{
{execute.Time(1), int64(20)},
{execute.Time(2), int64(10)},
{execute.Time(3), int64(20)},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TInt},
Data: [][]interface{}{
{execute.Time(2), int64(10)},
{execute.Time(3), int64(10)},
name: "uint",
spec: &functions.DifferenceProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TUInt},
Data: [][]interface{}{
{execute.Time(1), uint64(10)},
{execute.Time(2), uint64(20)},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TInt},
Data: [][]interface{}{
{execute.Time(2), int64(10)},
name: "uint with negative result",
spec: &functions.DifferenceProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TUInt},
Data: [][]interface{}{
{execute.Time(1), uint64(20)},
{execute.Time(2), uint64(10)},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TInt},
Data: [][]interface{}{
{execute.Time(2), int64(-10)},
name: "uint with non negative",
spec: &functions.DifferenceProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
NonNegative: true,
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TUInt},
Data: [][]interface{}{
{execute.Time(1), uint64(20)},
{execute.Time(2), uint64(10)},
{execute.Time(3), uint64(20)},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TInt},
Data: [][]interface{}{
{execute.Time(2), int64(10)},
{execute.Time(3), int64(10)},
name: "non negative one block",
spec: &functions.DifferenceProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
NonNegative: true,
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(1), 2.0},
{execute.Time(2), 1.0},
{execute.Time(3), 2.0},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(2), 1.0},
{execute.Time(3), 1.0},
name: "float with tags",
spec: &functions.DifferenceProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(1), 2.0, "a"},
{execute.Time(2), 1.0, "b"},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(2), -1.0, "b"},
name: "float with multiple values",
spec: &functions.DifferenceProcedureSpec{
Columns: []string{"x", "y"},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(1), 2.0, 20.0},
{execute.Time(2), 1.0, 10.0},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(2), -1.0, -10.0},
name: "float non negative with multiple values",
spec: &functions.DifferenceProcedureSpec{
Columns: []string{"x", "y"},
NonNegative: true,
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(1), 2.0, 20.0},
{execute.Time(2), 1.0, 10.0},
{execute.Time(3), 2.0, 0.0},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(2), 1.0, 10.0},
{execute.Time(3), 1.0, 0.0},
for _, tc := range testCases {
tc := tc
t.Run(, func(t *testing.T) {
func(d execute.Dataset, c execute.BlockBuilderCache) execute.Transformation {
return functions.NewDifferenceTransformation(d, c, tc.spec)
@ -0,0 +1,273 @@
package functions
import (
const DistinctKind = "distinct"
type DistinctOpSpec struct {
Column string `json:"column"`
var distinctSignature = query.DefaultFunctionSignature()
func init() {
distinctSignature.Params["column"] = semantic.String
query.RegisterFunction(DistinctKind, createDistinctOpSpec, distinctSignature)
query.RegisterOpSpec(DistinctKind, newDistinctOp)
plan.RegisterProcedureSpec(DistinctKind, newDistinctProcedure, DistinctKind)
execute.RegisterTransformation(DistinctKind, createDistinctTransformation)
func createDistinctOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
spec := new(DistinctOpSpec)
if col, ok, err := args.GetString("column"); err != nil {
return nil, err
} else if ok {
spec.Column = col
} else {
spec.Column = execute.DefaultValueColLabel
return spec, nil
func newDistinctOp() query.OperationSpec {
return new(DistinctOpSpec)
func (s *DistinctOpSpec) Kind() query.OperationKind {
return DistinctKind
type DistinctProcedureSpec struct {
Column string
func newDistinctProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*DistinctOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
return &DistinctProcedureSpec{
Column: spec.Column,
}, nil
func (s *DistinctProcedureSpec) Kind() plan.ProcedureKind {
return DistinctKind
func (s *DistinctProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(DistinctProcedureSpec)
*ns = *s
return ns
type DistinctPointLimitRewriteRule struct {
func (r DistinctPointLimitRewriteRule) Root() plan.ProcedureKind {
return FromKind
func (r DistinctPointLimitRewriteRule) Rewrite(pr *plan.Procedure, planner plan.PlanRewriter) error {
fromSpec, ok := pr.Spec.(*FromProcedureSpec)
if !ok {
return nil
var distinct *DistinctProcedureSpec
pr.DoChildren(func(child *plan.Procedure) {
if d, ok := child.Spec.(*DistinctProcedureSpec); ok {
distinct = d
if distinct == nil {
return nil
groupStar := !fromSpec.GroupingSet && distinct.Column != execute.DefaultValueColLabel
groupByColumn := fromSpec.GroupingSet && ((len(fromSpec.GroupKeys) > 0 && execute.ContainsStr(fromSpec.GroupKeys, distinct.Column)) || (len(fromSpec.GroupExcept) > 0 && !execute.ContainsStr(fromSpec.GroupExcept, distinct.Column)))
if groupStar || groupByColumn {
fromSpec.LimitSet = true
fromSpec.PointsLimit = -1
return nil
return nil
func createDistinctTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
s, ok := spec.(*DistinctProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", spec)
cache := execute.NewBlockBuilderCache(a.Allocator())
d := execute.NewDataset(id, mode, cache)
t := NewDistinctTransformation(d, cache, s)
return t, d, nil
type distinctTransformation struct {
d execute.Dataset
cache execute.BlockBuilderCache
column string
func NewDistinctTransformation(d execute.Dataset, cache execute.BlockBuilderCache, spec *DistinctProcedureSpec) *distinctTransformation {
return &distinctTransformation{
d: d,
cache: cache,
column: spec.Column,
func (t *distinctTransformation) RetractBlock(id execute.DatasetID, key execute.PartitionKey) error {
return t.d.RetractBlock(key)
func (t *distinctTransformation) Process(id execute.DatasetID, b execute.Block) error {
builder, created := t.cache.BlockBuilder(b.Key())
if !created {
return fmt.Errorf("distinct found duplicate block with key: %v", b.Key())
colIdx := execute.ColIdx(t.column, b.Cols())
if colIdx < 0 {
return fmt.Errorf("no column %q exists", t.column)
col := b.Cols()[colIdx]
execute.AddBlockKeyCols(b.Key(), builder)
colIdx = builder.AddCol(execute.ColMeta{
Label: execute.DefaultValueColLabel,
Type: col.Type,
if b.Key().HasCol(t.column) {
j := execute.ColIdx(t.column, b.Key().Cols())
switch col.Type {
case execute.TBool:
builder.AppendBool(colIdx, b.Key().ValueBool(j))
case execute.TInt:
builder.AppendInt(colIdx, b.Key().ValueInt(j))
case execute.TUInt:
builder.AppendUInt(colIdx, b.Key().ValueUInt(j))
case execute.TFloat:
builder.AppendFloat(colIdx, b.Key().ValueFloat(j))
case execute.TString:
builder.AppendString(colIdx, b.Key().ValueString(j))
case execute.TTime:
builder.AppendTime(colIdx, b.Key().ValueTime(j))
execute.AppendKeyValues(b.Key(), builder)
// TODO: this is a hack
return b.Do(func(execute.ColReader) error {
return nil
var (
boolDistinct map[bool]bool
intDistinct map[int64]bool
uintDistinct map[uint64]bool
floatDistinct map[float64]bool
stringDistinct map[string]bool
timeDistinct map[execute.Time]bool
switch col.Type {
case execute.TBool:
boolDistinct = make(map[bool]bool)
case execute.TInt:
intDistinct = make(map[int64]bool)
case execute.TUInt:
uintDistinct = make(map[uint64]bool)
case execute.TFloat:
floatDistinct = make(map[float64]bool)
case execute.TString:
stringDistinct = make(map[string]bool)
case execute.TTime:
timeDistinct = make(map[execute.Time]bool)
return b.Do(func(cr execute.ColReader) error {
l := cr.Len()
for i := 0; i < l; i++ {
// Check distinct
switch col.Type {
case execute.TBool:
v := cr.Bools(colIdx)[i]
if boolDistinct[v] {
boolDistinct[v] = true
builder.AppendBool(colIdx, v)
case execute.TInt:
v := cr.Ints(colIdx)[i]
if intDistinct[v] {
intDistinct[v] = true
builder.AppendInt(colIdx, v)
case execute.TUInt:
v := cr.UInts(colIdx)[i]
if uintDistinct[v] {
uintDistinct[v] = true
builder.AppendUInt(colIdx, v)
case execute.TFloat:
v := cr.Floats(colIdx)[i]
if floatDistinct[v] {
floatDistinct[v] = true
builder.AppendFloat(colIdx, v)
case execute.TString:
v := cr.Strings(colIdx)[i]
if stringDistinct[v] {
stringDistinct[v] = true
builder.AppendString(colIdx, v)
case execute.TTime:
v := cr.Times(colIdx)[i]
if timeDistinct[v] {
timeDistinct[v] = true
builder.AppendTime(colIdx, v)
execute.AppendKeyValues(b.Key(), builder)
return nil
func (t *distinctTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error {
return t.d.UpdateWatermark(mark)
func (t *distinctTransformation) UpdateProcessingTime(id execute.DatasetID, pt execute.Time) error {
return t.d.UpdateProcessingTime(pt)
func (t *distinctTransformation) Finish(id execute.DatasetID, err error) {
@ -0,0 +1,263 @@
package functions
import (
const FilterKind = "filter"
type FilterOpSpec struct {
Fn *semantic.FunctionExpression `json:"fn"`
var filterSignature = query.DefaultFunctionSignature()
func init() {
//TODO(nathanielc): Use complete function signature here, or formalize soft kind validation instead of complete function validation.
filterSignature.Params["fn"] = semantic.Function
query.RegisterFunction(FilterKind, createFilterOpSpec, filterSignature)
query.RegisterOpSpec(FilterKind, newFilterOp)
plan.RegisterProcedureSpec(FilterKind, newFilterProcedure, FilterKind)
execute.RegisterTransformation(FilterKind, createFilterTransformation)
func createFilterOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
f, err := args.GetRequiredFunction("fn")
if err != nil {
return nil, err
fn, err := interpreter.ResolveFunction(f)
if err != nil {
return nil, err
return &FilterOpSpec{
Fn: fn,
}, nil
func newFilterOp() query.OperationSpec {
return new(FilterOpSpec)
func (s *FilterOpSpec) Kind() query.OperationKind {
return FilterKind
type FilterProcedureSpec struct {
Fn *semantic.FunctionExpression
func newFilterProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*FilterOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
return &FilterProcedureSpec{
Fn: spec.Fn,
}, nil
func (s *FilterProcedureSpec) Kind() plan.ProcedureKind {
return FilterKind
func (s *FilterProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(FilterProcedureSpec)
ns.Fn = s.Fn.Copy().(*semantic.FunctionExpression)
return ns
func (s *FilterProcedureSpec) PushDownRules() []plan.PushDownRule {
return []plan.PushDownRule{
Root: FromKind,
Through: []plan.ProcedureKind{GroupKind, LimitKind, RangeKind},
Match: func(spec plan.ProcedureSpec) bool {
// TODO(nathanielc): Remove once row functions support calling functions
if _, ok := s.Fn.Body.(semantic.Expression); !ok {
return false
fs := spec.(*FromProcedureSpec)
if fs.Filter != nil {
if _, ok := fs.Filter.Body.(semantic.Expression); !ok {
return false
return true
Root: FilterKind,
Through: []plan.ProcedureKind{GroupKind, LimitKind, RangeKind},
Match: func(spec plan.ProcedureSpec) bool {
// TODO(nathanielc): Remove once row functions support calling functions
if _, ok := s.Fn.Body.(semantic.Expression); !ok {
return false
fs := spec.(*FilterProcedureSpec)
if _, ok := fs.Fn.Body.(semantic.Expression); !ok {
return false
return true
func (s *FilterProcedureSpec) PushDown(root *plan.Procedure, dup func() *plan.Procedure) {
switch spec := root.Spec.(type) {
case *FromProcedureSpec:
if spec.FilterSet {
spec.Filter = mergeArrowFunction(spec.Filter, s.Fn)
spec.FilterSet = true
spec.Filter = s.Fn
case *FilterProcedureSpec:
spec.Fn = mergeArrowFunction(spec.Fn, s.Fn)
func mergeArrowFunction(a, b *semantic.FunctionExpression) *semantic.FunctionExpression {
fn := a.Copy().(*semantic.FunctionExpression)
aExp, aOK := a.Body.(semantic.Expression)
bExp, bOK := b.Body.(semantic.Expression)
if aOK && bOK {
fn.Body = &semantic.LogicalExpression{
Operator: ast.AndOperator,
Left: aExp,
Right: bExp,
return fn
// TODO(nathanielc): This code is unreachable while the current PushDownRule Match function is inplace.
and := &semantic.LogicalExpression{
Operator: ast.AndOperator,
Left: aExp,
Right: bExp,
// Create pass through arguments expression
passThroughArgs := &semantic.ObjectExpression{
Properties: make([]*semantic.Property, len(a.Params)),
for i, p := range a.Params {
passThroughArgs.Properties[i] = &semantic.Property{
Key: p.Key,
//TODO(nathanielc): Construct valid IdentifierExpression with Declaration for the value.
//Value: p.Key,
if !aOK {
// Rewrite left expression as a function call.
and.Left = &semantic.CallExpression{
Callee: a.Copy().(*semantic.FunctionExpression),
Arguments: passThroughArgs.Copy().(*semantic.ObjectExpression),
if !bOK {
// Rewrite right expression as a function call.
and.Right = &semantic.CallExpression{
Callee: b.Copy().(*semantic.FunctionExpression),
Arguments: passThroughArgs.Copy().(*semantic.ObjectExpression),
return fn
func createFilterTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
s, ok := spec.(*FilterProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", spec)
cache := execute.NewBlockBuilderCache(a.Allocator())
d := execute.NewDataset(id, mode, cache)
t, err := NewFilterTransformation(d, cache, s)
if err != nil {
return nil, nil, err
return t, d, nil
type filterTransformation struct {
d execute.Dataset
cache execute.BlockBuilderCache
fn *execute.RowPredicateFn
func NewFilterTransformation(d execute.Dataset, cache execute.BlockBuilderCache, spec *FilterProcedureSpec) (*filterTransformation, error) {
fn, err := execute.NewRowPredicateFn(spec.Fn)
if err != nil {
return nil, err
return &filterTransformation{
d: d,
cache: cache,
fn: fn,
}, nil
func (t *filterTransformation) RetractBlock(id execute.DatasetID, key execute.PartitionKey) error {
return t.d.RetractBlock(key)
func (t *filterTransformation) Process(id execute.DatasetID, b execute.Block) error {
builder, created := t.cache.BlockBuilder(b.Key())
if !created {
return fmt.Errorf("filter found duplicate block with key: %v", b.Key())
execute.AddBlockCols(b, builder)
// Prepare the function for the column types.
cols := b.Cols()
if err := t.fn.Prepare(cols); err != nil {
// TODO(nathanielc): Should we not fail the query for failed compilation?
return err
// Append only matching rows to block
return b.Do(func(cr execute.ColReader) error {
l := cr.Len()
for i := 0; i < l; i++ {
if pass, err := t.fn.Eval(i, cr); err != nil {
log.Printf("failed to evaluate filter expression: %v", err)
} else if !pass {
// No match, skipping
execute.AppendRecord(i, cr, builder)
return nil
func (t *filterTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error {
return t.d.UpdateWatermark(mark)
func (t *filterTransformation) UpdateProcessingTime(id execute.DatasetID, pt execute.Time) error {
return t.d.UpdateProcessingTime(pt)
func (t *filterTransformation) Finish(id execute.DatasetID, err error) {
@ -0,0 +1,897 @@
package functions_test
import (
func TestFilter_NewQuery(t *testing.T) {
tests := []querytest.NewQueryTestCase{
Name: "from with database filter and range",
Raw: `from(db:"mydb") |> filter(fn: (r) => r["t1"]=="val1" and r["t2"]=="val2") |> range(start:-4h, stop:-2h) |> count()`,
Want: &query.Spec{
Operations: []*query.Operation{
ID: "from0",
Spec: &functions.FromOpSpec{
Database: "mydb",
ID: "filter1",
Spec: &functions.FilterOpSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.LogicalExpression{
Operator: ast.AndOperator,
Left: &semantic.BinaryExpression{
Operator: ast.EqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "t1",
Right: &semantic.StringLiteral{Value: "val1"},
Right: &semantic.BinaryExpression{
Operator: ast.EqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "t2",
Right: &semantic.StringLiteral{Value: "val2"},
ID: "range2",
Spec: &functions.RangeOpSpec{
Start: query.Time{
Relative: -4 * time.Hour,
IsRelative: true,
Stop: query.Time{
Relative: -2 * time.Hour,
IsRelative: true,
ID: "count3",
Spec: &functions.CountOpSpec{
AggregateConfig: execute.DefaultAggregateConfig,
Edges: []query.Edge{
{Parent: "from0", Child: "filter1"},
{Parent: "filter1", Child: "range2"},
{Parent: "range2", Child: "count3"},
Name: "from with database filter (and with or) and range",
Raw: `from(db:"mydb")
|> filter(fn: (r) =>
|> range(start:-4h, stop:-2h)
|> count()`,
Want: &query.Spec{
Operations: []*query.Operation{
ID: "from0",
Spec: &functions.FromOpSpec{
Database: "mydb",
ID: "filter1",
Spec: &functions.FilterOpSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.LogicalExpression{
Operator: ast.OrOperator,
Left: &semantic.LogicalExpression{
Operator: ast.AndOperator,
Left: &semantic.BinaryExpression{
Operator: ast.EqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "t1",
Right: &semantic.StringLiteral{Value: "val1"},
Right: &semantic.BinaryExpression{
Operator: ast.EqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "t2",
Right: &semantic.StringLiteral{Value: "val2"},
Right: &semantic.BinaryExpression{
Operator: ast.EqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "t3",
Right: &semantic.StringLiteral{Value: "val3"},
ID: "range2",
Spec: &functions.RangeOpSpec{
Start: query.Time{
Relative: -4 * time.Hour,
IsRelative: true,
Stop: query.Time{
Relative: -2 * time.Hour,
IsRelative: true,
ID: "count3",
Spec: &functions.CountOpSpec{
AggregateConfig: execute.DefaultAggregateConfig,
Edges: []query.Edge{
{Parent: "from0", Child: "filter1"},
{Parent: "filter1", Child: "range2"},
{Parent: "range2", Child: "count3"},
Name: "from with database filter including fields",
Raw: `from(db:"mydb")
|> filter(fn: (r) =>
(r["t1"] =="val1")
(r["_field"] == 10)
|> range(start:-4h, stop:-2h)
|> count()`,
Want: &query.Spec{
Operations: []*query.Operation{
ID: "from0",
Spec: &functions.FromOpSpec{
Database: "mydb",
ID: "filter1",
Spec: &functions.FilterOpSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.LogicalExpression{
Operator: ast.AndOperator,
Left: &semantic.BinaryExpression{
Operator: ast.EqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "t1",
Right: &semantic.StringLiteral{Value: "val1"},
Right: &semantic.BinaryExpression{
Operator: ast.EqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_field",
Right: &semantic.IntegerLiteral{Value: 10},
ID: "range2",
Spec: &functions.RangeOpSpec{
Start: query.Time{
Relative: -4 * time.Hour,
IsRelative: true,
Stop: query.Time{
Relative: -2 * time.Hour,
IsRelative: true,
ID: "count3",
Spec: &functions.CountOpSpec{
AggregateConfig: execute.DefaultAggregateConfig,
Edges: []query.Edge{
{Parent: "from0", Child: "filter1"},
{Parent: "filter1", Child: "range2"},
{Parent: "range2", Child: "count3"},
Name: "from with database filter with no parens including fields",
Raw: `from(db:"mydb")
|> filter(fn: (r) =>
r["_field"] == 10
|> range(start:-4h, stop:-2h)
|> count()`,
Want: &query.Spec{
Operations: []*query.Operation{
ID: "from0",
Spec: &functions.FromOpSpec{
Database: "mydb",
ID: "filter1",
Spec: &functions.FilterOpSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.LogicalExpression{
Operator: ast.AndOperator,
Left: &semantic.BinaryExpression{
Operator: ast.EqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "t1",
Right: &semantic.StringLiteral{Value: "val1"},
Right: &semantic.BinaryExpression{
Operator: ast.EqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_field",
Right: &semantic.IntegerLiteral{Value: 10},
ID: "range2",
Spec: &functions.RangeOpSpec{
Start: query.Time{
Relative: -4 * time.Hour,
IsRelative: true,
Stop: query.Time{
Relative: -2 * time.Hour,
IsRelative: true,
ID: "count3",
Spec: &functions.CountOpSpec{
AggregateConfig: execute.DefaultAggregateConfig,
Edges: []query.Edge{
{Parent: "from0", Child: "filter1"},
{Parent: "filter1", Child: "range2"},
{Parent: "range2", Child: "count3"},
Name: "from with database filter with no parens including regex and field",
Raw: `from(db:"mydb")
|> filter(fn: (r) =>
r["_field"] == 10.5
|> range(start:-4h, stop:-2h)
|> count()`,
Want: &query.Spec{
Operations: []*query.Operation{
ID: "from0",
Spec: &functions.FromOpSpec{
Database: "mydb",
ID: "filter1",
Spec: &functions.FilterOpSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.LogicalExpression{
Operator: ast.AndOperator,
Left: &semantic.BinaryExpression{
Operator: ast.EqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "t1",
Right: &semantic.RegexpLiteral{Value: regexp.MustCompile("val1")},
Right: &semantic.BinaryExpression{
Operator: ast.EqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_field",
Right: &semantic.FloatLiteral{Value: 10.5},
ID: "range2",
Spec: &functions.RangeOpSpec{
Start: query.Time{
Relative: -4 * time.Hour,
IsRelative: true,
Stop: query.Time{
Relative: -2 * time.Hour,
IsRelative: true,
ID: "count3",
Spec: &functions.CountOpSpec{
AggregateConfig: execute.DefaultAggregateConfig,
Edges: []query.Edge{
{Parent: "from0", Child: "filter1"},
{Parent: "filter1", Child: "range2"},
{Parent: "range2", Child: "count3"},
Name: "from with database regex with escape",
Raw: `from(db:"mydb")
|> filter(fn: (r) =>
Want: &query.Spec{
Operations: []*query.Operation{
ID: "from0",
Spec: &functions.FromOpSpec{
Database: "mydb",
ID: "filter1",
Spec: &functions.FilterOpSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.BinaryExpression{
Operator: ast.EqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "t1",
Right: &semantic.RegexpLiteral{Value: regexp.MustCompile(`va/l1`)},
Edges: []query.Edge{
{Parent: "from0", Child: "filter1"},
Name: "from with database with two regex",
Raw: `from(db:"mydb")
|> filter(fn: (r) =>
r["t2"] != /val2/
Want: &query.Spec{
Operations: []*query.Operation{
ID: "from0",
Spec: &functions.FromOpSpec{
Database: "mydb",
ID: "filter1",
Spec: &functions.FilterOpSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.LogicalExpression{
Operator: ast.AndOperator,
Left: &semantic.BinaryExpression{
Operator: ast.EqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "t1",
Right: &semantic.RegexpLiteral{Value: regexp.MustCompile(`va/l1`)},
Right: &semantic.BinaryExpression{
Operator: ast.NotEqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "t2",
Right: &semantic.RegexpLiteral{Value: regexp.MustCompile(`val2`)},
Edges: []query.Edge{
{Parent: "from0", Child: "filter1"},
for _, tc := range tests {
tc := tc
t.Run(tc.Name, func(t *testing.T) {
querytest.NewQueryTestHelper(t, tc)
func TestFilterOperation_Marshaling(t *testing.T) {
data := []byte(`{
"type": "ArrowFunctionExpression",
"params": [{"type":"FunctionParam","key":{"type":"Identifier","name":"r"}}],
"operator": "!=",
"object": {
"type": "IdentifierExpression",
"property": "_measurement"
op := &query.Operation{
ID: "filter",
Spec: &functions.FilterOpSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.BinaryExpression{
Operator: ast.NotEqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_measurement",
Right: &semantic.StringLiteral{Value: "mem"},
querytest.OperationMarshalingTestHelper(t, data, op)
func TestFilter_Process(t *testing.T) {
testCases := []struct {
name string
spec *functions.FilterProcedureSpec
data []execute.Block
want []*executetest.Block
name: `_value>5`,
spec: &functions.FilterProcedureSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.BinaryExpression{
Operator: ast.GreaterThanOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_value",
Right: &semantic.FloatLiteral{Value: 5},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(1), 1.0},
{execute.Time(2), 6.0},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(2), 6.0},
name: "_value>5 multiple blocks",
spec: &functions.FilterProcedureSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.BinaryExpression{
Operator: ast.GreaterThanOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_value",
Right: &semantic.FloatLiteral{
Value: 5,
data: []execute.Block{
KeyCols: []string{"t1"},
ColMeta: []execute.ColMeta{
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{"a", execute.Time(1), 3.0},
{"a", execute.Time(2), 6.0},
{"a", execute.Time(2), 1.0},
KeyCols: []string{"t1"},
ColMeta: []execute.ColMeta{
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{"b", execute.Time(3), 3.0},
{"b", execute.Time(3), 2.0},
{"b", execute.Time(4), 8.0},
want: []*executetest.Block{
KeyCols: []string{"t1"},
ColMeta: []execute.ColMeta{
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{"a", execute.Time(2), 6.0},
KeyCols: []string{"t1"},
ColMeta: []execute.ColMeta{
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{"b", execute.Time(4), 8.0},
name: "_value>5 and t1 = a and t2 = y",
spec: &functions.FilterProcedureSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.LogicalExpression{
Operator: ast.AndOperator,
Left: &semantic.BinaryExpression{
Operator: ast.GreaterThanOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_value",
Right: &semantic.FloatLiteral{
Value: 5,
Right: &semantic.LogicalExpression{
Operator: ast.AndOperator,
Left: &semantic.BinaryExpression{
Operator: ast.EqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "t1",
Right: &semantic.StringLiteral{
Value: "a",
Right: &semantic.BinaryExpression{
Operator: ast.EqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "t2",
Right: &semantic.StringLiteral{
Value: "y",
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(1), 1.0, "a", "x"},
{execute.Time(2), 6.0, "a", "x"},
{execute.Time(3), 8.0, "a", "y"},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(3), 8.0, "a", "y"},
for _, tc := range testCases {
tc := tc
t.Run(, func(t *testing.T) {
func(d execute.Dataset, c execute.BlockBuilderCache) execute.Transformation {
f, err := functions.NewFilterTransformation(d, c, tc.spec)
if err != nil {
return f
func TestFilter_PushDown(t *testing.T) {
spec := &functions.FilterProcedureSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.BinaryExpression{
Operator: ast.NotEqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_measurement",
Right: &semantic.StringLiteral{Value: "mem"},
root := &plan.Procedure{
Spec: new(functions.FromProcedureSpec),
want := &plan.Procedure{
Spec: &functions.FromProcedureSpec{
FilterSet: true,
Filter: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.BinaryExpression{
Operator: ast.NotEqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_measurement",
Right: &semantic.StringLiteral{Value: "mem"},
plantest.PhysicalPlan_PushDown_TestHelper(t, spec, root, false, want)
func TestFilter_PushDown_MergeExpressions(t *testing.T) {
testCases := []struct {
name string
spec *functions.FilterProcedureSpec
root *plan.Procedure
want *plan.Procedure
name: "merge with from",
spec: &functions.FilterProcedureSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.BinaryExpression{
Operator: ast.NotEqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_measurement",
Right: &semantic.StringLiteral{Value: "cpu"},
root: &plan.Procedure{
Spec: &functions.FromProcedureSpec{
FilterSet: true,
Filter: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.BinaryExpression{
Operator: ast.NotEqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_measurement",
Right: &semantic.StringLiteral{Value: "mem"},
want: &plan.Procedure{
Spec: &functions.FromProcedureSpec{
FilterSet: true,
Filter: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.LogicalExpression{
Operator: ast.AndOperator,
Left: &semantic.BinaryExpression{
Operator: ast.NotEqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_measurement",
Right: &semantic.StringLiteral{Value: "mem"},
Right: &semantic.BinaryExpression{
Operator: ast.NotEqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_measurement",
Right: &semantic.StringLiteral{Value: "cpu"},
name: "merge with filter",
spec: &functions.FilterProcedureSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.BinaryExpression{
Operator: ast.NotEqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_measurement",
Right: &semantic.StringLiteral{Value: "cpu"},
root: &plan.Procedure{
Spec: &functions.FilterProcedureSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.BinaryExpression{
Operator: ast.NotEqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_measurement",
Right: &semantic.StringLiteral{Value: "mem"},
want: &plan.Procedure{
Spec: &functions.FilterProcedureSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.LogicalExpression{
Operator: ast.AndOperator,
Left: &semantic.BinaryExpression{
Operator: ast.NotEqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_measurement",
Right: &semantic.StringLiteral{Value: "mem"},
Right: &semantic.BinaryExpression{
Operator: ast.NotEqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_measurement",
Right: &semantic.StringLiteral{Value: "cpu"},
for _, tc := range testCases {
tc := tc
t.Run(, func(t *testing.T) {
plantest.PhysicalPlan_PushDown_TestHelper(t, tc.spec, tc.root, false, tc.want)
@ -0,0 +1,170 @@
package functions
import (
const FirstKind = "first"
type FirstOpSpec struct {
var firstSignature = query.DefaultFunctionSignature()
func init() {
firstSignature.Params["column"] = semantic.String
firstSignature.Params["useRowTime"] = semantic.Bool
query.RegisterFunction(FirstKind, createFirstOpSpec, firstSignature)
query.RegisterOpSpec(FirstKind, newFirstOp)
plan.RegisterProcedureSpec(FirstKind, newFirstProcedure, FirstKind)
execute.RegisterTransformation(FirstKind, createFirstTransformation)
func createFirstOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
spec := new(FirstOpSpec)
if err := spec.SelectorConfig.ReadArgs(args); err != nil {
return nil, err
return spec, nil
func newFirstOp() query.OperationSpec {
return new(FirstOpSpec)
func (s *FirstOpSpec) Kind() query.OperationKind {
return FirstKind
type FirstProcedureSpec struct {
func newFirstProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*FirstOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
return &FirstProcedureSpec{
SelectorConfig: spec.SelectorConfig,
}, nil
func (s *FirstProcedureSpec) Kind() plan.ProcedureKind {
return FirstKind
func (s *FirstProcedureSpec) PushDownRules() []plan.PushDownRule {
return []plan.PushDownRule{{
Root: FromKind,
Through: []plan.ProcedureKind{GroupKind, LimitKind, FilterKind},
Match: func(spec plan.ProcedureSpec) bool {
selectSpec := spec.(*FromProcedureSpec)
return !selectSpec.AggregateSet
func (s *FirstProcedureSpec) PushDown(root *plan.Procedure, dup func() *plan.Procedure) {
selectSpec := root.Spec.(*FromProcedureSpec)
if selectSpec.BoundsSet || selectSpec.LimitSet || selectSpec.DescendingSet {
root = dup()
selectSpec = root.Spec.(*FromProcedureSpec)
selectSpec.BoundsSet = false
selectSpec.Bounds = plan.BoundsSpec{}
selectSpec.LimitSet = false
selectSpec.PointsLimit = 0
selectSpec.SeriesLimit = 0
selectSpec.SeriesOffset = 0
selectSpec.DescendingSet = false
selectSpec.Descending = false
selectSpec.BoundsSet = true
selectSpec.Bounds = plan.BoundsSpec{
Start: query.MinTime,
Stop: query.Now,
selectSpec.LimitSet = true
selectSpec.PointsLimit = 1
selectSpec.DescendingSet = true
selectSpec.Descending = false
func (s *FirstProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(FirstProcedureSpec)
*ns = *s
ns.SelectorConfig = s.SelectorConfig
return ns
type FirstSelector struct {
selected bool
func createFirstTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
ps, ok := spec.(*FirstProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", ps)
t, d := execute.NewIndexSelectorTransformationAndDataset(id, mode, new(FirstSelector), ps.SelectorConfig, a.Allocator())
return t, d, nil
func (s *FirstSelector) reset() {
s.selected = false
func (s *FirstSelector) NewBoolSelector() execute.DoBoolIndexSelector {
return s
func (s *FirstSelector) NewIntSelector() execute.DoIntIndexSelector {
return s
func (s *FirstSelector) NewUIntSelector() execute.DoUIntIndexSelector {
return s
func (s *FirstSelector) NewFloatSelector() execute.DoFloatIndexSelector {
return s
func (s *FirstSelector) NewStringSelector() execute.DoStringIndexSelector {
return s
func (s *FirstSelector) selectFirst(l int) []int {
if !s.selected && l > 0 {
s.selected = true
return []int{0}
return nil
func (s *FirstSelector) DoBool(vs []bool) []int {
return s.selectFirst(len(vs))
func (s *FirstSelector) DoInt(vs []int64) []int {
return s.selectFirst(len(vs))
func (s *FirstSelector) DoUInt(vs []uint64) []int {
return s.selectFirst(len(vs))
func (s *FirstSelector) DoFloat(vs []float64) []int {
return s.selectFirst(len(vs))
func (s *FirstSelector) DoString(vs []string) []int {
return s.selectFirst(len(vs))
@ -0,0 +1,132 @@
package functions_test
import (
func TestFirstOperation_Marshaling(t *testing.T) {
data := []byte(`{"id":"first","kind":"first","spec":{"column":"foo"}}`)
op := &query.Operation{
ID: "first",
Spec: &functions.FirstOpSpec{
SelectorConfig: execute.SelectorConfig{
Column: "foo",
querytest.OperationMarshalingTestHelper(t, data, op)
func TestFirst_Process(t *testing.T) {
testCases := []struct {
name string
data *executetest.Block
want [][]int
name: "first",
data: &executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(0), 0.0, "a", "y"},
{execute.Time(10), 5.0, "a", "x"},
{execute.Time(20), 9.0, "a", "y"},
{execute.Time(30), 4.0, "a", "x"},
{execute.Time(40), 6.0, "a", "y"},
{execute.Time(50), 8.0, "a", "x"},
{execute.Time(60), 1.0, "a", "y"},
{execute.Time(70), 2.0, "a", "x"},
{execute.Time(80), 3.0, "a", "y"},
{execute.Time(90), 7.0, "a", "x"},
want: [][]int{{0}, nil, nil, nil, nil, nil, nil, nil, nil, nil},
for _, tc := range testCases {
tc := tc
t.Run(, func(t *testing.T) {
func BenchmarkFirst(b *testing.B) {
executetest.IndexSelectorFuncBenchmarkHelper(b, new(functions.FirstSelector), NormalBlock)
func TestFirst_PushDown_Match(t *testing.T) {
spec := new(functions.FirstProcedureSpec)
from := new(functions.FromProcedureSpec)
// Should not match when an aggregate is set
from.AggregateSet = true
plantest.PhysicalPlan_PushDown_Match_TestHelper(t, spec, from, []bool{false})
// Should match when no aggregate is set
from.AggregateSet = false
plantest.PhysicalPlan_PushDown_Match_TestHelper(t, spec, from, []bool{true})
func TestFirst_PushDown(t *testing.T) {
spec := new(functions.FirstProcedureSpec)
root := &plan.Procedure{
Spec: new(functions.FromProcedureSpec),
want := &plan.Procedure{
Spec: &functions.FromProcedureSpec{
BoundsSet: true,
Bounds: plan.BoundsSpec{
Start: query.MinTime,
Stop: query.Now,
LimitSet: true,
PointsLimit: 1,
DescendingSet: true,
Descending: false,
plantest.PhysicalPlan_PushDown_TestHelper(t, spec, root, false, want)
func TestFirst_PushDown_Duplicate(t *testing.T) {
spec := new(functions.FirstProcedureSpec)
root := &plan.Procedure{
Spec: &functions.FromProcedureSpec{
BoundsSet: true,
Bounds: plan.BoundsSpec{
Start: query.MinTime,
Stop: query.Now,
LimitSet: true,
PointsLimit: 1,
DescendingSet: true,
Descending: false,
want := &plan.Procedure{
// Expect the duplicate has been reset to zero values
Spec: new(functions.FromProcedureSpec),
plantest.PhysicalPlan_PushDown_TestHelper(t, spec, root, true, want)
@ -0,0 +1,234 @@
package functions
import (
const FromKind = "from"
type FromOpSpec struct {
Database string `json:"db"`
Bucket string `json:"bucket"`
Hosts []string `json:"hosts"`
var fromSignature = semantic.FunctionSignature{
Params: map[string]semantic.Type{
"db": semantic.String,
ReturnType: query.TableObjectType,
func init() {
query.RegisterFunction(FromKind, createFromOpSpec, fromSignature)
query.RegisterOpSpec(FromKind, newFromOp)
plan.RegisterProcedureSpec(FromKind, newFromProcedure, FromKind)
execute.RegisterSource(FromKind, createFromSource)
func createFromOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
spec := new(FromOpSpec)
if db, ok, err := args.GetString("db"); err != nil {
return nil, err
} else if ok {
spec.Database = db
if bucket, ok, err := args.GetString("bucket"); err != nil {
return nil, err
} else if ok {
spec.Bucket = bucket
if spec.Database == "" && spec.Bucket == "" {
return nil, errors.New("must specify one of db or bucket")
if spec.Database != "" && spec.Bucket != "" {
return nil, errors.New("must specify only one of db or bucket")
if array, ok, err := args.GetArray("hosts", semantic.String); err != nil {
return nil, err
} else if ok {
spec.Hosts, err = interpreter.ToStringArray(array)
if err != nil {
return nil, err
return spec, nil
func newFromOp() query.OperationSpec {
return new(FromOpSpec)
func (s *FromOpSpec) Kind() query.OperationKind {
return FromKind
type FromProcedureSpec struct {
Database string
Bucket string
Hosts []string
BoundsSet bool
Bounds plan.BoundsSpec
FilterSet bool
Filter *semantic.FunctionExpression
DescendingSet bool
Descending bool
LimitSet bool
PointsLimit int64
SeriesLimit int64
SeriesOffset int64
WindowSet bool
Window plan.WindowSpec
GroupingSet bool
OrderByTime bool
MergeAll bool
GroupKeys []string
GroupExcept []string
AggregateSet bool
AggregateMethod string
func newFromProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*FromOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
return &FromProcedureSpec{
Database: spec.Database,
Bucket: spec.Bucket,
Hosts: spec.Hosts,
}, nil
func (s *FromProcedureSpec) Kind() plan.ProcedureKind {
return FromKind
func (s *FromProcedureSpec) TimeBounds() plan.BoundsSpec {
return s.Bounds
func (s *FromProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(FromProcedureSpec)
ns.Database = s.Database
ns.Bucket = s.Bucket
if len(s.Hosts) > 0 {
ns.Hosts = make([]string, len(s.Hosts))
copy(ns.Hosts, s.Hosts)
ns.BoundsSet = s.BoundsSet
ns.Bounds = s.Bounds
ns.FilterSet = s.FilterSet
// TODO copy predicate
ns.Filter = s.Filter
ns.DescendingSet = s.DescendingSet
ns.Descending = s.Descending
ns.LimitSet = s.LimitSet
ns.PointsLimit = s.PointsLimit
ns.SeriesLimit = s.SeriesLimit
ns.SeriesOffset = s.SeriesOffset
ns.WindowSet = s.WindowSet
ns.Window = s.Window
ns.AggregateSet = s.AggregateSet
ns.AggregateMethod = s.AggregateMethod
return ns
func createFromSource(prSpec plan.ProcedureSpec, dsid execute.DatasetID, a execute.Administration) (execute.Source, error) {
spec := prSpec.(*FromProcedureSpec)
var w execute.Window
if spec.WindowSet {
w = execute.Window{
Every: execute.Duration(spec.Window.Every),
Period: execute.Duration(spec.Window.Period),
Round: execute.Duration(spec.Window.Round),
Start: a.ResolveTime(spec.Window.Start),
} else {
duration := execute.Duration(a.ResolveTime(spec.Bounds.Stop)) - execute.Duration(a.ResolveTime(spec.Bounds.Start))
w = execute.Window{
Every: duration,
Period: duration,
Start: a.ResolveTime(spec.Bounds.Start),
currentTime := w.Start + execute.Time(w.Period)
bounds := execute.Bounds{
Start: a.ResolveTime(spec.Bounds.Start),
Stop: a.ResolveTime(spec.Bounds.Stop),
deps := a.Dependencies()[FromKind].(storage.Dependencies)
orgID := a.OrganizationID()
var bucketID id.ID
if spec.Database == "" {
b, ok := deps.BucketLookup.Lookup(orgID, spec.Bucket)
if !ok {
return nil, fmt.Errorf("could not find bucket %q", spec.Bucket)
bucketID = b
} else {
bucketID = id.ID(spec.Database)
return storage.NewSource(
OrganizationID: orgID,
BucketID: bucketID,
Hosts: spec.Hosts,
Predicate: spec.Filter,
PointsLimit: spec.PointsLimit,
SeriesLimit: spec.SeriesLimit,
SeriesOffset: spec.SeriesOffset,
Descending: spec.Descending,
OrderByTime: spec.OrderByTime,
MergeAll: spec.MergeAll,
GroupKeys: spec.GroupKeys,
GroupExcept: spec.GroupExcept,
AggregateMethod: spec.AggregateMethod,
), nil
func InjectFromDependencies(depsMap execute.Dependencies, deps storage.Dependencies) error {
if err := deps.Validate(); err != nil {
return err
depsMap[FromKind] = deps
return nil
@ -0,0 +1,91 @@
package functions_test
import (
func TestFrom_NewQuery(t *testing.T) {
tests := []querytest.NewQueryTestCase{
Name: "from no args",
Raw: `from()`,
WantErr: true,
Name: "from conflicting args",
Raw: `from(db:"d", bucket:"b")`,
WantErr: true,
Name: "from repeat arg",
Raw: `from(db:"telegraf", db:"oops")`,
WantErr: true,
Name: "from",
Raw: `from(db:"telegraf", chicken:"what is this?")`,
WantErr: true,
Name: "from with database",
Raw: `from(db:"mydb") |> range(start:-4h, stop:-2h) |> sum()`,
Want: &query.Spec{
Operations: []*query.Operation{
ID: "from0",
Spec: &functions.FromOpSpec{
Database: "mydb",
ID: "range1",
Spec: &functions.RangeOpSpec{
Start: query.Time{
Relative: -4 * time.Hour,
IsRelative: true,
Stop: query.Time{
Relative: -2 * time.Hour,
IsRelative: true,
ID: "sum2",
Spec: &functions.SumOpSpec{
AggregateConfig: execute.DefaultAggregateConfig,
Edges: []query.Edge{
{Parent: "from0", Child: "range1"},
{Parent: "range1", Child: "sum2"},
for _, tc := range tests {
tc := tc
t.Run(tc.Name, func(t *testing.T) {
querytest.NewQueryTestHelper(t, tc)
func TestFromOperation_Marshaling(t *testing.T) {
data := []byte(`{"id":"from","kind":"from","spec":{"db":"mydb"}}`)
op := &query.Operation{
ID: "from",
Spec: &functions.FromOpSpec{
Database: "mydb",
querytest.OperationMarshalingTestHelper(t, data, op)
@ -0,0 +1,273 @@
package functions
import (
const GroupKind = "group"
type GroupOpSpec struct {
By []string `json:"by"`
Except []string `json:"except"`
var groupSignature = query.DefaultFunctionSignature()
func init() {
groupSignature.Params["by"] = semantic.NewArrayType(semantic.String)
groupSignature.Params["except"] = semantic.NewArrayType(semantic.String)
query.RegisterFunction(GroupKind, createGroupOpSpec, groupSignature)
query.RegisterOpSpec(GroupKind, newGroupOp)
plan.RegisterProcedureSpec(GroupKind, newGroupProcedure, GroupKind)
execute.RegisterTransformation(GroupKind, createGroupTransformation)
func createGroupOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
spec := new(GroupOpSpec)
if array, ok, err := args.GetArray("by", semantic.String); err != nil {
return nil, err
} else if ok {
spec.By, err = interpreter.ToStringArray(array)
if err != nil {
return nil, err
if array, ok, err := args.GetArray("except", semantic.String); err != nil {
return nil, err
} else if ok {
spec.Except, err = interpreter.ToStringArray(array)
if err != nil {
return nil, err
if len(spec.By) > 0 && len(spec.Except) > 0 {
return nil, errors.New(`cannot specify both "by" and "except" keyword arguments`)
return spec, nil
func newGroupOp() query.OperationSpec {
return new(GroupOpSpec)
func (s *GroupOpSpec) Kind() query.OperationKind {
return GroupKind
type GroupProcedureSpec struct {
By []string
Except []string
func newGroupProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*GroupOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
p := &GroupProcedureSpec{
By: spec.By,
Except: spec.Except,
return p, nil
func (s *GroupProcedureSpec) Kind() plan.ProcedureKind {
return GroupKind
func (s *GroupProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(GroupProcedureSpec)
ns.By = make([]string, len(s.By))
copy(ns.By, s.By)
ns.Except = make([]string, len(s.Except))
copy(ns.Except, s.Except)
return ns
func (s *GroupProcedureSpec) PushDownRules() []plan.PushDownRule {
return []plan.PushDownRule{{
Root: FromKind,
Through: []plan.ProcedureKind{LimitKind, RangeKind, FilterKind},
Match: func(spec plan.ProcedureSpec) bool {
selectSpec := spec.(*FromProcedureSpec)
return !selectSpec.AggregateSet
func (s *GroupProcedureSpec) PushDown(root *plan.Procedure, dup func() *plan.Procedure) {
selectSpec := root.Spec.(*FromProcedureSpec)
if selectSpec.GroupingSet {
root = dup()
selectSpec = root.Spec.(*FromProcedureSpec)
selectSpec.OrderByTime = false
selectSpec.GroupingSet = false
selectSpec.MergeAll = false
selectSpec.GroupKeys = nil
selectSpec.GroupExcept = nil
selectSpec.GroupingSet = true
// TODO implement OrderByTime
//selectSpec.OrderByTime = true
// Merge all series into a single group if we have no specific grouping dimensions.
selectSpec.MergeAll = len(s.By) == 0 && len(s.Except) == 0
selectSpec.GroupKeys = s.By
selectSpec.GroupExcept = s.Except
type AggregateGroupRewriteRule struct {
func (r AggregateGroupRewriteRule) Root() plan.ProcedureKind {
return FromKind
func (r AggregateGroupRewriteRule) Rewrite(pr *plan.Procedure, planner plan.PlanRewriter) error {
var agg *plan.Procedure
pr.DoChildren(func(child *plan.Procedure) {
if _, ok := child.Spec.(plan.AggregateProcedureSpec); ok {
agg = child
if agg == nil {
return nil
fromSpec := pr.Spec.(*FromProcedureSpec)
if fromSpec.AggregateSet {
return nil
// Rewrite
isoFrom, err := planner.IsolatePath(pr, agg)
if err != nil {
return err
return r.rewrite(isoFrom, planner)
func (r AggregateGroupRewriteRule) rewrite(fromPr *plan.Procedure, planner plan.PlanRewriter) error {
fromSpec := fromPr.Spec.(*FromProcedureSpec)
aggPr := fromPr.Child(0)
aggSpec := aggPr.Spec.(plan.AggregateProcedureSpec)
fromSpec.AggregateSet = true
fromSpec.AggregateMethod = aggSpec.AggregateMethod()
if err := planner.RemoveBranch(aggPr); err != nil {
return err
planner.AddChild(fromPr, aggSpec.ReAggregateSpec())
return nil
func createGroupTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
s, ok := spec.(*GroupProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", spec)
cache := execute.NewBlockBuilderCache(a.Allocator())
d := execute.NewDataset(id, mode, cache)
t := NewGroupTransformation(d, cache, s)
return t, d, nil
type groupTransformation struct {
d execute.Dataset
cache execute.BlockBuilderCache
keys []string
except []string
// Ignoring is true of len(keys) == 0 && len(except) > 0
ignoring bool
func NewGroupTransformation(d execute.Dataset, cache execute.BlockBuilderCache, spec *GroupProcedureSpec) *groupTransformation {
t := &groupTransformation{
d: d,
cache: cache,
keys: spec.By,
except: spec.Except,
ignoring: len(spec.By) == 0 && len(spec.Except) > 0,
return t
func (t *groupTransformation) RetractBlock(id execute.DatasetID, key execute.PartitionKey) (err error) {
//TODO(nathanielc): Investigate if this can be smarter and not retract all blocks with the same time bounds.
panic("not implemented")
//t.cache.ForEachBuilder(func(bk execute.BlockKey, builder execute.BlockBuilder) {
// if err != nil {
// return
// }
// if meta.Bounds().Equal(builder.Bounds()) {
// err = t.d.RetractBlock(bk)
// }
func (t *groupTransformation) Process(id execute.DatasetID, b execute.Block) error {
cols := b.Cols()
on := make(map[string]bool, len(cols))
if len(t.keys) > 0 {
for _, k := range t.keys {
on[k] = true
} else if len(t.except) > 0 {
for _, c := range cols {
for _, label := range t.except {
if c.Label == label {
continue COLS
on[c.Label] = true
return b.Do(func(cr execute.ColReader) error {
l := cr.Len()
for i := 0; i < l; i++ {
key := execute.PartitionKeyForRowOn(i, cr, on)
builder, created := t.cache.BlockBuilder(key)
if created {
execute.AddBlockCols(b, builder)
execute.AppendRecord(i, cr, builder)
return nil
func (t *groupTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error {
return t.d.UpdateWatermark(mark)
func (t *groupTransformation) UpdateProcessingTime(id execute.DatasetID, pt execute.Time) error {
return t.d.UpdateProcessingTime(pt)
func (t *groupTransformation) Finish(id execute.DatasetID, err error) {
@ -0,0 +1,344 @@
package functions_test
import (
func TestGroupOperation_Marshaling(t *testing.T) {
data := []byte(`{"id":"group","kind":"group","spec":{"by":["t1","t2"]}}`)
op := &query.Operation{
ID: "group",
Spec: &functions.GroupOpSpec{
By: []string{"t1", "t2"},
querytest.OperationMarshalingTestHelper(t, data, op)
func TestGroup_Process(t *testing.T) {
testCases := []struct {
name string
spec *functions.GroupProcedureSpec
data []execute.Block
want []*executetest.Block
name: "fan in",
spec: &functions.GroupProcedureSpec{
By: []string{"t1"},
data: []execute.Block{
KeyCols: []string{"t1", "t2"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(1), 2.0, "a", "x"},
KeyCols: []string{"t1", "t2"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(2), 1.0, "a", "y"},
KeyCols: []string{"t1", "t2"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(1), 4.0, "b", "x"},
KeyCols: []string{"t1", "t2"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(2), 7.0, "b", "y"},
want: []*executetest.Block{
KeyCols: []string{"t1"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(1), 2.0, "a", "x"},
{execute.Time(2), 1.0, "a", "y"},
KeyCols: []string{"t1"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(1), 4.0, "b", "x"},
{execute.Time(2), 7.0, "b", "y"},
name: "fan in ignoring",
spec: &functions.GroupProcedureSpec{
Except: []string{"_time", "_value", "t2"},
data: []execute.Block{
KeyCols: []string{"t1", "t2", "t3"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
{Label: "t3", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(1), 2.0, "a", "m", "x"},
KeyCols: []string{"t1", "t2", "t3"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
{Label: "t3", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(2), 1.0, "a", "n", "x"},
KeyCols: []string{"t1", "t2", "t3"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
{Label: "t3", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(1), 4.0, "b", "m", "x"},
KeyCols: []string{"t1", "t2", "t3"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
{Label: "t3", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(2), 7.0, "b", "n", "x"},
want: []*executetest.Block{
KeyCols: []string{"t1", "t3"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
{Label: "t3", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(1), 2.0, "a", "m", "x"},
{execute.Time(2), 1.0, "a", "n", "x"},
KeyCols: []string{"t1", "t3"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
{Label: "t3", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(1), 4.0, "b", "m", "x"},
{execute.Time(2), 7.0, "b", "n", "x"},
name: "fan out",
spec: &functions.GroupProcedureSpec{
By: []string{"t1"},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(1), 2.0, "a"},
{execute.Time(2), 1.0, "b"},
want: []*executetest.Block{
KeyCols: []string{"t1"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(1), 2.0, "a"},
KeyCols: []string{"t1"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(2), 1.0, "b"},
name: "fan out ignoring",
spec: &functions.GroupProcedureSpec{
Except: []string{"_time", "_value", "t2"},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
{Label: "t3", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(1), 2.0, "a", "m", "x"},
{execute.Time(2), 1.0, "a", "n", "y"},
want: []*executetest.Block{
KeyCols: []string{"t1", "t3"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
{Label: "t3", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(1), 2.0, "a", "m", "x"},
KeyCols: []string{"t1", "t3"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
{Label: "t3", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(2), 1.0, "a", "n", "y"},
for _, tc := range testCases {
tc := tc
t.Run(, func(t *testing.T) {
func(d execute.Dataset, c execute.BlockBuilderCache) execute.Transformation {
return functions.NewGroupTransformation(d, c, tc.spec)
func TestGroup_PushDown(t *testing.T) {
spec := &functions.GroupProcedureSpec{
By: []string{"t1", "t2"},
root := &plan.Procedure{
Spec: new(functions.FromProcedureSpec),
want := &plan.Procedure{
Spec: &functions.FromProcedureSpec{
GroupingSet: true,
MergeAll: false,
GroupKeys: []string{"t1", "t2"},
plantest.PhysicalPlan_PushDown_TestHelper(t, spec, root, false, want)
func TestGroup_PushDown_Duplicate(t *testing.T) {
spec := &functions.GroupProcedureSpec{
By: []string{"t1", "t2"},
root := &plan.Procedure{
Spec: &functions.FromProcedureSpec{
GroupingSet: true,
MergeAll: true,
want := &plan.Procedure{
// Expect the duplicate has been reset to zero values
Spec: new(functions.FromProcedureSpec),
plantest.PhysicalPlan_PushDown_TestHelper(t, spec, root, true, want)
@ -0,0 +1,224 @@
package functions
import (
const IntegralKind = "integral"
type IntegralOpSpec struct {
Unit query.Duration `json:"unit"`
var integralSignature = query.DefaultFunctionSignature()
func init() {
integralSignature.Params["unit"] = semantic.Duration
query.RegisterFunction(IntegralKind, createIntegralOpSpec, integralSignature)
query.RegisterOpSpec(IntegralKind, newIntegralOp)
plan.RegisterProcedureSpec(IntegralKind, newIntegralProcedure, IntegralKind)
execute.RegisterTransformation(IntegralKind, createIntegralTransformation)
func createIntegralOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
spec := new(IntegralOpSpec)
if unit, ok, err := args.GetDuration("unit"); err != nil {
return nil, err
} else if ok {
spec.Unit = unit
} else {
//Default is 1s
spec.Unit = query.Duration(time.Second)
if err := spec.AggregateConfig.ReadArgs(args); err != nil {
return nil, err
return spec, nil
func newIntegralOp() query.OperationSpec {
return new(IntegralOpSpec)
func (s *IntegralOpSpec) Kind() query.OperationKind {
return IntegralKind
type IntegralProcedureSpec struct {
Unit query.Duration `json:"unit"`
func newIntegralProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*IntegralOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
return &IntegralProcedureSpec{
Unit: spec.Unit,
AggregateConfig: spec.AggregateConfig,
}, nil
func (s *IntegralProcedureSpec) Kind() plan.ProcedureKind {
return IntegralKind
func (s *IntegralProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(IntegralProcedureSpec)
*ns = *s
ns.AggregateConfig = s.AggregateConfig.Copy()
return ns
func createIntegralTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
s, ok := spec.(*IntegralProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", spec)
cache := execute.NewBlockBuilderCache(a.Allocator())
d := execute.NewDataset(id, mode, cache)
t := NewIntegralTransformation(d, cache, s)
return t, d, nil
type integralTransformation struct {
d execute.Dataset
cache execute.BlockBuilderCache
spec IntegralProcedureSpec
func NewIntegralTransformation(d execute.Dataset, cache execute.BlockBuilderCache, spec *IntegralProcedureSpec) *integralTransformation {
return &integralTransformation{
d: d,
cache: cache,
spec: *spec,
func (t *integralTransformation) RetractBlock(id execute.DatasetID, key execute.PartitionKey) error {
return t.d.RetractBlock(key)
func (t *integralTransformation) Process(id execute.DatasetID, b execute.Block) error {
builder, created := t.cache.BlockBuilder(b.Key())
if !created {
return fmt.Errorf("integral found duplicate block with key: %v", b.Key())
execute.AddBlockKeyCols(b.Key(), builder)
Label: t.spec.TimeDst,
Type: execute.TTime,
cols := b.Cols()
integrals := make([]*integral, len(cols))
colMap := make([]int, len(cols))
for j, c := range cols {
if execute.ContainsStr(t.spec.Columns, c.Label) {
integrals[j] = newIntegral(time.Duration(t.spec.Unit))
colMap[j] = builder.AddCol(execute.ColMeta{
Label: c.Label,
Type: execute.TFloat,
if err := execute.AppendAggregateTime(t.spec.TimeSrc, t.spec.TimeDst, b.Key(), builder); err != nil {
return err
timeIdx := execute.ColIdx(t.spec.TimeDst, cols)
if timeIdx < 0 {
return fmt.Errorf("no column %q exists", t.spec.TimeSrc)
err := b.Do(func(cr execute.ColReader) error {
for j, in := range integrals {
if in == nil {
l := cr.Len()
for i := 0; i < l; i++ {
tm := cr.Times(timeIdx)[i]
in.updateFloat(tm, cr.Floats(j)[i])
return nil
if err != nil {
return err
execute.AppendKeyValues(b.Key(), builder)
for j, in := range integrals {
if in == nil {
builder.AppendFloat(colMap[j], in.value())
return nil
func (t *integralTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error {
return t.d.UpdateWatermark(mark)
func (t *integralTransformation) UpdateProcessingTime(id execute.DatasetID, pt execute.Time) error {
return t.d.UpdateProcessingTime(pt)
func (t *integralTransformation) Finish(id execute.DatasetID, err error) {
func newIntegral(unit time.Duration) *integral {
return &integral{
first: true,
unit: float64(unit),
type integral struct {
first bool
unit float64
pFloatValue float64
pTime execute.Time
sum float64
func (in *integral) value() float64 {
return in.sum
func (in *integral) updateFloat(t execute.Time, v float64) {
if in.first {
in.pTime = t
in.pFloatValue = v
in.first = false
elapsed := float64(t-in.pTime) / in.unit
in.sum += 0.5 * (v + in.pFloatValue) * elapsed
in.pTime = t
in.pFloatValue = v
@ -0,0 +1,194 @@
package functions_test
import (
func TestIntegralOperation_Marshaling(t *testing.T) {
data := []byte(`{"id":"integral","kind":"integral","spec":{"unit":"1m"}}`)
op := &query.Operation{
ID: "integral",
Spec: &functions.IntegralOpSpec{
Unit: query.Duration(time.Minute),
querytest.OperationMarshalingTestHelper(t, data, op)
func TestIntegral_PassThrough(t *testing.T) {
executetest.TransformationPassThroughTestHelper(t, func(d execute.Dataset, c execute.BlockBuilderCache) execute.Transformation {
s := functions.NewIntegralTransformation(
return s
func TestIntegral_Process(t *testing.T) {
testCases := []struct {
name string
spec *functions.IntegralProcedureSpec
data []execute.Block
want []*executetest.Block
name: "float",
spec: &functions.IntegralProcedureSpec{
Unit: 1,
AggregateConfig: execute.DefaultAggregateConfig,
data: []execute.Block{&executetest.Block{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(1), execute.Time(3), execute.Time(1), 2.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 1.0},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(1), execute.Time(3), execute.Time(3), 1.5},
name: "float with units",
spec: &functions.IntegralProcedureSpec{
Unit: query.Duration(time.Second),
AggregateConfig: execute.DefaultAggregateConfig,
data: []execute.Block{&executetest.Block{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(1 * time.Second), execute.Time(4 * time.Second), execute.Time(1 * time.Second), 2.0},
{execute.Time(1 * time.Second), execute.Time(4 * time.Second), execute.Time(3 * time.Second), 1.0},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(1 * time.Second), execute.Time(4 * time.Second), execute.Time(4 * time.Second), 3.0},
name: "float with tags",
spec: &functions.IntegralProcedureSpec{
Unit: 1,
AggregateConfig: execute.DefaultAggregateConfig,
data: []execute.Block{&executetest.Block{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(1), execute.Time(3), execute.Time(1), 2.0, "a"},
{execute.Time(1), execute.Time(3), execute.Time(2), 1.0, "b"},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(1), execute.Time(3), execute.Time(3), 1.5},
name: "float with multiple values",
spec: &functions.IntegralProcedureSpec{
Unit: 1,
AggregateConfig: execute.AggregateConfig{
TimeDst: execute.DefaultTimeColLabel,
TimeSrc: execute.DefaultStopColLabel,
Columns: []string{"x", "y"},
data: []execute.Block{&executetest.Block{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(1), execute.Time(5), execute.Time(1), 2.0, 20.0},
{execute.Time(1), execute.Time(5), execute.Time(2), 1.0, 10.0},
{execute.Time(1), execute.Time(5), execute.Time(3), 2.0, 20.0},
{execute.Time(1), execute.Time(5), execute.Time(4), 1.0, 10.0},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(1), execute.Time(5), execute.Time(5), 4.5, 45.0},
for _, tc := range testCases {
tc := tc
t.Run(, func(t *testing.T) {
func(d execute.Dataset, c execute.BlockBuilderCache) execute.Transformation {
return functions.NewIntegralTransformation(d, c, tc.spec)
@ -0,0 +1,750 @@
package functions
import (
const JoinKind = "join"
const MergeJoinKind = "merge-join"
type JoinOpSpec struct {
// On is a list of tags on which to join.
On []string `json:"on"`
// Fn is a function accepting a single parameter.
// The parameter is map if records for each of the parent operations.
Fn *semantic.FunctionExpression `json:"fn"`
// TableNames are the names to give to each parent when populating the parameter for the function.
// The first parent is referenced by the first name and so forth.
// TODO(nathanielc): Change this to a map of parent operation IDs to names.
// Then make it possible for the transformation to map operation IDs to parent IDs.
TableNames map[query.OperationID]string `json:"table_names"`
var joinSignature = semantic.FunctionSignature{
Params: map[string]semantic.Type{
"tables": semantic.Object,
"fn": semantic.Function,
"on": semantic.NewArrayType(semantic.String),
ReturnType: query.TableObjectType,
PipeArgument: "tables",
func init() {
query.RegisterFunction(JoinKind, createJoinOpSpec, joinSignature)
query.RegisterOpSpec(JoinKind, newJoinOp)
//TODO(nathanielc): Allow for other types of join implementations
plan.RegisterProcedureSpec(MergeJoinKind, newMergeJoinProcedure, JoinKind)
execute.RegisterTransformation(MergeJoinKind, createMergeJoinTransformation)
func createJoinOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
f, err := args.GetRequiredFunction("fn")
if err != nil {
return nil, err
fn, err := interpreter.ResolveFunction(f)
if err != nil {
return nil, err
spec := &JoinOpSpec{
Fn: fn,
TableNames: make(map[query.OperationID]string),
if array, ok, err := args.GetArray("on", semantic.String); err != nil {
return nil, err
} else if ok {
spec.On, err = interpreter.ToStringArray(array)
if err != nil {
return nil, err
if m, ok, err := args.GetObject("tables"); err != nil {
return nil, err
} else if ok {
var err error
m.Range(func(k string, t values.Value) {
if err != nil {
if t.Type().Kind() != semantic.Object {
err = fmt.Errorf("value for key %q in tables must be an object: got %v", k, t.Type().Kind())
if t.Type() != query.TableObjectType {
err = fmt.Errorf("value for key %q in tables must be an table object: got %v", k, t.Type())
p := t.(query.TableObject)
spec.TableNames[p.ID] = k
if err != nil {
return nil, err
return spec, nil
func newJoinOp() query.OperationSpec {
return new(JoinOpSpec)
func (s *JoinOpSpec) Kind() query.OperationKind {
return JoinKind
type MergeJoinProcedureSpec struct {
On []string `json:"keys"`
Fn *semantic.FunctionExpression `json:"f"`
TableNames map[plan.ProcedureID]string `json:"table_names"`
func newMergeJoinProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*JoinOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
tableNames := make(map[plan.ProcedureID]string, len(spec.TableNames))
for qid, name := range spec.TableNames {
pid := pa.ConvertID(qid)
tableNames[pid] = name
p := &MergeJoinProcedureSpec{
On: spec.On,
Fn: spec.Fn,
TableNames: tableNames,
return p, nil
func (s *MergeJoinProcedureSpec) Kind() plan.ProcedureKind {
return MergeJoinKind
func (s *MergeJoinProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(MergeJoinProcedureSpec)
ns.On = make([]string, len(s.On))
copy(ns.On, s.On)
ns.Fn = s.Fn.Copy().(*semantic.FunctionExpression)
return ns
func (s *MergeJoinProcedureSpec) ParentChanged(old, new plan.ProcedureID) {
if v, ok := s.TableNames[old]; ok {
delete(s.TableNames, old)
s.TableNames[new] = v
func createMergeJoinTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
s, ok := spec.(*MergeJoinProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", spec)
parents := a.Parents()
if len(parents) != 2 {
//TODO(nathanielc): Support n-way joins
return nil, nil, errors.New("joins currently must only have two parents")
tableNames := make(map[execute.DatasetID]string, len(s.TableNames))
for pid, name := range s.TableNames {
id := a.ConvertID(pid)
tableNames[id] = name
leftName := tableNames[parents[0]]
rightName := tableNames[parents[1]]
joinFn, err := NewRowJoinFunction(s.Fn, parents, tableNames)
if err != nil {
return nil, nil, errors.Wrap(err, "invalid expression")
cache := NewMergeJoinCache(joinFn, a.Allocator(), leftName, rightName, s.On)
d := execute.NewDataset(id, mode, cache)
t := NewMergeJoinTransformation(d, cache, s, parents, tableNames)
return t, d, nil
type mergeJoinTransformation struct {
parents []execute.DatasetID
mu sync.Mutex
d execute.Dataset
cache MergeJoinCache
leftID, rightID execute.DatasetID
leftName, rightName string
parentState map[execute.DatasetID]*mergeJoinParentState
keys []string
func NewMergeJoinTransformation(d execute.Dataset, cache MergeJoinCache, spec *MergeJoinProcedureSpec, parents []execute.DatasetID, tableNames map[execute.DatasetID]string) *mergeJoinTransformation {
t := &mergeJoinTransformation{
d: d,
cache: cache,
keys: spec.On,
leftID: parents[0],
rightID: parents[1],
leftName: tableNames[parents[0]],
rightName: tableNames[parents[1]],
t.parentState = make(map[execute.DatasetID]*mergeJoinParentState)
for _, id := range parents {
t.parentState[id] = new(mergeJoinParentState)
return t
type mergeJoinParentState struct {
mark execute.Time
processing execute.Time
finished bool
func (t *mergeJoinTransformation) RetractBlock(id execute.DatasetID, key execute.PartitionKey) error {
panic("not implemented")
//bm := blockMetadata{
// tags: meta.Tags().IntersectingSubset(t.keys),
// bounds: meta.Bounds(),
//return t.d.RetractBlock(execute.ToBlockKey(bm))
func (t *mergeJoinTransformation) Process(id execute.DatasetID, b execute.Block) error {
tables := t.cache.Tables(b.Key())
var references []string
var table execute.BlockBuilder
switch id {
case t.leftID:
table = tables.left
references = tables.joinFn.references[t.leftName]
case t.rightID:
table = tables.right
references = tables.joinFn.references[t.rightName]
// Add columns to table
labels := unionStrs(t.keys, references)
colMap := make([]int, len(labels))
for _, label := range labels {
blockIdx := execute.ColIdx(label, b.Cols())
if blockIdx < 0 {
return fmt.Errorf("no column %q exists", label)
// Only add the column if it does not already exist
builderIdx := execute.ColIdx(label, table.Cols())
if builderIdx < 0 {
c := b.Cols()[blockIdx]
builderIdx = table.AddCol(c)
colMap[builderIdx] = blockIdx
execute.AppendBlock(b, table, colMap)
return nil
func unionStrs(as, bs []string) []string {
u := make([]string, len(bs), len(as)+len(bs))
copy(u, bs)
for _, a := range as {
found := false
for _, b := range bs {
if a == b {
found = true
if !found {
u = append(u, a)
return u
func (t *mergeJoinTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error {
t.parentState[id].mark = mark
min := execute.Time(math.MaxInt64)
for _, state := range t.parentState {
if state.mark < min {
min = state.mark
return t.d.UpdateWatermark(min)
func (t *mergeJoinTransformation) UpdateProcessingTime(id execute.DatasetID, pt execute.Time) error {
t.parentState[id].processing = pt
min := execute.Time(math.MaxInt64)
for _, state := range t.parentState {
if state.processing < min {
min = state.processing
return t.d.UpdateProcessingTime(min)
func (t *mergeJoinTransformation) Finish(id execute.DatasetID, err error) {
if err != nil {
t.parentState[id].finished = true
finished := true
for _, state := range t.parentState {
finished = finished && state.finished
if finished {
type MergeJoinCache interface {
Tables(execute.PartitionKey) *joinTables
type mergeJoinCache struct {
data *execute.PartitionLookup
alloc *execute.Allocator
keys []string
on map[string]bool
leftName, rightName string
triggerSpec query.TriggerSpec
joinFn *joinFunc
func NewMergeJoinCache(joinFn *joinFunc, a *execute.Allocator, leftName, rightName string, keys []string) *mergeJoinCache {
on := make(map[string]bool, len(keys))
for _, k := range keys {
on[k] = true
return &mergeJoinCache{
data: execute.NewPartitionLookup(),
keys: keys,
on: on,
joinFn: joinFn,
alloc: a,
leftName: leftName,
rightName: rightName,
func (c *mergeJoinCache) Block(key execute.PartitionKey) (execute.Block, error) {
t, ok := c.lookup(key)
if !ok {
return nil, errors.New("block not found")
return t.Join()
func (c *mergeJoinCache) ForEach(f func(execute.PartitionKey)) {
|||| execute.PartitionKey, value interface{}) {
func (c *mergeJoinCache) ForEachWithContext(f func(execute.PartitionKey, execute.Trigger, execute.BlockContext)) {
|||| execute.PartitionKey, value interface{}) {
tables := value.(*joinTables)
bc := execute.BlockContext{
Key: key,
Count: tables.Size(),
f(key, tables.trigger, bc)
func (c *mergeJoinCache) DiscardBlock(key execute.PartitionKey) {
t, ok := c.lookup(key)
if ok {
func (c *mergeJoinCache) ExpireBlock(key execute.PartitionKey) {
v, ok :=
if ok {
func (c *mergeJoinCache) SetTriggerSpec(spec query.TriggerSpec) {
c.triggerSpec = spec
func (c *mergeJoinCache) lookup(key execute.PartitionKey) (*joinTables, bool) {
v, ok :=
if !ok {
return nil, false
return v.(*joinTables), true
func (c *mergeJoinCache) Tables(key execute.PartitionKey) *joinTables {
tables, ok := c.lookup(key)
if !ok {
tables = &joinTables{
keys: c.keys,
key: key,
on: c.on,
alloc: c.alloc,
left: execute.NewColListBlockBuilder(key, c.alloc),
right: execute.NewColListBlockBuilder(key, c.alloc),
leftName: c.leftName,
rightName: c.rightName,
trigger: execute.NewTriggerFromSpec(c.triggerSpec),
joinFn: c.joinFn,
||||, tables)
return tables
type joinTables struct {
keys []string
on map[string]bool
key execute.PartitionKey
alloc *execute.Allocator
left, right *execute.ColListBlockBuilder
leftName, rightName string
trigger execute.Trigger
joinFn *joinFunc
func (t *joinTables) Size() int {
return t.left.NRows() + t.right.NRows()
func (t *joinTables) ClearData() {
t.left = execute.NewColListBlockBuilder(t.key, t.alloc)
t.right = execute.NewColListBlockBuilder(t.key, t.alloc)
// Join performs a sort-merge join
func (t *joinTables) Join() (execute.Block, error) {
// First prepare the join function
left := t.left.RawBlock()
right := t.right.RawBlock()
err := t.joinFn.Prepare(map[string]*execute.ColListBlock{
t.leftName: left,
t.rightName: right,
if err != nil {
return nil, errors.Wrap(err, "failed to prepare join function")
// Create a builder for the result of the join
builder := execute.NewColListBlockBuilder(t.key, t.alloc)
// Add columns from function in sorted order
properties := t.joinFn.Type().Properties()
keys := make([]string, 0, len(properties))
for k := range properties {
keys = append(keys, k)
for _, k := range keys {
Label: k,
Type: execute.ConvertFromKind(properties[k].Kind()),
// Now that all columns have been added, keep a reference.
bCols := builder.Cols()
// Determine sort order for the joining tables
sortOrder := make([]string, len(t.keys))
for i, label := range t.keys {
sortOrder[i] = label
// Sort input tables
t.left.Sort(sortOrder, false)
t.right.Sort(sortOrder, false)
var (
leftSet, rightSet subset
leftKey, rightKey execute.PartitionKey
rows := map[string]int{
t.leftName: -1,
t.rightName: -1,
leftSet, leftKey = t.advance(leftSet.Stop, left)
rightSet, rightKey = t.advance(rightSet.Stop, right)
for !leftSet.Empty() && !rightSet.Empty() {
if leftKey.Equal(rightKey) {
// Inner join
for l := leftSet.Start; l < leftSet.Stop; l++ {
for r := rightSet.Start; r < rightSet.Stop; r++ {
// Evaluate expression and add to block
rows[t.leftName] = l
rows[t.rightName] = r
m, err := t.joinFn.Eval(rows)
if err != nil {
return nil, errors.Wrap(err, "failed to evaluate join function")
for j, c := range bCols {
v, _ := m.Get(c.Label)
execute.AppendValue(builder, j, v)
leftSet, leftKey = t.advance(leftSet.Stop, left)
rightSet, rightKey = t.advance(rightSet.Stop, right)
} else if leftKey.Less(rightKey) {
leftSet, leftKey = t.advance(leftSet.Stop, left)
} else {
rightSet, rightKey = t.advance(rightSet.Stop, right)
return builder.Block()
func (t *joinTables) advance(offset int, table *execute.ColListBlock) (subset, execute.PartitionKey) {
if n := table.NRows(); n == offset {
return subset{Start: n, Stop: n}, nil
start := offset
key := execute.PartitionKeyForRowOn(start, table, t.on)
s := subset{Start: start}
for offset < table.NRows() && equalRowKeys(start, offset, table, t.on) {
s.Stop = offset
return s, key
type subset struct {
Start int
Stop int
func (s subset) Empty() bool {
return s.Start == s.Stop
func equalRowKeys(x, y int, table *execute.ColListBlock, on map[string]bool) bool {
for j, c := range table.Cols() {
if !on[c.Label] {
switch c.Type {
case execute.TBool:
if xv, yv := table.Bools(j)[x], table.Bools(j)[y]; xv != yv {
return false
case execute.TInt:
if xv, yv := table.Ints(j)[x], table.Ints(j)[y]; xv != yv {
return false
case execute.TUInt:
if xv, yv := table.UInts(j)[x], table.UInts(j)[y]; xv != yv {
return false
case execute.TFloat:
if xv, yv := table.Floats(j)[x], table.Floats(j)[y]; xv != yv {
return false
case execute.TString:
if xv, yv := table.Strings(j)[x], table.Strings(j)[y]; xv != yv {
return false
case execute.TTime:
if xv, yv := table.Times(j)[x], table.Times(j)[y]; xv != yv {
return false
return true
type joinFunc struct {
fn *semantic.FunctionExpression
compilationCache *compiler.CompilationCache
scope compiler.Scope
preparedFn compiler.Func
recordName string
record *execute.Record
recordCols map[tableCol]int
references map[string][]string
isWrap bool
wrapObj *execute.Record
tableData map[string]*execute.ColListBlock
type tableCol struct {
table, col string
func NewRowJoinFunction(fn *semantic.FunctionExpression, parentIDs []execute.DatasetID, tableNames map[execute.DatasetID]string) (*joinFunc, error) {
if len(fn.Params) != 1 {
return nil, errors.New("join function should only have one parameter for the map of tables")
scope, decls := query.BuiltIns()
return &joinFunc{
compilationCache: compiler.NewCompilationCache(fn, scope, decls),
scope: make(compiler.Scope, 1),
references: findTableReferences(fn),
recordCols: make(map[tableCol]int),
recordName: fn.Params[0].Key.Name,
}, nil
func (f *joinFunc) Prepare(tables map[string]*execute.ColListBlock) error {
f.tableData = tables
propertyTypes := make(map[string]semantic.Type, len(f.references))
// Prepare types and recordcols
for tbl, b := range tables {
cols := b.Cols()
tblPropertyTypes := make(map[string]semantic.Type, len(f.references[tbl]))
for _, r := range f.references[tbl] {
j := execute.ColIdx(r, cols)
if j < 0 {
return fmt.Errorf("function references unknown column %q of table %q", r, tbl)
c := cols[j]
f.recordCols[tableCol{table: tbl, col: c.Label}] = j
tblPropertyTypes[r] = execute.ConvertToKind(c.Type)
propertyTypes[tbl] = semantic.NewObjectType(tblPropertyTypes)
f.record = execute.NewRecord(semantic.NewObjectType(propertyTypes))
for tbl := range tables {
f.record.Set(tbl, execute.NewRecord(propertyTypes[tbl]))
// Compile fn for given types
fn, err := f.compilationCache.Compile(map[string]semantic.Type{
f.recordName: f.record.Type(),
if err != nil {
return err
f.preparedFn = fn
k := f.preparedFn.Type().Kind()
f.isWrap = k != semantic.Object
if f.isWrap {
f.wrapObj = execute.NewRecord(semantic.NewObjectType(map[string]semantic.Type{
execute.DefaultValueColLabel: f.preparedFn.Type(),
return nil
func (f *joinFunc) Type() semantic.Type {
if f.isWrap {
return f.wrapObj.Type()
return f.preparedFn.Type()
func (f *joinFunc) Eval(rows map[string]int) (values.Object, error) {
for tbl, references := range f.references {
row := rows[tbl]
data := f.tableData[tbl]
obj, _ := f.record.Get(tbl)
o := obj.(*execute.Record)
for _, r := range references {
o.Set(r, execute.ValueForRow(row, f.recordCols[tableCol{table: tbl, col: r}], data))
f.scope[f.recordName] = f.record
v, err := f.preparedFn.Eval(f.scope)
if err != nil {
return nil, err
if f.isWrap {
f.wrapObj.Set(execute.DefaultValueColLabel, v)
return f.wrapObj, nil
return v.Object(), nil
func findTableReferences(fn *semantic.FunctionExpression) map[string][]string {
v := &tableReferenceVisitor{
record: fn.Params[0].Key.Name,
refs: make(map[string][]string),
semantic.Walk(v, fn)
return v.refs
type tableReferenceVisitor struct {
record string
refs map[string][]string
func (c *tableReferenceVisitor) Visit(node semantic.Node) semantic.Visitor {
if col, ok := node.(*semantic.MemberExpression); ok {
if table, ok := col.Object.(*semantic.MemberExpression); ok {
if record, ok := table.Object.(*semantic.IdentifierExpression); ok && record.Name == c.record {
c.refs[table.Property] = append(c.refs[table.Property], col.Property)
return nil
return c
func (c *tableReferenceVisitor) Done() {}
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,210 @@
package functions
import (
const KeysKind = "keys"
var (
keysExceptDefaultValue = []string{"_time", "_value"}
type KeysOpSpec struct {
Except []string `json:"except"`
var keysSignature = query.DefaultFunctionSignature()
func init() {
keysSignature.Params["except"] = semantic.NewArrayType(semantic.String)
query.RegisterFunction(KeysKind, createKeysOpSpec, keysSignature)
query.RegisterOpSpec(KeysKind, newKeysOp)
plan.RegisterProcedureSpec(KeysKind, newKeysProcedure, KeysKind)
execute.RegisterTransformation(KeysKind, createKeysTransformation)
func createKeysOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
spec := new(KeysOpSpec)
if array, ok, err := args.GetArray("except", semantic.String); err != nil {
return nil, err
} else if ok {
spec.Except, err = interpreter.ToStringArray(array)
if err != nil {
return nil, err
} else {
spec.Except = keysExceptDefaultValue
return spec, nil
func newKeysOp() query.OperationSpec {
return new(KeysOpSpec)
func (s *KeysOpSpec) Kind() query.OperationKind {
return KeysKind
type KeysProcedureSpec struct {
Except []string
func newKeysProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*KeysOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
return &KeysProcedureSpec{
Except: spec.Except,
}, nil
func (s *KeysProcedureSpec) Kind() plan.ProcedureKind {
return KeysKind
func (s *KeysProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(KeysProcedureSpec)
*ns = *s
return ns
type KeysPointLimitRewriteRule struct {
func (r KeysPointLimitRewriteRule) Root() plan.ProcedureKind {
return FromKind
func (r KeysPointLimitRewriteRule) Rewrite(pr *plan.Procedure, planner plan.PlanRewriter) error {
fromSpec, ok := pr.Spec.(*FromProcedureSpec)
if !ok {
return nil
var keys *KeysProcedureSpec
pr.DoChildren(func(child *plan.Procedure) {
if d, ok := child.Spec.(*KeysProcedureSpec); ok {
keys = d
if keys == nil {
return nil
if !fromSpec.LimitSet {
fromSpec.LimitSet = true
fromSpec.PointsLimit = -1
return nil
func createKeysTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
s, ok := spec.(*KeysProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", spec)
cache := execute.NewBlockBuilderCache(a.Allocator())
d := execute.NewDataset(id, mode, cache)
t := NewKeysTransformation(d, cache, s)
return t, d, nil
type keysTransformation struct {
d execute.Dataset
cache execute.BlockBuilderCache
except []string
func NewKeysTransformation(d execute.Dataset, cache execute.BlockBuilderCache, spec *KeysProcedureSpec) *keysTransformation {
var except []string
if len(spec.Except) > 0 {
except = append([]string{}, spec.Except...)
return &keysTransformation{
d: d,
cache: cache,
except: except,
func (t *keysTransformation) RetractBlock(id execute.DatasetID, key execute.PartitionKey) error {
return t.d.RetractBlock(key)
func (t *keysTransformation) Process(id execute.DatasetID, b execute.Block) error {
builder, created := t.cache.BlockBuilder(b.Key())
if !created {
return fmt.Errorf("keys found duplicate block with key: %v", b.Key())
execute.AddBlockKeyCols(b.Key(), builder)
colIdx := builder.AddCol(execute.ColMeta{Label: execute.DefaultValueColLabel, Type: execute.TString})
cols := b.Cols()
sort.Slice(cols, func(i, j int) bool {
return cols[i].Label < cols[j].Label
var i int
if len(t.except) > 0 {
var j int
for i < len(cols) && j < len(t.except) {
c := strings.Compare(cols[i].Label, t.except[j])
if c < 0 {
execute.AppendKeyValues(b.Key(), builder)
builder.AppendString(colIdx, cols[i].Label)
} else if c > 0 {
} else {
// add remaining
for ; i < len(cols); i++ {
execute.AppendKeyValues(b.Key(), builder)
builder.AppendString(colIdx, cols[i].Label)
// TODO: this is a hack
return b.Do(func(execute.ColReader) error {
return nil
func (t *keysTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error {
return t.d.UpdateWatermark(mark)
func (t *keysTransformation) UpdateProcessingTime(id execute.DatasetID, pt execute.Time) error {
return t.d.UpdateProcessingTime(pt)
func (t *keysTransformation) Finish(id execute.DatasetID, err error) {
@ -0,0 +1,146 @@
package functions_test
import (
func TestKeys_Process(t *testing.T) {
testCases := []struct {
name string
spec *functions.KeysProcedureSpec
data []execute.Block
want []*executetest.Block
name: "one block",
spec: &functions.KeysProcedureSpec{},
data: []execute.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "tag0", Type: execute.TString},
{Label: "tag1", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(1), 2.0},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_value", Type: execute.TString},
Data: [][]interface{}{
name: "one block except",
spec: &functions.KeysProcedureSpec{Except: []string{"_value", "_time"}},
data: []execute.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "tag0", Type: execute.TString},
{Label: "tag1", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(1), 2.0},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_value", Type: execute.TString},
Data: [][]interface{}{
name: "two blocks",
spec: &functions.KeysProcedureSpec{},
data: []execute.Block{
KeyCols: []string{"tag0", "tag1"},
ColMeta: []execute.ColMeta{
{Label: "tag0", Type: execute.TString},
{Label: "tag1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{"tag0-0", "tag1-0", execute.Time(1), 2.0},
KeyCols: []string{"tag0", "tag2"},
ColMeta: []execute.ColMeta{
{Label: "tag0", Type: execute.TString},
{Label: "tag2", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{"tag0-0", "tag2-0", execute.Time(1), 2.0},
want: []*executetest.Block{
KeyCols: []string{"tag0", "tag1"},
ColMeta: []execute.ColMeta{
{Label: "tag0", Type: execute.TString},
{Label: "tag1", Type: execute.TString},
{Label: "_value", Type: execute.TString},
Data: [][]interface{}{
{"tag0-0", "tag1-0", "_time"},
{"tag0-0", "tag1-0", "_value"},
{"tag0-0", "tag1-0", "tag0"},
{"tag0-0", "tag1-0", "tag1"},
KeyCols: []string{"tag0", "tag2"},
ColMeta: []execute.ColMeta{
{Label: "tag0", Type: execute.TString},
{Label: "tag2", Type: execute.TString},
{Label: "_value", Type: execute.TString},
Data: [][]interface{}{
{"tag0-0", "tag2-0", "_time"},
{"tag0-0", "tag2-0", "_value"},
{"tag0-0", "tag2-0", "tag0"},
{"tag0-0", "tag2-0", "tag2"},
for _, tc := range testCases {
tc := tc
t.Run(, func(t *testing.T) {
func(d execute.Dataset, c execute.BlockBuilderCache) execute.Transformation {
return functions.NewKeysTransformation(d, c, tc.spec)
@ -0,0 +1,176 @@
package functions
import (
const LastKind = "last"
type LastOpSpec struct {
var lastSignature = query.DefaultFunctionSignature()
func init() {
lastSignature.Params["column"] = semantic.String
lastSignature.Params["useRowTime"] = semantic.Bool
query.RegisterFunction(LastKind, createLastOpSpec, lastSignature)
query.RegisterOpSpec(LastKind, newLastOp)
plan.RegisterProcedureSpec(LastKind, newLastProcedure, LastKind)
execute.RegisterTransformation(LastKind, createLastTransformation)
func createLastOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
spec := new(LastOpSpec)
if err := spec.SelectorConfig.ReadArgs(args); err != nil {
return nil, err
return spec, nil
func newLastOp() query.OperationSpec {
return new(LastOpSpec)
func (s *LastOpSpec) Kind() query.OperationKind {
return LastKind
type LastProcedureSpec struct {
func newLastProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*LastOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
return &LastProcedureSpec{
SelectorConfig: spec.SelectorConfig,
}, nil
func (s *LastProcedureSpec) Kind() plan.ProcedureKind {
return LastKind
func (s *LastProcedureSpec) PushDownRules() []plan.PushDownRule {
return []plan.PushDownRule{{
Root: FromKind,
Through: []plan.ProcedureKind{GroupKind, LimitKind, FilterKind},
Match: func(spec plan.ProcedureSpec) bool {
selectSpec := spec.(*FromProcedureSpec)
return !selectSpec.AggregateSet
func (s *LastProcedureSpec) PushDown(root *plan.Procedure, dup func() *plan.Procedure) {
selectSpec := root.Spec.(*FromProcedureSpec)
if selectSpec.BoundsSet || selectSpec.LimitSet || selectSpec.DescendingSet {
root = dup()
selectSpec = root.Spec.(*FromProcedureSpec)
selectSpec.BoundsSet = false
selectSpec.Bounds = plan.BoundsSpec{}
selectSpec.LimitSet = false
selectSpec.PointsLimit = 0
selectSpec.SeriesLimit = 0
selectSpec.SeriesOffset = 0
selectSpec.DescendingSet = false
selectSpec.Descending = false
selectSpec.BoundsSet = true
selectSpec.Bounds = plan.BoundsSpec{
Start: query.MinTime,
Stop: query.Now,
selectSpec.LimitSet = true
selectSpec.PointsLimit = 1
selectSpec.DescendingSet = true
selectSpec.Descending = true
func (s *LastProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(LastProcedureSpec)
ns.SelectorConfig = s.SelectorConfig
return ns
type LastSelector struct {
rows []execute.Row
func createLastTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
ps, ok := spec.(*LastProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", ps)
t, d := execute.NewRowSelectorTransformationAndDataset(id, mode, new(LastSelector), ps.SelectorConfig, a.Allocator())
return t, d, nil
func (s *LastSelector) reset() {
s.rows = nil
func (s *LastSelector) NewBoolSelector() execute.DoBoolRowSelector {
return s
func (s *LastSelector) NewIntSelector() execute.DoIntRowSelector {
return s
func (s *LastSelector) NewUIntSelector() execute.DoUIntRowSelector {
return s
func (s *LastSelector) NewFloatSelector() execute.DoFloatRowSelector {
return s
func (s *LastSelector) NewStringSelector() execute.DoStringRowSelector {
return s
func (s *LastSelector) Rows() []execute.Row {
return s.rows
func (s *LastSelector) selectLast(l int, cr execute.ColReader) {
if l > 0 {
s.rows = []execute.Row{execute.ReadRow(l-1, cr)}
func (s *LastSelector) DoBool(vs []bool, cr execute.ColReader) {
s.selectLast(len(vs), cr)
func (s *LastSelector) DoInt(vs []int64, cr execute.ColReader) {
s.selectLast(len(vs), cr)
func (s *LastSelector) DoUInt(vs []uint64, cr execute.ColReader) {
s.selectLast(len(vs), cr)
func (s *LastSelector) DoFloat(vs []float64, cr execute.ColReader) {
s.selectLast(len(vs), cr)
func (s *LastSelector) DoString(vs []string, cr execute.ColReader) {
s.selectLast(len(vs), cr)
@ -0,0 +1,135 @@
package functions_test
import (
func TestLastOperation_Marshaling(t *testing.T) {
data := []byte(`{"id":"last","kind":"last","spec":{"column":"bar"}}`)
op := &query.Operation{
ID: "last",
Spec: &functions.LastOpSpec{
SelectorConfig: execute.SelectorConfig{
Column: "bar",
querytest.OperationMarshalingTestHelper(t, data, op)
func TestLast_Process(t *testing.T) {
testCases := []struct {
name string
data *executetest.Block
want []execute.Row
name: "last",
data: &executetest.Block{
KeyCols: []string{"t1"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
Data: [][]interface{}{
{execute.Time(0), 0.0, "a", "y"},
{execute.Time(10), 5.0, "a", "x"},
{execute.Time(20), 9.0, "a", "y"},
{execute.Time(30), 4.0, "a", "x"},
{execute.Time(40), 6.0, "a", "y"},
{execute.Time(50), 8.0, "a", "x"},
{execute.Time(60), 1.0, "a", "y"},
{execute.Time(70), 2.0, "a", "x"},
{execute.Time(80), 3.0, "a", "y"},
{execute.Time(90), 7.0, "a", "x"},
want: []execute.Row{{
Values: []interface{}{execute.Time(90), 7.0, "a", "x"},
for _, tc := range testCases {
tc := tc
t.Run(, func(t *testing.T) {
func BenchmarkLast(b *testing.B) {
executetest.RowSelectorFuncBenchmarkHelper(b, new(functions.LastSelector), NormalBlock)
func TestLast_PushDown_Match(t *testing.T) {
spec := new(functions.LastProcedureSpec)
from := new(functions.FromProcedureSpec)
// Should not match when an aggregate is set
from.AggregateSet = true
plantest.PhysicalPlan_PushDown_Match_TestHelper(t, spec, from, []bool{false})
// Should match when no aggregate is set
from.AggregateSet = false
plantest.PhysicalPlan_PushDown_Match_TestHelper(t, spec, from, []bool{true})
func TestLast_PushDown(t *testing.T) {
spec := new(functions.LastProcedureSpec)
root := &plan.Procedure{
Spec: new(functions.FromProcedureSpec),
want := &plan.Procedure{
Spec: &functions.FromProcedureSpec{
BoundsSet: true,
Bounds: plan.BoundsSpec{
Start: query.MinTime,
Stop: query.Now,
LimitSet: true,
PointsLimit: 1,
DescendingSet: true,
Descending: true,
plantest.PhysicalPlan_PushDown_TestHelper(t, spec, root, false, want)
func TestLast_PushDown_Duplicate(t *testing.T) {
spec := new(functions.LastProcedureSpec)
root := &plan.Procedure{
Spec: &functions.FromProcedureSpec{
BoundsSet: true,
Bounds: plan.BoundsSpec{
Start: query.MinTime,
Stop: query.Now,
LimitSet: true,
PointsLimit: 1,
DescendingSet: true,
Descending: true,
want := &plan.Procedure{
// Expect the duplicate has been reset to zero values
Spec: new(functions.FromProcedureSpec),
plantest.PhysicalPlan_PushDown_TestHelper(t, spec, root, true, want)
@ -0,0 +1,219 @@
package functions
import (
const LimitKind = "limit"
// LimitOpSpec limits the number of rows returned per block.
// Currently offset is not supported.
type LimitOpSpec struct {
N int64 `json:"n"`
//Offset int64 `json:"offset"`
var limitSignature = query.DefaultFunctionSignature()
func init() {
limitSignature.Params["n"] = semantic.Int
query.RegisterFunction(LimitKind, createLimitOpSpec, limitSignature)
query.RegisterOpSpec(LimitKind, newLimitOp)
plan.RegisterProcedureSpec(LimitKind, newLimitProcedure, LimitKind)
// TODO register a range transformation. Currently range is only supported if it is pushed down into a select procedure.
execute.RegisterTransformation(LimitKind, createLimitTransformation)
func createLimitOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
spec := new(LimitOpSpec)
n, err := args.GetRequiredInt("n")
if err != nil {
return nil, err
spec.N = n
return spec, nil
func newLimitOp() query.OperationSpec {
return new(LimitOpSpec)
func (s *LimitOpSpec) Kind() query.OperationKind {
return LimitKind
type LimitProcedureSpec struct {
N int64 `json:"n"`
//Offset int64 `json:"offset"`
func newLimitProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*LimitOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
return &LimitProcedureSpec{
N: spec.N,
//Offset: spec.Offset,
}, nil
func (s *LimitProcedureSpec) Kind() plan.ProcedureKind {
return LimitKind
func (s *LimitProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(LimitProcedureSpec)
ns.N = s.N
//ns.Offset = s.Offset
return ns
func (s *LimitProcedureSpec) PushDownRules() []plan.PushDownRule {
return []plan.PushDownRule{{
Root: FromKind,
Through: []plan.ProcedureKind{GroupKind, RangeKind, FilterKind},
func (s *LimitProcedureSpec) PushDown(root *plan.Procedure, dup func() *plan.Procedure) {
selectSpec := root.Spec.(*FromProcedureSpec)
if selectSpec.LimitSet {
root = dup()
selectSpec = root.Spec.(*FromProcedureSpec)
selectSpec.LimitSet = false
selectSpec.PointsLimit = 0
selectSpec.SeriesLimit = 0
selectSpec.SeriesOffset = 0
selectSpec.LimitSet = true
selectSpec.PointsLimit = s.N
selectSpec.SeriesLimit = 0
selectSpec.SeriesOffset = 0
func createLimitTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
s, ok := spec.(*LimitProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", spec)
cache := execute.NewBlockBuilderCache(a.Allocator())
d := execute.NewDataset(id, mode, cache)
t := NewLimitTransformation(d, cache, s)
return t, d, nil
type limitTransformation struct {
d execute.Dataset
cache execute.BlockBuilderCache
n int
colMap []int
func NewLimitTransformation(d execute.Dataset, cache execute.BlockBuilderCache, spec *LimitProcedureSpec) *limitTransformation {
return &limitTransformation{
d: d,
cache: cache,
n: int(spec.N),
func (t *limitTransformation) RetractBlock(id execute.DatasetID, key execute.PartitionKey) error {
return t.d.RetractBlock(key)
func (t *limitTransformation) Process(id execute.DatasetID, b execute.Block) error {
builder, created := t.cache.BlockBuilder(b.Key())
if !created {
return fmt.Errorf("limit found duplicate block with key: %v", b.Key())
execute.AddBlockCols(b, builder)
ncols := builder.NCols()
if cap(t.colMap) < ncols {
t.colMap = make([]int, ncols)
for j := range t.colMap {
t.colMap[j] = j
} else {
t.colMap = t.colMap[:ncols]
// AppendBlock with limit
n := t.n
b.Do(func(cr execute.ColReader) error {
if n <= 0 {
// Returning an error terminates iteration
return errors.New("finished")
l := cr.Len()
if l > n {
l = n
n -= l
lcr := limitColReader{
ColReader: cr,
n: l,
execute.AppendCols(lcr, builder, t.colMap)
return nil
return nil
type limitColReader struct {
n int
func (cr limitColReader) Len() int {
return cr.n
func (cr limitColReader) Bools(j int) []bool {
return cr.ColReader.Bools(j)[:cr.n]
func (cr limitColReader) Ints(j int) []int64 {
return cr.ColReader.Ints(j)[:cr.n]
func (cr limitColReader) UInts(j int) []uint64 {
return cr.ColReader.UInts(j)[:cr.n]
func (cr limitColReader) Floats(j int) []float64 {
return cr.ColReader.Floats(j)[:cr.n]
func (cr limitColReader) Strings(j int) []string {
return cr.ColReader.Strings(j)[:cr.n]
func (cr limitColReader) Times(j int) []execute.Time {
return cr.ColReader.Times(j)[:cr.n]
func (t *limitTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error {
return t.d.UpdateWatermark(mark)
func (t *limitTransformation) UpdateProcessingTime(id execute.DatasetID, pt execute.Time) error {
return t.d.UpdateProcessingTime(pt)
func (t *limitTransformation) Finish(id execute.DatasetID, err error) {
@ -0,0 +1,167 @@
package functions_test
import (
func TestLimitOperation_Marshaling(t *testing.T) {
data := []byte(`{"id":"limit","kind":"limit","spec":{"n":10}}`)
op := &query.Operation{
ID: "limit",
Spec: &functions.LimitOpSpec{
N: 10,
querytest.OperationMarshalingTestHelper(t, data, op)
func TestLimit_Process(t *testing.T) {
testCases := []struct {
name string
spec *functions.LimitProcedureSpec
data []execute.Block
want []*executetest.Block
name: "one block",
spec: &functions.LimitProcedureSpec{
N: 1,
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(1), 2.0},
{execute.Time(2), 1.0},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(1), 2.0},
name: "multiple blocks",
spec: &functions.LimitProcedureSpec{
N: 2,
data: []execute.Block{
KeyCols: []string{"t1"},
ColMeta: []execute.ColMeta{
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{"a", execute.Time(1), 3.0},
{"a", execute.Time(2), 2.0},
{"a", execute.Time(2), 1.0},
KeyCols: []string{"t1"},
ColMeta: []execute.ColMeta{
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{"b", execute.Time(3), 3.0},
{"b", execute.Time(3), 2.0},
{"b", execute.Time(4), 1.0},
want: []*executetest.Block{
KeyCols: []string{"t1"},
ColMeta: []execute.ColMeta{
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{"a", execute.Time(1), 3.0},
{"a", execute.Time(2), 2.0},
KeyCols: []string{"t1"},
ColMeta: []execute.ColMeta{
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{"b", execute.Time(3), 3.0},
{"b", execute.Time(3), 2.0},
for _, tc := range testCases {
tc := tc
t.Run(, func(t *testing.T) {
func(d execute.Dataset, c execute.BlockBuilderCache) execute.Transformation {
return functions.NewLimitTransformation(d, c, tc.spec)
func TestLimit_PushDown(t *testing.T) {
spec := &functions.LimitProcedureSpec{
N: 42,
root := &plan.Procedure{
Spec: new(functions.FromProcedureSpec),
want := &plan.Procedure{
Spec: &functions.FromProcedureSpec{
LimitSet: true,
PointsLimit: 42,
plantest.PhysicalPlan_PushDown_TestHelper(t, spec, root, false, want)
func TestLimit_PushDown_Duplicate(t *testing.T) {
spec := &functions.LimitProcedureSpec{
N: 9,
root := &plan.Procedure{
Spec: &functions.FromProcedureSpec{
LimitSet: true,
PointsLimit: 42,
want := &plan.Procedure{
// Expect the duplicate has been reset to zero values
Spec: new(functions.FromProcedureSpec),
plantest.PhysicalPlan_PushDown_TestHelper(t, spec, root, true, want)
@ -0,0 +1,171 @@
package functions
import (
const MapKind = "map"
type MapOpSpec struct {
Fn *semantic.FunctionExpression `json:"fn"`
var mapSignature = query.DefaultFunctionSignature()
func init() {
mapSignature.Params["fn"] = semantic.Function
query.RegisterFunction(MapKind, createMapOpSpec, mapSignature)
query.RegisterOpSpec(MapKind, newMapOp)
plan.RegisterProcedureSpec(MapKind, newMapProcedure, MapKind)
execute.RegisterTransformation(MapKind, createMapTransformation)
func createMapOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
f, err := args.GetRequiredFunction("fn")
if err != nil {
return nil, err
fn, err := interpreter.ResolveFunction(f)
if err != nil {
return nil, err
return &MapOpSpec{
Fn: fn,
}, nil
func newMapOp() query.OperationSpec {
return new(MapOpSpec)
func (s *MapOpSpec) Kind() query.OperationKind {
return MapKind
type MapProcedureSpec struct {
Fn *semantic.FunctionExpression
func newMapProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*MapOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
return &MapProcedureSpec{
Fn: spec.Fn,
}, nil
func (s *MapProcedureSpec) Kind() plan.ProcedureKind {
return MapKind
func (s *MapProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(MapProcedureSpec)
ns.Fn = s.Fn.Copy().(*semantic.FunctionExpression)
return ns
func createMapTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
s, ok := spec.(*MapProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", spec)
cache := execute.NewBlockBuilderCache(a.Allocator())
d := execute.NewDataset(id, mode, cache)
t, err := NewMapTransformation(d, cache, s)
if err != nil {
return nil, nil, err
return t, d, nil
type mapTransformation struct {
d execute.Dataset
cache execute.BlockBuilderCache
fn *execute.RowMapFn
func NewMapTransformation(d execute.Dataset, cache execute.BlockBuilderCache, spec *MapProcedureSpec) (*mapTransformation, error) {
fn, err := execute.NewRowMapFn(spec.Fn)
if err != nil {
return nil, err
return &mapTransformation{
d: d,
cache: cache,
fn: fn,
}, nil
func (t *mapTransformation) RetractBlock(id execute.DatasetID, key execute.PartitionKey) error {
return t.d.RetractBlock(key)
func (t *mapTransformation) Process(id execute.DatasetID, b execute.Block) error {
// Prepare the functions for the column types.
cols := b.Cols()
err := t.fn.Prepare(cols)
if err != nil {
// TODO(nathanielc): Should we not fail the query for failed compilation?
return err
return b.Do(func(cr execute.ColReader) error {
l := cr.Len()
for i := 0; i < l; i++ {
m, err := t.fn.Eval(i, cr)
if err != nil {
log.Printf("failed to evaluate map expression: %v", err)
key := execute.PartitionKeyForRow(i, cr)
builder, created := t.cache.BlockBuilder(key)
if created {
// Add columns from function in sorted order
properties := t.fn.Type().Properties()
keys := make([]string, 0, len(properties))
for k := range properties {
keys = append(keys, k)
for _, k := range keys {
Label: k,
Type: execute.ConvertFromKind(properties[k].Kind()),
for j, c := range builder.Cols() {
v, _ := m.Get(c.Label)
execute.AppendValue(builder, j, v)
return nil
func (t *mapTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error {
return t.d.UpdateWatermark(mark)
func (t *mapTransformation) UpdateProcessingTime(id execute.DatasetID, pt execute.Time) error {
return t.d.UpdateProcessingTime(pt)
func (t *mapTransformation) Finish(id execute.DatasetID, err error) {
@ -0,0 +1,400 @@
package functions_test
import (
func TestMap_NewQuery(t *testing.T) {
tests := []querytest.NewQueryTestCase{
Name: "simple static map",
Raw: `from(db:"mydb") |> map(fn: (r) => r._value + 1)`,
Want: &query.Spec{
Operations: []*query.Operation{
ID: "from0",
Spec: &functions.FromOpSpec{
Database: "mydb",
ID: "map1",
Spec: &functions.MapOpSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.BinaryExpression{
Operator: ast.AdditionOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "r",
Property: "_value",
Right: &semantic.IntegerLiteral{Value: 1},
Edges: []query.Edge{
{Parent: "from0", Child: "map1"},
Name: "resolve map",
Raw: `x = 2 from(db:"mydb") |> map(fn: (r) => r._value + x)`,
Want: &query.Spec{
Operations: []*query.Operation{
ID: "from0",
Spec: &functions.FromOpSpec{
Database: "mydb",
ID: "map1",
Spec: &functions.MapOpSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.BinaryExpression{
Operator: ast.AdditionOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "r",
Property: "_value",
Right: &semantic.IntegerLiteral{Value: 2},
Edges: []query.Edge{
{Parent: "from0", Child: "map1"},
for _, tc := range tests {
tc := tc
t.Run(tc.Name, func(t *testing.T) {
querytest.NewQueryTestHelper(t, tc)
func TestMapOperation_Marshaling(t *testing.T) {
data := []byte(`{
"type": "ArrowFunctionExpression",
"params": [{"type":"FunctionParam","key":{"type":"Identifier","name":"r"}}],
"operator": "-",
"object": {
"type": "IdentifierExpression",
"property": "_value"
"value": 5.6
op := &query.Operation{
ID: "map",
Spec: &functions.MapOpSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.BinaryExpression{
Operator: ast.SubtractionOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "r",
Property: "_value",
Right: &semantic.FloatLiteral{Value: 5.6},
querytest.OperationMarshalingTestHelper(t, data, op)
func TestMap_Process(t *testing.T) {
testCases := []struct {
name string
spec *functions.MapProcedureSpec
data []execute.Block
want []*executetest.Block
name: `_value+5`,
spec: &functions.MapProcedureSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.ObjectExpression{
Properties: []*semantic.Property{
Key: &semantic.Identifier{Name: "_time"},
Value: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "r",
Property: "_time",
Key: &semantic.Identifier{Name: "_value"},
Value: &semantic.BinaryExpression{
Operator: ast.AdditionOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "r",
Property: "_value",
Right: &semantic.FloatLiteral{
Value: 5,
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(1), 1.0},
{execute.Time(2), 6.0},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(1), 6.0},
{execute.Time(2), 11.0},
name: `_value*_value`,
spec: &functions.MapProcedureSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.ObjectExpression{
Properties: []*semantic.Property{
Key: &semantic.Identifier{Name: "_time"},
Value: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "r",
Property: "_time",
Key: &semantic.Identifier{Name: "_value"},
Value: &semantic.BinaryExpression{
Operator: ast.MultiplicationOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "r",
Property: "_value",
Right: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "r",
Property: "_value",
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(1), 1.0},
{execute.Time(2), 6.0},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(1), 1.0},
{execute.Time(2), 36.0},
name: "float(r._value) int",
spec: &functions.MapProcedureSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.ObjectExpression{
Properties: []*semantic.Property{
Key: &semantic.Identifier{Name: "_time"},
Value: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "r",
Property: "_time",
Key: &semantic.Identifier{Name: "_value"},
Value: &semantic.CallExpression{
Callee: &semantic.IdentifierExpression{Name: "float"},
Arguments: &semantic.ObjectExpression{
Properties: []*semantic.Property{{
Key: &semantic.Identifier{Name: "v"},
Value: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "r",
Property: "_value",
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TInt},
Data: [][]interface{}{
{execute.Time(1), int64(1)},
{execute.Time(2), int64(6)},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(1), 1.0},
{execute.Time(2), 6.0},
name: "float(r._value) uint",
spec: &functions.MapProcedureSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.ObjectExpression{
Properties: []*semantic.Property{
Key: &semantic.Identifier{Name: "_time"},
Value: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "r",
Property: "_time",
Key: &semantic.Identifier{Name: "_value"},
Value: &semantic.CallExpression{
Callee: &semantic.IdentifierExpression{Name: "float"},
Arguments: &semantic.ObjectExpression{
Properties: []*semantic.Property{{
Key: &semantic.Identifier{Name: "v"},
Value: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "r",
Property: "_value",
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TUInt},
Data: [][]interface{}{
{execute.Time(1), uint64(1)},
{execute.Time(2), uint64(6)},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
Data: [][]interface{}{
{execute.Time(1), 1.0},
{execute.Time(2), 6.0},
for _, tc := range testCases {
tc := tc
t.Run(, func(t *testing.T) {
func(d execute.Dataset, c execute.BlockBuilderCache) execute.Transformation {
f, err := functions.NewMapTransformation(d, c, tc.spec)
if err != nil {
return f
@ -0,0 +1,167 @@
package functions
import (
const MaxKind = "max"
type MaxOpSpec struct {
var maxSignature = query.DefaultFunctionSignature()
func init() {
maxSignature.Params["column"] = semantic.String
maxSignature.Params["useRowTime"] = semantic.Bool
query.RegisterFunction(MaxKind, createMaxOpSpec, maxSignature)
query.RegisterOpSpec(MaxKind, newMaxOp)
plan.RegisterProcedureSpec(MaxKind, newMaxProcedure, MaxKind)
execute.RegisterTransformation(MaxKind, createMaxTransformation)
func createMaxOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
spec := new(MaxOpSpec)
if err := spec.SelectorConfig.ReadArgs(args); err != nil {
return nil, err
return spec, nil
func newMaxOp() query.OperationSpec {
return new(MaxOpSpec)
func (s *MaxOpSpec) Kind() query.OperationKind {
return MaxKind
type MaxProcedureSpec struct {
func newMaxProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*MaxOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
return &MaxProcedureSpec{
SelectorConfig: spec.SelectorConfig,
}, nil
func (s *MaxProcedureSpec) Kind() plan.ProcedureKind {
return MaxKind
func (s *MaxProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(MaxProcedureSpec)
ns.SelectorConfig = s.SelectorConfig
return ns
type MaxSelector struct {
set bool
rows []execute.Row
func createMaxTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
ps, ok := spec.(*MaxProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", ps)
t, d := execute.NewRowSelectorTransformationAndDataset(id, mode, new(MaxSelector), ps.SelectorConfig, a.Allocator())
return t, d, nil
type MaxIntSelector struct {
max int64
type MaxUIntSelector struct {
max uint64
type MaxFloatSelector struct {
max float64
func (s *MaxSelector) NewBoolSelector() execute.DoBoolRowSelector {
return nil
func (s *MaxSelector) NewIntSelector() execute.DoIntRowSelector {
return new(MaxIntSelector)
func (s *MaxSelector) NewUIntSelector() execute.DoUIntRowSelector {
return new(MaxUIntSelector)
func (s *MaxSelector) NewFloatSelector() execute.DoFloatRowSelector {
return new(MaxFloatSelector)
func (s *MaxSelector) NewStringSelector() execute.DoStringRowSelector {
return nil
func (s *MaxSelector) Rows() []execute.Row {
if !s.set {
return nil
return s.rows
func (s *MaxSelector) selectRow(idx int, cr execute.ColReader) {
// Capture row
if idx >= 0 {
s.rows = []execute.Row{execute.ReadRow(idx, cr)}
func (s *MaxIntSelector) DoInt(vs []int64, cr execute.ColReader) {
maxIdx := -1
for i, v := range vs {
if !s.set || v > s.max {
s.set = true
s.max = v
maxIdx = i
s.selectRow(maxIdx, cr)
func (s *MaxUIntSelector) DoUInt(vs []uint64, cr execute.ColReader) {
maxIdx := -1
for i, v := range vs {
if !s.set || v > s.max {
s.set = true
s.max = v
maxIdx = i
s.selectRow(maxIdx, cr)
func (s *MaxFloatSelector) DoFloat(vs []float64, cr execute.ColReader) {
maxIdx := -1
for i, v := range vs {
if !s.set || v > s.max {
s.set = true
s.max = v
maxIdx = i
s.selectRow(maxIdx, cr)
Some files were not shown because too many files have changed in this diff Show More
