initial copy of ifql repo

pull/10616/head
Nathaniel Cook 2018-05-21 15:13:54 -06:00
parent 0eb0f1548c
commit 51b8aebebf
199 changed files with 63941 additions and 0 deletions

66
query/Makefile Normal file
View File

@ -0,0 +1,66 @@
VERSION ?= $(shell git describe --always --tags)
SUBDIRS := ast parser promql
GO_ARGS=-tags '$(GO_TAGS)'
export GO_BUILD=go build $(GO_ARGS)
export GO_TEST=go test $(GO_ARGS)
export GO_GENERATE=go generate $(GO_ARGS)
SOURCES := $(shell find . -name '*.go' -not -name '*_test.go')
SOURCES_NO_VENDOR := $(shell find . -path ./vendor -prune -o -name "*.go" -not -name '*_test.go' -print)
all: Gopkg.lock $(SUBDIRS) bin/ifql bin/ifqld
$(SUBDIRS): bin/pigeon bin/cmpgen
$(MAKE) -C $@ $(MAKECMDGOALS)
bin/ifql: $(SOURCES) bin/pigeon bin/cmpgen
$(GO_BUILD) -i -o bin/ifql ./cmd/ifql
bin/ifqld: $(SOURCES) bin/pigeon bin/cmpgen
$(GO_BUILD) -i -o bin/ifqld ./cmd/ifqld
bin/pigeon: ./vendor/github.com/mna/pigeon/main.go
go build -i -o bin/pigeon ./vendor/github.com/mna/pigeon
bin/cmpgen: ./ast/asttest/cmpgen/main.go
go build -i -o bin/cmpgen ./ast/asttest/cmpgen
Gopkg.lock: Gopkg.toml
dep ensure -v
vendor/github.com/mna/pigeon/main.go: Gopkg.lock
dep ensure -v
fmt: $(SOURCES_NO_VENDOR)
goimports -w $^
update:
dep ensure -v -update
test: Gopkg.lock bin/ifql
$(GO_TEST) ./...
test-race: Gopkg.lock bin/ifql
$(GO_TEST) -race ./...
bench: Gopkg.lock bin/ifql
$(GO_TEST) -bench=. -run=^$$ ./...
bin/goreleaser:
go build -i -o bin/goreleaser ./vendor/github.com/goreleaser/goreleaser
dist: bin/goreleaser
PATH=./bin:${PATH} goreleaser --rm-dist --release-notes CHANGELOG.md
release: dist release-docker
release-docker:
docker build -t quay.io/influxdb/ifqld:latest .
docker tag quay.io/influxdb/ifqld:latest quay.io/influxdb/ifqld:${VERSION}
docker push quay.io/influxdb/ifqld:latest
docker push quay.io/influxdb/ifqld:${VERSION}
clean: $(SUBDIRS)
rm -rf bin dist
.PHONY: all clean $(SUBDIRS) update test test-race bench release docker dist fmt

626
query/README.md Normal file
View File

@ -0,0 +1,626 @@
# IFQL (Influx Query Language)
`ifqld` is an HTTP server for running **IFQL** queries to one or more InfluxDB
servers.
`ifqld` runs on port `8093` by default
### Specification
Here is the rough design specification for details until we get documentation up: http://bit.ly/ifql-spec
### INSTALLATION
1. Upgrade to InfluxDB >= 1.4.1
https://portal.influxdata.com/downloads
2. Update the InfluxDB configuration file to enable **IFQL** processing; restart
the InfluxDB server. InfluxDB will open port `8082` to accept **IFQL** queries.
> **This port has no authentication.**
```
[ifql]
enabled = true
log-enabled = true
bind-address = ":8082"
```
3. Download `ifqld` and install from https://github.com/influxdata/ifql/releases
4. Start `ifqld` with the InfluxDB host and port of `8082`. To run in federated
mode (see below), add the `--host` option for each InfluxDB host.
```sh
ifqld --verbose --host localhost:8082
```
5. To run a query POST an **IFQL** query string to `/query` as the `q` parameter:
```sh
curl -XPOST --data-urlencode \
'q=from(db:"telegraf")
|> filter(fn: (r) => r["_measurement"] == "cpu" AND r["_field"] == "usage_user")
|> range(start:-170h)
|> sum()' \
http://localhost:8093/query
```
#### docker compose
To spin up a testing environment you can run:
```
docker-compose up
```
Inside the `root` directory. It will spin up an `influxdb` and `ifqld` daemon
ready to be used. `influxd` is exposed on port `8086` and port `8082`.
### Prometheus metrics
Metrics are exposed on `/metrics`.
`ifqld` records the number of queries and the number of different functions within **IFQL** queries
### Federated Mode
By passing the `--host` option multiple times `ifqld` will query multiple
InfluxDB servers.
For example:
```sh
ifqld --host influxdb1:8082 --host influxdb2:8082
```
The results from multiple InfluxDB are merged together as if there was
one server.
### Basic Syntax
IFQL constructs a query by starting with a table of data and passing the table through transformations steps to describe the desired query operations.
Transformations are represented as functions which take a table of data as an input argument and return a new table that has been transformed.
There is a special function `from` which is a source function, meaning it does not accept a table as input, but rather produces a table.
All other transformation functions accept at least one table and return a table as a result.
For example to get the last point for each series in a database you start by creating a table using `from` and then pass that table into the `limit` function.
```
// Select the last point per series in the telegraf database.
limit(table:from(db:"telegraf"), n:1)
```
Since it is common to chain long lists of transformations together the pipe forward operator `|>` can be used to make reading the code easier.
These two expressions are equivalent:
```
// Select the last point per series in the telegraf database.
limit(table:from(db:"telegraf"), n:1)
// Same as above, but uses the pipe forward operator to indicate the flow of data.
from(db:"telegraf") |> limit(n:1)
```
Long list of functions can thus be chained together:
```
// Get the first point per host from the last minute of data.
from(db:"telegraf") |> range(start:-1m) |> group(by:["host"]) |> first()
```
### Supported Functions
Below is a list of supported functions.
#### from
Starting point for all queires. Get data from the specified database.
Example: `from(db:"telegraf")`
##### options
* `db` string
`from(db:"telegraf")`
* `hosts` array of strings
`from(db:"telegraf", hosts:["host1", "host2"])`
#### count
Counts the number of results
##### options
* `useStartTime` boolean
Use the start time as the timestamp of the resulting aggregate.
Example: `from(db:"telegraf") |> count()`
#### filter
Filters the results using an expression
Example:
```
from(db:"foo")
|> filter(fn: (r) => r["_measurement"]=="cpu" AND
r["_field"] == "usage_system" AND
r["service"] == "app-server")
|> range(start:-12h)
|> max()
```
##### options
* `fn` function(record) bool
Function to when filtering the records.
The function must accept a single parameter which will be the records and return a boolean value.
Records which evaluate to true, will be included in the results.
#### first
Returns the first result of the query
##### options
* `useStartTime` boolean
Use the start time as the timestamp of the resulting aggregate.
Example: `from(db:"telegraf") |> first()`
#### group
Groups results by a user-specified set of tags
##### options
* `by` array of strings
Group by these specific tag names
Cannot be used with `except` option
Example: `from(db: "telegraf") |> range(start: -30m) |> group(by: ["tag_a", "tag_b"])`
* `keep` array of strings
Keep specific tag keys that were not in `by` in the results
Example: `from(db: "telegraf") |> range(start: -30m) |> group(by: ["tag_a", "tag_b"], keep:["tag_c"])`
* `except` array of strings
Group by all but these tag keys
Cannot be used with `by` option
Example: `from(db: "telegraf") |> range(start: -30m) |> group(except: ["tag_a"], keep:["tag_b", "tag_c"])`
#### join
Join two time series together on time and the list of `on` keys.
Example:
```
cpu = from(db: "telegraf") |> filter(fn: (r) => r["_measurement"] == "cpu" and r["_field"] == "usage_user") |> range(start: -30m)
mem = from(db: "telegraf") |> filter(fn: (r) => r["_measurement"] == "mem" and r["_field"] == "used_percent") |> range(start: -30m)
join(tables:{cpu:cpu, mem:mem}, on:["host"], fn: (tables) => tables.cpu["_value"] + tables.mem["_value"])
```
##### options
* `tables` map of tables
Map of tables to join. Currently only two tables are allowed.
* `on` array of strings
List of tag keys that when equal produces a result set.
* `fn`
Defines the function that merges the values of the tables.
The function must defined to accept a single parameter.
The parameter is a map, which uses the same keys found in the `tables` map.
The function is called for each joined set of records from the tables.
#### last
Returns the last result of the query
##### options
* `useStartTime` boolean
Use the start time as the timestamp of the resulting aggregate.
Example: `from(db: "telegraf") |> last()`
#### limit
Restricts the number of rows returned in the results.
Example: `from(db: "telegraf") |> limit(n: 10)`
#### map
Applies a function to each row of the table.
##### options
* `fn` function
Function to apply to each row. The return value of the function may be a single value or an object.
Example:
```
from(db:"foo")
|> filter(fn: (r) => r["_measurement"]=="cpu" AND
r["_field"] == "usage_system" AND
r["service"] == "app-server")
|> range(start:-12h)
// Square the value
|> map(fn: (r) => r._value * r._value)
```
Example:
```
from(db:"foo")
|> filter(fn: (r) => r["_measurement"]=="cpu" AND
r["_field"] == "usage_system" AND
r["service"] == "app-server")
|> range(start:-12h)
// Square the value and keep the original value
|> map(fn: (r) => ({value: r._value, value2:r._value * r._value}))
```
#### max
Returns the max value within the results
##### options
* `useStartTime` boolean
Use the start time as the timestamp of the resulting aggregate.
Example:
```
from(db:"foo")
|> filter(fn: (r) => r["_measurement"]=="cpu" AND
r["_field"] == "usage_system" AND
r["service"] == "app-server")
|> range(start:-12h)
|> window(every:10m)
|> max()
```
#### mean
Returns the mean of the values within the results
##### options
* `useStartTime` boolean
Use the start time as the timestamp of the resulting aggregate.
Example:
```
from(db:"foo")
|> filter(fn: (r) => r["_measurement"] == "mem" AND
r["_field"] == "used_percent")
|> range(start:-12h)
|> window(every:10m)
|> mean()
```
#### min
Returns the min value within the results
##### options
* `useStartTime` boolean
Use the start time as the timestamp of the resulting aggregate.
Example:
```
from(db:"foo")
|> filter(fn: (r) => r[ "_measurement"] == "cpu" AND
r["_field" ]== "usage_system")
|> range(start:-12h)
|> window(every:10m, period: 5m)
|> min()
```
#### range
Filters the results by time boundaries
Example:
```
from(db:"foo")
|> filter(fn: (r) => r["_measurement"] == "cpu" AND
r["_field"] == "usage_system")
|> range(start:-12h, stop: -15m)
```
##### options
* start duration
Specifies the oldest time to be included in the results
* stop duration or timestamp
Specifies exclusive upper time bound
Defaults to "now"
#### sample
Sample values from a table.
##### options
* `useStartTime` boolean
Use the start time as the timestamp of the resulting aggregate.
* `n`
Sample every Nth element
* `pos`
Position offset from start of results to begin sampling
`pos` must be less than `n`
If `pos` less than 0, a random offset is used.
Default is -1 (random offset)
Example to sample every fifth point starting from the second element:
```
from(db:"foo")
|> filter(fn: (r) => r["_measurement"] == "cpu" AND
r["_field"] == "usage_system")
|> range(start:-1d)
|> sample(n: 5, pos: 1)
```
#### set
Add tag of key and value to set
Example: `from(db: "telegraf") |> set(key: "mykey", value: "myvalue")`
##### options
* `key` string
* `value` string
#### skew
Skew of the results
##### options
* `useStartTime` boolean
Use the start time as the timestamp of the resulting aggregate.
Example: `from(db: "telegraf") |> range(start: -30m, stop: -15m) |> skew()`
#### sort
Sorts the results by the specified columns
Default sort is ascending
Example:
```
from(db:"telegraf")
|> filter(fn: (r) => r["_measurement"] == "system" AND
r["_field"] == "uptime")
|> range(start:-12h)
|> sort(cols:["region", "host", "value"])
```
##### options
* `cols` array of strings
List of columns used to sort; precedence from left to right.
Default is `["value"]`
For example, this sorts by uptime descending to find the longest
running instances.
```
from(db:"telegraf")
|> filter(fn: (r) => r["_measurement"] == "system" AND
r["_field"] == "uptime")
|> range(start:-12h)
|> sort(desc: true)
```
* `desc` bool
Sort results descending
#### spread
Difference between min and max values
##### options
* `useStartTime` boolean
Use the start time as the timestamp of the resulting aggregate.
Example: `from(db: "telegraf") |> range(start: -30m) |> spread()`
#### stddev
Standard Deviation of the results
##### options
* `useStartTime` boolean
Use the start time as the timestamp of the resulting aggregate.
Example: `from(db: "telegraf") |> range(start: -30m, stop: -15m) |> stddev()`
#### sum
Sum of the results
##### options
* `useStartTime` boolean
Use the start time as the timestamp of the resulting aggregate.
Example: `from(db: "telegraf") |> range(start: -30m, stop: -15m) |> sum()`
### toBool
Convert a value to a bool.
Example: `from(db: "telegraf") |> filter(fn:(r) => r._measurement == "mem" and r._field == "used") |> toBool()`
The function `toBool` is defined as `toBool = (table=<-) => table |> map(fn:(r) => bool(v:r._value))`.
If you need to convert other columns use the `map` function directly with the `bool` function.
### toInt
Convert a value to a int.
Example: `from(db: "telegraf") |> filter(fn:(r) => r._measurement == "mem" and r._field == "used") |> toInt()`
The function `toInt` is defined as `toInt = (table=<-) => table |> map(fn:(r) => int(v:r._value))`.
If you need to convert other columns use the `map` function directly with the `int` function.
### toFloat
Convert a value to a float.
Example: `from(db: "telegraf") |> filter(fn:(r) => r._measurement == "mem" and r._field == "used") |> toFloat()`
The function `toFloat` is defined as `toFloat = (table=<-) => table |> map(fn:(r) => float(v:r._value))`.
If you need to convert other columns use the `map` function directly with the `float` function.
### toDuration
Convert a value to a duration.
Example: `from(db: "telegraf") |> filter(fn:(r) => r._measurement == "mem" and r._field == "used") |> toDuration()`
The function `toDuration` is defined as `toDuration = (table=<-) => table |> map(fn:(r) => duration(v:r._value))`.
If you need to convert other columns use the `map` function directly with the `duration` function.
### toString
Convert a value to a string.
Example: `from(db: "telegraf") |> filter(fn:(r) => r._measurement == "mem" and r._field == "used") |> toString()`
The function `toString` is defined as `toString = (table=<-) => table |> map(fn:(r) => string(v:r._value))`.
If you need to convert other columns use the `map` function directly with the `string` function.
### toTime
Convert a value to a time.
Example: `from(db: "telegraf") |> filter(fn:(r) => r._measurement == "mem" and r._field == "used") |> toTime()`
The function `toTime` is defined as `toTime = (table=<-) => table |> map(fn:(r) => time(v:r._value))`.
If you need to convert other columns use the `map` function directly with the `time` function.
### toUInt
Convert a value to a uint.
Example: `from(db: "telegraf") |> filter(fn:(r) => r._measurement == "mem" and r._field == "used") |> toUInt()`
The function `toUInt` is defined as `toUInt = (table=<-) => table |> map(fn:(r) => uint(v:r._value))`.
If you need to convert other columns use the `map` function directly with the `uint` function.
#### window
Partitions the results by a given time range
##### options
* `every` duration
Duration of time between windows
Defaults to `period`'s value
```
from(db:"foo")
|> range(start:-12h)
|> window(every:10m)
|> max()
```
* `period` duration
Duration of the windowed parition
```
from(db:"foo")
|> range(start:-12h)
|> window(every:10m)
|> max()
```
Default to `every`'s value
* `start` time
The time of the initial window parition.
* `round` duration
Rounds a window's bounds to the nearest duration
Example:
```
from(db:"foo")
|> range(start:-12h)
|> window(every:10m)
|> max()
```
### Custom Functions
IFQL also allows the user to define their own functions.
The function syntax is:
```
(parameter list) => <function body>
```
The list of parameters is simply a list of identifiers with optional default values.
The function body is either a single expression which is returned or a block of statements.
Functions may be assigned to identifiers to given them a name.
Examples:
```
// Define a simple addition function
add = (a,b) => a + b
// Define a helper function to get data from a telegraf measurement.
// By default the database is expected to be named "telegraf".
telegrafM = (measurement, db="telegraf") =>
from(db:db)
|> filter(fn: (r) => r._measurement == measurement)
// Define a helper function for a common join operation
// Use block syntax since we have more than a single expression
abJoin = (measurementA, measurementB, on) => {
a = telegrafM(measurement:measurementA)
b = telegrafM(measurement:measurementB)
return join(
tables:{a:a, b:b},
on:on,
// Return a map from the join fn,
// this creates a table with a column for each key in the map.
// Note the () around the map to indicate a single map expression instead of function block.
fn: (t) => ({
a: t.a._value,
b: t.b._value,
}),
)
}
```
#### Pipe Arguments
Functions may also declare that an argument can be piped into from an pipe forward operator by specifing a special default value:
```
// Define add function which accepts `a` as the piped argument.
add = (a=<-, b) => a + b
// Call add using the pipe forward syntax.
1 |> add(b:3) // 4
// Define measurement function which accepts table as the piped argument.
measurement = (m, table=<-) => table |> filter(fn: (r) => r._measurement == m)
// Define field function which accepts table as the piped argument
field = (field, table=<-) => table |> filter(fn: (r) => r._field == field)
// Query usage_idle from the cpu measurement and the telegraf database.
// Using the measurement and field functions.
from(db:"telegraf")
|> measurement(m:"cpu")
|> field(field:"usage_idle")
```

9
query/ast/Makefile Normal file
View File

@ -0,0 +1,9 @@
SUBDIRS := asttest
$(SUBDIRS):
$(MAKE) -C $@ $(MAKECMDGOALS)
all: $(SUBDIRS)
.PHONY: $(SUBDIRS) clean

850
query/ast/ast.go Normal file
View File

@ -0,0 +1,850 @@
package ast
import (
"encoding/json"
"fmt"
"regexp"
"time"
)
// Position represents a specific location in the source
type Position struct {
Line int `json:"line"` // Line is the line in the source marked by this position
Column int `json:"column"` // Column is the column in the source marked by this position
}
// SourceLocation represents the location of a node in the AST
type SourceLocation struct {
Start Position `json:"start"` // Start is the location in the source the node starts
End Position `json:"end"` // End is the location in the source the node ends
Source *string `json:"source,omitempty"` // Source is optional raw source
}
// Node represents a node in the InfluxDB abstract syntax tree.
type Node interface {
node()
Type() string // Type property is a string that contains the variant type of the node
Location() *SourceLocation
Copy() Node
// All node must support json marshalling
json.Marshaler
}
func (*Program) node() {}
func (*BlockStatement) node() {}
func (*ExpressionStatement) node() {}
func (*ReturnStatement) node() {}
func (*VariableDeclaration) node() {}
func (*VariableDeclarator) node() {}
func (*ArrayExpression) node() {}
func (*ArrowFunctionExpression) node() {}
func (*BinaryExpression) node() {}
func (*CallExpression) node() {}
func (*ConditionalExpression) node() {}
func (*LogicalExpression) node() {}
func (*MemberExpression) node() {}
func (*PipeExpression) node() {}
func (*ObjectExpression) node() {}
func (*UnaryExpression) node() {}
func (*Property) node() {}
func (*Identifier) node() {}
func (*BooleanLiteral) node() {}
func (*DateTimeLiteral) node() {}
func (*DurationLiteral) node() {}
func (*FloatLiteral) node() {}
func (*IntegerLiteral) node() {}
func (*PipeLiteral) node() {}
func (*RegexpLiteral) node() {}
func (*StringLiteral) node() {}
func (*UnsignedIntegerLiteral) node() {}
// BaseNode holds the attributes every expression or statement should have
type BaseNode struct {
Loc *SourceLocation `json:"location,omitempty"`
}
// Location is the source location of the Node
func (b *BaseNode) Location() *SourceLocation { return b.Loc }
// Program represents a complete program source tree
type Program struct {
*BaseNode
Body []Statement `json:"body"`
}
// Type is the abstract type
func (*Program) Type() string { return "Program" }
func (p *Program) Copy() Node {
np := new(Program)
*np = *p
if len(p.Body) > 0 {
np.Body = make([]Statement, len(p.Body))
for i, s := range p.Body {
np.Body[i] = s.Copy().(Statement)
}
}
return np
}
// Statement Perhaps we don't even want statements nor expression statements
type Statement interface {
Node
stmt()
}
func (*BlockStatement) stmt() {}
func (*ExpressionStatement) stmt() {}
func (*ReturnStatement) stmt() {}
func (*VariableDeclaration) stmt() {}
// BlockStatement is a set of statements
type BlockStatement struct {
*BaseNode
Body []Statement `json:"body"`
}
// Type is the abstract type
func (*BlockStatement) Type() string { return "BlockStatement" }
func (s *BlockStatement) Copy() Node {
ns := new(BlockStatement)
*ns = *s
if len(s.Body) > 0 {
ns.Body = make([]Statement, len(s.Body))
for i, stmt := range s.Body {
ns.Body[i] = stmt.Copy().(Statement)
}
}
return ns
}
// ExpressionStatement may consist of an expression that does not return a value and is executed solely for its side-effects.
type ExpressionStatement struct {
*BaseNode
Expression Expression `json:"expression"`
}
// Type is the abstract type
func (*ExpressionStatement) Type() string { return "ExpressionStatement" }
func (s *ExpressionStatement) Copy() Node {
if s == nil {
return s
}
ns := new(ExpressionStatement)
*ns = *s
ns.Expression = s.Expression.Copy().(Expression)
return ns
}
// ReturnStatement defines an Expression to return
type ReturnStatement struct {
*BaseNode
Argument Expression `json:"argument"`
}
// Type is the abstract type
func (*ReturnStatement) Type() string { return "ReturnStatement" }
func (s *ReturnStatement) Copy() Node {
if s == nil {
return s
}
ns := new(ReturnStatement)
*ns = *s
ns.Argument = s.Argument.Copy().(Expression)
return ns
}
// VariableDeclaration declares one or more variables using assignment
type VariableDeclaration struct {
*BaseNode
Declarations []*VariableDeclarator `json:"declarations"`
}
// Type is the abstract type
func (*VariableDeclaration) Type() string { return "VariableDeclaration" }
func (d *VariableDeclaration) Copy() Node {
if d == nil {
return d
}
nd := new(VariableDeclaration)
*nd = *d
if len(d.Declarations) > 0 {
nd.Declarations = make([]*VariableDeclarator, len(d.Declarations))
for i, decl := range d.Declarations {
nd.Declarations[i] = decl.Copy().(*VariableDeclarator)
}
}
return nd
}
// VariableDeclarator represents the declaration of a variable
type VariableDeclarator struct {
*BaseNode
ID *Identifier `json:"id"`
Init Expression `json:"init"`
}
// Type is the abstract type
func (*VariableDeclarator) Type() string { return "VariableDeclarator" }
func (d *VariableDeclarator) Copy() Node {
if d == nil {
return d
}
nd := new(VariableDeclarator)
*nd = *d
nd.Init = d.Init.Copy().(Expression)
return nd
}
// Expression represents an action that can be performed by InfluxDB that can be evaluated to a value.
type Expression interface {
Node
expression()
}
func (*ArrayExpression) expression() {}
func (*ArrowFunctionExpression) expression() {}
func (*BinaryExpression) expression() {}
func (*BooleanLiteral) expression() {}
func (*CallExpression) expression() {}
func (*ConditionalExpression) expression() {}
func (*DateTimeLiteral) expression() {}
func (*DurationLiteral) expression() {}
func (*FloatLiteral) expression() {}
func (*Identifier) expression() {}
func (*IntegerLiteral) expression() {}
func (*LogicalExpression) expression() {}
func (*MemberExpression) expression() {}
func (*ObjectExpression) expression() {}
func (*PipeExpression) expression() {}
func (*PipeLiteral) expression() {}
func (*RegexpLiteral) expression() {}
func (*StringLiteral) expression() {}
func (*UnaryExpression) expression() {}
func (*UnsignedIntegerLiteral) expression() {}
// CallExpression represents a function all whose callee may be an Identifier or MemberExpression
type CallExpression struct {
*BaseNode
Callee Expression `json:"callee"`
Arguments []Expression `json:"arguments,omitempty"`
}
// Type is the abstract type
func (*CallExpression) Type() string { return "CallExpression" }
func (e *CallExpression) Copy() Node {
if e == nil {
return e
}
ne := new(CallExpression)
*ne = *e
ne.Callee = e.Callee.Copy().(Expression)
if len(e.Arguments) > 0 {
ne.Arguments = make([]Expression, len(e.Arguments))
for i, arg := range e.Arguments {
ne.Arguments[i] = arg.Copy().(Expression)
}
}
return ne
}
type PipeExpression struct {
*BaseNode
Argument Expression `json:"argument"`
Call *CallExpression `json:"call"`
}
// Type is the abstract type
func (*PipeExpression) Type() string { return "PipeExpression" }
func (e *PipeExpression) Copy() Node {
if e == nil {
return e
}
ne := new(PipeExpression)
*ne = *e
ne.Argument = e.Argument.Copy().(Expression)
ne.Call = e.Call.Copy().(*CallExpression)
return ne
}
// MemberExpression represents calling a property of a CallExpression
type MemberExpression struct {
*BaseNode
Object Expression `json:"object"`
Property Expression `json:"property"`
}
// Type is the abstract type
func (*MemberExpression) Type() string { return "MemberExpression" }
func (e *MemberExpression) Copy() Node {
if e == nil {
return e
}
ne := new(MemberExpression)
*ne = *e
ne.Object = e.Object.Copy().(Expression)
ne.Property = e.Property.Copy().(Expression)
return ne
}
type ArrowFunctionExpression struct {
*BaseNode
Params []*Property `json:"params"`
Body Node `json:"body"`
}
// Type is the abstract type
func (*ArrowFunctionExpression) Type() string { return "ArrowFunctionExpression" }
func (e *ArrowFunctionExpression) Copy() Node {
if e == nil {
return e
}
ne := new(ArrowFunctionExpression)
*ne = *e
if len(e.Params) > 0 {
ne.Params = make([]*Property, len(e.Params))
for i, param := range e.Params {
ne.Params[i] = param.Copy().(*Property)
}
}
ne.Body = e.Body.Copy()
return ne
}
// OperatorKind are Equality and Arithmatic operators.
// Result of evaluating an equality operator is always of type Boolean based on whether the
// comparison is true
// Arithmetic operators take numerical values (either literals or variables) as their operands
// and return a single numerical value.
type OperatorKind int
const (
opBegin OperatorKind = iota
MultiplicationOperator
DivisionOperator
AdditionOperator
SubtractionOperator
LessThanEqualOperator
LessThanOperator
GreaterThanEqualOperator
GreaterThanOperator
StartsWithOperator
InOperator
NotOperator
NotEmptyOperator
EmptyOperator
EqualOperator
NotEqualOperator
RegexpMatchOperator
NotRegexpMatchOperator
opEnd
)
func (o OperatorKind) String() string {
return OperatorTokens[o]
}
// OperatorLookup converts the operators to OperatorKind
func OperatorLookup(op string) OperatorKind {
return operators[op]
}
func (o OperatorKind) MarshalText() ([]byte, error) {
text, ok := OperatorTokens[o]
if !ok {
return nil, fmt.Errorf("unknown operator %d", int(o))
}
return []byte(text), nil
}
func (o *OperatorKind) UnmarshalText(data []byte) error {
var ok bool
*o, ok = operators[string(data)]
if !ok {
return fmt.Errorf("unknown operator %q", string(data))
}
return nil
}
// BinaryExpression use binary operators act on two operands in an expression.
// BinaryExpression includes relational and arithmatic operators
type BinaryExpression struct {
*BaseNode
Operator OperatorKind `json:"operator"`
Left Expression `json:"left"`
Right Expression `json:"right"`
}
// Type is the abstract type
func (*BinaryExpression) Type() string { return "BinaryExpression" }
func (e *BinaryExpression) Copy() Node {
if e == nil {
return e
}
ne := new(BinaryExpression)
*ne = *e
ne.Left = e.Left.Copy().(Expression)
ne.Right = e.Right.Copy().(Expression)
return ne
}
// UnaryExpression use operators act on a single operand in an expression.
type UnaryExpression struct {
*BaseNode
Operator OperatorKind `json:"operator"`
Argument Expression `json:"argument"`
}
// Type is the abstract type
func (*UnaryExpression) Type() string { return "UnaryExpression" }
func (e *UnaryExpression) Copy() Node {
if e == nil {
return e
}
ne := new(UnaryExpression)
*ne = *e
ne.Argument = e.Argument.Copy().(Expression)
return ne
}
// LogicalOperatorKind are used with boolean (logical) values
type LogicalOperatorKind int
const (
logOpBegin LogicalOperatorKind = iota
AndOperator
OrOperator
logOpEnd
)
func (o LogicalOperatorKind) String() string {
return LogicalOperatorTokens[o]
}
// LogicalOperatorLookup converts the operators to LogicalOperatorKind
func LogicalOperatorLookup(op string) LogicalOperatorKind {
return logOperators[op]
}
func (o LogicalOperatorKind) MarshalText() ([]byte, error) {
text, ok := LogicalOperatorTokens[o]
if !ok {
return nil, fmt.Errorf("unknown logical operator %d", int(o))
}
return []byte(text), nil
}
func (o *LogicalOperatorKind) UnmarshalText(data []byte) error {
var ok bool
*o, ok = logOperators[string(data)]
if !ok {
return fmt.Errorf("unknown logical operator %q", string(data))
}
return nil
}
// LogicalExpression represent the rule conditions that collectively evaluate to either true or false.
// `or` expressions compute the disjunction of two boolean expressions and return boolean values.
// `and`` expressions compute the conjunction of two boolean expressions and return boolean values.
type LogicalExpression struct {
*BaseNode
Operator LogicalOperatorKind `json:"operator"`
Left Expression `json:"left"`
Right Expression `json:"right"`
}
// Type is the abstract type
func (*LogicalExpression) Type() string { return "LogicalExpression" }
func (e *LogicalExpression) Copy() Node {
if e == nil {
return e
}
ne := new(LogicalExpression)
*ne = *e
ne.Left = e.Left.Copy().(Expression)
ne.Right = e.Right.Copy().(Expression)
return ne
}
// ArrayExpression is used to create and directly specify the elements of an array object
type ArrayExpression struct {
*BaseNode
Elements []Expression `json:"elements"`
}
// Type is the abstract type
func (*ArrayExpression) Type() string { return "ArrayExpression" }
func (e *ArrayExpression) Copy() Node {
if e == nil {
return e
}
ne := new(ArrayExpression)
*ne = *e
if len(e.Elements) > 0 {
ne.Elements = make([]Expression, len(e.Elements))
for i, el := range e.Elements {
ne.Elements[i] = el.Copy().(Expression)
}
}
return ne
}
// ObjectExpression allows the declaration of an anonymous object within a declaration.
type ObjectExpression struct {
*BaseNode
Properties []*Property `json:"properties"`
}
// Type is the abstract type
func (*ObjectExpression) Type() string { return "ObjectExpression" }
func (e *ObjectExpression) Copy() Node {
if e == nil {
return e
}
ne := new(ObjectExpression)
*ne = *e
if len(e.Properties) > 0 {
ne.Properties = make([]*Property, len(e.Properties))
for i, p := range e.Properties {
ne.Properties[i] = p.Copy().(*Property)
}
}
return ne
}
// ConditionalExpression selects one of two expressions, `Alternate` or `Consequent`
// depending on a third, boolean, expression, `Test`.
type ConditionalExpression struct {
*BaseNode
Test Expression `json:"test"`
Alternate Expression `json:"alternate"`
Consequent Expression `json:"consequent"`
}
// Type is the abstract type
func (*ConditionalExpression) Type() string { return "ConditionalExpression" }
func (e *ConditionalExpression) Copy() Node {
if e == nil {
return e
}
ne := new(ConditionalExpression)
*ne = *e
ne.Test = e.Test.Copy().(Expression)
ne.Alternate = e.Alternate.Copy().(Expression)
ne.Consequent = e.Consequent.Copy().(Expression)
return ne
}
// Property is the value associated with a key
type Property struct {
*BaseNode
Key *Identifier `json:"key"`
Value Expression `json:"value"`
}
func (p *Property) Copy() Node {
if p == nil {
return p
}
np := new(Property)
*np = *p
if p.Value != nil {
np.Value = p.Value.Copy().(Expression)
}
return np
}
// Type is the abstract type
func (*Property) Type() string { return "Property" }
// Identifier represents a name that identifies a unique Node
type Identifier struct {
*BaseNode
Name string `json:"name"`
}
// Type is the abstract type
func (*Identifier) Type() string { return "Identifier" }
func (i *Identifier) Copy() Node {
if i == nil {
return i
}
ni := new(Identifier)
*ni = *i
return ni
}
// Literal are thelexical forms for literal expressions which define
// boolean, string, integer, number, duration, datetime and field values.
// Literals must be coerced explicitly.
type Literal interface {
Expression
literal()
}
func (*BooleanLiteral) literal() {}
func (*DateTimeLiteral) literal() {}
func (*DurationLiteral) literal() {}
func (*FloatLiteral) literal() {}
func (*IntegerLiteral) literal() {}
func (*PipeLiteral) literal() {}
func (*RegexpLiteral) literal() {}
func (*StringLiteral) literal() {}
func (*UnsignedIntegerLiteral) literal() {}
// PipeLiteral represents an specialized literal value, indicating the left hand value of a pipe expression.
type PipeLiteral struct {
*BaseNode
}
// Type is the abstract type
func (*PipeLiteral) Type() string { return "PipeLiteral" }
func (i *PipeLiteral) Copy() Node {
if i == nil {
return i
}
ni := new(PipeLiteral)
*ni = *i
return ni
}
// StringLiteral expressions begin and end with double quote marks.
type StringLiteral struct {
*BaseNode
Value string `json:"value"`
}
func (*StringLiteral) Type() string { return "StringLiteral" }
func (l *StringLiteral) Copy() Node {
if l == nil {
return l
}
nl := new(StringLiteral)
*nl = *l
return nl
}
// BooleanLiteral represent boolean values
type BooleanLiteral struct {
*BaseNode
Value bool `json:"value"`
}
// Type is the abstract type
func (*BooleanLiteral) Type() string { return "BooleanLiteral" }
func (l *BooleanLiteral) Copy() Node {
if l == nil {
return l
}
nl := new(BooleanLiteral)
*nl = *l
return nl
}
// FloatLiteral represent floating point numbers according to the double representations defined by the IEEE-754-1985
type FloatLiteral struct {
*BaseNode
Value float64 `json:"value"`
}
// Type is the abstract type
func (*FloatLiteral) Type() string { return "FloatLiteral" }
func (l *FloatLiteral) Copy() Node {
if l == nil {
return l
}
nl := new(FloatLiteral)
*nl = *l
return nl
}
// IntegerLiteral represent integer numbers.
type IntegerLiteral struct {
*BaseNode
Value int64 `json:"value"`
}
// Type is the abstract type
func (*IntegerLiteral) Type() string { return "IntegerLiteral" }
func (l *IntegerLiteral) Copy() Node {
if l == nil {
return l
}
nl := new(IntegerLiteral)
*nl = *l
return nl
}
// UnsignedIntegerLiteral represent integer numbers.
type UnsignedIntegerLiteral struct {
*BaseNode
Value uint64 `json:"value"`
}
// Type is the abstract type
func (*UnsignedIntegerLiteral) Type() string { return "UnsignedIntegerLiteral" }
func (l *UnsignedIntegerLiteral) Copy() Node {
if l == nil {
return l
}
nl := new(UnsignedIntegerLiteral)
*nl = *l
return nl
}
// RegexpLiteral expressions begin and end with `/` and are regular expressions with syntax accepted by RE2
type RegexpLiteral struct {
*BaseNode
Value *regexp.Regexp `json:"value"`
}
// Type is the abstract type
func (*RegexpLiteral) Type() string { return "RegexpLiteral" }
func (l *RegexpLiteral) Copy() Node {
if l == nil {
return l
}
nl := new(RegexpLiteral)
*nl = *l
return nl
}
// DurationLiteral represents the elapsed time between two instants as an
// int64 nanosecond count with syntax of golang's time.Duration
// TODO: this may be better as a class initialization
type DurationLiteral struct {
*BaseNode
Value time.Duration `json:"value"`
}
// Type is the abstract type
func (*DurationLiteral) Type() string { return "DurationLiteral" }
func (l *DurationLiteral) Copy() Node {
if l == nil {
return l
}
nl := new(DurationLiteral)
*nl = *l
return nl
}
// DateTimeLiteral represents an instant in time with nanosecond precision using
// the syntax of golang's RFC3339 Nanosecond variant
// TODO: this may be better as a class initialization
type DateTimeLiteral struct {
*BaseNode
Value time.Time `json:"value"`
}
// Type is the abstract type
func (*DateTimeLiteral) Type() string { return "DateTimeLiteral" }
func (l *DateTimeLiteral) Copy() Node {
if l == nil {
return l
}
nl := new(DateTimeLiteral)
*nl = *l
return nl
}
// OperatorTokens converts OperatorKind to string
var OperatorTokens = map[OperatorKind]string{
MultiplicationOperator: "*",
DivisionOperator: "/",
AdditionOperator: "+",
SubtractionOperator: "-",
LessThanEqualOperator: "<=",
LessThanOperator: "<",
GreaterThanOperator: ">",
GreaterThanEqualOperator: ">=",
InOperator: "in",
NotOperator: "not",
NotEmptyOperator: "not empty",
EmptyOperator: "empty",
StartsWithOperator: "startswith",
EqualOperator: "==",
NotEqualOperator: "!=",
RegexpMatchOperator: "=~",
NotRegexpMatchOperator: "!~",
}
// LogicalOperatorTokens converts LogicalOperatorKind to string
var LogicalOperatorTokens = map[LogicalOperatorKind]string{
AndOperator: "and",
OrOperator: "or",
}
var operators map[string]OperatorKind
var logOperators map[string]LogicalOperatorKind
func init() {
operators = make(map[string]OperatorKind)
for op := opBegin + 1; op < opEnd; op++ {
operators[OperatorTokens[op]] = op
}
logOperators = make(map[string]LogicalOperatorKind)
for op := logOpBegin + 1; op < logOpEnd; op++ {
logOperators[LogicalOperatorTokens[op]] = op
}
}

View File

@ -0,0 +1,9 @@
all: cmpopts.go
cmpopts.go: ../ast.go gen.go ../../bin/cmpgen
PATH=../../bin:${PATH} $(GO_GENERATE) -x ./...
clean:
rm -f cmpopts.go
.PHONY: all clean

View File

@ -0,0 +1,53 @@
package main
import (
"fmt"
"go/importer"
"go/types"
"log"
"os"
)
func main() {
if len(os.Args) != 2 {
log.Println(os.Args)
fmt.Println("Usage: cmpgen <path to output file>")
os.Exit(1)
}
f, err := os.Create(os.Args[1])
if err != nil {
log.Fatal(err)
}
defer f.Close()
pkg, err := importer.For("source", nil).Import("github.com/influxdata/ifql/ast")
if err != nil {
log.Fatal(err)
}
scope := pkg.Scope()
fmt.Fprintln(f, "package asttest")
fmt.Fprintln(f)
fmt.Fprintln(f, "// DO NOT EDIT: This file is autogenerated via the cmpgen command.")
fmt.Fprintln(f)
fmt.Fprintln(f, `import (`)
fmt.Fprintln(f, ` "github.com/google/go-cmp/cmp"`)
fmt.Fprintln(f, ` "github.com/google/go-cmp/cmp/cmpopts"`)
fmt.Fprintln(f, ` "github.com/influxdata/ifql/ast"`)
fmt.Fprintln(f, `)`)
fmt.Fprintln(f)
fmt.Fprintln(f, `var IgnoreBaseNodeOptions = []cmp.Option{`)
for _, name := range scope.Names() {
obj := scope.Lookup(name)
if strct, ok := obj.Type().Underlying().(*types.Struct); obj.Exported() && ok {
for i := 0; i < strct.NumFields(); i++ {
field := strct.Field(i)
if field.Name() == "BaseNode" {
fmt.Fprintf(f, "\tcmpopts.IgnoreFields(ast.%s{}, \"BaseNode\"),\n", obj.Name())
}
}
}
}
fmt.Fprintln(f, `}`)
}

View File

@ -0,0 +1,39 @@
package asttest
// DO NOT EDIT: This file is autogenerated via the cmpgen command.
import (
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/influxdata/ifql/ast"
)
var IgnoreBaseNodeOptions = []cmp.Option{
cmpopts.IgnoreFields(ast.ArrayExpression{}, "BaseNode"),
cmpopts.IgnoreFields(ast.ArrowFunctionExpression{}, "BaseNode"),
cmpopts.IgnoreFields(ast.BinaryExpression{}, "BaseNode"),
cmpopts.IgnoreFields(ast.BlockStatement{}, "BaseNode"),
cmpopts.IgnoreFields(ast.BooleanLiteral{}, "BaseNode"),
cmpopts.IgnoreFields(ast.CallExpression{}, "BaseNode"),
cmpopts.IgnoreFields(ast.ConditionalExpression{}, "BaseNode"),
cmpopts.IgnoreFields(ast.DateTimeLiteral{}, "BaseNode"),
cmpopts.IgnoreFields(ast.DurationLiteral{}, "BaseNode"),
cmpopts.IgnoreFields(ast.ExpressionStatement{}, "BaseNode"),
cmpopts.IgnoreFields(ast.FloatLiteral{}, "BaseNode"),
cmpopts.IgnoreFields(ast.Identifier{}, "BaseNode"),
cmpopts.IgnoreFields(ast.IntegerLiteral{}, "BaseNode"),
cmpopts.IgnoreFields(ast.LogicalExpression{}, "BaseNode"),
cmpopts.IgnoreFields(ast.MemberExpression{}, "BaseNode"),
cmpopts.IgnoreFields(ast.ObjectExpression{}, "BaseNode"),
cmpopts.IgnoreFields(ast.PipeExpression{}, "BaseNode"),
cmpopts.IgnoreFields(ast.PipeLiteral{}, "BaseNode"),
cmpopts.IgnoreFields(ast.Program{}, "BaseNode"),
cmpopts.IgnoreFields(ast.Property{}, "BaseNode"),
cmpopts.IgnoreFields(ast.RegexpLiteral{}, "BaseNode"),
cmpopts.IgnoreFields(ast.ReturnStatement{}, "BaseNode"),
cmpopts.IgnoreFields(ast.StringLiteral{}, "BaseNode"),
cmpopts.IgnoreFields(ast.UnaryExpression{}, "BaseNode"),
cmpopts.IgnoreFields(ast.UnsignedIntegerLiteral{}, "BaseNode"),
cmpopts.IgnoreFields(ast.VariableDeclaration{}, "BaseNode"),
cmpopts.IgnoreFields(ast.VariableDeclarator{}, "BaseNode"),
}

13
query/ast/asttest/gen.go Normal file
View File

@ -0,0 +1,13 @@
package asttest
import (
"regexp"
"github.com/google/go-cmp/cmp"
)
//go:generate cmpgen cmpopts.go
var CompareOptions = append(IgnoreBaseNodeOptions,
cmp.Comparer(func(x, y *regexp.Regexp) bool { return x.String() == y.String() }),
)

894
query/ast/json.go Normal file
View File

@ -0,0 +1,894 @@
package ast
import (
"encoding/json"
"fmt"
"regexp"
"strconv"
"time"
)
func (p *Program) MarshalJSON() ([]byte, error) {
type Alias Program
raw := struct {
Type string `json:"type"`
*Alias
}{
Type: p.Type(),
Alias: (*Alias)(p),
}
return json.Marshal(raw)
}
func (p *Program) UnmarshalJSON(data []byte) error {
type Alias Program
raw := struct {
*Alias
Body []json.RawMessage `json:"body"`
}{}
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
if raw.Alias != nil {
*p = *(*Program)(raw.Alias)
}
p.Body = make([]Statement, len(raw.Body))
for i, r := range raw.Body {
s, err := unmarshalStatement(r)
if err != nil {
return err
}
p.Body[i] = s
}
return nil
}
func (s *BlockStatement) MarshalJSON() ([]byte, error) {
type Alias BlockStatement
raw := struct {
Type string `json:"type"`
*Alias
}{
Type: s.Type(),
Alias: (*Alias)(s),
}
return json.Marshal(raw)
}
func (s *BlockStatement) UnmarshalJSON(data []byte) error {
type Alias BlockStatement
raw := struct {
*Alias
Body []json.RawMessage `json:"body"`
}{}
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
if raw.Alias != nil {
*s = *(*BlockStatement)(raw.Alias)
}
s.Body = make([]Statement, len(raw.Body))
for i, r := range raw.Body {
stmt, err := unmarshalStatement(r)
if err != nil {
return err
}
s.Body[i] = stmt
}
return nil
}
func (s *ExpressionStatement) MarshalJSON() ([]byte, error) {
type Alias ExpressionStatement
raw := struct {
Type string `json:"type"`
*Alias
}{
Type: s.Type(),
Alias: (*Alias)(s),
}
return json.Marshal(raw)
}
func (s *ExpressionStatement) UnmarshalJSON(data []byte) error {
type Alias ExpressionStatement
raw := struct {
*Alias
Expression json.RawMessage `json:"expression"`
}{}
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
if raw.Alias != nil {
*s = *(*ExpressionStatement)(raw.Alias)
}
e, err := unmarshalExpression(raw.Expression)
if err != nil {
return err
}
s.Expression = e
return nil
}
func (s *ReturnStatement) MarshalJSON() ([]byte, error) {
type Alias ReturnStatement
raw := struct {
Type string `json:"type"`
*Alias
}{
Type: s.Type(),
Alias: (*Alias)(s),
}
return json.Marshal(raw)
}
func (s *ReturnStatement) UnmarshalJSON(data []byte) error {
type Alias ReturnStatement
raw := struct {
*Alias
Argument json.RawMessage `json:"argument"`
}{}
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
if raw.Alias != nil {
*s = *(*ReturnStatement)(raw.Alias)
}
e, err := unmarshalExpression(raw.Argument)
if err != nil {
return err
}
s.Argument = e
return nil
}
func (d *VariableDeclaration) MarshalJSON() ([]byte, error) {
type Alias VariableDeclaration
raw := struct {
Type string `json:"type"`
*Alias
}{
Type: d.Type(),
Alias: (*Alias)(d),
}
return json.Marshal(raw)
}
func (d *VariableDeclarator) MarshalJSON() ([]byte, error) {
type Alias VariableDeclarator
raw := struct {
Type string `json:"type"`
*Alias
}{
Type: d.Type(),
Alias: (*Alias)(d),
}
return json.Marshal(raw)
}
func (d *VariableDeclarator) UnmarshalJSON(data []byte) error {
type Alias VariableDeclarator
raw := struct {
*Alias
Init json.RawMessage `json:"init"`
}{}
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
if raw.Alias != nil {
*d = *(*VariableDeclarator)(raw.Alias)
}
e, err := unmarshalExpression(raw.Init)
if err != nil {
return err
}
d.Init = e
return nil
}
func (e *CallExpression) MarshalJSON() ([]byte, error) {
type Alias CallExpression
raw := struct {
Type string `json:"type"`
*Alias
}{
Type: e.Type(),
Alias: (*Alias)(e),
}
return json.Marshal(raw)
}
func (e *CallExpression) UnmarshalJSON(data []byte) error {
type Alias CallExpression
raw := struct {
*Alias
Callee json.RawMessage `json:"callee"`
Arguments []json.RawMessage `json:"arguments"`
}{}
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
if raw.Alias != nil {
*e = *(*CallExpression)(raw.Alias)
}
callee, err := unmarshalExpression(raw.Callee)
if err != nil {
return err
}
e.Callee = callee
e.Arguments = make([]Expression, len(raw.Arguments))
for i, r := range raw.Arguments {
expr, err := unmarshalExpression(r)
if err != nil {
return err
}
e.Arguments[i] = expr
}
return nil
}
func (e *PipeExpression) MarshalJSON() ([]byte, error) {
type Alias PipeExpression
raw := struct {
Type string `json:"type"`
*Alias
}{
Type: e.Type(),
Alias: (*Alias)(e),
}
return json.Marshal(raw)
}
func (e *PipeExpression) UnmarshalJSON(data []byte) error {
type Alias PipeExpression
raw := struct {
*Alias
Argument json.RawMessage `json:"argument"`
}{}
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
if raw.Alias != nil {
*e = *(*PipeExpression)(raw.Alias)
}
arg, err := unmarshalExpression(raw.Argument)
if err != nil {
return err
}
e.Argument = arg
return nil
}
func (e *MemberExpression) MarshalJSON() ([]byte, error) {
type Alias MemberExpression
raw := struct {
Type string `json:"type"`
*Alias
}{
Type: e.Type(),
Alias: (*Alias)(e),
}
return json.Marshal(raw)
}
func (e *MemberExpression) UnmarshalJSON(data []byte) error {
type Alias MemberExpression
raw := struct {
*Alias
Object json.RawMessage `json:"object"`
Property json.RawMessage `json:"property"`
}{}
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
if raw.Alias != nil {
*e = *(*MemberExpression)(raw.Alias)
}
object, err := unmarshalExpression(raw.Object)
if err != nil {
return err
}
e.Object = object
property, err := unmarshalExpression(raw.Property)
if err != nil {
return err
}
e.Property = property
return nil
}
func (e *ArrowFunctionExpression) MarshalJSON() ([]byte, error) {
type Alias ArrowFunctionExpression
raw := struct {
Type string `json:"type"`
*Alias
}{
Type: e.Type(),
Alias: (*Alias)(e),
}
return json.Marshal(raw)
}
func (e *ArrowFunctionExpression) UnmarshalJSON(data []byte) error {
type Alias ArrowFunctionExpression
raw := struct {
*Alias
Body json.RawMessage `json:"body"`
}{}
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
if raw.Alias != nil {
*e = *(*ArrowFunctionExpression)(raw.Alias)
}
body, err := unmarshalNode(raw.Body)
if err != nil {
return err
}
e.Body = body
return nil
}
func (e *BinaryExpression) MarshalJSON() ([]byte, error) {
type Alias BinaryExpression
raw := struct {
Type string `json:"type"`
*Alias
}{
Type: e.Type(),
Alias: (*Alias)(e),
}
return json.Marshal(raw)
}
func (e *BinaryExpression) UnmarshalJSON(data []byte) error {
type Alias BinaryExpression
raw := struct {
*Alias
Left json.RawMessage `json:"left"`
Right json.RawMessage `json:"right"`
}{}
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
if raw.Alias != nil {
*e = *(*BinaryExpression)(raw.Alias)
}
l, err := unmarshalExpression(raw.Left)
if err != nil {
return err
}
e.Left = l
r, err := unmarshalExpression(raw.Right)
if err != nil {
return err
}
e.Right = r
return nil
}
func (e *UnaryExpression) MarshalJSON() ([]byte, error) {
type Alias UnaryExpression
raw := struct {
Type string `json:"type"`
*Alias
}{
Type: e.Type(),
Alias: (*Alias)(e),
}
return json.Marshal(raw)
}
func (e *UnaryExpression) UnmarshalJSON(data []byte) error {
type Alias UnaryExpression
raw := struct {
*Alias
Argument json.RawMessage `json:"argument"`
}{}
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
if raw.Alias != nil {
*e = *(*UnaryExpression)(raw.Alias)
}
argument, err := unmarshalExpression(raw.Argument)
if err != nil {
return err
}
e.Argument = argument
return nil
}
func (e *LogicalExpression) MarshalJSON() ([]byte, error) {
type Alias LogicalExpression
raw := struct {
Type string `json:"type"`
*Alias
}{
Type: e.Type(),
Alias: (*Alias)(e),
}
return json.Marshal(raw)
}
func (e *LogicalExpression) UnmarshalJSON(data []byte) error {
type Alias LogicalExpression
raw := struct {
*Alias
Left json.RawMessage `json:"left"`
Right json.RawMessage `json:"right"`
}{}
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
if raw.Alias != nil {
*e = *(*LogicalExpression)(raw.Alias)
}
l, err := unmarshalExpression(raw.Left)
if err != nil {
return err
}
e.Left = l
r, err := unmarshalExpression(raw.Right)
if err != nil {
return err
}
e.Right = r
return nil
}
func (e *ArrayExpression) MarshalJSON() ([]byte, error) {
type Alias ArrayExpression
raw := struct {
Type string `json:"type"`
*Alias
}{
Type: e.Type(),
Alias: (*Alias)(e),
}
return json.Marshal(raw)
}
func (e *ArrayExpression) UnmarshalJSON(data []byte) error {
type Alias ArrayExpression
raw := struct {
*Alias
Elements []json.RawMessage `json:"elements"`
}{}
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
if raw.Alias != nil {
*e = *(*ArrayExpression)(raw.Alias)
}
e.Elements = make([]Expression, len(raw.Elements))
for i, r := range raw.Elements {
expr, err := unmarshalExpression(r)
if err != nil {
return err
}
e.Elements[i] = expr
}
return nil
}
func (e *ObjectExpression) MarshalJSON() ([]byte, error) {
type Alias ObjectExpression
raw := struct {
Type string `json:"type"`
*Alias
}{
Type: e.Type(),
Alias: (*Alias)(e),
}
return json.Marshal(raw)
}
func (e *ConditionalExpression) MarshalJSON() ([]byte, error) {
type Alias ConditionalExpression
raw := struct {
Type string `json:"type"`
*Alias
}{
Type: e.Type(),
Alias: (*Alias)(e),
}
return json.Marshal(raw)
}
func (e *ConditionalExpression) UnmarshalJSON(data []byte) error {
type Alias ConditionalExpression
raw := struct {
*Alias
Test json.RawMessage `json:"test"`
Alternate json.RawMessage `json:"alternate"`
Consequent json.RawMessage `json:"consequent"`
}{}
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
if raw.Alias != nil {
*e = *(*ConditionalExpression)(raw.Alias)
}
test, err := unmarshalExpression(raw.Test)
if err != nil {
return err
}
e.Test = test
alternate, err := unmarshalExpression(raw.Alternate)
if err != nil {
return err
}
e.Alternate = alternate
consequent, err := unmarshalExpression(raw.Consequent)
if err != nil {
return err
}
e.Consequent = consequent
return nil
}
func (p *Property) MarshalJSON() ([]byte, error) {
type Alias Property
raw := struct {
Type string `json:"type"`
*Alias
}{
Type: p.Type(),
Alias: (*Alias)(p),
}
return json.Marshal(raw)
}
func (p *Property) UnmarshalJSON(data []byte) error {
type Alias Property
raw := struct {
*Alias
Value json.RawMessage `json:"value"`
}{}
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
if raw.Alias != nil {
*p = *(*Property)(raw.Alias)
}
if raw.Value != nil {
value, err := unmarshalExpression(raw.Value)
if err != nil {
return err
}
p.Value = value
}
return nil
}
func (i *Identifier) MarshalJSON() ([]byte, error) {
type Alias Identifier
raw := struct {
Type string `json:"type"`
*Alias
}{
Type: i.Type(),
Alias: (*Alias)(i),
}
return json.Marshal(raw)
}
func (l *PipeLiteral) MarshalJSON() ([]byte, error) {
type Alias PipeLiteral
raw := struct {
Type string `json:"type"`
*Alias
}{
Type: l.Type(),
Alias: (*Alias)(l),
}
return json.Marshal(raw)
}
func (l *StringLiteral) MarshalJSON() ([]byte, error) {
type Alias StringLiteral
raw := struct {
Type string `json:"type"`
*Alias
}{
Type: l.Type(),
Alias: (*Alias)(l),
}
return json.Marshal(raw)
}
func (l *BooleanLiteral) MarshalJSON() ([]byte, error) {
type Alias BooleanLiteral
raw := struct {
Type string `json:"type"`
*Alias
}{
Type: l.Type(),
Alias: (*Alias)(l),
}
return json.Marshal(raw)
}
func (l *FloatLiteral) MarshalJSON() ([]byte, error) {
type Alias FloatLiteral
raw := struct {
Type string `json:"type"`
*Alias
}{
Type: l.Type(),
Alias: (*Alias)(l),
}
return json.Marshal(raw)
}
func (l *IntegerLiteral) MarshalJSON() ([]byte, error) {
type Alias IntegerLiteral
raw := struct {
Type string `json:"type"`
*Alias
Value string `json:"value"`
}{
Type: l.Type(),
Alias: (*Alias)(l),
Value: strconv.FormatInt(l.Value, 10),
}
return json.Marshal(raw)
}
func (l *IntegerLiteral) UnmarshalJSON(data []byte) error {
type Alias IntegerLiteral
raw := struct {
*Alias
Value string `json:"value"`
}{}
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
if raw.Alias != nil {
*l = *(*IntegerLiteral)(raw.Alias)
}
value, err := strconv.ParseInt(raw.Value, 10, 64)
if err != nil {
return err
}
l.Value = value
return nil
}
func (l *UnsignedIntegerLiteral) MarshalJSON() ([]byte, error) {
type Alias UnsignedIntegerLiteral
raw := struct {
Type string `json:"type"`
*Alias
Value string `json:"value"`
}{
Type: l.Type(),
Alias: (*Alias)(l),
Value: strconv.FormatUint(l.Value, 10),
}
return json.Marshal(raw)
}
func (l *UnsignedIntegerLiteral) UnmarshalJSON(data []byte) error {
type Alias UnsignedIntegerLiteral
raw := struct {
*Alias
Value string `json:"value"`
}{}
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
if raw.Alias != nil {
*l = *(*UnsignedIntegerLiteral)(raw.Alias)
}
value, err := strconv.ParseUint(raw.Value, 10, 64)
if err != nil {
return err
}
l.Value = value
return nil
}
func (l *RegexpLiteral) MarshalJSON() ([]byte, error) {
type Alias RegexpLiteral
raw := struct {
Type string `json:"type"`
*Alias
Value string `json:"value"`
}{
Type: l.Type(),
Alias: (*Alias)(l),
Value: l.Value.String(),
}
return json.Marshal(raw)
}
func (l *RegexpLiteral) UnmarshalJSON(data []byte) error {
type Alias RegexpLiteral
raw := struct {
*Alias
Value string `json:"value"`
}{}
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
if raw.Alias != nil {
*l = *(*RegexpLiteral)(raw.Alias)
}
value, err := regexp.Compile(raw.Value)
if err != nil {
return err
}
l.Value = value
return nil
}
func (l *DurationLiteral) MarshalJSON() ([]byte, error) {
type Alias DurationLiteral
raw := struct {
Type string `json:"type"`
*Alias
Value string `json:"value"`
}{
Type: l.Type(),
Alias: (*Alias)(l),
Value: l.Value.String(),
}
return json.Marshal(raw)
}
func (l *DurationLiteral) UnmarshalJSON(data []byte) error {
type Alias DurationLiteral
raw := struct {
*Alias
Value string `json:"value"`
}{}
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
if raw.Alias != nil {
*l = *(*DurationLiteral)(raw.Alias)
}
value, err := time.ParseDuration(raw.Value)
if err != nil {
return err
}
l.Value = value
return nil
}
func (l *DateTimeLiteral) MarshalJSON() ([]byte, error) {
type Alias DateTimeLiteral
raw := struct {
Type string `json:"type"`
*Alias
}{
Type: l.Type(),
Alias: (*Alias)(l),
}
return json.Marshal(raw)
}
func checkNullMsg(msg json.RawMessage) bool {
switch len(msg) {
case 0:
return true
case 4:
return string(msg) == "null"
default:
return false
}
}
func unmarshalStatement(msg json.RawMessage) (Statement, error) {
if checkNullMsg(msg) {
return nil, nil
}
n, err := unmarshalNode(msg)
if err != nil {
return nil, err
}
s, ok := n.(Statement)
if !ok {
return nil, fmt.Errorf("node %q is not a statement", n.Type())
}
return s, nil
}
func unmarshalExpression(msg json.RawMessage) (Expression, error) {
if checkNullMsg(msg) {
return nil, nil
}
n, err := unmarshalNode(msg)
if err != nil {
return nil, err
}
e, ok := n.(Expression)
if !ok {
return nil, fmt.Errorf("node %q is not an expression", n.Type())
}
return e, nil
}
func unmarshalLiteral(msg json.RawMessage) (Literal, error) {
if checkNullMsg(msg) {
return nil, nil
}
n, err := unmarshalNode(msg)
if err != nil {
return nil, err
}
e, ok := n.(Literal)
if !ok {
return nil, fmt.Errorf("node %q is not a literal", n.Type())
}
return e, nil
}
func unmarshalNode(msg json.RawMessage) (Node, error) {
if checkNullMsg(msg) {
return nil, nil
}
type typeRawMessage struct {
Type string `json:"type"`
}
typ := typeRawMessage{}
if err := json.Unmarshal(msg, &typ); err != nil {
return nil, err
}
var node Node
switch typ.Type {
case "Program":
node = new(Program)
case "BlockStatement":
node = new(BlockStatement)
case "ExpressionStatement":
node = new(ExpressionStatement)
case "ReturnStatement":
node = new(ReturnStatement)
case "VariableDeclaration":
node = new(VariableDeclaration)
case "VariableDeclarator":
node = new(VariableDeclarator)
case "CallExpression":
node = new(CallExpression)
case "PipeExpression":
node = new(PipeExpression)
case "MemberExpression":
node = new(MemberExpression)
case "BinaryExpression":
node = new(BinaryExpression)
case "UnaryExpression":
node = new(UnaryExpression)
case "LogicalExpression":
node = new(LogicalExpression)
case "ObjectExpression":
node = new(ObjectExpression)
case "ConditionalExpression":
node = new(ConditionalExpression)
case "ArrayExpression":
node = new(ArrayExpression)
case "Identifier":
node = new(Identifier)
case "PipeLiteral":
node = new(PipeLiteral)
case "StringLiteral":
node = new(StringLiteral)
case "BooleanLiteral":
node = new(BooleanLiteral)
case "FloatLiteral":
node = new(FloatLiteral)
case "IntegerLiteral":
node = new(IntegerLiteral)
case "UnsignedIntegerLiteral":
node = new(UnsignedIntegerLiteral)
case "RegexpLiteral":
node = new(RegexpLiteral)
case "DurationLiteral":
node = new(DurationLiteral)
case "DateTimeLiteral":
node = new(DateTimeLiteral)
case "ArrowFunctionExpression":
node = new(ArrowFunctionExpression)
case "Property":
node = new(Property)
default:
return nil, fmt.Errorf("unknown type %q", typ.Type)
}
if err := json.Unmarshal(msg, node); err != nil {
return nil, err
}
return node, nil
}
func UnmarshalNode(data []byte) (Node, error) {
return unmarshalNode((json.RawMessage)(data))
}

255
query/ast/json_test.go Normal file
View File

@ -0,0 +1,255 @@
package ast_test
import (
"encoding/json"
"math"
"regexp"
"testing"
"time"
"github.com/google/go-cmp/cmp"
"github.com/influxdata/ifql/ast"
"github.com/influxdata/ifql/ast/asttest"
)
func TestJSONMarshal(t *testing.T) {
testCases := []struct {
name string
node ast.Node
want string
}{
{
name: "simple program",
node: &ast.Program{
Body: []ast.Statement{
&ast.ExpressionStatement{
Expression: &ast.StringLiteral{Value: "hello"},
},
},
},
want: `{"type":"Program","body":[{"type":"ExpressionStatement","expression":{"type":"StringLiteral","value":"hello"}}]}`,
},
{
name: "block statement",
node: &ast.BlockStatement{
Body: []ast.Statement{
&ast.ExpressionStatement{
Expression: &ast.StringLiteral{Value: "hello"},
},
},
},
want: `{"type":"BlockStatement","body":[{"type":"ExpressionStatement","expression":{"type":"StringLiteral","value":"hello"}}]}`,
},
{
name: "expression statement",
node: &ast.ExpressionStatement{
Expression: &ast.StringLiteral{Value: "hello"},
},
want: `{"type":"ExpressionStatement","expression":{"type":"StringLiteral","value":"hello"}}`,
},
{
name: "return statement",
node: &ast.ReturnStatement{
Argument: &ast.StringLiteral{Value: "hello"},
},
want: `{"type":"ReturnStatement","argument":{"type":"StringLiteral","value":"hello"}}`,
},
{
name: "variable declaration",
node: &ast.VariableDeclaration{
Declarations: []*ast.VariableDeclarator{
{
ID: &ast.Identifier{Name: "a"},
Init: &ast.StringLiteral{Value: "hello"},
},
},
},
want: `{"type":"VariableDeclaration","declarations":[{"type":"VariableDeclarator","id":{"type":"Identifier","name":"a"},"init":{"type":"StringLiteral","value":"hello"}}]}`,
},
{
name: "variable declarator",
node: &ast.VariableDeclarator{
ID: &ast.Identifier{Name: "a"},
Init: &ast.StringLiteral{Value: "hello"},
},
want: `{"type":"VariableDeclarator","id":{"type":"Identifier","name":"a"},"init":{"type":"StringLiteral","value":"hello"}}`,
},
{
name: "call expression",
node: &ast.CallExpression{
Callee: &ast.Identifier{Name: "a"},
Arguments: []ast.Expression{&ast.StringLiteral{Value: "hello"}},
},
want: `{"type":"CallExpression","callee":{"type":"Identifier","name":"a"},"arguments":[{"type":"StringLiteral","value":"hello"}]}`,
},
{
name: "pipe expression",
node: &ast.PipeExpression{
Argument: &ast.Identifier{Name: "a"},
Call: &ast.CallExpression{
Callee: &ast.Identifier{Name: "a"},
Arguments: []ast.Expression{&ast.StringLiteral{Value: "hello"}},
},
},
want: `{"type":"PipeExpression","argument":{"type":"Identifier","name":"a"},"call":{"type":"CallExpression","callee":{"type":"Identifier","name":"a"},"arguments":[{"type":"StringLiteral","value":"hello"}]}}`,
},
{
name: "member expression",
node: &ast.MemberExpression{
Object: &ast.Identifier{Name: "a"},
Property: &ast.StringLiteral{Value: "hello"},
},
want: `{"type":"MemberExpression","object":{"type":"Identifier","name":"a"},"property":{"type":"StringLiteral","value":"hello"}}`,
},
{
name: "arrow function expression",
node: &ast.ArrowFunctionExpression{
Params: []*ast.Property{{Key: &ast.Identifier{Name: "a"}}},
Body: &ast.StringLiteral{Value: "hello"},
},
want: `{"type":"ArrowFunctionExpression","params":[{"type":"Property","key":{"type":"Identifier","name":"a"},"value":null}],"body":{"type":"StringLiteral","value":"hello"}}`,
},
{
name: "binary expression",
node: &ast.BinaryExpression{
Operator: ast.AdditionOperator,
Left: &ast.StringLiteral{Value: "hello"},
Right: &ast.StringLiteral{Value: "world"},
},
want: `{"type":"BinaryExpression","operator":"+","left":{"type":"StringLiteral","value":"hello"},"right":{"type":"StringLiteral","value":"world"}}`,
},
{
name: "unary expression",
node: &ast.UnaryExpression{
Operator: ast.NotOperator,
Argument: &ast.BooleanLiteral{Value: true},
},
want: `{"type":"UnaryExpression","operator":"not","argument":{"type":"BooleanLiteral","value":true}}`,
},
{
name: "logical expression",
node: &ast.LogicalExpression{
Operator: ast.OrOperator,
Left: &ast.BooleanLiteral{Value: false},
Right: &ast.BooleanLiteral{Value: true},
},
want: `{"type":"LogicalExpression","operator":"or","left":{"type":"BooleanLiteral","value":false},"right":{"type":"BooleanLiteral","value":true}}`,
},
{
name: "array expression",
node: &ast.ArrayExpression{
Elements: []ast.Expression{&ast.StringLiteral{Value: "hello"}},
},
want: `{"type":"ArrayExpression","elements":[{"type":"StringLiteral","value":"hello"}]}`,
},
{
name: "object expression",
node: &ast.ObjectExpression{
Properties: []*ast.Property{{
Key: &ast.Identifier{Name: "a"},
Value: &ast.StringLiteral{Value: "hello"},
}},
},
want: `{"type":"ObjectExpression","properties":[{"type":"Property","key":{"type":"Identifier","name":"a"},"value":{"type":"StringLiteral","value":"hello"}}]}`,
},
{
name: "conditional expression",
node: &ast.ConditionalExpression{
Test: &ast.BooleanLiteral{Value: true},
Alternate: &ast.StringLiteral{Value: "false"},
Consequent: &ast.StringLiteral{Value: "true"},
},
want: `{"type":"ConditionalExpression","test":{"type":"BooleanLiteral","value":true},"alternate":{"type":"StringLiteral","value":"false"},"consequent":{"type":"StringLiteral","value":"true"}}`,
},
{
name: "property",
node: &ast.Property{
Key: &ast.Identifier{Name: "a"},
Value: &ast.StringLiteral{Value: "hello"},
},
want: `{"type":"Property","key":{"type":"Identifier","name":"a"},"value":{"type":"StringLiteral","value":"hello"}}`,
},
{
name: "identifier",
node: &ast.Identifier{
Name: "a",
},
want: `{"type":"Identifier","name":"a"}`,
},
{
name: "string literal",
node: &ast.StringLiteral{
Value: "hello",
},
want: `{"type":"StringLiteral","value":"hello"}`,
},
{
name: "boolean literal",
node: &ast.BooleanLiteral{
Value: true,
},
want: `{"type":"BooleanLiteral","value":true}`,
},
{
name: "float literal",
node: &ast.FloatLiteral{
Value: 42.1,
},
want: `{"type":"FloatLiteral","value":42.1}`,
},
{
name: "integer literal",
node: &ast.IntegerLiteral{
Value: math.MaxInt64,
},
want: `{"type":"IntegerLiteral","value":"9223372036854775807"}`,
},
{
name: "unsigned integer literal",
node: &ast.UnsignedIntegerLiteral{
Value: math.MaxUint64,
},
want: `{"type":"UnsignedIntegerLiteral","value":"18446744073709551615"}`,
},
{
name: "regexp literal",
node: &ast.RegexpLiteral{
Value: regexp.MustCompile(`.*`),
},
want: `{"type":"RegexpLiteral","value":".*"}`,
},
{
name: "duration literal",
node: &ast.DurationLiteral{
Value: time.Hour + time.Minute,
},
want: `{"type":"DurationLiteral","value":"1h1m0s"}`,
},
{
name: "datetime literal",
node: &ast.DateTimeLiteral{
Value: time.Date(2017, 8, 8, 8, 8, 8, 8, time.UTC),
},
want: `{"type":"DateTimeLiteral","value":"2017-08-08T08:08:08.000000008Z"}`,
},
}
for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
data, err := json.Marshal(tc.node)
if err != nil {
t.Fatal(err)
}
if got := string(data); got != tc.want {
t.Errorf("unexpected json data:\nwant:%s\ngot: %s\n", tc.want, got)
}
node, err := ast.UnmarshalNode(data)
if err != nil {
t.Fatal(err)
}
if !cmp.Equal(tc.node, node, asttest.CompareOptions...) {
t.Errorf("unexpected node after unmarshalling: -want/+got:\n%s", cmp.Diff(tc.node, node, asttest.CompareOptions...))
}
})
}
}

View File

@ -0,0 +1,200 @@
## Benchmarks
The output generated by `curl` uses `curl-format.txt`, present along side this readme.
Telegraf is configured with the supplied `telegraf.conf` to capture metrics from ifqld and influxdb using the
Prometheus `/metrics` HTTP endpoint and machine metrics including CPU usage and disk I/O. Note that `influxd` is running
on port `8186`, allowing a separate `influxd` on the default port to receive metrics from Telegraf.
## Dataset #1
| | |
| ----- | ----- |
| series | 100,000 |
| pps | 3,000 |
| shards | 12 |
| pps / shard | 250 |
| total points | 300,000,000 |
**pps**: points per series
### Hardware
| | |
| ----- | ----- |
| AWS instance type | c3.4xlarge |
### Generate dataset
1. Use [ingen][ingen] to populate a database with data.
```sh
$ ingen -p=250 -t=1000,100 -shards=12 -start-time="2017-11-01T00:00:00Z" -data-path=~/.influxdb/data -meta-path=~/.influxdb/meta
```
The previous command will
* populate a database named `db` (default),
* create 100,000 series (1000×100),
* made up of 2 tag keys (`tag0` and `tag1`) each with 1000 and 100 tag values respectively.
* 250 points per series, per shard, for a total of 3,000 points per series.
* Points will start from `2017-11-01 00:00:00 UTC` and
* span 12 shards.
### ifql queries
Query #1
```sh
HOST=localhost:8093; curl -w "@curl-format.txt" -H 'Accept: text/plain' -o /dev/null -s http://${HOST}/query \
--data-urlencode 'q=from(db:"db").range(start:2017-11-01T00:00:00Z, stop:2017-11-02T00:00:00Z).filter(exp:{"_measurement" == "m0" and "_field" == "v0" and $ > 0}).sum()'
time_starttransfer: 0.138
size_download: 5800000
time_total: 7.578
```
Query #2
```sh
HOST=localhost:8093; curl -w "@curl-format.txt" -H 'Accept: text/plain' -o /dev/null -s http://${HOST}/query \
--data-urlencode 'q=from(db:"db").range(start:2017-11-01T00:00:00Z, stop:2017-11-05T00:00:00Z).filter(exp:{"_measurement" == "m0" and "_field" == "v0" and $ > 0}).sum()'
time_starttransfer: 0.305
size_download: 5900000
time_total: 17.909
```
Query #3
```sh
HOST=localhost:8093; curl -w "@curl-format.txt" -H 'Accept: text/plain' -o /dev/null -s http://${HOST}/query \
--data-urlencode 'q=from(db:"db").range(start:2017-11-01T00:00:00Z, stop:2017-11-05T00:00:00Z).filter(exp:{"_measurement" == "m0" and "_field" == "v0" and $ > 0}).group(by:["tag0"]).sum()'
time_starttransfer: 22.727
size_download: 60000
time_total: 22.730
```
Query #4
```sh
HOST=localhost:8093; curl -w "@curl-format.txt" -H 'Accept: text/plain' -o /dev/null -s http://${HOST}/query \
--data-urlencode 'q=from(db:"db").range(start:2017-11-01T00:00:00Z, stop:2017-11-13T00:00:00Z).filter(exp:{"_measurement" == "m0" and "_field" == "v0" and $ > 0}).sum()'
time_starttransfer: 0.713
size_download: 5900000
time_total: 44.159
```
Query #5
```sh
HOST=localhost:8093; curl -w "@curl-format.txt" -H 'Accept: text/plain' -o /dev/null -s http://${HOST}/query \
--data-urlencode 'q=from(db:"db").range(start:2017-11-01T00:00:00Z, stop:2017-11-13T00:00:00Z).filter(exp:{"_measurement" == "m0" and "_field" == "v0" and $ > 0}).group(by:["tag0"]).sum()'
time_starttransfer: 56.257
size_download: 60000
time_total: 56.261
```
## Dataset #2
| | |
| ----- | ----- |
| series | 10,000,000 |
| pps | 1,000 |
| shards | 4 |
| pps / shard | 250 |
| total points | 10,000,000,000 |
**pps**: points per series
### Hardware
| | |
| ----- | ----- |
| AWS instance type | c5.4xlarge |
### Generate dataset
1. Use [ingen][ingen] to populate a database with data.
```sh
$ ingen -p=250 -t=10000,100,10 -shards=4 -start-time="2017-11-01T00:00:00Z" -data-path=~/.influxdb/data -meta-path=~/.influxdb/meta
```
The previous command will
* populate a database named `db` (default),
* create 10,000,000 series (10000×100×10),
* made up of 3 tag keys (`tag0`, `tag1`, `tag2`) each with 10000, 100 and 10 tag values respectively.
* 250 points per series, per shard, for a total of 1,000 points per series.
* Points will start from `2017-11-01 00:00:00 UTC` and
* span 4 shards.
### ifql queries
Query #1
```sh
HOST=localhost:8093; curl -w "@curl-format.txt" -H 'Accept: text/plain' -o /dev/null -s http://${HOST}/query \
--data-urlencode 'q=from(db:"db").range(start:2017-11-01T00:00:00Z, stop:2017-11-05T00:00:00Z).filter(exp:{"_measurement" == "m0" and "_field" == "v0" and "tag1" == "value00"}).group(by:["tag0"]).sum()'
time_starttransfer: 0.325
size_download: 7200000
time_total: 11.437
```
Query #2
```sh
HOST=localhost:8093; curl -w "@curl-format.txt" -H 'Accept: text/plain' -o /dev/null -s http://${HOST}/query \
--data-urlencode 'q=from(db:"db").range(start:2017-11-01T00:00:00Z, stop:2017-11-05T00:00:00Z).filter(exp:{"_measurement" == "m0" and "_field" == "v0" and "tag1" == "value00"}).group(by:["tag0"]).sum()'
time_starttransfer: 13.174
size_download: 600000
time_total: 13.215
```
Query #3
```sh
HOST=localhost:8093; curl -w "@curl-format.txt" -H 'Accept: text/plain' -o /dev/null -s http://${HOST}/query \
--data-urlencode 'q=from(db:"db").range(start:2017-11-01T00:00:00Z, stop:2017-11-05T00:00:00Z).filter(exp:{"_measurement" == "m0" and "_field" == "v0"}).group(by:["tag0"]).sum()'
time_starttransfer: 1190.204
size_download: 620000
time_total: 1190.244
```
Query #4
```sh
HOST=localhost:8093; curl -w "@curl-format.txt" -H 'Accept: text/plain' -o /dev/null -s http://${HOST}/query \
--data-urlencode 'q=from(db:"db").range(start:2017-11-01T00:00:00Z, stop:2017-11-05T00:00:00Z).filter(exp:{"_measurement" == "m0" and "_field" == "v0"}).sum()'
time_starttransfer: 23.975
size_download: 720000000
time_total: 803.254
```
[ingen]: https://github.com/influxdata/ingen

View File

@ -0,0 +1,132 @@
reporting-disabled = false
bind-address = ":8188"
[meta]
dir = "/home/ubuntu/.influxdb/meta"
retention-autocreate = true
logging-enabled = true
[data]
dir = "/home/ubuntu/.influxdb/data"
index-version = "inmem"
wal-dir = "/home/ubuntu/.influxdb/wal"
wal-fsync-delay = "0s"
query-log-enabled = true
cache-max-memory-size = 1073741824
cache-snapshot-memory-size = 26214400
cache-snapshot-write-cold-duration = "10m0s"
compact-full-write-cold-duration = "4h0m0s"
max-series-per-database = 1000000
max-values-per-tag = 100000
max-concurrent-compactions = 0
trace-logging-enabled = false
[coordinator]
write-timeout = "30s"
max-concurrent-queries = 0
#query-timeout = "5s"
log-queries-after = "0s"
max-select-point = 0
max-select-series = 0
max-select-buckets = 0
[retention]
enabled = true
check-interval = "30m0s"
[shard-precreation]
enabled = true
check-interval = "10m0s"
advance-period = "30m0s"
[monitor]
store-enabled = false
store-database = "_internal"
store-interval = "10s"
[subscriber]
enabled = true
http-timeout = "30s"
insecure-skip-verify = false
ca-certs = ""
write-concurrency = 40
write-buffer-size = 1000
[http]
enabled = true
bind-address = ":8186"
auth-enabled = false
log-enabled = false
write-tracing = false
pprof-enabled = true
https-enabled = false
https-certificate = "/etc/ssl/influxdb.pem"
https-private-key = ""
max-row-limit = 0
max-connection-limit = 0
shared-secret = ""
realm = "InfluxDB"
unix-socket-enabled = false
bind-socket = "/var/run/influxdb.sock"
#max-body-size = 5
[ifql]
enabled = true
log-enabled = true
bind-address = ":8082"
[[graphite]]
enabled = false
bind-address = ":2003"
database = "graphite"
retention-policy = ""
protocol = "tcp"
batch-size = 5000
batch-pending = 10
batch-timeout = "1s"
consistency-level = "one"
separator = "."
udp-read-buffer = 0
[[collectd]]
enabled = false
bind-address = ":25826"
database = "collectd"
retention-policy = ""
batch-size = 5000
batch-pending = 10
batch-timeout = "10s"
read-buffer = 0
typesdb = "/usr/share/collectd/types.db"
security-level = "none"
auth-file = "/etc/collectd/auth_file"
[[opentsdb]]
enabled = false
bind-address = ":4242"
database = "opentsdb"
retention-policy = ""
consistency-level = "one"
tls-enabled = false
certificate = "/etc/ssl/influxdb.pem"
batch-size = 1000
batch-pending = 5
batch-timeout = "1s"
log-point-errors = true
[[udp]]
enabled = false
bind-address = ":8089"
database = "udp"
retention-policy = ""
batch-size = 5000
batch-pending = 10
read-buffer = 0
batch-timeout = "1s"
precision = ""
[continuous_queries]
log-enabled = true
enabled = true
run-interval = "1s"

View File

@ -0,0 +1,5 @@
\n
time_starttransfer: %{time_starttransfer}\n
size_download: %{size_download}\n
time_total: %{time_total}\n
\n

View File

@ -0,0 +1,52 @@
[global_tags]
[agent]
interval = "2s"
round_interval = true
metric_batch_size = 1000
metric_buffer_limit = 10000
collection_jitter = "10ms"
flush_interval = "10s"
flush_jitter = "1s"
precision = ""
debug = false
quiet = false
hostname = "stuart-bench-oss-0"
omit_hostname = false
[[inputs.cpu]]
percpu = true
totalcpu = true
fielddrop = ["time_*"]
[[inputs.disk]]
ignore_fs = ["tmpfs", "devtmpfs"]
# Read metrics about disk IO by device
[[inputs.diskio]]
## By default, telegraf will gather stats for all devices including
## disk partitions.
## Setting devices will restrict the stats to the specified devices.
# devices = ["sda", "sdb"]
## Uncomment the following line if you need disk serial numbers.
# skip_serial_number = false
[[inputs.mem]]
[[inputs.procstat]]
pattern = "influx"
prefix = ""
fielddrop = ["cpu_time_*"]
[[inputs.system]]
# Read metrics from one or many prometheus clients
[[inputs.prometheus]]
## An array of urls to scrape metrics from.
urls = ["http://localhost:8093/metrics","http://localhost:8186/metrics"]

536
query/compile.go Normal file
View File

@ -0,0 +1,536 @@
package query
import (
"context"
"fmt"
"log"
"regexp"
"time"
"github.com/influxdata/ifql/interpreter"
"github.com/influxdata/ifql/parser"
"github.com/influxdata/ifql/semantic"
"github.com/influxdata/ifql/values"
opentracing "github.com/opentracing/opentracing-go"
"github.com/pkg/errors"
)
const (
TableParameter = "table"
tableIDKey = "id"
tableKindKey = "kind"
tableParentsKey = "parents"
//tableSpecKey = "spec"
)
type Option func(*options)
func Verbose(v bool) Option {
return func(o *options) {
o.verbose = v
}
}
type options struct {
verbose bool
}
// Compile evaluates an IFQL script producing a query Spec.
func Compile(ctx context.Context, q string, opts ...Option) (*Spec, error) {
o := new(options)
for _, opt := range opts {
opt(o)
}
s, _ := opentracing.StartSpanFromContext(ctx, "parse")
astProg, err := parser.NewAST(q)
if err != nil {
return nil, err
}
s.Finish()
s, _ = opentracing.StartSpanFromContext(ctx, "compile")
defer s.Finish()
qd := new(queryDomain)
scope, decls := builtIns(qd)
interpScope := interpreter.NewScopeWithValues(scope)
// Convert AST program to a semantic program
semProg, err := semantic.New(astProg, decls)
if err != nil {
return nil, err
}
if err := interpreter.Eval(semProg, interpScope); err != nil {
return nil, err
}
spec := qd.ToSpec()
if o.verbose {
log.Println("Query Spec: ", Formatted(spec, FmtJSON))
}
return spec, nil
}
type CreateOperationSpec func(args Arguments, a *Administration) (OperationSpec, error)
var builtinScope = make(map[string]values.Value)
var builtinDeclarations = make(semantic.DeclarationScope)
// list of builtin scripts
var builtins = make(map[string]string)
var finalized bool
// RegisterBuiltIn adds any variable declarations in the script to the builtin scope.
func RegisterBuiltIn(name, script string) {
if finalized {
panic(errors.New("already finalized, cannot register builtin"))
}
builtins[name] = script
}
// RegisterFunction adds a new builtin top level function.
func RegisterFunction(name string, c CreateOperationSpec, sig semantic.FunctionSignature) {
f := function{
t: semantic.NewFunctionType(sig),
name: name,
createOpSpec: c,
}
RegisterBuiltInValue(name, f)
}
// RegisterBuiltInValue adds the value to the builtin scope.
func RegisterBuiltInValue(name string, v values.Value) {
if finalized {
panic(errors.New("already finalized, cannot register builtin"))
}
if _, ok := builtinScope[name]; ok {
panic(fmt.Errorf("duplicate registration for builtin %q", name))
}
builtinDeclarations[name] = semantic.NewExternalVariableDeclaration(name, v.Type())
builtinScope[name] = v
}
// FinalizeRegistration must be called to complete registration.
// Future calls to RegisterFunction, RegisterBuiltIn or RegisterBuiltInValue will panic.
func FinalizeRegistration() {
if finalized {
panic("already finalized")
}
finalized = true
//for name, script := range builtins {
// astProg, err := parser.NewAST(script)
// if err != nil {
// panic(errors.Wrapf(err, "failed to parse builtin %q", name))
// }
// semProg, err := semantic.New(astProg, builtinDeclarations)
// if err != nil {
// panic(errors.Wrapf(err, "failed to create semantic graph for builtin %q", name))
// }
// if err := interpreter.Eval(semProg, builtinScope); err != nil {
// panic(errors.Wrapf(err, "failed to evaluate builtin %q", name))
// }
//}
//// free builtins list
//builtins = nil
}
var TableObjectType = semantic.NewObjectType(map[string]semantic.Type{
tableIDKey: semantic.String,
tableKindKey: semantic.String,
// TODO(nathanielc): The spec types vary significantly making type comparisons impossible, for now the solution is to state the type as an empty object.
//tableSpecKey: semantic.EmptyObject,
// TODO(nathanielc): Support recursive types, for now we state that the array has empty objects.
tableParentsKey: semantic.NewArrayType(semantic.EmptyObject),
})
type TableObject struct {
ID OperationID
Kind OperationKind
Spec OperationSpec
Parents values.Array
}
func (t TableObject) Operation() *Operation {
return &Operation{
ID: t.ID,
Spec: t.Spec,
}
}
func (t TableObject) String() string {
return fmt.Sprintf("{id: %q, kind: %q}", t.ID, t.Kind)
}
func (t TableObject) ToSpec() *Spec {
visited := make(map[OperationID]bool)
spec := new(Spec)
t.buildSpec(spec, visited)
return spec
}
func (t TableObject) buildSpec(spec *Spec, visited map[OperationID]bool) {
id := t.ID
t.Parents.Range(func(i int, v values.Value) {
p := v.(TableObject)
if !visited[p.ID] {
// rescurse up parents
p.buildSpec(spec, visited)
}
spec.Edges = append(spec.Edges, Edge{
Parent: p.ID,
Child: id,
})
})
visited[id] = true
spec.Operations = append(spec.Operations, t.Operation())
}
func (t TableObject) Type() semantic.Type {
return TableObjectType
}
func (t TableObject) Str() string {
panic(values.UnexpectedKind(semantic.Object, semantic.String))
}
func (t TableObject) Int() int64 {
panic(values.UnexpectedKind(semantic.Object, semantic.Int))
}
func (t TableObject) UInt() uint64 {
panic(values.UnexpectedKind(semantic.Object, semantic.UInt))
}
func (t TableObject) Float() float64 {
panic(values.UnexpectedKind(semantic.Object, semantic.Float))
}
func (t TableObject) Bool() bool {
panic(values.UnexpectedKind(semantic.Object, semantic.Bool))
}
func (t TableObject) Time() values.Time {
panic(values.UnexpectedKind(semantic.Object, semantic.Time))
}
func (t TableObject) Duration() values.Duration {
panic(values.UnexpectedKind(semantic.Object, semantic.Duration))
}
func (t TableObject) Regexp() *regexp.Regexp {
panic(values.UnexpectedKind(semantic.Object, semantic.Regexp))
}
func (t TableObject) Array() values.Array {
panic(values.UnexpectedKind(semantic.Object, semantic.Array))
}
func (t TableObject) Object() values.Object {
return t
}
func (t TableObject) Function() values.Function {
panic(values.UnexpectedKind(semantic.Object, semantic.Function))
}
func (t TableObject) Get(name string) (values.Value, bool) {
switch name {
case tableIDKey:
return values.NewStringValue(string(t.ID)), true
case tableKindKey:
return values.NewStringValue(string(t.Kind)), true
case tableParentsKey:
return t.Parents, true
default:
return nil, false
}
}
func (t TableObject) Set(name string, v values.Value) {
//TableObject is immutable
}
func (t TableObject) Len() int {
return 3
}
func (t TableObject) Range(f func(name string, v values.Value)) {
f(tableIDKey, values.NewStringValue(string(t.ID)))
f(tableKindKey, values.NewStringValue(string(t.Kind)))
f(tableParentsKey, t.Parents)
}
// DefaultFunctionSignature returns a FunctionSignature for standard functions which accept a table piped argument.
// It is safe to modify the returned signature.
func DefaultFunctionSignature() semantic.FunctionSignature {
return semantic.FunctionSignature{
Params: map[string]semantic.Type{
TableParameter: TableObjectType,
},
ReturnType: TableObjectType,
PipeArgument: TableParameter,
}
}
func BuiltIns() (map[string]values.Value, semantic.DeclarationScope) {
qd := new(queryDomain)
return builtIns(qd)
}
func builtIns(qd *queryDomain) (map[string]values.Value, semantic.DeclarationScope) {
decls := builtinDeclarations.Copy()
scope := make(map[string]values.Value, len(builtinScope))
for k, v := range builtinScope {
if v.Type().Kind() == semantic.Function {
if f, ok := v.Function().(function); ok {
f.qd = qd
v = f
}
}
scope[k] = v
}
interpScope := interpreter.NewScopeWithValues(scope)
for name, script := range builtins {
astProg, err := parser.NewAST(script)
if err != nil {
panic(errors.Wrapf(err, "failed to parse builtin %q", name))
}
semProg, err := semantic.New(astProg, decls)
if err != nil {
panic(errors.Wrapf(err, "failed to create semantic graph for builtin %q", name))
}
if err := interpreter.Eval(semProg, interpScope); err != nil {
panic(errors.Wrapf(err, "failed to evaluate builtin %q", name))
}
}
return scope, decls
}
type Administration struct {
id OperationID
parents values.Array
}
func newAdministration(id OperationID) *Administration {
return &Administration{
id: id,
// TODO(nathanielc): Once we can support recursive types change this to,
// interpreter.NewArray(TableObjectType)
parents: values.NewArray(semantic.EmptyObject),
}
}
// AddParentFromArgs reads the args for the `table` argument and adds the value as a parent.
func (a *Administration) AddParentFromArgs(args Arguments) error {
parent, err := args.GetRequiredObject(TableParameter)
if err != nil {
return err
}
p, ok := parent.(TableObject)
if !ok {
return fmt.Errorf("argument is not a table object: got %T", parent)
}
a.AddParent(p)
return nil
}
// AddParent instructs the evaluation Context that a new edge should be created from the parent to the current operation.
// Duplicate parents will be removed, so the caller need not concern itself with which parents have already been added.
func (a *Administration) AddParent(np TableObject) {
// Check for duplicates
found := false
a.parents.Range(func(i int, p values.Value) {
if p.(TableObject).ID == np.ID {
found = true
}
})
if !found {
a.parents.Append(np)
}
}
type Domain interface {
ToSpec() *Spec
}
func NewDomain() Domain {
return new(queryDomain)
}
type queryDomain struct {
id int
operations []TableObject
}
func (d *queryDomain) NewID(name string) OperationID {
return OperationID(fmt.Sprintf("%s%d", name, d.nextID()))
}
func (d *queryDomain) nextID() int {
id := d.id
d.id++
return id
}
func (d *queryDomain) ToSpec() *Spec {
spec := new(Spec)
visited := make(map[OperationID]bool)
for _, t := range d.operations {
t.buildSpec(spec, visited)
}
return spec
}
type function struct {
name string
t semantic.Type
createOpSpec CreateOperationSpec
qd *queryDomain
}
func (f function) Type() semantic.Type {
return f.t
}
func (f function) Str() string {
panic(values.UnexpectedKind(semantic.Function, semantic.String))
}
func (f function) Int() int64 {
panic(values.UnexpectedKind(semantic.Function, semantic.Int))
}
func (f function) UInt() uint64 {
panic(values.UnexpectedKind(semantic.Function, semantic.UInt))
}
func (f function) Float() float64 {
panic(values.UnexpectedKind(semantic.Function, semantic.Float))
}
func (f function) Bool() bool {
panic(values.UnexpectedKind(semantic.Function, semantic.Bool))
}
func (f function) Time() values.Time {
panic(values.UnexpectedKind(semantic.Function, semantic.Time))
}
func (f function) Duration() values.Duration {
panic(values.UnexpectedKind(semantic.Function, semantic.Duration))
}
func (f function) Regexp() *regexp.Regexp {
panic(values.UnexpectedKind(semantic.Function, semantic.Regexp))
}
func (f function) Array() values.Array {
panic(values.UnexpectedKind(semantic.Function, semantic.Array))
}
func (f function) Object() values.Object {
panic(values.UnexpectedKind(semantic.Function, semantic.Object))
}
func (f function) Function() values.Function {
return f
}
func (f function) Call(argsObj values.Object) (values.Value, error) {
return interpreter.DoFunctionCall(f.call, argsObj)
}
func (f function) call(args interpreter.Arguments) (values.Value, error) {
id := f.qd.NewID(f.name)
a := newAdministration(id)
spec, err := f.createOpSpec(Arguments{Arguments: args}, a)
if err != nil {
return nil, err
}
if a.parents.Len() > 1 {
// Always add parents in a consistent order
a.parents.Sort(func(i, j values.Value) bool {
return i.(TableObject).ID < j.(TableObject).ID
})
}
t := TableObject{
ID: id,
Kind: spec.Kind(),
Spec: spec,
Parents: a.parents,
}
f.qd.operations = append(f.qd.operations, t)
return t, nil
}
type specValue struct {
spec OperationSpec
}
func (v specValue) Type() semantic.Type {
return semantic.EmptyObject
}
func (v specValue) Value() interface{} {
return v.spec
}
func (v specValue) Property(name string) (interpreter.Value, error) {
return nil, errors.New("spec does not have properties")
}
type Arguments struct {
interpreter.Arguments
}
func (a Arguments) GetTime(name string) (Time, bool, error) {
v, ok := a.Get(name)
if !ok {
return Time{}, false, nil
}
qt, err := ToQueryTime(v)
if err != nil {
return Time{}, ok, err
}
return qt, ok, nil
}
func (a Arguments) GetRequiredTime(name string) (Time, error) {
qt, ok, err := a.GetTime(name)
if err != nil {
return Time{}, err
}
if !ok {
return Time{}, fmt.Errorf("missing required keyword argument %q", name)
}
return qt, nil
}
func (a Arguments) GetDuration(name string) (Duration, bool, error) {
v, ok := a.Get(name)
if !ok {
return 0, false, nil
}
return Duration(v.Duration()), true, nil
}
func (a Arguments) GetRequiredDuration(name string) (Duration, error) {
d, ok, err := a.GetDuration(name)
if err != nil {
return 0, err
}
if !ok {
return 0, fmt.Errorf("missing required keyword argument %q", name)
}
return d, nil
}
func ToQueryTime(value values.Value) (Time, error) {
switch value.Type().Kind() {
case semantic.Time:
return Time{
Absolute: value.Time().Time(),
}, nil
case semantic.Duration:
return Time{
Relative: value.Duration().Duration(),
IsRelative: true,
}, nil
case semantic.Int:
return Time{
Absolute: time.Unix(value.Int(), 0),
}, nil
default:
return Time{}, fmt.Errorf("value is not a time, got %v", value.Type())
}
}

290
query/compiler/compiler.go Normal file
View File

@ -0,0 +1,290 @@
package compiler
import (
"errors"
"fmt"
"github.com/influxdata/ifql/semantic"
"github.com/influxdata/ifql/values"
)
func Compile(f *semantic.FunctionExpression, inTypes map[string]semantic.Type, builtinScope Scope, builtinDeclarations semantic.DeclarationScope) (Func, error) {
if builtinDeclarations == nil {
builtinDeclarations = make(semantic.DeclarationScope)
}
for k, t := range inTypes {
builtinDeclarations[k] = semantic.NewExternalVariableDeclaration(k, t)
}
semantic.SolveTypes(f, builtinDeclarations)
declarations := make(map[string]semantic.VariableDeclaration, len(inTypes))
for k, t := range inTypes {
declarations[k] = semantic.NewExternalVariableDeclaration(k, t)
}
f = f.Copy().(*semantic.FunctionExpression)
semantic.ApplyNewDeclarations(f, declarations)
root, err := compile(f.Body, builtinScope)
if err != nil {
return nil, err
}
cpy := make(map[string]semantic.Type)
for k, v := range inTypes {
cpy[k] = v
}
return compiledFn{
root: root,
inTypes: cpy,
}, nil
}
func compile(n semantic.Node, builtIns Scope) (Evaluator, error) {
switch n := n.(type) {
case *semantic.BlockStatement:
body := make([]Evaluator, len(n.Body))
for i, s := range n.Body {
node, err := compile(s, builtIns)
if err != nil {
return nil, err
}
body[i] = node
}
return &blockEvaluator{
t: n.ReturnStatement().Argument.Type(),
body: body,
}, nil
case *semantic.ExpressionStatement:
return nil, errors.New("statement does nothing, sideffects are not supported by the compiler")
case *semantic.ReturnStatement:
node, err := compile(n.Argument, builtIns)
if err != nil {
return nil, err
}
return returnEvaluator{
Evaluator: node,
}, nil
case *semantic.NativeVariableDeclaration:
node, err := compile(n.Init, builtIns)
if err != nil {
return nil, err
}
return &declarationEvaluator{
t: n.Init.Type(),
id: n.Identifier.Name,
init: node,
}, nil
case *semantic.ObjectExpression:
properties := make(map[string]Evaluator, len(n.Properties))
propertyTypes := make(map[string]semantic.Type, len(n.Properties))
for _, p := range n.Properties {
node, err := compile(p.Value, builtIns)
if err != nil {
return nil, err
}
properties[p.Key.Name] = node
propertyTypes[p.Key.Name] = node.Type()
}
return &objEvaluator{
t: semantic.NewObjectType(propertyTypes),
properties: properties,
}, nil
case *semantic.IdentifierExpression:
if v, ok := builtIns[n.Name]; ok {
//Resolve any built in identifiers now
return &valueEvaluator{
value: v,
}, nil
}
return &identifierEvaluator{
t: n.Type(),
name: n.Name,
}, nil
case *semantic.MemberExpression:
object, err := compile(n.Object, builtIns)
if err != nil {
return nil, err
}
return &memberEvaluator{
t: n.Type(),
object: object,
property: n.Property,
}, nil
case *semantic.BooleanLiteral:
return &booleanEvaluator{
t: n.Type(),
b: n.Value,
}, nil
case *semantic.IntegerLiteral:
return &integerEvaluator{
t: n.Type(),
i: n.Value,
}, nil
case *semantic.FloatLiteral:
return &floatEvaluator{
t: n.Type(),
f: n.Value,
}, nil
case *semantic.StringLiteral:
return &stringEvaluator{
t: n.Type(),
s: n.Value,
}, nil
case *semantic.RegexpLiteral:
return &regexpEvaluator{
t: n.Type(),
r: n.Value,
}, nil
case *semantic.DateTimeLiteral:
return &timeEvaluator{
t: n.Type(),
time: values.ConvertTime(n.Value),
}, nil
case *semantic.UnaryExpression:
node, err := compile(n.Argument, builtIns)
if err != nil {
return nil, err
}
return &unaryEvaluator{
t: n.Type(),
node: node,
}, nil
case *semantic.LogicalExpression:
l, err := compile(n.Left, builtIns)
if err != nil {
return nil, err
}
r, err := compile(n.Right, builtIns)
if err != nil {
return nil, err
}
return &logicalEvaluator{
t: n.Type(),
operator: n.Operator,
left: l,
right: r,
}, nil
case *semantic.BinaryExpression:
l, err := compile(n.Left, builtIns)
if err != nil {
return nil, err
}
lt := l.Type()
r, err := compile(n.Right, builtIns)
if err != nil {
return nil, err
}
rt := r.Type()
f, err := values.LookupBinaryFunction(values.BinaryFuncSignature{
Operator: n.Operator,
Left: lt,
Right: rt,
})
if err != nil {
return nil, err
}
return &binaryEvaluator{
t: n.Type(),
left: l,
right: r,
f: f,
}, nil
case *semantic.CallExpression:
callee, err := compile(n.Callee, builtIns)
if err != nil {
return nil, err
}
args, err := compile(n.Arguments, builtIns)
if err != nil {
return nil, err
}
return &callEvaluator{
t: n.Type(),
callee: callee,
args: args,
}, nil
case *semantic.FunctionExpression:
body, err := compile(n.Body, builtIns)
if err != nil {
return nil, err
}
params := make([]functionParam, len(n.Params))
for i, param := range n.Params {
params[i] = functionParam{
Key: param.Key.Name,
Type: param.Type(),
}
if param.Default != nil {
d, err := compile(param.Default, builtIns)
if err != nil {
return nil, err
}
params[i].Default = d
}
}
return &functionEvaluator{
t: n.Type(),
params: params,
body: body,
}, nil
default:
return nil, fmt.Errorf("unknown semantic node of type %T", n)
}
}
// CompilationCache caches compilation results based on the types of the input parameters.
type CompilationCache struct {
fn *semantic.FunctionExpression
root *compilationCacheNode
}
func NewCompilationCache(fn *semantic.FunctionExpression, scope Scope, decls semantic.DeclarationScope) *CompilationCache {
return &CompilationCache{
fn: fn,
root: &compilationCacheNode{
scope: scope,
decls: decls,
},
}
}
// Compile returnes a compiled function bsaed on the provided types.
// The result will be cached for subsequent calls.
func (c *CompilationCache) Compile(types map[string]semantic.Type) (Func, error) {
return c.root.compile(c.fn, 0, types)
}
type compilationCacheNode struct {
scope Scope
decls semantic.DeclarationScope
children map[semantic.Type]*compilationCacheNode
fn Func
err error
}
// compile recursively searches for a matching child node that has compiled the function.
// If the compilation has not been performed previously its result is cached and returned.
func (c *compilationCacheNode) compile(fn *semantic.FunctionExpression, idx int, types map[string]semantic.Type) (Func, error) {
if idx == len(fn.Params) {
// We are the matching child, return the cached result or do the compilation.
if c.fn == nil && c.err == nil {
c.fn, c.err = Compile(fn, types, c.scope, c.decls)
}
return c.fn, c.err
}
// Find the matching child based on the order.
next := fn.Params[idx].Key.Name
t := types[next]
child := c.children[t]
if child == nil {
child = &compilationCacheNode{
scope: c.scope,
decls: c.decls,
}
if c.children == nil {
c.children = make(map[semantic.Type]*compilationCacheNode)
}
c.children[t] = child
}
return child.compile(fn, idx+1, types)
}

View File

@ -0,0 +1,263 @@
package compiler_test
import (
"reflect"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/influxdata/ifql/ast"
"github.com/influxdata/ifql/compiler"
"github.com/influxdata/ifql/semantic"
"github.com/influxdata/ifql/semantic/semantictest"
"github.com/influxdata/ifql/values"
)
var CmpOptions []cmp.Option
func init() {
CmpOptions = append(semantictest.CmpOptions, cmp.Comparer(ValueEqual))
}
func ValueEqual(x, y values.Value) bool {
if x.Type() != y.Type() {
return false
}
switch k := x.Type().Kind(); k {
case semantic.Bool:
return x.Bool() == y.Bool()
case semantic.UInt:
return x.UInt() == y.UInt()
case semantic.Int:
return x.Int() == y.Int()
case semantic.Float:
return x.Float() == y.Float()
case semantic.String:
return x.Str() == y.Str()
case semantic.Time:
return x.Time() == y.Time()
case semantic.Object:
return cmp.Equal(x.Object(), y.Object(), CmpOptions...)
default:
return false
}
}
func TestCompilationCache(t *testing.T) {
add := &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{
{Key: &semantic.Identifier{Name: "a"}},
{Key: &semantic.Identifier{Name: "b"}},
},
Body: &semantic.BinaryExpression{
Operator: ast.AdditionOperator,
Left: &semantic.IdentifierExpression{Name: "a"},
Right: &semantic.IdentifierExpression{Name: "b"},
},
}
testCases := []struct {
name string
types map[string]semantic.Type
scope map[string]values.Value
want values.Value
}{
{
name: "floats",
types: map[string]semantic.Type{
"a": semantic.Float,
"b": semantic.Float,
},
scope: map[string]values.Value{
"a": values.NewFloatValue(5),
"b": values.NewFloatValue(4),
},
want: values.NewFloatValue(9),
},
{
name: "ints",
types: map[string]semantic.Type{
"a": semantic.Int,
"b": semantic.Int,
},
scope: map[string]values.Value{
"a": values.NewIntValue(5),
"b": values.NewIntValue(4),
},
want: values.NewIntValue(9),
},
{
name: "uints",
types: map[string]semantic.Type{
"a": semantic.UInt,
"b": semantic.UInt,
},
scope: map[string]values.Value{
"a": values.NewUIntValue(5),
"b": values.NewUIntValue(4),
},
want: values.NewUIntValue(9),
},
}
//Reuse the same cache for all test cases
cache := compiler.NewCompilationCache(add, nil, nil)
for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
f0, err := cache.Compile(tc.types)
if err != nil {
t.Fatal(err)
}
f1, err := cache.Compile(tc.types)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(f0, f1) {
t.Errorf("unexpected new compilation result")
}
got0, err := f0.Eval(tc.scope)
if err != nil {
t.Fatal(err)
}
got1, err := f1.Eval(tc.scope)
if err != nil {
t.Fatal(err)
}
if !cmp.Equal(got0, tc.want, CmpOptions...) {
t.Errorf("unexpected eval result -want/+got\n%s", cmp.Diff(tc.want, got0, CmpOptions...))
}
if !cmp.Equal(got0, got1, CmpOptions...) {
t.Errorf("unexpected differing results -got0/+got1\n%s", cmp.Diff(got0, got1, CmpOptions...))
}
})
}
}
func TestCompileAndEval(t *testing.T) {
testCases := []struct {
name string
fn *semantic.FunctionExpression
types map[string]semantic.Type
scope map[string]values.Value
want values.Value
wantErr bool
}{
{
name: "simple ident return",
fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{
{Key: &semantic.Identifier{Name: "r"}},
},
Body: &semantic.IdentifierExpression{Name: "r"},
},
types: map[string]semantic.Type{
"r": semantic.Int,
},
scope: map[string]values.Value{
"r": values.NewIntValue(4),
},
want: values.NewIntValue(4),
wantErr: false,
},
{
name: "call function",
fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{
{Key: &semantic.Identifier{Name: "r"}},
},
Body: &semantic.CallExpression{
Callee: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{
{Key: &semantic.Identifier{Name: "a"}, Default: &semantic.IntegerLiteral{Value: 1}},
{Key: &semantic.Identifier{Name: "b"}, Default: &semantic.IntegerLiteral{Value: 1}},
},
Body: &semantic.BinaryExpression{
Operator: ast.AdditionOperator,
Left: &semantic.IdentifierExpression{Name: "a"},
Right: &semantic.IdentifierExpression{Name: "b"},
},
},
Arguments: &semantic.ObjectExpression{
Properties: []*semantic.Property{
{Key: &semantic.Identifier{Name: "a"}, Value: &semantic.IntegerLiteral{Value: 1}},
{Key: &semantic.Identifier{Name: "b"}, Value: &semantic.IdentifierExpression{Name: "r"}},
},
},
},
},
types: map[string]semantic.Type{
"r": semantic.Int,
},
scope: map[string]values.Value{
"r": values.NewIntValue(4),
},
want: values.NewIntValue(5),
wantErr: false,
},
{
name: "call function via identifier",
fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{
{Key: &semantic.Identifier{Name: "r"}},
},
Body: &semantic.BlockStatement{
Body: []semantic.Statement{
&semantic.NativeVariableDeclaration{
Identifier: &semantic.Identifier{Name: "f"}, Init: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{
{Key: &semantic.Identifier{Name: "a"}, Default: &semantic.IntegerLiteral{Value: 1}},
{Key: &semantic.Identifier{Name: "b"}, Default: &semantic.IntegerLiteral{Value: 1}},
},
Body: &semantic.BinaryExpression{
Operator: ast.AdditionOperator,
Left: &semantic.IdentifierExpression{Name: "a"},
Right: &semantic.IdentifierExpression{Name: "b"},
},
},
},
&semantic.ReturnStatement{
Argument: &semantic.CallExpression{
Callee: &semantic.IdentifierExpression{Name: "f"},
Arguments: &semantic.ObjectExpression{
Properties: []*semantic.Property{
{Key: &semantic.Identifier{Name: "a"}, Value: &semantic.IntegerLiteral{Value: 1}},
{Key: &semantic.Identifier{Name: "b"}, Value: &semantic.IdentifierExpression{Name: "r"}},
},
},
},
},
},
},
},
types: map[string]semantic.Type{
"r": semantic.Int,
},
scope: map[string]values.Value{
"r": values.NewIntValue(4),
},
want: values.NewIntValue(5),
wantErr: false,
},
}
for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
f, err := compiler.Compile(tc.fn, tc.types, nil, nil)
if tc.wantErr != (err != nil) {
t.Fatalf("unexpected error %s", err)
}
got, err := f.Eval(tc.scope)
if tc.wantErr != (err != nil) {
t.Errorf("unexpected error %s", err)
}
if !cmp.Equal(tc.want, got, CmpOptions...) {
t.Errorf("unexpected value -want/+got\n%s", cmp.Diff(tc.want, got, CmpOptions...))
}
})
}
}

9
query/compiler/doc.go Normal file
View File

@ -0,0 +1,9 @@
// The compiler package provides a compiler and Go runtime for a subset of the IFQL language.
// Only pure functions are supported by the compiler.
// A function is compiled and then may be called repeatedly with different arguments.
// The function must be pure meaning it has no side effects. Other language features are not supported.
//
// This runtime is not portable by design. The runtime consists of Go types that have been constructed based on the IFQL function being compiled.
// Those types are not serializable and cannot be transported to other systems or environments.
// This design is intended to limit the scope under which compilation must be supported.
package compiler

1137
query/compiler/runtime.go Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,97 @@
package complete
import (
"errors"
"fmt"
"sort"
"github.com/influxdata/ifql/interpreter"
"github.com/influxdata/ifql/semantic"
)
type functionType interface {
Params() map[string]semantic.Type
}
// FunctionSuggestion provides information about a function
type FunctionSuggestion struct {
Params map[string]string
}
// Completer provides methods for suggestions in IFQL queries
type Completer struct {
scope *interpreter.Scope
declarations semantic.DeclarationScope
}
// NewCompleter creates a new completer from scope and declarations
func NewCompleter(scope *interpreter.Scope, declarations semantic.DeclarationScope) Completer {
return Completer{scope: scope, declarations: declarations}
}
// Names returns the slice of names of declared expressions
func (c Completer) Names() []string {
names := c.scope.Names()
sort.Strings(names)
return names
}
// Declaration returns a declaration based on the expression name, if one exists
func (c Completer) Declaration(name string) (semantic.VariableDeclaration, error) {
d, ok := c.declarations[name]
if !ok {
return d, errors.New("could not find declaration")
}
return d, nil
}
// FunctionNames returns all declaration names of the Function Kind
func (c Completer) FunctionNames() []string {
funcs := []string{}
for name, d := range c.declarations {
if isFunction(d) {
funcs = append(funcs, name)
}
}
sort.Strings(funcs)
return funcs
}
// FunctionSuggestion returns information needed for autocomplete suggestions for a function
func (c Completer) FunctionSuggestion(name string) (FunctionSuggestion, error) {
var s FunctionSuggestion
d, err := c.Declaration(name)
if err != nil {
return s, err
}
if !isFunction(d) {
return s, fmt.Errorf("name ( %s ) is not a function", name)
}
funcType, ok := d.InitType().(functionType)
if !ok {
return s, errors.New("could not cast function type")
}
params := map[string]string{}
for k, v := range funcType.Params() {
params[k] = v.Kind().String()
}
s = FunctionSuggestion{
Params: params,
}
return s, nil
}
func isFunction(d semantic.VariableDeclaration) bool {
return d.InitType().Kind() == semantic.Function
}

View File

@ -0,0 +1,90 @@
package complete
import (
"testing"
"github.com/google/go-cmp/cmp"
_ "github.com/influxdata/ifql/functions"
"github.com/influxdata/ifql/interpreter"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/semantic"
"github.com/influxdata/ifql/values"
)
var scope *interpreter.Scope
var declarations semantic.DeclarationScope
func init() {
query.FinalizeRegistration()
s, d := query.BuiltIns()
scope = interpreter.NewScopeWithValues(s)
declarations = d
}
func TestNames(t *testing.T) {
s := interpreter.NewScope()
var v values.Value
s.Set("boom", v)
s.Set("tick", v)
c := NewCompleter(s, semantic.DeclarationScope{})
results := c.Names()
expected := []string{
"boom",
"tick",
}
if !cmp.Equal(results, expected) {
t.Error(cmp.Diff(results, expected), "unexpected names from declarations")
}
}
func TestDeclaration(t *testing.T) {
name := "range"
expected := declarations[name].ID()
declaration, _ := NewCompleter(scope, declarations).Declaration(name)
result := declaration.ID()
if !cmp.Equal(result, expected) {
t.Error(cmp.Diff(result, expected), "unexpected declaration for name")
}
}
func TestFunctionNames(t *testing.T) {
d := make(semantic.DeclarationScope)
d["boom"] = semantic.NewExternalVariableDeclaration(
"boom", semantic.NewFunctionType(semantic.FunctionSignature{}))
d["noBoom"] = semantic.NewExternalVariableDeclaration("noBoom", semantic.String)
s := interpreter.NewScope()
c := NewCompleter(s, d)
results := c.FunctionNames()
expected := []string{
"boom",
}
if !cmp.Equal(results, expected) {
t.Error(cmp.Diff(results, expected), "unexpected function names")
}
}
func TestFunctionSuggestion(t *testing.T) {
name := "range"
result, _ := NewCompleter(scope, declarations).FunctionSuggestion(name)
expected := FunctionSuggestion{
Params: map[string]string{
"start": semantic.Time.String(),
"stop": semantic.Time.String(),
"table": query.TableObjectType.Kind().String(),
},
}
if !cmp.Equal(result, expected) {
t.Error(cmp.Diff(result, expected), "does not match expected suggestion")
}
}

600
query/control/controller.go Normal file
View File

@ -0,0 +1,600 @@
package control
import (
"context"
"log"
"math"
"sync"
"time"
"github.com/influxdata/ifql/id"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/plan"
opentracing "github.com/opentracing/opentracing-go"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
)
// Controller provides a central location to manage all incoming queries.
// The controller is responsible for queueing, planning, and executing queries.
type Controller struct {
newQueries chan *Query
lastID QueryID
queriesMu sync.RWMutex
queries map[QueryID]*Query
queryDone chan *Query
cancelRequest chan QueryID
verbose bool
lplanner plan.LogicalPlanner
pplanner plan.Planner
executor execute.Executor
maxConcurrency int
availableConcurrency int
availableMemory int64
}
type Config struct {
ConcurrencyQuota int
MemoryBytesQuota int64
ExecutorDependencies execute.Dependencies
Verbose bool
}
type QueryID uint64
func New(c Config) *Controller {
ctrl := &Controller{
newQueries: make(chan *Query),
queries: make(map[QueryID]*Query),
queryDone: make(chan *Query),
cancelRequest: make(chan QueryID),
maxConcurrency: c.ConcurrencyQuota,
availableConcurrency: c.ConcurrencyQuota,
availableMemory: c.MemoryBytesQuota,
lplanner: plan.NewLogicalPlanner(),
pplanner: plan.NewPlanner(),
executor: execute.NewExecutor(c.ExecutorDependencies),
verbose: c.Verbose,
}
go ctrl.run()
return ctrl
}
// QueryWithCompile submits a query for execution returning immediately.
// The query will first be compiled before submitting for execution.
// Done must be called on any returned Query objects.
func (c *Controller) QueryWithCompile(ctx context.Context, orgID id.ID, queryStr string) (*Query, error) {
q := c.createQuery(ctx, orgID)
err := c.compileQuery(q, queryStr)
if err != nil {
return nil, err
}
err = c.enqueueQuery(q)
return q, err
}
// Query submits a query for execution returning immediately.
// The spec must not be modified while the query is still active.
// Done must be called on any returned Query objects.
func (c *Controller) Query(ctx context.Context, orgID id.ID, qSpec *query.Spec) (*Query, error) {
q := c.createQuery(ctx, orgID)
q.spec = *qSpec
err := c.enqueueQuery(q)
return q, err
}
func (c *Controller) createQuery(ctx context.Context, orgID id.ID) *Query {
id := c.nextID()
cctx, cancel := context.WithCancel(ctx)
ready := make(chan map[string]execute.Result, 1)
return &Query{
id: id,
orgID: orgID,
labelValues: []string{
orgID.String(),
},
state: Created,
c: c,
now: time.Now().UTC(),
ready: ready,
parentCtx: cctx,
cancel: cancel,
}
}
func (c *Controller) compileQuery(q *Query, queryStr string) error {
if !q.tryCompile() {
return errors.New("failed to transition query to compiling state")
}
spec, err := query.Compile(q.compilingCtx, queryStr, query.Verbose(c.verbose))
if err != nil {
return errors.Wrap(err, "failed to compile query")
}
q.spec = *spec
return nil
}
func (c *Controller) enqueueQuery(q *Query) error {
if c.verbose {
log.Println("query", query.Formatted(&q.spec, query.FmtJSON))
}
if !q.tryQueue() {
return errors.New("failed to transition query to queueing state")
}
if err := q.spec.Validate(); err != nil {
return errors.Wrap(err, "invalid query")
}
// Add query to the queue
c.newQueries <- q
return nil
}
func (c *Controller) nextID() QueryID {
c.queriesMu.RLock()
defer c.queriesMu.RUnlock()
ok := true
for ok {
c.lastID++
_, ok = c.queries[c.lastID]
}
return c.lastID
}
// Queries reports the active queries.
func (c *Controller) Queries() []*Query {
c.queriesMu.RLock()
defer c.queriesMu.RUnlock()
queries := make([]*Query, 0, len(c.queries))
for _, q := range c.queries {
queries = append(queries, q)
}
return queries
}
func (c *Controller) run() {
pq := newPriorityQueue()
for {
select {
// Wait for resources to free
case q := <-c.queryDone:
c.free(q)
c.queriesMu.Lock()
delete(c.queries, q.id)
c.queriesMu.Unlock()
// Wait for new queries
case q := <-c.newQueries:
pq.Push(q)
c.queriesMu.Lock()
c.queries[q.id] = q
c.queriesMu.Unlock()
// Wait for cancel query requests
case id := <-c.cancelRequest:
c.queriesMu.RLock()
q := c.queries[id]
c.queriesMu.RUnlock()
q.Cancel()
}
// Peek at head of priority queue
q := pq.Peek()
if q != nil {
err := c.processQuery(pq, q)
if err != nil {
go q.setErr(err)
}
}
}
}
func (c *Controller) processQuery(pq *PriorityQueue, q *Query) error {
if q.tryPlan() {
// Plan query to determine needed resources
lp, err := c.lplanner.Plan(&q.spec)
if err != nil {
return errors.Wrap(err, "failed to create logical plan")
}
if c.verbose {
log.Println("logical plan", plan.Formatted(lp))
}
p, err := c.pplanner.Plan(lp, nil, q.now)
if err != nil {
return errors.Wrap(err, "failed to create physical plan")
}
q.plan = p
q.concurrency = p.Resources.ConcurrencyQuota
if q.concurrency > c.maxConcurrency {
q.concurrency = c.maxConcurrency
}
q.memory = p.Resources.MemoryBytesQuota
if c.verbose {
log.Println("physical plan", plan.Formatted(q.plan))
}
}
// Check if we have enough resources
if c.check(q) {
// Update resource gauges
c.consume(q)
// Remove the query from the queue
pq.Pop()
// Execute query
if !q.tryExec() {
return errors.New("failed to transition query into executing state")
}
r, err := c.executor.Execute(q.executeCtx, q.orgID, q.plan)
if err != nil {
return errors.Wrap(err, "failed to execute query")
}
q.setResults(r)
} else {
// update state to queueing
if !q.tryRequeue() {
return errors.New("failed to transition query into requeueing state")
}
}
return nil
}
func (c *Controller) check(q *Query) bool {
return c.availableConcurrency >= q.concurrency && (q.memory == math.MaxInt64 || c.availableMemory >= q.memory)
}
func (c *Controller) consume(q *Query) {
c.availableConcurrency -= q.concurrency
if q.memory != math.MaxInt64 {
c.availableMemory -= q.memory
}
}
func (c *Controller) free(q *Query) {
c.availableConcurrency += q.concurrency
if q.memory != math.MaxInt64 {
c.availableMemory += q.memory
}
}
// Query represents a single request.
type Query struct {
id QueryID
orgID id.ID
labelValues []string
c *Controller
spec query.Spec
now time.Time
err error
ready chan map[string]execute.Result
mu sync.Mutex
state State
cancel func()
parentCtx,
compilingCtx,
queueCtx,
planCtx,
requeueCtx,
executeCtx context.Context
compileSpan,
queueSpan,
planSpan,
requeueSpan,
executeSpan *span
plan *plan.PlanSpec
concurrency int
memory int64
}
// ID reports an ephemeral unique ID for the query.
func (q *Query) ID() QueryID {
return q.id
}
func (q *Query) OrganizationID() id.ID {
return q.orgID
}
func (q *Query) Spec() *query.Spec {
return &q.spec
}
// Cancel will stop the query execution.
func (q *Query) Cancel() {
q.mu.Lock()
defer q.mu.Unlock()
// call cancel func
q.cancel()
// Finish the query immediately.
// This allows for receiving from the Ready channel in the same goroutine
// that has called defer q.Done()
q.finish()
if q.state != Errored {
q.state = Canceled
}
}
// Ready returns a channel that will deliver the query results.
// Its possible that the channel is closed before any results arrive, in which case the query should be
// inspected for an error using Err().
func (q *Query) Ready() <-chan map[string]execute.Result {
return q.ready
}
// finish informs the controller and the Ready channel that the query is finished.
func (q *Query) finish() {
switch q.state {
case Compiling:
q.compileSpan.Finish()
case Queueing:
q.queueSpan.Finish()
case Planning:
q.planSpan.Finish()
case Requeueing:
q.requeueSpan.Finish()
case Executing:
q.executeSpan.Finish()
case Errored:
// The query has already been finished in the call to setErr.
return
case Canceled:
// The query has already been finished in the call to Cancel.
return
case Finished:
// The query has already finished
return
default:
panic("unreachable, all states have been accounted for")
}
q.c.queryDone <- q
close(q.ready)
}
// Done must always be called to free resources.
func (q *Query) Done() {
q.mu.Lock()
defer q.mu.Unlock()
q.finish()
q.state = Finished
}
// State reports the current state of the query.
func (q *Query) State() State {
q.mu.Lock()
s := q.state
q.mu.Unlock()
return s
}
func (q *Query) isOK() bool {
q.mu.Lock()
ok := q.state != Canceled && q.state != Errored
q.mu.Unlock()
return ok
}
// Err reports any error the query may have encountered.
func (q *Query) Err() error {
q.mu.Lock()
err := q.err
q.mu.Unlock()
return err
}
func (q *Query) setErr(err error) {
q.mu.Lock()
defer q.mu.Unlock()
q.err = err
// Finish the query immediately.
// This allows for receiving from the Ready channel in the same goroutine
// that has called defer q.Done()
q.finish()
q.state = Errored
}
func (q *Query) setResults(r map[string]execute.Result) {
q.mu.Lock()
if q.state == Executing {
q.ready <- r
}
q.mu.Unlock()
}
// tryCompile attempts to transition the query into the Compiling state.
func (q *Query) tryCompile() bool {
q.mu.Lock()
defer q.mu.Unlock()
if q.state == Created {
q.compileSpan, q.compilingCtx = StartSpanFromContext(
q.parentCtx,
"compiling",
compilingHist.WithLabelValues(q.labelValues...),
compilingGauge.WithLabelValues(q.labelValues...),
)
q.state = Compiling
return true
}
return false
}
// tryQueue attempts to transition the query into the Queueing state.
func (q *Query) tryQueue() bool {
q.mu.Lock()
defer q.mu.Unlock()
if q.state == Compiling || q.state == Created {
if q.state == Compiling {
q.compileSpan.Finish()
}
q.queueSpan, q.queueCtx = StartSpanFromContext(
q.parentCtx,
"queueing",
queueingHist.WithLabelValues(q.labelValues...),
queueingGauge.WithLabelValues(q.labelValues...),
)
q.state = Queueing
return true
}
return false
}
// tryRequeue attempts to transition the query into the Requeueing state.
func (q *Query) tryRequeue() bool {
q.mu.Lock()
defer q.mu.Unlock()
if q.state == Planning {
q.planSpan.Finish()
q.requeueSpan, q.requeueCtx = StartSpanFromContext(
q.parentCtx,
"requeueing",
requeueingHist.WithLabelValues(q.labelValues...),
requeueingGauge.WithLabelValues(q.labelValues...),
)
q.state = Requeueing
return true
}
return false
}
// tryPlan attempts to transition the query into the Planning state.
func (q *Query) tryPlan() bool {
q.mu.Lock()
defer q.mu.Unlock()
if q.state == Queueing {
q.queueSpan.Finish()
q.planSpan, q.planCtx = StartSpanFromContext(
q.parentCtx,
"planning",
planningHist.WithLabelValues(q.labelValues...),
planningGauge.WithLabelValues(q.labelValues...),
)
q.state = Planning
return true
}
return false
}
// tryExec attempts to transition the query into the Executing state.
func (q *Query) tryExec() bool {
q.mu.Lock()
defer q.mu.Unlock()
if q.state == Requeueing || q.state == Planning {
switch q.state {
case Requeueing:
q.requeueSpan.Finish()
case Planning:
q.planSpan.Finish()
}
q.executeSpan, q.executeCtx = StartSpanFromContext(
q.parentCtx,
"executing",
executingHist.WithLabelValues(q.labelValues...),
executingGauge.WithLabelValues(q.labelValues...),
)
q.state = Executing
return true
}
return false
}
// State is the query state.
type State int
const (
Created State = iota
Compiling
Queueing
Planning
Requeueing
Executing
Errored
Finished
Canceled
)
func (s State) String() string {
switch s {
case Created:
return "created"
case Compiling:
return "compiling"
case Queueing:
return "queueing"
case Planning:
return "planning"
case Requeueing:
return "requeing"
case Executing:
return "executing"
case Errored:
return "errored"
case Finished:
return "finished"
case Canceled:
return "canceled"
default:
return "unknown"
}
}
// span is a simple wrapper around opentracing.Span in order to
// get access to the duration of the span for metrics reporting.
type span struct {
s opentracing.Span
start time.Time
Duration time.Duration
hist prometheus.Observer
gauge prometheus.Gauge
}
func StartSpanFromContext(ctx context.Context, operationName string, hist prometheus.Observer, gauge prometheus.Gauge) (*span, context.Context) {
start := time.Now()
s, sctx := opentracing.StartSpanFromContext(ctx, operationName, opentracing.StartTime(start))
gauge.Inc()
return &span{
s: s,
start: start,
hist: hist,
gauge: gauge,
}, sctx
}
func (s *span) Finish() {
finish := time.Now()
s.Duration = finish.Sub(s.start)
s.s.FinishWithOptions(opentracing.FinishOptions{
FinishTime: finish,
})
s.hist.Observe(s.Duration.Seconds())
s.gauge.Dec()
}

103
query/control/metrics.go Normal file
View File

@ -0,0 +1,103 @@
package control
import "github.com/prometheus/client_golang/prometheus"
const (
namespace = "query"
subsystem = "control"
)
var (
labels = []string{"org"}
)
var (
compilingGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "compiling_active",
Help: "Number of queries actively compiling",
}, labels)
queueingGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "queueing_active",
Help: "Number of queries actively queueing",
}, labels)
requeueingGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "requeueing_active",
Help: "Number of queries actively requeueing",
}, labels)
planningGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "planning_active",
Help: "Number of queries actively planning",
}, labels)
executingGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "executing_active",
Help: "Number of queries actively executing",
}, labels)
compilingHist = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "compiling_duration_seconds",
Help: "Histogram of times spent compiling queries",
Buckets: prometheus.ExponentialBuckets(1e-3, 5, 7),
}, labels)
queueingHist = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "queueing_duration_seconds",
Help: "Histogram of times spent queueing queries",
Buckets: prometheus.ExponentialBuckets(1e-3, 5, 7),
}, labels)
requeueingHist = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "requeueing_duration_seconds",
Help: "Histogram of times spent requeueing queries",
Buckets: prometheus.ExponentialBuckets(1e-3, 5, 7),
}, labels)
planningHist = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "planning_duration_seconds",
Help: "Histogram of times spent planning queries",
Buckets: prometheus.ExponentialBuckets(1e-5, 5, 7),
}, labels)
executingHist = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "executing_duration_seconds",
Help: "Histogram of times spent executing queries",
Buckets: prometheus.ExponentialBuckets(1e-3, 5, 7),
}, labels)
)
func init() {
prometheus.MustRegister(compilingGauge)
prometheus.MustRegister(queueingGauge)
prometheus.MustRegister(requeueingGauge)
prometheus.MustRegister(planningGauge)
prometheus.MustRegister(executingGauge)
prometheus.MustRegister(compilingHist)
prometheus.MustRegister(queueingHist)
prometheus.MustRegister(requeueingHist)
prometheus.MustRegister(planningHist)
prometheus.MustRegister(executingHist)
}

67
query/control/queue.go Normal file
View File

@ -0,0 +1,67 @@
package control
import "container/heap"
// priorityQueue implements heap.Interface and holds Query objects.
type priorityQueue []*Query
func (pq priorityQueue) Len() int { return len(pq) }
func (pq priorityQueue) Less(i, j int) bool {
return pq[i].spec.Resources.Priority < pq[j].spec.Resources.Priority
}
func (pq priorityQueue) Swap(i, j int) {
pq[i], pq[j] = pq[j], pq[i]
}
func (pq *priorityQueue) Push(x interface{}) {
q := x.(*Query)
*pq = append(*pq, q)
}
func (pq *priorityQueue) Pop() interface{} {
old := *pq
n := len(old)
q := old[n-1]
*pq = old[0 : n-1]
return q
}
type PriorityQueue struct {
queue priorityQueue
}
func newPriorityQueue() *PriorityQueue {
return &PriorityQueue{
queue: make(priorityQueue, 0, 100),
}
}
func (p *PriorityQueue) Push(q *Query) {
heap.Push(&p.queue, q)
}
func (p *PriorityQueue) Peek() *Query {
for {
if p.queue.Len() == 0 {
return nil
}
q := p.queue[0]
if q.isOK() {
return q
}
heap.Pop(&p.queue)
}
}
func (p *PriorityQueue) Pop() *Query {
for {
if p.queue.Len() == 0 {
return nil
}
q := heap.Pop(&p.queue).(*Query)
if q.isOK() {
return q
}
}
}

57
query/docs/Datasets.md Normal file
View File

@ -0,0 +1,57 @@
# Datasets
Datasets are the container for data between transformations of a query.
Datasets and transformations come in pairs.
A transformations receives data for processing and writes its results to its downstream dataset.
A dataset decides when data should be materialized and passed down to the next transformation dataset pair.
A dataset is made up of blocks.
A block is a time bounded set of data for a given grouping key.
Blocks are modeled as matrixes where rows labels are series keys and columns labels are timestamps.
The query engine is capable of processing data out of order and still providing complete and correct results.
This is accomplished through a mechanism of watermarks and triggers.
A watermark gives an indication that no data older than the mark is likely to arrive.
A trigger defines when a block of data should be materialized for processing.
Then if late data does arrive the block can be updated and rematerialized.
This mechanism allows for a trade off between three aspects of stream processing.
* Completeness - Did the query process all of the data?
* Latency - How quickly is a result produced once data is received?
* Cost - How much compute resources are used to process the pipeline?
Datasets cache active blocks and materialize them when ever they are triggered and remove then once they are finished.
## Resilience
The plan is to implement datasets as resilient data stores like Spark's RDD, so that if a given dataset is lost, a replacement can be rebuilt quickly.
## Performance
The Dataset and Block interfaces are designed to allow different implementations to make various performance trade offs.
### Batching
Blocks represents time and group bounded data.
It is possible that data for a single block is too large to maintain in RAM.
Bounds on data indicate how aggregate transformations, etc. should behave.
Batching the data so that it can be processed with available resources is an orthogonal issue to the bounds of the data.
As such is not part of the Dataset or Block interfaces and is left up to the implementation of the interfaces as needed.
### Sparse vs. Dense
There will be three different implementations of the Block interface.
* Dense
* Sparse Row Optimized
* Sparse Column Optimized
A dense matrix implementation assumes that there is little to no missing data.
A dense matrix is typically "row-major" meaning its optimized for row based operations, at this point it doesn't seem helpful to have a column major dense implementation.
A sparse matrix implementation assumes that there is a significant amount of missing data.
Sparse implementations can be optimized for either row or column operations.
Since different processes access data in different patterns the planning step will be responsible for deciding which implementation is best at which steps in a query.
The planner will add transformations procedures for conversions between the different implementations.

39
query/docs/Executor.md Normal file
View File

@ -0,0 +1,39 @@
# Executor Design
This document lays out the design of the executor.
## Interface
The Executor interface is defined as:
```go
type Executor interface {
Execute(context.Context, Plan) ([]Result, ErrorState)
}
```
The executor is responsible for taking a specific plan from the Planner and executing it to produce the result which is a list of Results will allow for streaming of the various results to a client.
## Concepts
The executor interacts with many different systems and has its own internal systems
Below is a list of concepts within the executor.
| Concept | Description |
| ------- | ----------- |
| Bounded Data | Datasets that are finite, in other words `batch` data. |
| Unbounded Data | Datasets that have no know end, or are infinite, in other words `stream` data. |
| Event time | The time the event actually occurred. |
| Processing time | The time the event is processed. This time may be completely out of order with respect to its event time and the event time of other events with similar processing time. |
| Watermarks | Watermarks communicate the lag between event time and processing time. Watermarks define a bound on the event time of data that has been observed. |
| Triggers | Triggers communicate when data should be materialized. |
| Accumulation | Accumulation defines how different results from events of the same window can be combined into a single result. |
| Dataset | A set of data produced from a transformation. The dataset is resilient because its lineage and watermarks are known, therfore it can be recreated in the event of loss. |
| Block | A subset of a dataset. Row represent series and columns represent time. |
| Transformation | Performs a transformation on data received from a parent dataset and writes results to a child dataset. |
| Execution State | Execution state tracks the state of an execution. |
## Execution State
While both queries and plans are specifications the execution state encapsulates the implementation and state of executing a query.

109
query/docs/Language.md Normal file
View File

@ -0,0 +1,109 @@
# IFQL Language
This document details the design of the IFQL langauage.
If you are looking for usage information on the langauage see the README.md.
# Overview
The IFQL langauage is used to construct query specifications.
# Syntax
The langauage syntax is defined by the ifql/ifql.peg grammar.
## Keyword Arguments
IFQL uses keyword arguments for ALL arguments to ALL functions.
Keyword arguments enable iterative improvements to the langauage while remaining backwards compatible.
Since IFQL is functional in style it is important to note that the choice of keyword arguments means that many functional concepts that deal with positional arguments have to be mapped into a space where only keyword arguments exist.
### Default Arguments
Since all arguments are keyword arguments and there are no positional arguments it is possible for any argument to have a default value.
If an argument is not specified at call time, then if the argument has a default it is used, otherwise an error occurs.
## Abstract Syntax Tree
The abstract syntax tree (AST) of IFQL is closely modeled after the javascript AST.
Using the javascript AST provides a good foundation for organization and structure of the syntax tree.
Since IFQL is so similar to javascript this design works well.
# Semantics
The `semantic` package provides a graph structure that represents the meaning of an IFQL script.
An AST is converted into a semantic graph for use with other systems.
Using a semantic graph representation of the IFQL, enables highlevel meaning to be specified programatically.
For example since IFQL uses the javascript AST structures, arguments to a function are represented as a single positional argument that is always an object expression.
The semantic graph validates that the AST correctly follows these semantics, and use structures that are strongly typed for this expectation.
The semantic structures are to be designed to facilitate the interpretation and compilation of IFQL.
# Interpretation
IFQL is primarily an interpreted language.
The implementation of the IFQL interpreter can be found in the `interpreter` package.
# Compilation and Go Runtime
A subset of IFQL can be compiled into a runtime hosted in Go.
The subset consists of only pure functions.
Meaning a function defintion in IFQL can be compiled and then called repeatedly with different arguments.
The function must be pure, meaning it has no side effects.
Other language feature like imports etc are not supported.
This runtime is entirely not portable.
The runtime consists of Go types that have been constructed based on the IFQL function being compiled.
Those types are not serializable and cannot be transported to other systems or environments.
This design is intended to limit the scope under which compilation must be supported.
# Features
This sections details various features of the language.
## Functions
IFQL supports defining functions.
Example:
```
add = (a,b) => a + b
add(a:1, b:2) // 3
```
Functions can be assigned to identifiers and can call other functions.
Functions are first class types within IFQL.
## Scoping
IFQL uses lexical scoping.
Scoping boundaries occur at functions.
Example:
```
x = 5
addX = (a) => a + x
add(a:1) // 6
```
The `x` referred to in the `addX` function is the same as is defined in the toplevel scope.
Scope names can be changed for more specific scopes.
Example:
```
x = 5
add = (x,y) => x + y
add(x:1,y:2) // 3
```
In this example the `x = 5` definition is unused, as the `add` function defines it own local identifier `x` as a parameter.

131
query/docs/Overview.md Normal file
View File

@ -0,0 +1,131 @@
# Design Overview
This document provides an overview of the design of the query engine.
## Concepts
There are several different concepts that make up the complete query engine.
* Query - A query defines work to be performed on time series data and a result.
A query is represented as a directed acyclic graph (DAG).
* IFQL - Functional Language for defining a query to execute.
* Parser - Parses an IFQL script and produces a query.
* Data Frame - A data frame is a matrix of time series data where one dimension is time and the other is series.
* Query Node - A query node represents a single step in the DAG of a query.
* Planner - The planner creates a plan of execution from a query.
* Plan - A plan is also a DAG of node the explicitly state how a query will be performed.
* Plan Node - A plan node represents a single step in the DAG of a plan.
* Executor - The executor is responsible for processing a query plan.
The executor process data via data frames.
* Storage - The Storage interface provides a mechanism for accessing the underlying data as data frames.
* Capabilities - The Storage interface exposes its capabilities.
The planner uses the available capabilities to determine the best plan.
* Hints - The Storage interface exposes hints about the data.
The planner uses the hints to determine the best plan.
* Query Engine - Query Engine is the name given to the entire system being described in this document.
Both a query and a plan are represented by a DAG and describe an operation that needs to be performed.
The difference is that a plan in addition to describing what the operation is, also describes how that operation will be performed.
In short, a query describes what the operation is and a plan describes how that operation will be carried out.
## Relations
Below is a high level description, using the Go language, of the relations between the different components and concepts of the query engine.
```go
type Parser interface {
Parse(ifql string) Query
}
// Query defines work to be performed on time series data and a result.
// A query is represented as a directed acyclic graph (DAG).
type Query interface {
Nodes() []QueryNode
Edges() []Edge
}
// QueryNode is a single step in the DAG of a query
type QueryNode interface {
ID() NodeID
// More details about what the node does
}
// NodeID uniquely identifies a node.
type NodeID
// Edge establishes a parent child relationship between two nodes.
type Edge interface {
Parent() NodeID
Child() NodeID
}
// Planner computes a plan from a query and available storage interfaces
type Planner interface {
Plan(Query, []Storage) Plan
}
// Plan is a DAG of the specific steps to execute.
type Plan interface {
Nodes() []PlanNode
Edges() []Edge
}
// PlanNode is a single step in the plan DAG.
type PlanNode interface {
ID() NodeID
Predicates() []Predicate
}
// Predicate filters data.
type Predicate interface {}
// Storage provides an interface to the storage layer.
type Storage interface {
// Read gets data from the underlying storage system and returns a data frame or error state.
Read(context.Context, []Predicate, TimeRange, Grouping) (DataFrame, ErrorState)
// Capabilities exposes the capabilities of the storage interface.
Capabilities() []Capability
// Hints provides hints about the characteristics of the data.
Hints(context.Context, []Predicate, TimeRange, Grouping) Hints
}
// TimeRange is the beginning time and ending time
type TimeRange interface {
Begin() int64
End() int64
}
// Grouping are key groups
type Grouping interface {
Keys() []string
}
// Hints provide insight into the size and shape of the data that would likely be returned
// from a storage read operation.
type Hints interface {
Cardinality() // Count tag values
ByteSize() int64
Blocks() int64
}
// Capability represents a single capability of a storage interface.
type Capability interface{
Name() string
}
// Executor processes a plan and returns the resulting data frames or an error state.
type Executor interface{
Execute(context.Context, Plan) ([]DataFrame, ErrorState)
}
// ErrorState describes precisely the state of an errored operation such that appropraite recovery may be attempted.
type ErrorState interface {
Error() error
OtherInformation()
// Retryable() bool ?
}
```

13
query/docs/Plan.md Normal file
View File

@ -0,0 +1,13 @@
# Plan DAG
The plan is represented as a DAG, where each node performs an operation and produces a result.
The plan DAG is separate and distinct from the query DAG.
The plan DAG specifies details about how the query will be executed, while the query DAG only specifies what the query is.
There may be multiple roots to the DAG where each root represents a source of data.
A root or source node may retrieve data from multiple different systems.
Primarily data will be read from the storage interface, but may be streamed from the write ingest system or potentially external systems as well.
The leaves of the DAG represent the results of the operation.
The results are collected and returned.

63
query/docs/Planner.md Normal file
View File

@ -0,0 +1,63 @@
# Planner Design
This document lays out the design of the planner.
## Interface
The Planner inter is defined as:
```go
type Planner interface {
Plan(Query, []Storage) Plan
}
```
The planner is responsible for taking a query DAG and a set of available storage interfaces and produce a plan DAG.
## Plans
Plans are created via a two step process:
1. Create a general plan from the query.
2. Create a specific plan from the general plan and available storage interface.
The general plan specifies all the needed data frames and their lineage needed to produce the final query result.
The specific plan specifies how the general plan will be executed, which storage interfaces will be consumed and how.
The general plan does not leave the scope of the Planner and is not part of the API of the planner.
Hence the Plan type above it the specific plan.
## Plan DAG
Both the general and specific plans are represented as DAGs.
The nodes of the DAG represent data frames to be produced, while the edges of the DAG represent the operations need to construct the data frames.
This is inverted from the Query DAG where the nodes are operations and edges represents data sets.
The leaves of the plan DAG represent sources of data and the data flows from bottom up through the tree.
Again this is inverted from the Query DAG where data flows top down.
## Data Frames
Data frames are a set of data and their lineage is known.
Meaning it is known what parent data frames and operations are needed to construct the data frame.
Using this concept of lineage allows a data frame to be reconstructed if it is loss due to node failure or if its parent data frames are modified.
### Windowing
Data frames will specify their windowing properties. ????
## Operations
Operations are a definition of a transformation to be applied on one data frame resulting in another.
### Narrow vs Wide
Operations are classified as either narrow or wide:
* Narrow operations map each parent data frame to exactly one child data frame.
Specifically a narrow operation is a one-to-one mapping of parent to child data frames.
* Wide operations map multiple parent data frames to multiple child data frames.
Specifically a wide operation is a many-to-many mapping of parent to child data frames.
This distinction is necessary to precisely define the lineage of a data frame.

22
query/docs/Query.md Normal file
View File

@ -0,0 +1,22 @@
# Query DAG
The query is represented as a DAG, where each node represents an operation to be performed.
There may be multiple roots to the DAG where each root represents a source of data.
Root nodes will specify whether they are selecting data from the database or consuming data as a stream.
The leaves of the DAG represent the results of the operation.
A result node may be added as a child to any node to make that intermediate representation a finalized result.
## Specification
A query DAG consists of a set of nodes and a set of edges that form a directed acyclic graph (DAG).
Each node has the following properties:
* ID - A unique identifier for the node within the graph.
* Kind - The kind of operation the node performs.
* Spec - The spec, specifies the parameters provided to the node detailing the specifics of the operation.
The parameters vary by the kind of node.

8
query/docs/Release.md Normal file
View File

@ -0,0 +1,8 @@
### Creating Release tag
We are using semantic versioning with the format "vMajor.Minor.Patch"
```sh
git tag -s v0.0.1
make release
```

39
query/docs/Resources.md Normal file
View File

@ -0,0 +1,39 @@
# Learning Resources
This documents contains a list of papers articles etc. that are useful for understanding the design of IFQL.
## Stream Processing
* Set of articles by Tyler Akidau on building stream processing engines with correctness guarantees.
https://www.oreilly.com/ideas/the-world-beyond-batch-streaming-101
https://www.oreilly.com/ideas/the-world-beyond-batch-streaming-102
* Published paper by Tyler AKidau on building stream processing engines with correctness guarantees.
http://www.vldb.org/pvldb/vol8/p1792-Akidau.pdf
* Paper from UC Berkley introducing Spark and RDDs
https://www.usenix.org/system/files/conference/nsdi12/nsdi12-final138.pdf
* A summary of the different data APIs in Spark
https://databricks.com/blog/2016/07/14/a-tale-of-three-apache-spark-apis-rdds-dataframes-and-datasets.html
## Map Reduce
* Google research paper on Dremel
https://research.google.com/pubs/pub36632.html
## DataFrames
* Good overview on various sparse matrix implementations. https://en.wikipedia.org/wiki/Sparse_matrix
## Query Optimization
* Volcano Optimizer Generator
https://pdfs.semanticscholar.org/a817/a3e74d1663d9eb35b4baf3161ab16f57df85.pdf
* The Cascades Framework for Query Optimization
http://db.informatik.uni-mannheim.de/downloads/hauptstudium/seminare/papers/Cascades_01.PDF
* Chapter 7: Query Optimization
From Readings in Database Systems, 5th Edition (2015)
http://www.redbook.io/pdf/ch7-queryoptimization.pdf
This chaper references various other valuable readings.
* Cost-based Optimization in Parallel Data Frameworks
https://www.cse.iitb.ac.in/~pararth09/btp/report.pdf

1641
query/docs/SPEC.md Normal file

File diff suppressed because it is too large Load Diff

59
query/docs/Transpiler.md Normal file
View File

@ -0,0 +1,59 @@
# Transpiler Design
IFQL will support transpiling various other languages into query specification that can be executed.
Executing a transpiled query involes two steps outside the normal execution process.
1. Transpile the query to an query spec.
2. Write the result in the desired format.
The following interfaces found in the `crossexecute` package represent these concepts.
```
type QueryTranspiler interface {
Transpile(ctx context.Context, txt string) (*query.Spec, error)
}
type ResultWriter interface {
WriteTo(w io.Writer, results map[string]execute.Result) error
}
```
Each different language/system need only implement a query transpiler and result writer.
## Producing IFQL txt via transpilation
The various transpilers only define the `somelang txt -> spec` transformation.
In general the reverse process will be possible, `spec -> ifql txt`.
Once any transpiler has been implemented then IFQL txt can be produced from that source language.
## InfluxQL
Specific to writing the InfluxQL transpiler there is a major problem to overcome:
### How can the transpiler disambiguate fields and tags?
The transpiler will need a service that can report whether an identifier is a field or a tag for a given measurement.
The service will also need to be able to report which measurements exist for a given regexp pattern.
With this extra information the transpiler should be able to process any InfluxQL query.
#### Open Questions
* What to do about measurements that have different schemas and are queried as if they have the same schema?
```
select sum("value") from /(a|b)/ where "c" == 'foo'
```
`c` is a tag on measurement `a`.
`c` is a field on measurement `b`.
For 1.x does this error or return some result?
* Does a query spec contain enough information to convert 2.0 results into InfluxQL 1.x result JSON?
The final table wil contain the correct column names etc to produce the correct JSON output.
IFQL Table -> InfluxQL JSON

View File

@ -0,0 +1,48 @@
The following benchmarks measure reading 10,000,000 points via TCP, yamux and gRPC.
These were performed using a VM and host machine on the same physical hardware.
`iperf` reported about 390 MB/s as the maximum throughput between the machines.
`influxd` running on VM, client on host.
All protocols used the same protobuf message (to provide framing) with an embedded `[]byte`
array to serialize batches of points large arrays of structures for points are simply too slow.
The underlying storage engine cursor can read 10,000,000 points in about 230ms, therefore
the overhead for each protocol is as follows
```
TCP → 470ms
yamux → 620ms
gRPC → 970ms
```
Maximum transfer rates are therefore:
```
TCP → 340 MB/s or ~21e6 points / sec
yamux → 258 MB/s or ~16e6 points / sec
gRPC → 164 MB/s or ~10e6 points / sec
```
It is worth noting that I have not tested Go's network libraries to determine maximum throughput,
however I suspect it may be close to the TCP maximum. Whilst we will benchmark using independent
machines in AWS, these tests helped me understand relative performance of the various transports
and the impact different serialization mechanisms have on our throughput. Protobuf is ok as long
as we keep the graph small, meaning we customize the serialization of the points.
---
As a comparison, I also tested client and server on localhost, to compare the protocols without the
network stack overhead. gRPC was very inconsistent, varying anywhere from 463ms to 793ms so the result
represents the average of a number of runs.
Overhead
```
TCP → 95ms
yamux → 108ms
gRPC → 441ms
```
These numbers bring TCP and yamux within about 10% of each other. The majority of the difference
between TCP and yamux is due to the additional frames sent by yamux to manage flow control,
which add latency. If that overhead is a concern, we may need to tune the flow control algorithm.

View File

@ -0,0 +1,449 @@
package execute_test
import (
"sort"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/influxdata/ifql/functions"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/execute/executetest"
)
func TestAggregate_Process(t *testing.T) {
sumAgg := new(functions.SumAgg)
countAgg := new(functions.CountAgg)
testCases := []struct {
name string
agg execute.Aggregate
config execute.AggregateConfig
data []*executetest.Block
want []*executetest.Block
}{
{
name: "single",
config: execute.DefaultAggregateConfig,
agg: sumAgg,
data: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(0), 0.0},
{execute.Time(0), execute.Time(100), execute.Time(10), 1.0},
{execute.Time(0), execute.Time(100), execute.Time(20), 2.0},
{execute.Time(0), execute.Time(100), execute.Time(30), 3.0},
{execute.Time(0), execute.Time(100), execute.Time(40), 4.0},
{execute.Time(0), execute.Time(100), execute.Time(50), 5.0},
{execute.Time(0), execute.Time(100), execute.Time(60), 6.0},
{execute.Time(0), execute.Time(100), execute.Time(70), 7.0},
{execute.Time(0), execute.Time(100), execute.Time(80), 8.0},
{execute.Time(0), execute.Time(100), execute.Time(90), 9.0},
},
}},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(100), 45.0},
},
}},
},
{
name: "single use start time",
config: execute.AggregateConfig{
Columns: []string{execute.DefaultValueColLabel},
TimeSrc: execute.DefaultStartColLabel,
TimeDst: execute.DefaultTimeColLabel,
},
agg: sumAgg,
data: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(0), 0.0},
{execute.Time(0), execute.Time(100), execute.Time(10), 1.0},
{execute.Time(0), execute.Time(100), execute.Time(20), 2.0},
{execute.Time(0), execute.Time(100), execute.Time(30), 3.0},
{execute.Time(0), execute.Time(100), execute.Time(40), 4.0},
{execute.Time(0), execute.Time(100), execute.Time(50), 5.0},
{execute.Time(0), execute.Time(100), execute.Time(60), 6.0},
{execute.Time(0), execute.Time(100), execute.Time(70), 7.0},
{execute.Time(0), execute.Time(100), execute.Time(80), 8.0},
{execute.Time(0), execute.Time(100), execute.Time(90), 9.0},
},
}},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(0), 45.0},
},
}},
},
{
name: "multiple blocks",
config: execute.DefaultAggregateConfig,
agg: sumAgg,
data: []*executetest.Block{
{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(0), 0.0},
{execute.Time(0), execute.Time(100), execute.Time(10), 1.0},
{execute.Time(0), execute.Time(100), execute.Time(20), 2.0},
{execute.Time(0), execute.Time(100), execute.Time(30), 3.0},
{execute.Time(0), execute.Time(100), execute.Time(40), 4.0},
{execute.Time(0), execute.Time(100), execute.Time(50), 5.0},
{execute.Time(0), execute.Time(100), execute.Time(60), 6.0},
{execute.Time(0), execute.Time(100), execute.Time(70), 7.0},
{execute.Time(0), execute.Time(100), execute.Time(80), 8.0},
{execute.Time(0), execute.Time(100), execute.Time(90), 9.0},
},
},
{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(100), execute.Time(200), execute.Time(100), 10.0},
{execute.Time(100), execute.Time(200), execute.Time(110), 11.0},
{execute.Time(100), execute.Time(200), execute.Time(120), 12.0},
{execute.Time(100), execute.Time(200), execute.Time(130), 13.0},
{execute.Time(100), execute.Time(200), execute.Time(140), 14.0},
{execute.Time(100), execute.Time(200), execute.Time(150), 15.0},
{execute.Time(100), execute.Time(200), execute.Time(160), 16.0},
{execute.Time(100), execute.Time(200), execute.Time(170), 17.0},
{execute.Time(100), execute.Time(200), execute.Time(180), 18.0},
{execute.Time(100), execute.Time(200), execute.Time(190), 19.0},
},
},
},
want: []*executetest.Block{
{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(100), 45.0},
},
},
{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(100), execute.Time(200), execute.Time(200), 145.0},
},
},
},
},
{
name: "multiple blocks with keyed columns",
config: execute.DefaultAggregateConfig,
agg: sumAgg,
data: []*executetest.Block{
{
KeyCols: []string{"_start", "_stop", "t1"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), "a", execute.Time(0), 0.0},
{execute.Time(0), execute.Time(100), "a", execute.Time(10), 1.0},
{execute.Time(0), execute.Time(100), "a", execute.Time(20), 2.0},
{execute.Time(0), execute.Time(100), "a", execute.Time(30), 3.0},
{execute.Time(0), execute.Time(100), "a", execute.Time(40), 4.0},
{execute.Time(0), execute.Time(100), "a", execute.Time(50), 5.0},
{execute.Time(0), execute.Time(100), "a", execute.Time(60), 6.0},
{execute.Time(0), execute.Time(100), "a", execute.Time(70), 7.0},
{execute.Time(0), execute.Time(100), "a", execute.Time(80), 8.0},
{execute.Time(0), execute.Time(100), "a", execute.Time(90), 9.0},
},
},
{
KeyCols: []string{"_start", "_stop", "t1"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), "b", execute.Time(0), 0.3},
{execute.Time(0), execute.Time(100), "b", execute.Time(10), 1.3},
{execute.Time(0), execute.Time(100), "b", execute.Time(20), 2.3},
{execute.Time(0), execute.Time(100), "b", execute.Time(30), 3.3},
{execute.Time(0), execute.Time(100), "b", execute.Time(40), 4.3},
{execute.Time(0), execute.Time(100), "b", execute.Time(50), 5.3},
{execute.Time(0), execute.Time(100), "b", execute.Time(60), 6.3},
{execute.Time(0), execute.Time(100), "b", execute.Time(70), 7.3},
{execute.Time(0), execute.Time(100), "b", execute.Time(80), 8.3},
{execute.Time(0), execute.Time(100), "b", execute.Time(90), 9.3},
},
},
{
KeyCols: []string{"_start", "_stop", "t1"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(100), execute.Time(200), "a", execute.Time(100), 10.0},
{execute.Time(100), execute.Time(200), "a", execute.Time(110), 11.0},
{execute.Time(100), execute.Time(200), "a", execute.Time(120), 12.0},
{execute.Time(100), execute.Time(200), "a", execute.Time(130), 13.0},
{execute.Time(100), execute.Time(200), "a", execute.Time(140), 14.0},
{execute.Time(100), execute.Time(200), "a", execute.Time(150), 15.0},
{execute.Time(100), execute.Time(200), "a", execute.Time(160), 16.0},
{execute.Time(100), execute.Time(200), "a", execute.Time(170), 17.0},
{execute.Time(100), execute.Time(200), "a", execute.Time(180), 18.0},
{execute.Time(100), execute.Time(200), "a", execute.Time(190), 19.0},
},
},
{
KeyCols: []string{"_start", "_stop", "t1"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(100), execute.Time(200), "b", execute.Time(100), 10.3},
{execute.Time(100), execute.Time(200), "b", execute.Time(110), 11.3},
{execute.Time(100), execute.Time(200), "b", execute.Time(120), 12.3},
{execute.Time(100), execute.Time(200), "b", execute.Time(130), 13.3},
{execute.Time(100), execute.Time(200), "b", execute.Time(140), 14.3},
{execute.Time(100), execute.Time(200), "b", execute.Time(150), 15.3},
{execute.Time(100), execute.Time(200), "b", execute.Time(160), 16.3},
{execute.Time(100), execute.Time(200), "b", execute.Time(170), 17.3},
{execute.Time(100), execute.Time(200), "b", execute.Time(180), 18.3},
{execute.Time(100), execute.Time(200), "b", execute.Time(190), 19.3},
},
},
},
want: []*executetest.Block{
{
KeyCols: []string{"_start", "_stop", "t1"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), "a", execute.Time(100), 45.0},
},
},
{
KeyCols: []string{"_start", "_stop", "t1"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(100), execute.Time(200), "a", execute.Time(200), 145.0},
},
},
{
KeyCols: []string{"_start", "_stop", "t1"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), "b", execute.Time(100), 48.0},
},
},
{
KeyCols: []string{"_start", "_stop", "t1"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(100), execute.Time(200), "b", execute.Time(200), 148.0},
},
},
},
},
{
name: "multiple values",
config: execute.AggregateConfig{
Columns: []string{"x", "y"},
TimeSrc: execute.DefaultStopColLabel,
TimeDst: execute.DefaultTimeColLabel,
},
agg: sumAgg,
data: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(0), 0.0, 0.0},
{execute.Time(0), execute.Time(100), execute.Time(10), 1.0, -1.0},
{execute.Time(0), execute.Time(100), execute.Time(20), 2.0, -2.0},
{execute.Time(0), execute.Time(100), execute.Time(30), 3.0, -3.0},
{execute.Time(0), execute.Time(100), execute.Time(40), 4.0, -4.0},
{execute.Time(0), execute.Time(100), execute.Time(50), 5.0, -5.0},
{execute.Time(0), execute.Time(100), execute.Time(60), 6.0, -6.0},
{execute.Time(0), execute.Time(100), execute.Time(70), 7.0, -7.0},
{execute.Time(0), execute.Time(100), execute.Time(80), 8.0, -8.0},
{execute.Time(0), execute.Time(100), execute.Time(90), 9.0, -9.0},
},
}},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(100), 45.0, -45.0},
},
}},
},
{
name: "multiple values changing types",
config: execute.AggregateConfig{
Columns: []string{"x", "y"},
TimeSrc: execute.DefaultStopColLabel,
TimeDst: execute.DefaultTimeColLabel,
},
agg: countAgg,
data: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(0), 0.0, 0.0},
{execute.Time(0), execute.Time(100), execute.Time(10), 1.0, -1.0},
{execute.Time(0), execute.Time(100), execute.Time(20), 2.0, -2.0},
{execute.Time(0), execute.Time(100), execute.Time(30), 3.0, -3.0},
{execute.Time(0), execute.Time(100), execute.Time(40), 4.0, -4.0},
{execute.Time(0), execute.Time(100), execute.Time(50), 5.0, -5.0},
{execute.Time(0), execute.Time(100), execute.Time(60), 6.0, -6.0},
{execute.Time(0), execute.Time(100), execute.Time(70), 7.0, -7.0},
{execute.Time(0), execute.Time(100), execute.Time(80), 8.0, -8.0},
{execute.Time(0), execute.Time(100), execute.Time(90), 9.0, -9.0},
},
}},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TInt},
{Label: "y", Type: execute.TInt},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(100), int64(10), int64(10)},
},
}},
},
}
for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
d := executetest.NewDataset(executetest.RandomDatasetID())
c := execute.NewBlockBuilderCache(executetest.UnlimitedAllocator)
c.SetTriggerSpec(execute.DefaultTriggerSpec)
agg := execute.NewAggregateTransformation(d, c, tc.agg, tc.config)
parentID := executetest.RandomDatasetID()
for _, b := range tc.data {
if err := agg.Process(parentID, b); err != nil {
t.Fatal(err)
}
}
got, err := executetest.BlocksFromCache(c)
if err != nil {
t.Fatal(err)
}
executetest.NormalizeBlocks(got)
executetest.NormalizeBlocks(tc.want)
sort.Sort(executetest.SortedBlocks(got))
sort.Sort(executetest.SortedBlocks(tc.want))
if !cmp.Equal(tc.want, got, cmpopts.EquateNaNs()) {
t.Errorf("unexpected blocks -want/+got\n%s", cmp.Diff(tc.want, got))
}
})
}
}

275
query/execute/aggregate.go Normal file
View File

@ -0,0 +1,275 @@
package execute
import (
"fmt"
"github.com/influxdata/ifql/interpreter"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/semantic"
"github.com/pkg/errors"
)
type aggregateTransformation struct {
d Dataset
cache BlockBuilderCache
agg Aggregate
config AggregateConfig
}
type AggregateConfig struct {
Columns []string `json:"columns"`
TimeSrc string `json:"time_src"`
TimeDst string `json:"time_dst"`
}
var DefaultAggregateConfig = AggregateConfig{
Columns: []string{DefaultValueColLabel},
TimeSrc: DefaultStopColLabel,
TimeDst: DefaultTimeColLabel,
}
func (c AggregateConfig) Copy() AggregateConfig {
nc := c
if c.Columns != nil {
nc.Columns = make([]string, len(c.Columns))
copy(nc.Columns, c.Columns)
}
return nc
}
func (c *AggregateConfig) ReadArgs(args query.Arguments) error {
if label, ok, err := args.GetString("timeDst"); err != nil {
return err
} else if ok {
c.TimeDst = label
} else {
c.TimeDst = DefaultAggregateConfig.TimeDst
}
if timeValue, ok, err := args.GetString("timeSrc"); err != nil {
return err
} else if ok {
c.TimeSrc = timeValue
} else {
c.TimeSrc = DefaultAggregateConfig.TimeSrc
}
if cols, ok, err := args.GetArray("columns", semantic.String); err != nil {
return err
} else if ok {
columns, err := interpreter.ToStringArray(cols)
if err != nil {
return err
}
c.Columns = columns
} else {
c.Columns = DefaultAggregateConfig.Columns
}
return nil
}
func NewAggregateTransformation(d Dataset, c BlockBuilderCache, agg Aggregate, config AggregateConfig) *aggregateTransformation {
return &aggregateTransformation{
d: d,
cache: c,
agg: agg,
config: config,
}
}
func NewAggregateTransformationAndDataset(id DatasetID, mode AccumulationMode, agg Aggregate, config AggregateConfig, a *Allocator) (*aggregateTransformation, Dataset) {
cache := NewBlockBuilderCache(a)
d := NewDataset(id, mode, cache)
return NewAggregateTransformation(d, cache, agg, config), d
}
func (t *aggregateTransformation) RetractBlock(id DatasetID, key PartitionKey) error {
//TODO(nathanielc): Store intermediate state for retractions
return t.d.RetractBlock(key)
}
func (t *aggregateTransformation) Process(id DatasetID, b Block) error {
builder, new := t.cache.BlockBuilder(b.Key())
if !new {
return fmt.Errorf("aggregate found duplicate block with key: %v", b.Key())
}
AddBlockKeyCols(b.Key(), builder)
builder.AddCol(ColMeta{
Label: t.config.TimeDst,
Type: TTime,
})
builderColMap := make([]int, len(t.config.Columns))
blockColMap := make([]int, len(t.config.Columns))
aggregates := make([]ValueFunc, len(t.config.Columns))
cols := b.Cols()
for j, label := range t.config.Columns {
idx := -1
for bj, bc := range cols {
if bc.Label == label {
idx = bj
break
}
}
if idx < 0 {
return fmt.Errorf("column %q does not exist", label)
}
c := cols[idx]
if b.Key().HasCol(c.Label) {
return errors.New("cannot aggregate columns that are part of the partition key")
}
var vf ValueFunc
switch c.Type {
case TBool:
vf = t.agg.NewBoolAgg()
case TInt:
vf = t.agg.NewIntAgg()
case TUInt:
vf = t.agg.NewUIntAgg()
case TFloat:
vf = t.agg.NewFloatAgg()
case TString:
vf = t.agg.NewStringAgg()
default:
return fmt.Errorf("unsupported aggregate column type %v", c.Type)
}
aggregates[j] = vf
builderColMap[j] = builder.AddCol(ColMeta{
Label: c.Label,
Type: vf.Type(),
})
blockColMap[j] = idx
}
if err := AppendAggregateTime(t.config.TimeSrc, t.config.TimeDst, b.Key(), builder); err != nil {
return err
}
b.Do(func(cr ColReader) error {
for j := range t.config.Columns {
vf := aggregates[j]
tj := blockColMap[j]
c := b.Cols()[tj]
switch c.Type {
case TBool:
vf.(DoBoolAgg).DoBool(cr.Bools(tj))
case TInt:
vf.(DoIntAgg).DoInt(cr.Ints(tj))
case TUInt:
vf.(DoUIntAgg).DoUInt(cr.UInts(tj))
case TFloat:
vf.(DoFloatAgg).DoFloat(cr.Floats(tj))
case TString:
vf.(DoStringAgg).DoString(cr.Strings(tj))
default:
return fmt.Errorf("unsupport aggregate type %v", c.Type)
}
}
return nil
})
for j, vf := range aggregates {
bj := builderColMap[j]
// Append aggregated value
switch vf.Type() {
case TBool:
builder.AppendBool(bj, vf.(BoolValueFunc).ValueBool())
case TInt:
builder.AppendInt(bj, vf.(IntValueFunc).ValueInt())
case TUInt:
builder.AppendUInt(bj, vf.(UIntValueFunc).ValueUInt())
case TFloat:
builder.AppendFloat(bj, vf.(FloatValueFunc).ValueFloat())
case TString:
builder.AppendString(bj, vf.(StringValueFunc).ValueString())
}
}
AppendKeyValues(b.Key(), builder)
return nil
}
func (t *aggregateTransformation) UpdateWatermark(id DatasetID, mark Time) error {
return t.d.UpdateWatermark(mark)
}
func (t *aggregateTransformation) UpdateProcessingTime(id DatasetID, pt Time) error {
return t.d.UpdateProcessingTime(pt)
}
func (t *aggregateTransformation) Finish(id DatasetID, err error) {
t.d.Finish(err)
}
func AppendAggregateTime(srcTime, dstTime string, key PartitionKey, builder BlockBuilder) error {
srcTimeIdx := ColIdx(srcTime, key.Cols())
if srcTimeIdx < 0 {
return fmt.Errorf("timeValue column %q does not exist", srcTime)
}
srcTimeCol := key.Cols()[srcTimeIdx]
if srcTimeCol.Type != TTime {
return fmt.Errorf("timeValue column %q does not have type time", srcTime)
}
dstTimeIdx := ColIdx(dstTime, builder.Cols())
if dstTimeIdx < 0 {
return fmt.Errorf("timeValue column %q does not exist", dstTime)
}
dstTimeCol := builder.Cols()[dstTimeIdx]
if dstTimeCol.Type != TTime {
return fmt.Errorf("timeValue column %q does not have type time", dstTime)
}
builder.AppendTime(dstTimeIdx, key.ValueTime(srcTimeIdx))
return nil
}
type Aggregate interface {
NewBoolAgg() DoBoolAgg
NewIntAgg() DoIntAgg
NewUIntAgg() DoUIntAgg
NewFloatAgg() DoFloatAgg
NewStringAgg() DoStringAgg
}
type ValueFunc interface {
Type() DataType
}
type DoBoolAgg interface {
ValueFunc
DoBool([]bool)
}
type DoFloatAgg interface {
ValueFunc
DoFloat([]float64)
}
type DoIntAgg interface {
ValueFunc
DoInt([]int64)
}
type DoUIntAgg interface {
ValueFunc
DoUInt([]uint64)
}
type DoStringAgg interface {
ValueFunc
DoString([]string)
}
type BoolValueFunc interface {
ValueBool() bool
}
type FloatValueFunc interface {
ValueFloat() float64
}
type IntValueFunc interface {
ValueInt() int64
}
type UIntValueFunc interface {
ValueUInt() uint64
}
type StringValueFunc interface {
ValueString() string
}

170
query/execute/allocator.go Normal file
View File

@ -0,0 +1,170 @@
package execute
import (
"fmt"
"sync/atomic"
)
const (
boolSize = 1
int64Size = 8
uint64Size = 8
float64Size = 8
stringSize = 16
timeSize = 8
)
// Allocator tracks the amount of memory being consumed by a query.
// The allocator provides methods similar to make and append, to allocate large slices of data.
// The allocator also provides a Free method to account for when memory will be freed.
type Allocator struct {
Limit int64
bytesAllocated int64
maxAllocated int64
}
func (a *Allocator) count(n, size int) (c int64) {
c = atomic.AddInt64(&a.bytesAllocated, int64(n*size))
for max := atomic.LoadInt64(&a.maxAllocated); c > max; max = atomic.LoadInt64(&a.maxAllocated) {
if atomic.CompareAndSwapInt64(&a.maxAllocated, max, c) {
return
}
}
return
}
// Free informs the allocator that memory has been freed.
func (a *Allocator) Free(n, size int) {
a.count(-n, size)
}
// Max reports the maximum amount of allocated memory at any point in the query.
func (a *Allocator) Max() int64 {
return atomic.LoadInt64(&a.maxAllocated)
}
func (a *Allocator) account(n, size int) {
if want := a.count(n, size); want > a.Limit {
allocated := a.count(-n, size)
panic(AllocError{
Limit: a.Limit,
Allocated: allocated,
Wanted: want - allocated,
})
}
}
// Bools makes a slice of bool values.
func (a *Allocator) Bools(l, c int) []bool {
a.account(c, boolSize)
return make([]bool, l, c)
}
// AppendBools appends bools to a slice
func (a *Allocator) AppendBools(slice []bool, vs ...bool) []bool {
if cap(slice)-len(slice) > len(vs) {
return append(slice, vs...)
}
s := append(slice, vs...)
diff := cap(s) - cap(slice)
a.account(diff, boolSize)
return s
}
// Ints makes a slice of int64 values.
func (a *Allocator) Ints(l, c int) []int64 {
a.account(c, int64Size)
return make([]int64, l, c)
}
// AppendInts appends int64s to a slice
func (a *Allocator) AppendInts(slice []int64, vs ...int64) []int64 {
if cap(slice)-len(slice) > len(vs) {
return append(slice, vs...)
}
s := append(slice, vs...)
diff := cap(s) - cap(slice)
a.account(diff, int64Size)
return s
}
// UInts makes a slice of uint64 values.
func (a *Allocator) UInts(l, c int) []uint64 {
a.account(c, uint64Size)
return make([]uint64, l, c)
}
// AppendUInts appends uint64s to a slice
func (a *Allocator) AppendUInts(slice []uint64, vs ...uint64) []uint64 {
if cap(slice)-len(slice) > len(vs) {
return append(slice, vs...)
}
s := append(slice, vs...)
diff := cap(s) - cap(slice)
a.account(diff, uint64Size)
return s
}
// Floats makes a slice of float64 values.
func (a *Allocator) Floats(l, c int) []float64 {
a.account(c, float64Size)
return make([]float64, l, c)
}
// AppendFloats appends float64s to a slice
func (a *Allocator) AppendFloats(slice []float64, vs ...float64) []float64 {
if cap(slice)-len(slice) > len(vs) {
return append(slice, vs...)
}
s := append(slice, vs...)
diff := cap(s) - cap(slice)
a.account(diff, float64Size)
return s
}
// Strings makes a slice of string values.
// Only the string headers are accounted for.
func (a *Allocator) Strings(l, c int) []string {
a.account(c, stringSize)
return make([]string, l, c)
}
// AppendStrings appends strings to a slice.
// Only the string headers are accounted for.
func (a *Allocator) AppendStrings(slice []string, vs ...string) []string {
//TODO(nathanielc): Account for actual size of strings
if cap(slice)-len(slice) > len(vs) {
return append(slice, vs...)
}
s := append(slice, vs...)
diff := cap(s) - cap(slice)
a.account(diff, stringSize)
return s
}
// Times makes a slice of Time values.
func (a *Allocator) Times(l, c int) []Time {
a.account(c, timeSize)
return make([]Time, l, c)
}
// AppendTimes appends Times to a slice
func (a *Allocator) AppendTimes(slice []Time, vs ...Time) []Time {
if cap(slice)-len(slice) > len(vs) {
return append(slice, vs...)
}
s := append(slice, vs...)
diff := cap(s) - cap(slice)
a.account(diff, timeSize)
return s
}
type AllocError struct {
Limit int64
Allocated int64
Wanted int64
}
func (a AllocError) Error() string {
return fmt.Sprintf("allocation limit reached: limit %d, allocated: %d, wanted: %d", a.Limit, a.Allocated, a.Wanted)
}

1273
query/execute/block.go Normal file

File diff suppressed because it is too large Load Diff

51
query/execute/bounds.go Normal file
View File

@ -0,0 +1,51 @@
package execute
import (
"fmt"
"math"
"time"
"github.com/influxdata/ifql/values"
)
type Time = values.Time
type Duration = values.Duration
const (
MaxTime = math.MaxInt64
MinTime = math.MinInt64
)
type Bounds struct {
Start Time
Stop Time
}
var AllTime = Bounds{
Start: MinTime,
Stop: MaxTime,
}
func (b Bounds) String() string {
return fmt.Sprintf("[%v, %v)", b.Start, b.Stop)
}
func (b Bounds) Contains(t Time) bool {
return t >= b.Start && t < b.Stop
}
func (b Bounds) Overlaps(o Bounds) bool {
return b.Contains(o.Start) || b.Contains(o.Stop)
}
func (b Bounds) Equal(o Bounds) bool {
return b == o
}
func (b Bounds) Shift(d Duration) Bounds {
return Bounds{Start: b.Start.Add(d), Stop: b.Stop.Add(d)}
}
func Now() Time {
return values.ConvertTime(time.Now())
}

188
query/execute/dataset.go Normal file
View File

@ -0,0 +1,188 @@
package execute
import (
"github.com/influxdata/ifql/query"
uuid "github.com/satori/go.uuid"
)
// Dataset represents the set of data produced by a transformation.
type Dataset interface {
Node
RetractBlock(key PartitionKey) error
UpdateProcessingTime(t Time) error
UpdateWatermark(mark Time) error
Finish(error)
SetTriggerSpec(t query.TriggerSpec)
}
// DataCache holds all working data for a transformation.
type DataCache interface {
Block(PartitionKey) (Block, error)
ForEach(func(PartitionKey))
ForEachWithContext(func(PartitionKey, Trigger, BlockContext))
DiscardBlock(PartitionKey)
ExpireBlock(PartitionKey)
SetTriggerSpec(t query.TriggerSpec)
}
type AccumulationMode int
const (
DiscardingMode AccumulationMode = iota
AccumulatingMode
AccumulatingRetractingMode
)
type DatasetID uuid.UUID
func (id DatasetID) String() string {
return uuid.UUID(id).String()
}
var ZeroDatasetID DatasetID
func (id DatasetID) IsZero() bool {
return id == ZeroDatasetID
}
type dataset struct {
id DatasetID
ts []Transformation
accMode AccumulationMode
watermark Time
processingTime Time
cache DataCache
}
func NewDataset(id DatasetID, accMode AccumulationMode, cache DataCache) *dataset {
return &dataset{
id: id,
accMode: accMode,
cache: cache,
}
}
func (d *dataset) AddTransformation(t Transformation) {
d.ts = append(d.ts, t)
}
func (d *dataset) SetTriggerSpec(spec query.TriggerSpec) {
d.cache.SetTriggerSpec(spec)
}
func (d *dataset) UpdateWatermark(mark Time) error {
d.watermark = mark
if err := d.evalTriggers(); err != nil {
return err
}
for _, t := range d.ts {
if err := t.UpdateWatermark(d.id, mark); err != nil {
return err
}
}
return nil
}
func (d *dataset) UpdateProcessingTime(time Time) error {
d.processingTime = time
if err := d.evalTriggers(); err != nil {
return err
}
for _, t := range d.ts {
if err := t.UpdateProcessingTime(d.id, time); err != nil {
return err
}
}
return nil
}
func (d *dataset) evalTriggers() (err error) {
d.cache.ForEachWithContext(func(key PartitionKey, trigger Trigger, bc BlockContext) {
if err != nil {
// Skip the rest once we have encountered an error
return
}
c := TriggerContext{
Block: bc,
Watermark: d.watermark,
CurrentProcessingTime: d.processingTime,
}
if trigger.Triggered(c) {
err = d.triggerBlock(key)
}
if trigger.Finished() {
d.expireBlock(key)
}
})
return err
}
func (d *dataset) triggerBlock(key PartitionKey) error {
b, err := d.cache.Block(key)
if err != nil {
return err
}
b.RefCount(len(d.ts))
switch d.accMode {
case DiscardingMode:
for _, t := range d.ts {
if err := t.Process(d.id, b); err != nil {
return err
}
}
d.cache.DiscardBlock(key)
case AccumulatingRetractingMode:
for _, t := range d.ts {
if err := t.RetractBlock(d.id, b.Key()); err != nil {
return err
}
}
fallthrough
case AccumulatingMode:
for _, t := range d.ts {
if err := t.Process(d.id, b); err != nil {
return err
}
}
}
return nil
}
func (d *dataset) expireBlock(key PartitionKey) {
d.cache.ExpireBlock(key)
}
func (d *dataset) RetractBlock(key PartitionKey) error {
d.cache.DiscardBlock(key)
for _, t := range d.ts {
if err := t.RetractBlock(d.id, key); err != nil {
return err
}
}
return nil
}
func (d *dataset) Finish(err error) {
if err == nil {
// Only trigger blocks we if we not finishing because of an error.
d.cache.ForEach(func(bk PartitionKey) {
if err != nil {
return
}
err = d.triggerBlock(bk)
d.cache.ExpireBlock(bk)
})
}
for _, t := range d.ts {
t.Finish(d.id, err)
}
}

119
query/execute/dispatcher.go Normal file
View File

@ -0,0 +1,119 @@
package execute
import (
"context"
"fmt"
"runtime/debug"
"sync"
)
// Dispatcher schedules work for a query.
// Each transformation submits work to be done to the dispatcher.
// Then the dispatcher schedules to work based on the available resources.
type Dispatcher interface {
// Schedule fn to be executed
Schedule(fn ScheduleFunc)
}
// ScheduleFunc is a function that represents work to do.
// The throughput is the maximum number of messages to process for this scheduling.
type ScheduleFunc func(throughput int)
// poolDispatcher implements Dispatcher using a pool of goroutines.
type poolDispatcher struct {
work chan ScheduleFunc
throughput int
mu sync.Mutex
closed bool
closing chan struct{}
wg sync.WaitGroup
err error
errC chan error
}
func newPoolDispatcher(throughput int) *poolDispatcher {
return &poolDispatcher{
throughput: throughput,
work: make(chan ScheduleFunc, 100),
closing: make(chan struct{}),
errC: make(chan error, 1),
}
}
func (d *poolDispatcher) Schedule(fn ScheduleFunc) {
select {
case d.work <- fn:
case <-d.closing:
}
}
func (d *poolDispatcher) Start(n int, ctx context.Context) {
d.wg.Add(n)
for i := 0; i < n; i++ {
go func() {
defer d.wg.Done()
// Setup panic handling on the worker goroutines
defer func() {
if e := recover(); e != nil {
var err error
switch e := e.(type) {
case error:
err = e
default:
err = fmt.Errorf("%v", e)
}
d.setErr(fmt.Errorf("panic: %v\n%s", err, debug.Stack()))
}
}()
d.run(ctx)
}()
}
}
// Err returns a channel with will produce an error if encountered.
func (d *poolDispatcher) Err() <-chan error {
d.mu.Lock()
defer d.mu.Unlock()
return d.errC
}
func (d *poolDispatcher) setErr(err error) {
d.mu.Lock()
defer d.mu.Unlock()
// TODO(nathanielc): Collect all error information.
if d.err == nil {
d.err = err
d.errC <- err
}
}
//Stop the dispatcher.
func (d *poolDispatcher) Stop() error {
d.mu.Lock()
defer d.mu.Unlock()
if d.closed {
return d.err
}
d.closed = true
close(d.closing)
d.wg.Wait()
return d.err
}
// run is the logic executed by each worker goroutine in the pool.
func (d *poolDispatcher) run(ctx context.Context) {
for {
select {
case <-ctx.Done():
// Immediately return, do not process any more work
return
case <-d.closing:
// We are done, nothing left to do.
return
case fn := <-d.work:
fn(d.throughput)
}
}
}

View File

@ -0,0 +1,67 @@
package executetest
import (
"testing"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/influxdata/ifql/query/execute"
)
// AggFuncTestHelper splits the data in half, runs Do over each split and compares
// the Value to want.
func AggFuncTestHelper(t *testing.T, agg execute.Aggregate, data []float64, want interface{}) {
t.Helper()
// Call Do twice, since this is possible according to the interface.
h := len(data) / 2
vf := agg.NewFloatAgg()
vf.DoFloat(data[:h])
if h < len(data) {
vf.DoFloat(data[h:])
}
var got interface{}
switch vf.Type() {
case execute.TBool:
got = vf.(execute.BoolValueFunc).ValueBool()
case execute.TInt:
got = vf.(execute.IntValueFunc).ValueInt()
case execute.TUInt:
got = vf.(execute.UIntValueFunc).ValueUInt()
case execute.TFloat:
got = vf.(execute.FloatValueFunc).ValueFloat()
case execute.TString:
got = vf.(execute.StringValueFunc).ValueString()
}
if !cmp.Equal(want, got, cmpopts.EquateNaNs()) {
t.Errorf("unexpected value -want/+got\n%s", cmp.Diff(want, got))
}
}
// AggFuncBenchmarkHelper benchmarks the aggregate function over data and compares to wantValue
func AggFuncBenchmarkHelper(b *testing.B, agg execute.Aggregate, data []float64, want interface{}) {
b.Helper()
b.ResetTimer()
for n := 0; n < b.N; n++ {
vf := agg.NewFloatAgg()
vf.DoFloat(data)
var got interface{}
switch vf.Type() {
case execute.TBool:
got = vf.(execute.BoolValueFunc).ValueBool()
case execute.TInt:
got = vf.(execute.IntValueFunc).ValueInt()
case execute.TUInt:
got = vf.(execute.UIntValueFunc).ValueUInt()
case execute.TFloat:
got = vf.(execute.FloatValueFunc).ValueFloat()
case execute.TString:
got = vf.(execute.StringValueFunc).ValueString()
}
if !cmp.Equal(want, got) {
b.Errorf("unexpected value -want/+got\n%s", cmp.Diff(want, got))
}
}
}

View File

@ -0,0 +1,11 @@
package executetest
import (
"math"
"github.com/influxdata/ifql/query/execute"
)
var UnlimitedAllocator = &execute.Allocator{
Limit: math.MaxInt64,
}

View File

@ -0,0 +1,205 @@
package executetest
import (
"fmt"
"github.com/influxdata/ifql/query/execute"
)
// Block is an implementation of execute.Block
// It is designed to make it easy to statically declare the data within the block.
// Not all fields need to be set. See comments on each field.
// Use Normalize to ensure that all fields are set before equality comparisons.
type Block struct {
// PartitionKey of the block. Does not need to be set explicitly.
PartitionKey execute.PartitionKey
// KeyCols is a list of column that are part of the partition key.
// The column type is deduced from the ColMeta slice.
KeyCols []string
// KeyValues is a list of values for the partition key columns.
// Only needs to be set when no data is present on the Block.
KeyValues []interface{}
// ColMeta is a list of columns of the block.
ColMeta []execute.ColMeta
// Data is a list of rows, i.e. Data[row][col]
// Each row must be a list with length equal to len(ColMeta)
Data [][]interface{}
}
// Normalize ensures all fields of the Block are set correctly.
func (b *Block) Normalize() {
if b.PartitionKey == nil {
cols := make([]execute.ColMeta, len(b.KeyCols))
if len(b.KeyValues) != len(b.KeyCols) {
b.KeyValues = make([]interface{}, len(b.KeyCols))
}
for j, label := range b.KeyCols {
idx := execute.ColIdx(label, b.ColMeta)
if idx < 0 {
panic(fmt.Errorf("block invalid: missing partition column %q", label))
}
cols[j] = b.ColMeta[idx]
if len(b.Data) > 0 {
b.KeyValues[j] = b.Data[0][idx]
}
}
b.PartitionKey = execute.NewPartitionKey(cols, b.KeyValues)
}
}
func (b *Block) RefCount(n int) {}
func (b *Block) Cols() []execute.ColMeta {
return b.ColMeta
}
func (b *Block) Key() execute.PartitionKey {
b.Normalize()
return b.PartitionKey
}
func (b *Block) Do(f func(execute.ColReader) error) error {
for _, r := range b.Data {
if err := f(ColReader{
key: b.Key(),
cols: b.ColMeta,
row: r,
}); err != nil {
return err
}
}
return nil
}
type ColReader struct {
key execute.PartitionKey
cols []execute.ColMeta
row []interface{}
}
func (cr ColReader) Cols() []execute.ColMeta {
return cr.cols
}
func (cr ColReader) Key() execute.PartitionKey {
return cr.key
}
func (cr ColReader) Len() int {
return 1
}
func (cr ColReader) Bools(j int) []bool {
return []bool{cr.row[j].(bool)}
}
func (cr ColReader) Ints(j int) []int64 {
return []int64{cr.row[j].(int64)}
}
func (cr ColReader) UInts(j int) []uint64 {
return []uint64{cr.row[j].(uint64)}
}
func (cr ColReader) Floats(j int) []float64 {
return []float64{cr.row[j].(float64)}
}
func (cr ColReader) Strings(j int) []string {
return []string{cr.row[j].(string)}
}
func (cr ColReader) Times(j int) []execute.Time {
return []execute.Time{cr.row[j].(execute.Time)}
}
func BlocksFromCache(c execute.DataCache) (blocks []*Block, err error) {
c.ForEach(func(key execute.PartitionKey) {
if err != nil {
return
}
var b execute.Block
b, err = c.Block(key)
if err != nil {
return
}
var cb *Block
cb, err = ConvertBlock(b)
if err != nil {
return
}
blocks = append(blocks, cb)
})
return blocks, nil
}
func ConvertBlock(b execute.Block) (*Block, error) {
key := b.Key()
blk := &Block{
PartitionKey: key,
ColMeta: b.Cols(),
}
keyCols := key.Cols()
if len(keyCols) > 0 {
blk.KeyCols = make([]string, len(keyCols))
blk.KeyValues = make([]interface{}, len(keyCols))
for j, c := range keyCols {
blk.KeyCols[j] = c.Label
blk.KeyValues[j] = key.Value(j)
}
}
err := b.Do(func(cr execute.ColReader) error {
l := cr.Len()
for i := 0; i < l; i++ {
row := make([]interface{}, len(blk.ColMeta))
for j, c := range blk.ColMeta {
var v interface{}
switch c.Type {
case execute.TBool:
v = cr.Bools(j)[i]
case execute.TInt:
v = cr.Ints(j)[i]
case execute.TUInt:
v = cr.UInts(j)[i]
case execute.TFloat:
v = cr.Floats(j)[i]
case execute.TString:
v = cr.Strings(j)[i]
case execute.TTime:
v = cr.Times(j)[i]
default:
panic(fmt.Errorf("unknown column type %s", c.Type))
}
row[j] = v
}
blk.Data = append(blk.Data, row)
}
return nil
})
if err != nil {
return nil, err
}
return blk, nil
}
type SortedBlocks []*Block
func (b SortedBlocks) Len() int {
return len(b)
}
func (b SortedBlocks) Less(i int, j int) bool {
return b[i].Key().Less(b[j].Key())
}
func (b SortedBlocks) Swap(i int, j int) {
b[i], b[j] = b[j], b[i]
}
// NormalizeBlocks ensures that each block is normalized
func NormalizeBlocks(bs []*Block) {
for _, b := range bs {
b.Key()
}
}

View File

@ -0,0 +1,92 @@
package executetest
import (
"testing"
"github.com/google/go-cmp/cmp"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
uuid "github.com/satori/go.uuid"
)
func RandomDatasetID() execute.DatasetID {
return execute.DatasetID(uuid.NewV4())
}
type Dataset struct {
ID execute.DatasetID
Retractions []execute.PartitionKey
ProcessingTimeUpdates []execute.Time
WatermarkUpdates []execute.Time
Finished bool
FinishedErr error
}
func NewDataset(id execute.DatasetID) *Dataset {
return &Dataset{
ID: id,
}
}
func (d *Dataset) AddTransformation(t execute.Transformation) {
panic("not implemented")
}
func (d *Dataset) RetractBlock(key execute.PartitionKey) error {
d.Retractions = append(d.Retractions, key)
return nil
}
func (d *Dataset) UpdateProcessingTime(t execute.Time) error {
d.ProcessingTimeUpdates = append(d.ProcessingTimeUpdates, t)
return nil
}
func (d *Dataset) UpdateWatermark(mark execute.Time) error {
d.WatermarkUpdates = append(d.WatermarkUpdates, mark)
return nil
}
func (d *Dataset) Finish(err error) {
if d.Finished {
panic("finish has already been called")
}
d.Finished = true
d.FinishedErr = err
}
func (d *Dataset) SetTriggerSpec(t query.TriggerSpec) {
panic("not implemented")
}
type NewTransformation func(execute.Dataset, execute.BlockBuilderCache) execute.Transformation
func TransformationPassThroughTestHelper(t *testing.T, newTr NewTransformation) {
t.Helper()
now := execute.Now()
d := NewDataset(RandomDatasetID())
c := execute.NewBlockBuilderCache(UnlimitedAllocator)
c.SetTriggerSpec(execute.DefaultTriggerSpec)
parentID := RandomDatasetID()
tr := newTr(d, c)
if err := tr.UpdateWatermark(parentID, now); err != nil {
t.Fatal(err)
}
if err := tr.UpdateProcessingTime(parentID, now); err != nil {
t.Fatal(err)
}
tr.Finish(parentID, nil)
exp := &Dataset{
ID: d.ID,
ProcessingTimeUpdates: []execute.Time{now},
WatermarkUpdates: []execute.Time{now},
Finished: true,
FinishedErr: nil,
}
if !cmp.Equal(d, exp) {
t.Errorf("unexpected dataset -want/+got\n%s", cmp.Diff(exp, d))
}
}

View File

@ -0,0 +1,34 @@
package executetest
import "github.com/influxdata/ifql/query/execute"
type Result struct {
Blks []*Block
}
func NewResult(blocks []*Block) *Result {
return &Result{Blks: blocks}
}
func (r *Result) Blocks() execute.BlockIterator {
return &BlockIterator{
r.Blks,
}
}
func (r *Result) Normalize() {
NormalizeBlocks(r.Blks)
}
type BlockIterator struct {
blocks []*Block
}
func (bi *BlockIterator) Do(f func(execute.Block) error) error {
for _, b := range bi.blocks {
if err := f(b); err != nil {
return err
}
}
return nil
}

View File

@ -0,0 +1,93 @@
package executetest
import (
"testing"
"github.com/google/go-cmp/cmp"
"github.com/influxdata/ifql/query/execute"
)
func RowSelectorFuncTestHelper(t *testing.T, selector execute.RowSelector, data execute.Block, want []execute.Row) {
t.Helper()
s := selector.NewFloatSelector()
valueIdx := execute.ColIdx(execute.DefaultValueColLabel, data.Cols())
if valueIdx < 0 {
t.Fatal("no _value column found")
}
data.Do(func(cr execute.ColReader) error {
s.DoFloat(cr.Floats(valueIdx), cr)
return nil
})
got := s.Rows()
if !cmp.Equal(want, got) {
t.Errorf("unexpected value -want/+got\n%s", cmp.Diff(want, got))
}
}
var rows []execute.Row
func RowSelectorFuncBenchmarkHelper(b *testing.B, selector execute.RowSelector, data execute.Block) {
b.Helper()
valueIdx := execute.ColIdx(execute.DefaultValueColLabel, data.Cols())
if valueIdx < 0 {
b.Fatal("no _value column found")
}
b.ResetTimer()
for n := 0; n < b.N; n++ {
s := selector.NewFloatSelector()
data.Do(func(cr execute.ColReader) error {
s.DoFloat(cr.Floats(valueIdx), cr)
return nil
})
rows = s.Rows()
}
}
func IndexSelectorFuncTestHelper(t *testing.T, selector execute.IndexSelector, data execute.Block, want [][]int) {
t.Helper()
var got [][]int
s := selector.NewFloatSelector()
valueIdx := execute.ColIdx(execute.DefaultValueColLabel, data.Cols())
if valueIdx < 0 {
t.Fatal("no _value column found")
}
data.Do(func(cr execute.ColReader) error {
var cpy []int
selected := s.DoFloat(cr.Floats(valueIdx))
if len(selected) > 0 {
cpy = make([]int, len(selected))
copy(cpy, selected)
}
got = append(got, cpy)
return nil
})
if !cmp.Equal(want, got) {
t.Errorf("unexpected value -want/+got\n%s", cmp.Diff(want, got))
}
}
func IndexSelectorFuncBenchmarkHelper(b *testing.B, selector execute.IndexSelector, data execute.Block) {
b.Helper()
valueIdx := execute.ColIdx(execute.DefaultValueColLabel, data.Cols())
if valueIdx < 0 {
b.Fatal("no _value column found")
}
b.ResetTimer()
var got [][]int
for n := 0; n < b.N; n++ {
s := selector.NewFloatSelector()
data.Do(func(cr execute.ColReader) error {
got = append(got, s.DoFloat(cr.Floats(valueIdx)))
return nil
})
}
}

View File

@ -0,0 +1,47 @@
package executetest
import (
"sort"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/influxdata/ifql/query/execute"
)
func ProcessTestHelper(
t *testing.T,
data []execute.Block,
want []*Block,
create func(d execute.Dataset, c execute.BlockBuilderCache) execute.Transformation,
) {
t.Helper()
d := NewDataset(RandomDatasetID())
c := execute.NewBlockBuilderCache(UnlimitedAllocator)
c.SetTriggerSpec(execute.DefaultTriggerSpec)
tx := create(d, c)
parentID := RandomDatasetID()
for _, b := range data {
if err := tx.Process(parentID, b); err != nil {
t.Fatal(err)
}
}
got, err := BlocksFromCache(c)
if err != nil {
t.Fatal(err)
}
NormalizeBlocks(got)
NormalizeBlocks(want)
sort.Sort(SortedBlocks(got))
sort.Sort(SortedBlocks(want))
if !cmp.Equal(want, got, cmpopts.EquateNaNs()) {
t.Errorf("unexpected blocks -want/+got\n%s", cmp.Diff(want, got))
}
}

244
query/execute/executor.go Normal file
View File

@ -0,0 +1,244 @@
package execute
import (
"context"
"fmt"
"runtime/debug"
"github.com/influxdata/ifql/id"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/plan"
"github.com/pkg/errors"
)
type Executor interface {
Execute(ctx context.Context, orgID id.ID, p *plan.PlanSpec) (map[string]Result, error)
}
type executor struct {
deps Dependencies
}
func NewExecutor(deps Dependencies) Executor {
e := &executor{
deps: deps,
}
return e
}
type executionState struct {
p *plan.PlanSpec
deps Dependencies
orgID id.ID
alloc *Allocator
resources query.ResourceManagement
bounds Bounds
results map[string]Result
sources []Source
transports []Transport
dispatcher *poolDispatcher
}
func (e *executor) Execute(ctx context.Context, orgID id.ID, p *plan.PlanSpec) (map[string]Result, error) {
es, err := e.createExecutionState(ctx, orgID, p)
if err != nil {
return nil, errors.Wrap(err, "failed to initialize execute state")
}
es.do(ctx)
return es.results, nil
}
func validatePlan(p *plan.PlanSpec) error {
if p.Resources.ConcurrencyQuota == 0 {
return errors.New("plan must have a non-zero concurrency quota")
}
return nil
}
func (e *executor) createExecutionState(ctx context.Context, orgID id.ID, p *plan.PlanSpec) (*executionState, error) {
if err := validatePlan(p); err != nil {
return nil, errors.Wrap(err, "invalid plan")
}
es := &executionState{
orgID: orgID,
p: p,
deps: e.deps,
alloc: &Allocator{
Limit: p.Resources.MemoryBytesQuota,
},
resources: p.Resources,
results: make(map[string]Result, len(p.Results)),
// TODO(nathanielc): Have the planner specify the dispatcher throughput
dispatcher: newPoolDispatcher(10),
bounds: Bounds{
Start: Time(p.Bounds.Start.Time(p.Now).UnixNano()),
Stop: Time(p.Bounds.Stop.Time(p.Now).UnixNano()),
},
}
nodes := make(map[plan.ProcedureID]Node, len(p.Procedures))
for name, yield := range p.Results {
ds, err := es.createNode(ctx, p.Procedures[yield.ID], nodes)
if err != nil {
return nil, err
}
r := newResult(yield)
ds.AddTransformation(r)
es.results[name] = r
}
return es, nil
}
// DefaultTriggerSpec defines the triggering that should be used for datasets
// whose parent transformation is not a windowing transformation.
var DefaultTriggerSpec = query.AfterWatermarkTriggerSpec{}
type triggeringSpec interface {
TriggerSpec() query.TriggerSpec
}
func (es *executionState) createNode(ctx context.Context, pr *plan.Procedure, nodes map[plan.ProcedureID]Node) (Node, error) {
// Check if we already created this node
if n, ok := nodes[pr.ID]; ok {
return n, nil
}
// Build execution context
ec := executionContext{
es: es,
}
if len(pr.Parents) > 0 {
ec.parents = make([]DatasetID, len(pr.Parents))
for i, parentID := range pr.Parents {
ec.parents[i] = DatasetID(parentID)
}
}
// If source create source
if createS, ok := procedureToSource[pr.Spec.Kind()]; ok {
s, err := createS(pr.Spec, DatasetID(pr.ID), ec)
if err != nil {
return nil, err
}
es.sources = append(es.sources, s)
nodes[pr.ID] = s
return s, nil
}
createT, ok := procedureToTransformation[pr.Spec.Kind()]
if !ok {
return nil, fmt.Errorf("unsupported procedure %v", pr.Spec.Kind())
}
// Create the transformation
t, ds, err := createT(DatasetID(pr.ID), AccumulatingMode, pr.Spec, ec)
if err != nil {
return nil, err
}
nodes[pr.ID] = ds
// Setup triggering
var ts query.TriggerSpec = DefaultTriggerSpec
if t, ok := pr.Spec.(triggeringSpec); ok {
ts = t.TriggerSpec()
}
ds.SetTriggerSpec(ts)
// Recurse creating parents
for _, parentID := range pr.Parents {
parent, err := es.createNode(ctx, es.p.Procedures[parentID], nodes)
if err != nil {
return nil, err
}
transport := newConescutiveTransport(es.dispatcher, t)
es.transports = append(es.transports, transport)
parent.AddTransformation(transport)
}
return ds, nil
}
func (es *executionState) abort(err error) {
for _, r := range es.results {
r.(*result).abort(err)
}
}
func (es *executionState) do(ctx context.Context) {
for _, src := range es.sources {
go func(src Source) {
// Setup panic handling on the source goroutines
defer func() {
if e := recover(); e != nil {
// We had a panic, abort the entire execution.
var err error
switch e := e.(type) {
case error:
err = e
default:
err = fmt.Errorf("%v", e)
}
es.abort(fmt.Errorf("panic: %v\n%s", err, debug.Stack()))
}
}()
src.Run(ctx)
}(src)
}
es.dispatcher.Start(es.resources.ConcurrencyQuota, ctx)
go func() {
// Wait for all transports to finish
for _, t := range es.transports {
select {
case <-t.Finished():
case <-ctx.Done():
es.abort(errors.New("context done"))
case err := <-es.dispatcher.Err():
if err != nil {
es.abort(err)
}
}
}
// Check for any errors on the dispatcher
err := es.dispatcher.Stop()
if err != nil {
es.abort(err)
}
}()
}
type executionContext struct {
es *executionState
parents []DatasetID
}
// Satisfy the ExecutionContext interface
func (ec executionContext) OrganizationID() id.ID {
return ec.es.orgID
}
func (ec executionContext) ResolveTime(qt query.Time) Time {
return Time(qt.Time(ec.es.p.Now).UnixNano())
}
func (ec executionContext) Bounds() Bounds {
return ec.es.bounds
}
func (ec executionContext) Allocator() *Allocator {
return ec.es.alloc
}
func (ec executionContext) Parents() []DatasetID {
return ec.parents
}
func (ec executionContext) ConvertID(id plan.ProcedureID) DatasetID {
return DatasetID(id)
}
func (ec executionContext) Dependencies() Dependencies {
return ec.es.deps
}

View File

@ -0,0 +1,429 @@
package execute_test
import (
"context"
"math"
"testing"
"time"
"github.com/google/go-cmp/cmp"
"github.com/influxdata/ifql/ast"
"github.com/influxdata/ifql/functions"
"github.com/influxdata/ifql/id"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/execute/executetest"
"github.com/influxdata/ifql/query/plan"
"github.com/influxdata/ifql/semantic"
uuid "github.com/satori/go.uuid"
)
var epoch = time.Unix(0, 0)
var orgID id.ID
func init() {
orgID.DecodeFromString("aaaa")
}
func TestExecutor_Execute(t *testing.T) {
testCases := []struct {
name string
plan *plan.PlanSpec
want map[string][]*executetest.Block
}{
{
name: "simple aggregate",
plan: &plan.PlanSpec{
Now: epoch.Add(5),
Resources: query.ResourceManagement{
ConcurrencyQuota: 1,
MemoryBytesQuota: math.MaxInt64,
},
Bounds: plan.BoundsSpec{
Start: query.Time{Absolute: time.Unix(0, 1)},
Stop: query.Time{Absolute: time.Unix(0, 5)},
},
Procedures: map[plan.ProcedureID]*plan.Procedure{
plan.ProcedureIDFromOperationID("from"): {
ID: plan.ProcedureIDFromOperationID("from"),
Spec: &testFromProcedureSource{
data: []execute.Block{&executetest.Block{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(0), 1.0},
{execute.Time(0), execute.Time(5), execute.Time(1), 2.0},
{execute.Time(0), execute.Time(5), execute.Time(2), 3.0},
{execute.Time(0), execute.Time(5), execute.Time(3), 4.0},
{execute.Time(0), execute.Time(5), execute.Time(4), 5.0},
},
}},
},
Parents: nil,
Children: []plan.ProcedureID{plan.ProcedureIDFromOperationID("sum")},
},
plan.ProcedureIDFromOperationID("sum"): {
ID: plan.ProcedureIDFromOperationID("sum"),
Spec: &functions.SumProcedureSpec{
AggregateConfig: execute.DefaultAggregateConfig,
},
Parents: []plan.ProcedureID{
plan.ProcedureIDFromOperationID("from"),
},
Children: nil,
},
},
Results: map[string]plan.YieldSpec{
plan.DefaultYieldName: {ID: plan.ProcedureIDFromOperationID("sum")},
},
},
want: map[string][]*executetest.Block{
plan.DefaultYieldName: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(5), 15.0},
},
}},
},
},
{
name: "simple join",
plan: &plan.PlanSpec{
Now: epoch.Add(5),
Resources: query.ResourceManagement{
ConcurrencyQuota: 1,
MemoryBytesQuota: math.MaxInt64,
},
Bounds: plan.BoundsSpec{
Start: query.Time{Absolute: time.Unix(0, 1)},
Stop: query.Time{Absolute: time.Unix(0, 5)},
},
Procedures: map[plan.ProcedureID]*plan.Procedure{
plan.ProcedureIDFromOperationID("from"): {
ID: plan.ProcedureIDFromOperationID("from"),
Spec: &testFromProcedureSource{
data: []execute.Block{&executetest.Block{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TInt},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(0), int64(1)},
{execute.Time(0), execute.Time(5), execute.Time(1), int64(2)},
{execute.Time(0), execute.Time(5), execute.Time(2), int64(3)},
{execute.Time(0), execute.Time(5), execute.Time(3), int64(4)},
{execute.Time(0), execute.Time(5), execute.Time(4), int64(5)},
},
}},
},
Parents: nil,
Children: []plan.ProcedureID{plan.ProcedureIDFromOperationID("sum")},
},
plan.ProcedureIDFromOperationID("sum"): {
ID: plan.ProcedureIDFromOperationID("sum"),
Spec: &functions.SumProcedureSpec{
AggregateConfig: execute.DefaultAggregateConfig,
},
Parents: []plan.ProcedureID{
plan.ProcedureIDFromOperationID("from"),
},
Children: []plan.ProcedureID{plan.ProcedureIDFromOperationID("join")},
},
plan.ProcedureIDFromOperationID("count"): {
ID: plan.ProcedureIDFromOperationID("count"),
Spec: &functions.CountProcedureSpec{
AggregateConfig: execute.DefaultAggregateConfig,
},
Parents: []plan.ProcedureID{
plan.ProcedureIDFromOperationID("from"),
},
Children: []plan.ProcedureID{plan.ProcedureIDFromOperationID("join")},
},
plan.ProcedureIDFromOperationID("join"): {
ID: plan.ProcedureIDFromOperationID("join"),
Spec: &functions.MergeJoinProcedureSpec{
TableNames: map[plan.ProcedureID]string{
plan.ProcedureIDFromOperationID("sum"): "sum",
plan.ProcedureIDFromOperationID("count"): "count",
},
On: []string{"_time", "_start", "_stop"},
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "t"}}},
Body: &semantic.ObjectExpression{
Properties: []*semantic.Property{
{
Key: &semantic.Identifier{Name: "_time"},
Value: &semantic.MemberExpression{
Object: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "t",
},
Property: "sum",
},
Property: "_time",
},
},
{
Key: &semantic.Identifier{Name: "_start"},
Value: &semantic.MemberExpression{
Object: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "t",
},
Property: "sum",
},
Property: "_start",
},
},
{
Key: &semantic.Identifier{Name: "_stop"},
Value: &semantic.MemberExpression{
Object: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "t",
},
Property: "sum",
},
Property: "_stop",
},
},
{
Key: &semantic.Identifier{Name: "_value"},
Value: &semantic.BinaryExpression{
Operator: ast.DivisionOperator,
Left: &semantic.MemberExpression{
Object: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "t",
},
Property: "sum",
},
Property: "_value",
},
Right: &semantic.MemberExpression{
Object: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "t",
},
Property: "count",
},
Property: "_value",
},
},
},
},
},
},
},
Parents: []plan.ProcedureID{
plan.ProcedureIDFromOperationID("sum"),
plan.ProcedureIDFromOperationID("count"),
},
Children: nil,
},
},
Results: map[string]plan.YieldSpec{
plan.DefaultYieldName: {ID: plan.ProcedureIDFromOperationID("join")},
},
},
want: map[string][]*executetest.Block{
plan.DefaultYieldName: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TInt},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(5), int64(3)},
},
}},
},
},
{
name: "multiple aggregates",
plan: &plan.PlanSpec{
Now: epoch.Add(5),
Resources: query.ResourceManagement{
ConcurrencyQuota: 1,
MemoryBytesQuota: math.MaxInt64,
},
Bounds: plan.BoundsSpec{
Start: query.Time{Absolute: time.Unix(0, 1)},
Stop: query.Time{Absolute: time.Unix(0, 5)},
},
Procedures: map[plan.ProcedureID]*plan.Procedure{
plan.ProcedureIDFromOperationID("from"): {
ID: plan.ProcedureIDFromOperationID("from"),
Spec: &testFromProcedureSource{
data: []execute.Block{&executetest.Block{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(0), 1.0},
{execute.Time(0), execute.Time(5), execute.Time(1), 2.0},
{execute.Time(0), execute.Time(5), execute.Time(2), 3.0},
{execute.Time(0), execute.Time(5), execute.Time(3), 4.0},
{execute.Time(0), execute.Time(5), execute.Time(4), 5.0},
},
}},
},
Parents: nil,
Children: []plan.ProcedureID{
plan.ProcedureIDFromOperationID("sum"),
plan.ProcedureIDFromOperationID("mean"),
},
},
plan.ProcedureIDFromOperationID("sum"): {
ID: plan.ProcedureIDFromOperationID("sum"),
Spec: &functions.SumProcedureSpec{
AggregateConfig: execute.DefaultAggregateConfig,
},
Parents: []plan.ProcedureID{
plan.ProcedureIDFromOperationID("from"),
},
Children: nil,
},
plan.ProcedureIDFromOperationID("mean"): {
ID: plan.ProcedureIDFromOperationID("mean"),
Spec: &functions.MeanProcedureSpec{
AggregateConfig: execute.DefaultAggregateConfig,
},
Parents: []plan.ProcedureID{
plan.ProcedureIDFromOperationID("from"),
},
Children: nil,
},
},
Results: map[string]plan.YieldSpec{
"sum": {ID: plan.ProcedureIDFromOperationID("sum")},
"mean": {ID: plan.ProcedureIDFromOperationID("mean")},
},
},
want: map[string][]*executetest.Block{
"sum": []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(5), 15.0},
},
}},
"mean": []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(5), 3.0},
},
}},
},
},
}
for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
exe := execute.NewExecutor(nil)
results, err := exe.Execute(context.Background(), orgID, tc.plan)
if err != nil {
t.Fatal(err)
}
got := make(map[string][]*executetest.Block, len(results))
for name, r := range results {
if err := r.Blocks().Do(func(b execute.Block) error {
cb, err := executetest.ConvertBlock(b)
if err != nil {
return err
}
got[name] = append(got[name], cb)
return nil
}); err != nil {
t.Fatal(err)
}
}
for _, g := range got {
executetest.NormalizeBlocks(g)
}
for _, w := range tc.want {
executetest.NormalizeBlocks(w)
}
if !cmp.Equal(got, tc.want) {
t.Error("unexpected results -want/+got", cmp.Diff(tc.want, got))
}
})
}
}
type testFromProcedureSource struct {
data []execute.Block
ts []execute.Transformation
}
func (p *testFromProcedureSource) Kind() plan.ProcedureKind {
return "from-test"
}
func (p *testFromProcedureSource) Copy() plan.ProcedureSpec {
return p
}
func (p *testFromProcedureSource) AddTransformation(t execute.Transformation) {
p.ts = append(p.ts, t)
}
func (p *testFromProcedureSource) Run(ctx context.Context) {
id := execute.DatasetID(uuid.NewV4())
for _, t := range p.ts {
var max execute.Time
for _, b := range p.data {
t.Process(id, b)
stopIdx := execute.ColIdx(execute.DefaultStopColLabel, b.Cols())
if stopIdx >= 0 {
if s := b.Key().ValueTime(stopIdx); s > max {
max = s
}
}
}
t.UpdateWatermark(id, max)
t.Finish(id, nil)
}
}
func init() {
execute.RegisterSource("from-test", createTestFromSource)
}
func createTestFromSource(prSpec plan.ProcedureSpec, id execute.DatasetID, a execute.Administration) (execute.Source, error) {
return prSpec.(*testFromProcedureSource), nil
}

View File

@ -0,0 +1,285 @@
package execute
//func TestBinaryFuncs(t *testing.T) {
// testCases := []struct {
// op expression.Operator
// l, r interface{}
// want interface{}
// noFunc bool
// }{
// {op: expression.AdditionOperator, l: int64(6), r: int64(7), want: int64(13)},
// {op: expression.AdditionOperator, l: int64(6), r: uint64(7), noFunc: true},
// {op: expression.AdditionOperator, l: int64(6), r: float64(7), noFunc: true},
// {op: expression.AdditionOperator, l: int64(6), r: bool(false), noFunc: true},
// {op: expression.AdditionOperator, l: uint64(6), r: int64(7), noFunc: true},
// {op: expression.AdditionOperator, l: uint64(6), r: uint64(7), want: uint64(13)},
// {op: expression.AdditionOperator, l: uint64(6), r: float64(7), noFunc: true},
// {op: expression.AdditionOperator, l: uint64(6), r: bool(false), noFunc: true},
// {op: expression.AdditionOperator, l: float64(6), r: int64(7), noFunc: true},
// {op: expression.AdditionOperator, l: float64(6), r: uint64(7), noFunc: true},
// {op: expression.AdditionOperator, l: float64(6), r: float64(7), want: float64(13)},
// {op: expression.AdditionOperator, l: float64(6), r: bool(false), noFunc: true},
// {op: expression.AdditionOperator, l: bool(true), r: int64(7), noFunc: true},
// {op: expression.AdditionOperator, l: bool(true), r: uint64(7), noFunc: true},
// {op: expression.AdditionOperator, l: bool(true), r: float64(7), noFunc: true},
// {op: expression.AdditionOperator, l: bool(true), r: bool(false), noFunc: true},
// {op: expression.SubtractionOperator, l: int64(6), r: int64(7), want: int64(-1)},
// {op: expression.SubtractionOperator, l: int64(6), r: uint64(7), noFunc: true},
// {op: expression.SubtractionOperator, l: int64(6), r: float64(7), noFunc: true},
// {op: expression.SubtractionOperator, l: int64(6), r: bool(false), noFunc: true},
// {op: expression.SubtractionOperator, l: uint64(6), r: int64(7), noFunc: true},
// {op: expression.SubtractionOperator, l: uint64(7), r: uint64(6), want: uint64(1)},
// {op: expression.SubtractionOperator, l: uint64(6), r: float64(7), noFunc: true},
// {op: expression.SubtractionOperator, l: uint64(6), r: bool(false), noFunc: true},
// {op: expression.SubtractionOperator, l: float64(6), r: int64(7), noFunc: true},
// {op: expression.SubtractionOperator, l: float64(6), r: uint64(7), noFunc: true},
// {op: expression.SubtractionOperator, l: float64(6), r: float64(7), want: float64(-1)},
// {op: expression.SubtractionOperator, l: float64(6), r: bool(false), noFunc: true},
// {op: expression.SubtractionOperator, l: bool(true), r: int64(7), noFunc: true},
// {op: expression.SubtractionOperator, l: bool(true), r: uint64(7), noFunc: true},
// {op: expression.SubtractionOperator, l: bool(true), r: float64(7), noFunc: true},
// {op: expression.SubtractionOperator, l: bool(true), r: bool(false), noFunc: true},
// {op: expression.MultiplicationOperator, l: int64(6), r: int64(7), want: int64(42)},
// {op: expression.MultiplicationOperator, l: int64(6), r: uint64(7), noFunc: true},
// {op: expression.MultiplicationOperator, l: int64(6), r: float64(7), noFunc: true},
// {op: expression.MultiplicationOperator, l: int64(6), r: bool(false), noFunc: true},
// {op: expression.MultiplicationOperator, l: uint64(6), r: int64(7), noFunc: true},
// {op: expression.MultiplicationOperator, l: uint64(6), r: uint64(7), want: uint64(42)},
// {op: expression.MultiplicationOperator, l: uint64(6), r: float64(7), noFunc: true},
// {op: expression.MultiplicationOperator, l: uint64(6), r: bool(false), noFunc: true},
// {op: expression.MultiplicationOperator, l: float64(6), r: int64(7), noFunc: true},
// {op: expression.MultiplicationOperator, l: float64(6), r: uint64(7), noFunc: true},
// {op: expression.MultiplicationOperator, l: float64(6), r: float64(7), want: float64(42)},
// {op: expression.MultiplicationOperator, l: float64(6), r: bool(false), noFunc: true},
// {op: expression.MultiplicationOperator, l: bool(true), r: int64(7), noFunc: true},
// {op: expression.MultiplicationOperator, l: bool(true), r: uint64(7), noFunc: true},
// {op: expression.MultiplicationOperator, l: bool(true), r: float64(7), noFunc: true},
// {op: expression.MultiplicationOperator, l: bool(true), r: bool(false), noFunc: true},
// {op: expression.DivisionOperator, l: int64(6), r: int64(3), want: int64(2)},
// {op: expression.DivisionOperator, l: int64(6), r: uint64(7), noFunc: true},
// {op: expression.DivisionOperator, l: int64(6), r: float64(7), noFunc: true},
// {op: expression.DivisionOperator, l: int64(6), r: bool(false), noFunc: true},
// {op: expression.DivisionOperator, l: uint64(6), r: int64(7), noFunc: true},
// {op: expression.DivisionOperator, l: uint64(6), r: uint64(2), want: uint64(3)},
// {op: expression.DivisionOperator, l: uint64(6), r: float64(7), noFunc: true},
// {op: expression.DivisionOperator, l: uint64(6), r: bool(false), noFunc: true},
// {op: expression.DivisionOperator, l: float64(6), r: int64(7), noFunc: true},
// {op: expression.DivisionOperator, l: float64(6), r: uint64(7), noFunc: true},
// {op: expression.DivisionOperator, l: float64(6), r: float64(7), want: float64(6.0 / 7.0)},
// {op: expression.DivisionOperator, l: float64(6), r: bool(false), noFunc: true},
// {op: expression.DivisionOperator, l: bool(true), r: int64(7), noFunc: true},
// {op: expression.DivisionOperator, l: bool(true), r: uint64(7), noFunc: true},
// {op: expression.DivisionOperator, l: bool(true), r: float64(7), noFunc: true},
// {op: expression.DivisionOperator, l: bool(true), r: bool(false), noFunc: true},
// {op: expression.LessThanEqualOperator, l: int64(6), r: int64(7), want: true},
// {op: expression.LessThanEqualOperator, l: int64(6), r: uint64(7), want: true},
// {op: expression.LessThanEqualOperator, l: int64(6), r: float64(7), want: true},
// {op: expression.LessThanEqualOperator, l: int64(6), r: bool(false), noFunc: true},
// {op: expression.LessThanEqualOperator, l: uint64(6), r: int64(7), want: true},
// {op: expression.LessThanEqualOperator, l: uint64(6), r: uint64(7), want: true},
// {op: expression.LessThanEqualOperator, l: uint64(6), r: float64(7), want: true},
// {op: expression.LessThanEqualOperator, l: uint64(6), r: bool(false), noFunc: true},
// {op: expression.LessThanEqualOperator, l: float64(6), r: int64(7), want: true},
// {op: expression.LessThanEqualOperator, l: float64(6), r: uint64(7), want: true},
// {op: expression.LessThanEqualOperator, l: float64(6), r: float64(7), want: true},
// {op: expression.LessThanEqualOperator, l: float64(6), r: bool(false), noFunc: true},
// {op: expression.LessThanEqualOperator, l: bool(true), r: int64(7), noFunc: true},
// {op: expression.LessThanEqualOperator, l: bool(true), r: uint64(7), noFunc: true},
// {op: expression.LessThanEqualOperator, l: bool(true), r: float64(7), noFunc: true},
// {op: expression.LessThanEqualOperator, l: bool(true), r: bool(false), noFunc: true},
// {op: expression.LessThanOperator, l: int64(6), r: int64(7), want: true},
// {op: expression.LessThanOperator, l: int64(6), r: uint64(7), want: true},
// {op: expression.LessThanOperator, l: int64(6), r: float64(7), want: true},
// {op: expression.LessThanOperator, l: int64(6), r: bool(false), noFunc: true},
// {op: expression.LessThanOperator, l: uint64(6), r: int64(7), want: true},
// {op: expression.LessThanOperator, l: uint64(6), r: uint64(7), want: true},
// {op: expression.LessThanOperator, l: uint64(6), r: float64(7), want: true},
// {op: expression.LessThanOperator, l: uint64(6), r: bool(false), noFunc: true},
// {op: expression.LessThanOperator, l: float64(6), r: int64(7), want: true},
// {op: expression.LessThanOperator, l: float64(6), r: uint64(7), want: true},
// {op: expression.LessThanOperator, l: float64(6), r: float64(7), want: true},
// {op: expression.LessThanOperator, l: float64(6), r: bool(false), noFunc: true},
// {op: expression.LessThanOperator, l: bool(true), r: int64(7), noFunc: true},
// {op: expression.LessThanOperator, l: bool(true), r: uint64(7), noFunc: true},
// {op: expression.LessThanOperator, l: bool(true), r: float64(7), noFunc: true},
// {op: expression.LessThanOperator, l: bool(true), r: bool(false), noFunc: true},
// {op: expression.GreaterThanEqualOperator, l: int64(6), r: int64(7), want: false},
// {op: expression.GreaterThanEqualOperator, l: int64(6), r: uint64(7), want: false},
// {op: expression.GreaterThanEqualOperator, l: int64(6), r: float64(7), want: false},
// {op: expression.GreaterThanEqualOperator, l: int64(6), r: bool(false), noFunc: true},
// {op: expression.GreaterThanEqualOperator, l: uint64(6), r: int64(7), want: false},
// {op: expression.GreaterThanEqualOperator, l: uint64(6), r: uint64(7), want: false},
// {op: expression.GreaterThanEqualOperator, l: uint64(6), r: float64(7), want: false},
// {op: expression.GreaterThanEqualOperator, l: uint64(6), r: bool(false), noFunc: true},
// {op: expression.GreaterThanEqualOperator, l: float64(6), r: int64(7), want: false},
// {op: expression.GreaterThanEqualOperator, l: float64(6), r: uint64(7), want: false},
// {op: expression.GreaterThanEqualOperator, l: float64(6), r: float64(7), want: false},
// {op: expression.GreaterThanEqualOperator, l: float64(6), r: bool(false), noFunc: true},
// {op: expression.GreaterThanEqualOperator, l: bool(true), r: int64(7), noFunc: true},
// {op: expression.GreaterThanEqualOperator, l: bool(true), r: uint64(7), noFunc: true},
// {op: expression.GreaterThanEqualOperator, l: bool(true), r: float64(7), noFunc: true},
// {op: expression.GreaterThanEqualOperator, l: bool(true), r: bool(false), noFunc: true},
// {op: expression.GreaterThanOperator, l: int64(6), r: int64(7), want: false},
// {op: expression.GreaterThanOperator, l: int64(6), r: uint64(7), want: false},
// {op: expression.GreaterThanOperator, l: int64(6), r: float64(7), want: false},
// {op: expression.GreaterThanOperator, l: int64(6), r: bool(false), noFunc: true},
// {op: expression.GreaterThanOperator, l: uint64(6), r: int64(7), want: false},
// {op: expression.GreaterThanOperator, l: uint64(6), r: uint64(7), want: false},
// {op: expression.GreaterThanOperator, l: uint64(6), r: float64(7), want: false},
// {op: expression.GreaterThanOperator, l: uint64(6), r: bool(false), noFunc: true},
// {op: expression.GreaterThanOperator, l: float64(6), r: int64(7), want: false},
// {op: expression.GreaterThanOperator, l: float64(6), r: uint64(7), want: false},
// {op: expression.GreaterThanOperator, l: float64(6), r: float64(7), want: false},
// {op: expression.GreaterThanOperator, l: float64(6), r: bool(false), noFunc: true},
// {op: expression.GreaterThanOperator, l: bool(true), r: int64(7), noFunc: true},
// {op: expression.GreaterThanOperator, l: bool(true), r: uint64(7), noFunc: true},
// {op: expression.GreaterThanOperator, l: bool(true), r: float64(7), noFunc: true},
// {op: expression.GreaterThanOperator, l: bool(true), r: bool(false), noFunc: true},
// {op: expression.EqualOperator, l: int64(6), r: int64(7), want: false},
// {op: expression.EqualOperator, l: int64(6), r: uint64(7), want: false},
// {op: expression.EqualOperator, l: int64(6), r: float64(7), want: false},
// {op: expression.EqualOperator, l: int64(6), r: bool(false), noFunc: true},
// {op: expression.EqualOperator, l: uint64(6), r: int64(7), want: false},
// {op: expression.EqualOperator, l: uint64(6), r: uint64(7), want: false},
// {op: expression.EqualOperator, l: uint64(6), r: float64(7), want: false},
// {op: expression.EqualOperator, l: uint64(6), r: bool(false), noFunc: true},
// {op: expression.EqualOperator, l: float64(6), r: int64(7), want: false},
// {op: expression.EqualOperator, l: float64(6), r: uint64(7), want: false},
// {op: expression.EqualOperator, l: float64(6), r: float64(7), want: false},
// {op: expression.EqualOperator, l: float64(6), r: bool(false), noFunc: true},
// {op: expression.EqualOperator, l: bool(true), r: int64(7), noFunc: true},
// {op: expression.EqualOperator, l: bool(true), r: uint64(7), noFunc: true},
// {op: expression.EqualOperator, l: bool(true), r: float64(7), noFunc: true},
// {op: expression.EqualOperator, l: bool(true), r: bool(false), noFunc: true},
// {op: expression.NotEqualOperator, l: int64(6), r: int64(7), want: true},
// {op: expression.NotEqualOperator, l: int64(6), r: uint64(7), want: true},
// {op: expression.NotEqualOperator, l: int64(6), r: float64(7), want: true},
// {op: expression.NotEqualOperator, l: int64(6), r: bool(true), noFunc: true},
// {op: expression.NotEqualOperator, l: uint64(6), r: int64(7), want: true},
// {op: expression.NotEqualOperator, l: uint64(6), r: uint64(7), want: true},
// {op: expression.NotEqualOperator, l: uint64(6), r: float64(7), want: true},
// {op: expression.NotEqualOperator, l: uint64(6), r: bool(true), noFunc: true},
// {op: expression.NotEqualOperator, l: float64(6), r: int64(7), want: true},
// {op: expression.NotEqualOperator, l: float64(6), r: uint64(7), want: true},
// {op: expression.NotEqualOperator, l: float64(6), r: float64(7), want: true},
// {op: expression.NotEqualOperator, l: float64(6), r: bool(false), noFunc: true},
// {op: expression.NotEqualOperator, l: bool(true), r: int64(7), noFunc: true},
// {op: expression.NotEqualOperator, l: bool(true), r: uint64(7), noFunc: true},
// {op: expression.NotEqualOperator, l: bool(true), r: float64(7), noFunc: true},
// {op: expression.NotEqualOperator, l: bool(true), r: bool(false), noFunc: true},
// {op: expression.AndOperator, l: int64(6), r: int64(7), noFunc: true},
// {op: expression.AndOperator, l: int64(6), r: uint64(7), noFunc: true},
// {op: expression.AndOperator, l: int64(6), r: float64(7), noFunc: true},
// {op: expression.AndOperator, l: int64(6), r: bool(false), noFunc: true},
// {op: expression.AndOperator, l: uint64(6), r: int64(7), noFunc: true},
// {op: expression.AndOperator, l: uint64(6), r: uint64(7), noFunc: true},
// {op: expression.AndOperator, l: uint64(6), r: float64(7), noFunc: true},
// {op: expression.AndOperator, l: uint64(6), r: bool(false), noFunc: true},
// {op: expression.AndOperator, l: float64(6), r: int64(7), noFunc: true},
// {op: expression.AndOperator, l: float64(6), r: uint64(7), noFunc: true},
// {op: expression.AndOperator, l: float64(6), r: float64(7), noFunc: true},
// {op: expression.AndOperator, l: float64(6), r: bool(false), noFunc: true},
// {op: expression.AndOperator, l: bool(true), r: int64(7), noFunc: true},
// {op: expression.AndOperator, l: bool(true), r: uint64(7), noFunc: true},
// {op: expression.AndOperator, l: bool(true), r: float64(7), noFunc: true},
// {op: expression.AndOperator, l: bool(true), r: bool(false), want: false},
// {op: expression.OrOperator, l: int64(6), r: int64(7), noFunc: true},
// {op: expression.OrOperator, l: int64(6), r: uint64(7), noFunc: true},
// {op: expression.OrOperator, l: int64(6), r: float64(7), noFunc: true},
// {op: expression.OrOperator, l: int64(6), r: bool(false), noFunc: true},
// {op: expression.OrOperator, l: uint64(6), r: int64(7), noFunc: true},
// {op: expression.OrOperator, l: uint64(6), r: uint64(7), noFunc: true},
// {op: expression.OrOperator, l: uint64(6), r: float64(7), noFunc: true},
// {op: expression.OrOperator, l: uint64(6), r: bool(false), noFunc: true},
// {op: expression.OrOperator, l: float64(6), r: int64(7), noFunc: true},
// {op: expression.OrOperator, l: float64(6), r: uint64(7), noFunc: true},
// {op: expression.OrOperator, l: float64(6), r: float64(7), noFunc: true},
// {op: expression.OrOperator, l: float64(6), r: bool(false), noFunc: true},
// {op: expression.OrOperator, l: bool(true), r: int64(7), noFunc: true},
// {op: expression.OrOperator, l: bool(true), r: uint64(7), noFunc: true},
// {op: expression.OrOperator, l: bool(true), r: float64(7), noFunc: true},
// {op: expression.OrOperator, l: bool(true), r: bool(false), want: true},
// }
// for i, tc := range testCases {
// tc := tc
// t.Run(fmt.Sprintf("%d: %v %v %v", i, tc.l, tc.op, tc.r), func(t *testing.T) {
// lt := typeOf(tc.l)
// rt := typeOf(tc.r)
// sig := binarySignature{
// Operator: tc.op,
// Left: lt,
// Right: rt,
// }
// f, ok := binaryFuncs[sig]
// if !ok {
// if !tc.noFunc {
// t.Fatal("could not find matching function")
// }
// return
// } else if tc.noFunc {
// t.Fatal("expected to not find function")
// }
// left := evaluator{
// Value: tc.l,
// }
// right := evaluator{
// Value: tc.r,
// }
//
// got := f.Func(nil, left, right)
// want := Value{
// Type: typeOf(tc.want),
// Value: tc.want,
// }
//
// if !cmp.Equal(got, want) {
// t.Errorf("unexpected value: -want/+got\n%s", cmp.Diff(want, got))
// }
// })
// }
//}
//
//func typeOf(v interface{}) DataType {
// switch v.(type) {
// case bool:
// return TBool
// case int64:
// return TInt
// case uint64:
// return TUInt
// case float64:
// return TFloat
// case string:
// return TString
// case Time:
// return TTime
// default:
// return TInvalid
// }
//}
//
//type evaluator struct {
// Value interface{}
//}
//
//func (v evaluator) Type() DataType {
// return typeOf(v.Value)
//}
//func (v evaluator) EvalBool(Scope) bool {
// return v.Value.(bool)
//}
//func (v evaluator) EvalInt(Scope) int64 {
// return v.Value.(int64)
//}
//func (v evaluator) EvalUInt(Scope) uint64 {
// return v.Value.(uint64)
//}
//func (v evaluator) EvalFloat(Scope) float64 {
// return v.Value.(float64)
//}
//func (v evaluator) EvalString(Scope) string {
// return v.Value.(string)
//}
//func (v evaluator) EvalTime(Scope) Time {
// return v.Value.(Time)
//}

View File

@ -0,0 +1,266 @@
package execute_test
//func TestCompileExpression(t *testing.T) {
// testCases := []struct {
// name string
// expr expression.Expression
// types map[string]execute.DataType
// wantErr bool
// }{
// {
// name: "integer literal",
// expr: expression.Expression{
// Root: &expression.IntegerLiteralNode{
// Value: 42,
// },
// },
// wantErr: false,
// },
// {
// name: "negate string",
// expr: expression.Expression{
// Root: &expression.UnaryNode{
// Operator: expression.SubtractionOperator,
// Node: &expression.StringLiteralNode{
// Value: "hello",
// },
// },
// },
// wantErr: true,
// },
// {
// name: "missing type info",
// expr: expression.Expression{
// Root: &expression.ReferenceNode{
// Name: "a",
// },
// },
// wantErr: true,
// },
// }
// for _, tc := range testCases {
// tc := tc
// t.Run(tc.name, func(t *testing.T) {
// _, err := execute.CompileExpression(tc.expr, tc.types)
// if err != nil {
// if !tc.wantErr {
// t.Errorf("unexpected compliation error: %s", err)
// }
// } else if tc.wantErr {
// t.Error("expected compliation error")
// }
// })
// }
//}
//func TestEvaluateCompiledExpression(t *testing.T) {
// testCases := []struct {
// name string
// expr expression.Expression
// types map[string]execute.DataType
// scope execute.Scope
// want execute.Value
// wantErr bool
// }{
// {
// name: "integer literal",
// expr: expression.Expression{
// Root: &expression.IntegerLiteralNode{
// Value: 42,
// },
// },
// want: execute.Value{
// Type: execute.TInt,
// Value: int64(42),
// },
// },
// {
// name: "integer addition",
// expr: expression.Expression{
// Root: &expression.BinaryNode{
// Operator: expression.AdditionOperator,
// Left: &expression.IntegerLiteralNode{
// Value: 18,
// },
// Right: &expression.IntegerLiteralNode{
// Value: 24,
// },
// },
// },
// want: execute.Value{
// Type: execute.TInt,
// Value: int64(42),
// },
// },
// {
// name: "integer addition using scope",
// expr: expression.Expression{
// Root: &expression.BinaryNode{
// Operator: expression.AdditionOperator,
// Left: &expression.ReferenceNode{
// Name: "a",
// },
// Right: &expression.ReferenceNode{
// Name: "b",
// },
// },
// },
// types: map[string]execute.DataType{
// "a": execute.TInt,
// "b": execute.TInt,
// },
// scope: map[string]execute.Value{
// "a": {Type: execute.TInt, Value: int64(18)},
// "b": {Type: execute.TInt, Value: int64(24)},
// },
// want: execute.Value{
// Type: execute.TInt,
// Value: int64(42),
// },
// },
// {
// name: "integer addition missing scope",
// expr: expression.Expression{
// Root: &expression.BinaryNode{
// Operator: expression.AdditionOperator,
// Left: &expression.ReferenceNode{
// Name: "a",
// },
// Right: &expression.ReferenceNode{
// Name: "b",
// },
// },
// },
// types: map[string]execute.DataType{
// "a": execute.TInt,
// "b": execute.TInt,
// },
// scope: map[string]execute.Value{
// "a": {Type: execute.TInt, Value: int64(18)},
// },
// wantErr: true,
// },
// {
// name: "integer addition incorrect scope",
// expr: expression.Expression{
// Root: &expression.BinaryNode{
// Operator: expression.AdditionOperator,
// Left: &expression.ReferenceNode{
// Name: "a",
// },
// Right: &expression.ReferenceNode{
// Name: "b",
// },
// },
// },
// types: map[string]execute.DataType{
// "a": execute.TInt,
// "b": execute.TInt,
// },
// scope: map[string]execute.Value{
// "a": {Type: execute.TInt, Value: int64(18)},
// "b": {Type: execute.TFloat, Value: float64(18)},
// },
// wantErr: true,
// },
// {
// name: "unsigned integer addition",
// expr: expression.Expression{
// Root: &expression.BinaryNode{
// Operator: expression.AdditionOperator,
// Left: &expression.ReferenceNode{
// Name: "a",
// },
// Right: &expression.ReferenceNode{
// Name: "b",
// },
// },
// },
// types: map[string]execute.DataType{
// "a": execute.TUInt,
// "b": execute.TUInt,
// },
// scope: map[string]execute.Value{
// "a": {Type: execute.TUInt, Value: uint64(18)},
// "b": {Type: execute.TUInt, Value: uint64(24)},
// },
// want: execute.Value{
// Type: execute.TUInt,
// Value: uint64(42),
// },
// },
// {
// name: "float addition",
// expr: expression.Expression{
// Root: &expression.BinaryNode{
// Operator: expression.AdditionOperator,
// Left: &expression.FloatLiteralNode{
// Value: 18,
// },
// Right: &expression.FloatLiteralNode{
// Value: 24,
// },
// },
// },
// want: execute.Value{
// Type: execute.TFloat,
// Value: float64(42),
// },
// },
// {
// name: "boolean and",
// expr: expression.Expression{
// Root: &expression.BinaryNode{
// Operator: expression.AndOperator,
// Left: &expression.BooleanLiteralNode{
// Value: true,
// },
// Right: &expression.BooleanLiteralNode{
// Value: true,
// },
// },
// },
// want: execute.Value{
// Type: execute.TBool,
// Value: true,
// },
// },
// {
// name: "boolean or",
// expr: expression.Expression{
// Root: &expression.BinaryNode{
// Operator: expression.OrOperator,
// Left: &expression.BooleanLiteralNode{
// Value: false,
// },
// Right: &expression.BooleanLiteralNode{
// Value: true,
// },
// },
// },
// want: execute.Value{
// Type: execute.TBool,
// Value: true,
// },
// },
// }
// for _, tc := range testCases {
// tc := tc
// t.Run(tc.name, func(t *testing.T) {
// ce, err := execute.CompileExpression(tc.expr, tc.types)
// if err != nil {
// t.Fatal(err)
// }
// got, err := ce.Eval(tc.scope)
// if err != nil {
// if !tc.wantErr {
// t.Fatal(err)
// }
// } else if tc.wantErr {
// t.Fatal("expected evaluation error")
// }
// if !cmp.Equal(got, tc.want) {
// t.Errorf("unexpected value: -want/+got\n%s", cmp.Diff(tc.want, got))
// }
// })
// }
//}

286
query/execute/format.go Normal file
View File

@ -0,0 +1,286 @@
package execute
import (
"io"
"sort"
"strconv"
"strings"
)
const fixedWidthTimeFmt = "2006-01-02T15:04:05.000000000Z"
// Formatter writes a block to a Writer.
type Formatter struct {
b Block
widths []int
maxWidth int
newWidths []int
pad []byte
dash []byte
// fmtBuf is used to format values
fmtBuf [64]byte
opts FormatOptions
cols orderedCols
}
type FormatOptions struct {
// RepeatHeaderCount is the number of rows to print before printing the header again.
// If zero then the headers are not repeated.
RepeatHeaderCount int
}
func DefaultFormatOptions() *FormatOptions {
return &FormatOptions{}
}
var eol = []byte{'\n'}
// NewFormatter creates a Formatter for a given block.
// If opts is nil, the DefaultFormatOptions are used.
func NewFormatter(b Block, opts *FormatOptions) *Formatter {
if opts == nil {
opts = DefaultFormatOptions()
}
return &Formatter{
b: b,
opts: *opts,
}
}
type writeToHelper struct {
w io.Writer
n int64
err error
}
func (w *writeToHelper) write(data []byte) {
if w.err != nil {
return
}
n, err := w.w.Write(data)
w.n += int64(n)
w.err = err
}
var minWidthsByType = map[DataType]int{
TBool: 12,
TInt: 26,
TUInt: 27,
TFloat: 28,
TString: 22,
TTime: len(fixedWidthTimeFmt),
TInvalid: 10,
}
// WriteTo writes the formatted block data to w.
func (f *Formatter) WriteTo(out io.Writer) (int64, error) {
w := &writeToHelper{w: out}
// Sort cols
cols := f.b.Cols()
f.cols = newOrderedCols(cols)
sort.Sort(f.cols)
// Compute header widths
f.widths = make([]int, len(cols))
for j, c := range cols {
l := len(c.Label)
min := minWidthsByType[c.Type]
if min > l {
l = min
}
if l > f.widths[j] {
f.widths[j] = l
}
if l > f.maxWidth {
f.maxWidth = l
}
}
// Write Block header
w.write([]byte("Block: keys: ["))
labels := make([]string, len(f.b.Key().Cols()))
for i, c := range f.b.Key().Cols() {
labels[i] = c.Label
}
w.write([]byte(strings.Join(labels, ", ")))
w.write([]byte("]"))
w.write(eol)
// Check err and return early
if w.err != nil {
return w.n, w.err
}
// Write rows
r := 0
f.b.Do(func(cr ColReader) error {
if r == 0 {
l := cr.Len()
for i := 0; i < l; i++ {
for oj, c := range f.cols.cols {
j := f.cols.Idx(oj)
buf := f.valueBuf(i, j, c.Type, cr)
l := len(buf)
if l > f.widths[j] {
f.widths[j] = l
}
if l > f.maxWidth {
f.maxWidth = l
}
}
}
f.makePaddingBuffers()
f.writeHeader(w)
f.writeHeaderSeparator(w)
f.newWidths = make([]int, len(f.widths))
copy(f.newWidths, f.widths)
}
l := cr.Len()
for i := 0; i < l; i++ {
for oj, c := range f.cols.cols {
j := f.cols.Idx(oj)
buf := f.valueBuf(i, j, c.Type, cr)
l := len(buf)
padding := f.widths[j] - l
if padding >= 0 {
w.write(f.pad[:padding])
w.write(buf)
} else {
//TODO make unicode friendly
w.write(buf[:f.widths[j]-3])
w.write([]byte{'.', '.', '.'})
}
w.write(f.pad[:2])
if l > f.newWidths[j] {
f.newWidths[j] = l
}
if l > f.maxWidth {
f.maxWidth = l
}
}
w.write(eol)
r++
if f.opts.RepeatHeaderCount > 0 && r%f.opts.RepeatHeaderCount == 0 {
copy(f.widths, f.newWidths)
f.makePaddingBuffers()
f.writeHeaderSeparator(w)
f.writeHeader(w)
f.writeHeaderSeparator(w)
}
}
return w.err
})
return w.n, w.err
}
func (f *Formatter) makePaddingBuffers() {
if len(f.pad) != f.maxWidth {
f.pad = make([]byte, f.maxWidth)
for i := range f.pad {
f.pad[i] = ' '
}
}
if len(f.dash) != f.maxWidth {
f.dash = make([]byte, f.maxWidth)
for i := range f.dash {
f.dash[i] = '-'
}
}
}
func (f *Formatter) writeHeader(w *writeToHelper) {
for oj, c := range f.cols.cols {
j := f.cols.Idx(oj)
buf := append(append([]byte(c.Label), ':'), []byte(c.Type.String())...)
w.write(f.pad[:f.widths[j]-len(buf)])
w.write(buf)
w.write(f.pad[:2])
}
w.write(eol)
}
func (f *Formatter) writeHeaderSeparator(w *writeToHelper) {
for oj := range f.cols.cols {
j := f.cols.Idx(oj)
w.write(f.dash[:f.widths[j]])
w.write(f.pad[:2])
}
w.write(eol)
}
func (f *Formatter) valueBuf(i, j int, typ DataType, cr ColReader) (buf []byte) {
switch typ {
case TBool:
buf = strconv.AppendBool(f.fmtBuf[0:0], cr.Bools(j)[i])
case TInt:
buf = strconv.AppendInt(f.fmtBuf[0:0], cr.Ints(j)[i], 10)
case TUInt:
buf = strconv.AppendUint(f.fmtBuf[0:0], cr.UInts(j)[i], 10)
case TFloat:
// TODO allow specifying format and precision
buf = strconv.AppendFloat(f.fmtBuf[0:0], cr.Floats(j)[i], 'f', -1, 64)
case TString:
buf = []byte(cr.Strings(j)[i])
case TTime:
buf = []byte(cr.Times(j)[i].String())
}
return
}
// orderedCols sorts a list of columns:
//
// * time
// * common tags sorted by label
// * other tags sorted by label
// * value
//
type orderedCols struct {
indexMap []int
cols []ColMeta
}
func newOrderedCols(cols []ColMeta) orderedCols {
indexMap := make([]int, len(cols))
for i := range indexMap {
indexMap[i] = i
}
cpy := make([]ColMeta, len(cols))
copy(cpy, cols)
return orderedCols{
indexMap: indexMap,
cols: cpy,
}
}
func (o orderedCols) Idx(oj int) int {
return o.indexMap[oj]
}
func (o orderedCols) Len() int { return len(o.cols) }
func (o orderedCols) Swap(i int, j int) {
o.cols[i], o.cols[j] = o.cols[j], o.cols[i]
o.indexMap[i], o.indexMap[j] = o.indexMap[j], o.indexMap[i]
}
func (o orderedCols) Less(i int, j int) bool {
// Time column is always first
if o.cols[i].Label == DefaultTimeColLabel {
return true
}
if o.cols[j].Label == DefaultTimeColLabel {
return false
}
// Value column is always last
if o.cols[i].Label == DefaultValueColLabel {
return false
}
if o.cols[j].Label == DefaultValueColLabel {
return true
}
// within a class sort by label
return o.cols[i].Label < o.cols[j].Label
}

View File

@ -0,0 +1,102 @@
package execute
import (
"encoding/binary"
"hash/fnv"
"math"
)
type PartitionLookup struct {
partitions map[uint64][]partitionEntry
}
type partitionEntry struct {
key PartitionKey
value interface{}
}
func NewPartitionLookup() *PartitionLookup {
return &PartitionLookup{
partitions: make(map[uint64][]partitionEntry),
}
}
func (l *PartitionLookup) Lookup(key PartitionKey) (interface{}, bool) {
if key == nil {
return nil, false
}
h := key.Hash()
entries := l.partitions[h]
if len(entries) == 1 {
return entries[0].value, true
}
for _, entry := range entries {
if entry.key.Equal(key) {
return entry.value, true
}
}
return nil, false
}
func (l *PartitionLookup) Set(key PartitionKey, value interface{}) {
h := key.Hash()
entries := l.partitions[h]
l.partitions[h] = append(entries, partitionEntry{
key: key,
value: value,
})
}
func (l *PartitionLookup) Delete(key PartitionKey) (interface{}, bool) {
if key == nil {
return nil, false
}
h := key.Hash()
entries := l.partitions[h]
if len(entries) == 1 {
delete(l.partitions, h)
return entries[0].value, true
}
for i, entry := range entries {
if entry.key.Equal(key) {
l.partitions[h] = append(entries[:i+1], entries[i+1:]...)
return entry.value, true
}
}
return nil, false
}
func (l *PartitionLookup) Range(f func(key PartitionKey, value interface{})) {
for _, entries := range l.partitions {
for _, entry := range entries {
f(entry.key, entry.value)
}
}
}
func computeKeyHash(key PartitionKey) uint64 {
h := fnv.New64()
for j, c := range key.Cols() {
h.Write([]byte(c.Label))
switch c.Type {
case TBool:
if key.ValueBool(j) {
h.Write([]byte{1})
} else {
h.Write([]byte{0})
}
case TInt:
binary.Write(h, binary.BigEndian, key.ValueInt(j))
case TUInt:
binary.Write(h, binary.BigEndian, key.ValueUInt(j))
case TFloat:
binary.Write(h, binary.BigEndian, math.Float64bits(key.ValueFloat(j)))
case TString:
h.Write([]byte(key.ValueString(j)))
case TTime:
binary.Write(h, binary.BigEndian, uint64(key.ValueTime(j)))
}
}
return h.Sum64()
}

64
query/execute/queue.go Normal file
View File

@ -0,0 +1,64 @@
package execute
import (
"sync"
"sync/atomic"
)
// MessageQueue provides a concurrency safe queue for messages.
// The queue must have a single consumer calling Pop.
type MessageQueue interface {
Push(Message)
Pop() Message
}
type unboundedMessageQueue struct {
buf []Message
head int
tail int
mu sync.Mutex
len int32
}
func newMessageQueue(n int) *unboundedMessageQueue {
return &unboundedMessageQueue{
buf: make([]Message, n),
}
}
func (q *unboundedMessageQueue) Push(m Message) {
q.mu.Lock()
size := len(q.buf)
q.tail = (q.tail + 1) % size
if q.tail == q.head {
// Resize
buf := make([]Message, size*2)
copy(buf, q.buf[q.head:])
copy(buf[size-q.head:], q.buf[:q.head])
q.head = 0
q.tail = size
q.buf = buf
}
atomic.AddInt32(&q.len, 1)
q.buf[q.tail] = m
q.mu.Unlock()
}
func (q *unboundedMessageQueue) Len() int {
return int(atomic.LoadInt32(&q.len))
}
func (q *unboundedMessageQueue) Pop() Message {
if q.Len() == 0 {
return nil
}
q.mu.Lock()
size := len(q.buf)
q.head = (q.head + 1) % size
m := q.buf[q.head]
q.buf[q.head] = nil
atomic.AddInt32(&q.len, -1)
q.mu.Unlock()
return m
}

119
query/execute/result.go Normal file
View File

@ -0,0 +1,119 @@
package execute
import (
"sync"
"github.com/influxdata/ifql/query/plan"
)
type Result interface {
// Blocks returns a BlockIterator for iterating through results
Blocks() BlockIterator
}
// result implements both the Transformation and Result interfaces,
// mapping the pushed based Transformation API to the pull based Result interface.
type result struct {
mu sync.Mutex
blocks chan resultMessage
abortErr chan error
aborted chan struct{}
}
type resultMessage struct {
block Block
err error
}
func newResult(plan.YieldSpec) *result {
return &result{
// TODO(nathanielc): Currently this buffer needs to be big enough hold all result blocks :(
blocks: make(chan resultMessage, 1000),
abortErr: make(chan error, 1),
aborted: make(chan struct{}),
}
}
func (s *result) RetractBlock(DatasetID, PartitionKey) error {
//TODO implement
return nil
}
func (s *result) Process(id DatasetID, b Block) error {
select {
case s.blocks <- resultMessage{
block: b,
}:
case <-s.aborted:
}
return nil
}
func (s *result) Blocks() BlockIterator {
return s
}
func (s *result) Do(f func(Block) error) error {
for {
select {
case err := <-s.abortErr:
return err
case msg, more := <-s.blocks:
if !more {
return nil
}
if msg.err != nil {
return msg.err
}
if err := f(msg.block); err != nil {
return err
}
}
}
}
func (s *result) UpdateWatermark(id DatasetID, mark Time) error {
//Nothing to do
return nil
}
func (s *result) UpdateProcessingTime(id DatasetID, t Time) error {
//Nothing to do
return nil
}
func (s *result) setTrigger(Trigger) {
//TODO: Change interfaces so that resultSink, does not need to implement this method.
}
func (s *result) Finish(id DatasetID, err error) {
if err != nil {
select {
case s.blocks <- resultMessage{
err: err,
}:
case <-s.aborted:
}
}
close(s.blocks)
}
// Abort the result with the given error
func (s *result) abort(err error) {
s.mu.Lock()
defer s.mu.Unlock()
// Check if we have already aborted
aborted := false
select {
case <-s.aborted:
aborted = true
default:
}
if aborted {
return // already aborted
}
s.abortErr <- err
close(s.aborted)
}

335
query/execute/row_fn.go Normal file
View File

@ -0,0 +1,335 @@
package execute
import (
"fmt"
"regexp"
"github.com/influxdata/ifql/compiler"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/semantic"
"github.com/influxdata/ifql/values"
"github.com/pkg/errors"
)
type rowFn struct {
fn *semantic.FunctionExpression
compilationCache *compiler.CompilationCache
scope compiler.Scope
preparedFn compiler.Func
recordName string
record *Record
recordCols map[string]int
references []string
}
func newRowFn(fn *semantic.FunctionExpression) (rowFn, error) {
if len(fn.Params) != 1 {
return rowFn{}, fmt.Errorf("function should only have a single parameter, got %d", len(fn.Params))
}
scope, decls := query.BuiltIns()
return rowFn{
compilationCache: compiler.NewCompilationCache(fn, scope, decls),
scope: make(compiler.Scope, 1),
recordName: fn.Params[0].Key.Name,
references: findColReferences(fn),
recordCols: make(map[string]int),
}, nil
}
func (f *rowFn) prepare(cols []ColMeta) error {
// Prepare types and recordCols
propertyTypes := make(map[string]semantic.Type, len(f.references))
for _, r := range f.references {
found := false
for j, c := range cols {
if r == c.Label {
f.recordCols[r] = j
found = true
propertyTypes[r] = ConvertToKind(c.Type)
break
}
}
if !found {
return fmt.Errorf("function references unknown column %q", r)
}
}
f.record = NewRecord(semantic.NewObjectType(propertyTypes))
// Compile fn for given types
fn, err := f.compilationCache.Compile(map[string]semantic.Type{
f.recordName: f.record.Type(),
})
if err != nil {
return err
}
f.preparedFn = fn
return nil
}
func ConvertToKind(t DataType) semantic.Kind {
// TODO make this an array lookup.
switch t {
case TInvalid:
return semantic.Invalid
case TBool:
return semantic.Bool
case TInt:
return semantic.Int
case TUInt:
return semantic.UInt
case TFloat:
return semantic.Float
case TString:
return semantic.String
case TTime:
return semantic.Time
default:
return semantic.Invalid
}
}
func ConvertFromKind(k semantic.Kind) DataType {
// TODO make this an array lookup.
switch k {
case semantic.Invalid:
return TInvalid
case semantic.Bool:
return TBool
case semantic.Int:
return TInt
case semantic.UInt:
return TUInt
case semantic.Float:
return TFloat
case semantic.String:
return TString
case semantic.Time:
return TTime
default:
return TInvalid
}
}
func (f *rowFn) eval(row int, cr ColReader) (values.Value, error) {
for _, r := range f.references {
f.record.Set(r, ValueForRow(row, f.recordCols[r], cr))
}
f.scope[f.recordName] = f.record
return f.preparedFn.Eval(f.scope)
}
type RowPredicateFn struct {
rowFn
}
func NewRowPredicateFn(fn *semantic.FunctionExpression) (*RowPredicateFn, error) {
r, err := newRowFn(fn)
if err != nil {
return nil, err
}
return &RowPredicateFn{
rowFn: r,
}, nil
}
func (f *RowPredicateFn) Prepare(cols []ColMeta) error {
err := f.rowFn.prepare(cols)
if err != nil {
return err
}
if f.preparedFn.Type() != semantic.Bool {
return errors.New("row predicate function does not evaluate to a boolean")
}
return nil
}
func (f *RowPredicateFn) Eval(row int, cr ColReader) (bool, error) {
v, err := f.rowFn.eval(row, cr)
if err != nil {
return false, err
}
return v.Bool(), nil
}
type RowMapFn struct {
rowFn
isWrap bool
wrapObj *Record
}
func NewRowMapFn(fn *semantic.FunctionExpression) (*RowMapFn, error) {
r, err := newRowFn(fn)
if err != nil {
return nil, err
}
return &RowMapFn{
rowFn: r,
}, nil
}
func (f *RowMapFn) Prepare(cols []ColMeta) error {
err := f.rowFn.prepare(cols)
if err != nil {
return err
}
k := f.preparedFn.Type().Kind()
f.isWrap = k != semantic.Object
if f.isWrap {
f.wrapObj = NewRecord(semantic.NewObjectType(map[string]semantic.Type{
DefaultValueColLabel: f.preparedFn.Type(),
}))
}
return nil
}
func (f *RowMapFn) Type() semantic.Type {
if f.isWrap {
return f.wrapObj.Type()
}
return f.preparedFn.Type()
}
func (f *RowMapFn) Eval(row int, cr ColReader) (values.Object, error) {
v, err := f.rowFn.eval(row, cr)
if err != nil {
return nil, err
}
if f.isWrap {
f.wrapObj.Set(DefaultValueColLabel, v)
return f.wrapObj, nil
}
return v.Object(), nil
}
func ValueForRow(i, j int, cr ColReader) values.Value {
t := cr.Cols()[j].Type
switch t {
case TString:
return values.NewStringValue(cr.Strings(j)[i])
case TInt:
return values.NewIntValue(cr.Ints(j)[i])
case TUInt:
return values.NewUIntValue(cr.UInts(j)[i])
case TFloat:
return values.NewFloatValue(cr.Floats(j)[i])
case TBool:
return values.NewBoolValue(cr.Bools(j)[i])
case TTime:
return values.NewTimeValue(cr.Times(j)[i])
default:
PanicUnknownType(t)
return nil
}
}
func AppendValue(builder BlockBuilder, j int, v values.Value) {
switch k := v.Type().Kind(); k {
case semantic.Bool:
builder.AppendBool(j, v.Bool())
case semantic.Int:
builder.AppendInt(j, v.Int())
case semantic.UInt:
builder.AppendUInt(j, v.UInt())
case semantic.Float:
builder.AppendFloat(j, v.Float())
case semantic.String:
builder.AppendString(j, v.Str())
case semantic.Time:
builder.AppendTime(j, v.Time())
default:
PanicUnknownType(ConvertFromKind(k))
}
}
func findColReferences(fn *semantic.FunctionExpression) []string {
v := &colReferenceVisitor{
recordName: fn.Params[0].Key.Name,
}
semantic.Walk(v, fn)
return v.refs
}
type colReferenceVisitor struct {
recordName string
refs []string
}
func (c *colReferenceVisitor) Visit(node semantic.Node) semantic.Visitor {
if me, ok := node.(*semantic.MemberExpression); ok {
if obj, ok := me.Object.(*semantic.IdentifierExpression); ok && obj.Name == c.recordName {
c.refs = append(c.refs, me.Property)
}
}
return c
}
func (c *colReferenceVisitor) Done() {}
type Record struct {
t semantic.Type
values map[string]values.Value
}
func NewRecord(t semantic.Type) *Record {
return &Record{
t: t,
values: make(map[string]values.Value),
}
}
func (r *Record) Type() semantic.Type {
return r.t
}
func (r *Record) Str() string {
panic(values.UnexpectedKind(semantic.Object, semantic.String))
}
func (r *Record) Int() int64 {
panic(values.UnexpectedKind(semantic.Object, semantic.Int))
}
func (r *Record) UInt() uint64 {
panic(values.UnexpectedKind(semantic.Object, semantic.UInt))
}
func (r *Record) Float() float64 {
panic(values.UnexpectedKind(semantic.Object, semantic.Float))
}
func (r *Record) Bool() bool {
panic(values.UnexpectedKind(semantic.Object, semantic.Bool))
}
func (r *Record) Time() values.Time {
panic(values.UnexpectedKind(semantic.Object, semantic.Time))
}
func (r *Record) Duration() values.Duration {
panic(values.UnexpectedKind(semantic.Object, semantic.Duration))
}
func (r *Record) Regexp() *regexp.Regexp {
panic(values.UnexpectedKind(semantic.Object, semantic.Regexp))
}
func (r *Record) Array() values.Array {
panic(values.UnexpectedKind(semantic.Object, semantic.Array))
}
func (r *Record) Object() values.Object {
return r
}
func (r *Record) Function() values.Function {
panic(values.UnexpectedKind(semantic.Object, semantic.Function))
}
func (r *Record) Set(name string, v values.Value) {
r.values[name] = v
}
func (r *Record) Get(name string) (values.Value, bool) {
v, ok := r.values[name]
return v, ok
}
func (r *Record) Len() int {
return len(r.values)
}
func (r *Record) Range(f func(name string, v values.Value)) {
for k, v := range r.values {
f(k, v)
}
}

326
query/execute/selector.go Normal file
View File

@ -0,0 +1,326 @@
package execute
import (
"fmt"
"github.com/influxdata/ifql/query"
)
type selectorTransformation struct {
d Dataset
cache BlockBuilderCache
config SelectorConfig
}
type SelectorConfig struct {
Column string `json:"column"`
}
func (c *SelectorConfig) ReadArgs(args query.Arguments) error {
if col, ok, err := args.GetString("column"); err != nil {
return err
} else if ok {
c.Column = col
}
return nil
}
type rowSelectorTransformation struct {
selectorTransformation
selector RowSelector
}
type indexSelectorTransformation struct {
selectorTransformation
selector IndexSelector
}
func NewRowSelectorTransformationAndDataset(id DatasetID, mode AccumulationMode, selector RowSelector, config SelectorConfig, a *Allocator) (*rowSelectorTransformation, Dataset) {
cache := NewBlockBuilderCache(a)
d := NewDataset(id, mode, cache)
return NewRowSelectorTransformation(d, cache, selector, config), d
}
func NewRowSelectorTransformation(d Dataset, c BlockBuilderCache, selector RowSelector, config SelectorConfig) *rowSelectorTransformation {
return &rowSelectorTransformation{
selectorTransformation: newSelectorTransformation(d, c, config),
selector: selector,
}
}
func NewIndexSelectorTransformationAndDataset(id DatasetID, mode AccumulationMode, selector IndexSelector, config SelectorConfig, a *Allocator) (*indexSelectorTransformation, Dataset) {
cache := NewBlockBuilderCache(a)
d := NewDataset(id, mode, cache)
return NewIndexSelectorTransformation(d, cache, selector, config), d
}
func NewIndexSelectorTransformation(d Dataset, c BlockBuilderCache, selector IndexSelector, config SelectorConfig) *indexSelectorTransformation {
return &indexSelectorTransformation{
selectorTransformation: newSelectorTransformation(d, c, config),
selector: selector,
}
}
func newSelectorTransformation(d Dataset, c BlockBuilderCache, config SelectorConfig) selectorTransformation {
if config.Column == "" {
config.Column = DefaultValueColLabel
}
return selectorTransformation{
d: d,
cache: c,
config: config,
}
}
func (t *selectorTransformation) RetractBlock(id DatasetID, key PartitionKey) error {
//TODO(nathanielc): Store intermediate state for retractions
return t.d.RetractBlock(key)
}
func (t *selectorTransformation) UpdateWatermark(id DatasetID, mark Time) error {
return t.d.UpdateWatermark(mark)
}
func (t *selectorTransformation) UpdateProcessingTime(id DatasetID, pt Time) error {
return t.d.UpdateProcessingTime(pt)
}
func (t *selectorTransformation) Finish(id DatasetID, err error) {
t.d.Finish(err)
}
func (t *selectorTransformation) setupBuilder(b Block) (BlockBuilder, int, error) {
builder, new := t.cache.BlockBuilder(b.Key())
if !new {
return nil, 0, fmt.Errorf("found duplicate block with key: %v", b.Key())
}
AddBlockCols(b, builder)
cols := builder.Cols()
valueIdx := ColIdx(t.config.Column, cols)
if valueIdx < 0 {
return nil, 0, fmt.Errorf("no column %q exists", t.config.Column)
}
return builder, valueIdx, nil
}
func (t *indexSelectorTransformation) Process(id DatasetID, b Block) error {
builder, valueIdx, err := t.setupBuilder(b)
if err != nil {
return err
}
valueCol := builder.Cols()[valueIdx]
var s interface{}
switch valueCol.Type {
case TBool:
s = t.selector.NewBoolSelector()
case TInt:
s = t.selector.NewIntSelector()
case TUInt:
s = t.selector.NewUIntSelector()
case TFloat:
s = t.selector.NewFloatSelector()
case TString:
s = t.selector.NewStringSelector()
default:
return fmt.Errorf("unsupported selector type %v", valueCol.Type)
}
return b.Do(func(cr ColReader) error {
switch valueCol.Type {
case TBool:
selected := s.(DoBoolIndexSelector).DoBool(cr.Bools(valueIdx))
t.appendSelected(selected, builder, cr)
case TInt:
selected := s.(DoIntIndexSelector).DoInt(cr.Ints(valueIdx))
t.appendSelected(selected, builder, cr)
case TUInt:
selected := s.(DoUIntIndexSelector).DoUInt(cr.UInts(valueIdx))
t.appendSelected(selected, builder, cr)
case TFloat:
selected := s.(DoFloatIndexSelector).DoFloat(cr.Floats(valueIdx))
t.appendSelected(selected, builder, cr)
case TString:
selected := s.(DoStringIndexSelector).DoString(cr.Strings(valueIdx))
t.appendSelected(selected, builder, cr)
default:
return fmt.Errorf("unsupported selector type %v", valueCol.Type)
}
return nil
})
}
func (t *rowSelectorTransformation) Process(id DatasetID, b Block) error {
builder, valueIdx, err := t.setupBuilder(b)
if err != nil {
return err
}
valueCol := builder.Cols()[valueIdx]
var rower Rower
switch valueCol.Type {
case TBool:
rower = t.selector.NewBoolSelector()
case TInt:
rower = t.selector.NewIntSelector()
case TUInt:
rower = t.selector.NewUIntSelector()
case TFloat:
rower = t.selector.NewFloatSelector()
case TString:
rower = t.selector.NewStringSelector()
default:
return fmt.Errorf("unsupported selector type %v", valueCol.Type)
}
b.Do(func(cr ColReader) error {
switch valueCol.Type {
case TBool:
rower.(DoBoolRowSelector).DoBool(cr.Bools(valueIdx), cr)
case TInt:
rower.(DoIntRowSelector).DoInt(cr.Ints(valueIdx), cr)
case TUInt:
rower.(DoUIntRowSelector).DoUInt(cr.UInts(valueIdx), cr)
case TFloat:
rower.(DoFloatRowSelector).DoFloat(cr.Floats(valueIdx), cr)
case TString:
rower.(DoStringRowSelector).DoString(cr.Strings(valueIdx), cr)
default:
return fmt.Errorf("unsupported selector type %v", valueCol.Type)
}
return nil
})
rows := rower.Rows()
t.appendRows(builder, rows)
return nil
}
func (t *indexSelectorTransformation) appendSelected(selected []int, builder BlockBuilder, cr ColReader) {
if len(selected) == 0 {
return
}
cols := builder.Cols()
for j, c := range cols {
for _, i := range selected {
switch c.Type {
case TBool:
builder.AppendBool(j, cr.Bools(j)[i])
case TInt:
builder.AppendInt(j, cr.Ints(j)[i])
case TUInt:
builder.AppendUInt(j, cr.UInts(j)[i])
case TFloat:
builder.AppendFloat(j, cr.Floats(j)[i])
case TString:
builder.AppendString(j, cr.Strings(j)[i])
case TTime:
builder.AppendTime(j, cr.Times(j)[i])
default:
PanicUnknownType(c.Type)
}
}
}
}
func (t *rowSelectorTransformation) appendRows(builder BlockBuilder, rows []Row) {
cols := builder.Cols()
for j, c := range cols {
for _, row := range rows {
v := row.Values[j]
switch c.Type {
case TBool:
builder.AppendBool(j, v.(bool))
case TInt:
builder.AppendInt(j, v.(int64))
case TUInt:
builder.AppendUInt(j, v.(uint64))
case TFloat:
builder.AppendFloat(j, v.(float64))
case TString:
builder.AppendString(j, v.(string))
case TTime:
builder.AppendTime(j, v.(Time))
default:
PanicUnknownType(c.Type)
}
}
}
}
type IndexSelector interface {
NewBoolSelector() DoBoolIndexSelector
NewIntSelector() DoIntIndexSelector
NewUIntSelector() DoUIntIndexSelector
NewFloatSelector() DoFloatIndexSelector
NewStringSelector() DoStringIndexSelector
}
type DoBoolIndexSelector interface {
DoBool([]bool) []int
}
type DoIntIndexSelector interface {
DoInt([]int64) []int
}
type DoUIntIndexSelector interface {
DoUInt([]uint64) []int
}
type DoFloatIndexSelector interface {
DoFloat([]float64) []int
}
type DoStringIndexSelector interface {
DoString([]string) []int
}
type RowSelector interface {
NewBoolSelector() DoBoolRowSelector
NewIntSelector() DoIntRowSelector
NewUIntSelector() DoUIntRowSelector
NewFloatSelector() DoFloatRowSelector
NewStringSelector() DoStringRowSelector
}
type Rower interface {
Rows() []Row
}
type DoBoolRowSelector interface {
Rower
DoBool(vs []bool, cr ColReader)
}
type DoIntRowSelector interface {
Rower
DoInt(vs []int64, cr ColReader)
}
type DoUIntRowSelector interface {
Rower
DoUInt(vs []uint64, cr ColReader)
}
type DoFloatRowSelector interface {
Rower
DoFloat(vs []float64, cr ColReader)
}
type DoStringRowSelector interface {
Rower
DoString(vs []string, cr ColReader)
}
type Row struct {
Values []interface{}
}
func ReadRow(i int, cr ColReader) (row Row) {
cols := cr.Cols()
row.Values = make([]interface{}, len(cols))
for j, c := range cols {
switch c.Type {
case TBool:
row.Values[j] = cr.Bools(j)[i]
case TInt:
row.Values[j] = cr.Ints(j)[i]
case TUInt:
row.Values[j] = cr.UInts(j)[i]
case TFloat:
row.Values[j] = cr.Floats(j)[i]
case TString:
row.Values[j] = cr.Strings(j)[i]
case TTime:
row.Values[j] = cr.Times(j)[i]
}
}
return
}

View File

@ -0,0 +1,367 @@
package execute_test
import (
"sort"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/influxdata/ifql/functions"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/execute/executetest"
)
func TestRowSelector_Process(t *testing.T) {
// All test cases use a simple MinSelector
testCases := []struct {
name string
config execute.SelectorConfig
data []*executetest.Block
want []*executetest.Block
}{
{
name: "single",
config: execute.SelectorConfig{
Column: "_value",
},
data: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(1), 0.0},
{execute.Time(0), execute.Time(100), execute.Time(10), 1.0},
{execute.Time(0), execute.Time(100), execute.Time(20), 2.0},
{execute.Time(0), execute.Time(100), execute.Time(30), 3.0},
{execute.Time(0), execute.Time(100), execute.Time(40), 4.0},
{execute.Time(0), execute.Time(100), execute.Time(50), 5.0},
{execute.Time(0), execute.Time(100), execute.Time(60), 6.0},
{execute.Time(0), execute.Time(100), execute.Time(70), 7.0},
{execute.Time(0), execute.Time(100), execute.Time(80), 8.0},
{execute.Time(0), execute.Time(100), execute.Time(90), 9.0},
},
}},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(1), 0.0},
},
}},
},
{
name: "single custom column",
config: execute.SelectorConfig{
Column: "x",
},
data: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(1), 0.0},
{execute.Time(0), execute.Time(100), execute.Time(10), 1.0},
{execute.Time(0), execute.Time(100), execute.Time(20), 2.0},
{execute.Time(0), execute.Time(100), execute.Time(30), 3.0},
{execute.Time(0), execute.Time(100), execute.Time(40), 4.0},
{execute.Time(0), execute.Time(100), execute.Time(50), 5.0},
{execute.Time(0), execute.Time(100), execute.Time(60), 6.0},
{execute.Time(0), execute.Time(100), execute.Time(70), 7.0},
{execute.Time(0), execute.Time(100), execute.Time(80), 8.0},
{execute.Time(0), execute.Time(100), execute.Time(90), 9.0},
},
}},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(1), 0.0},
},
}},
},
{
name: "multiple blocks",
config: execute.SelectorConfig{
Column: "_value",
},
data: []*executetest.Block{
{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(1), 0.0},
{execute.Time(0), execute.Time(100), execute.Time(10), 1.0},
{execute.Time(0), execute.Time(100), execute.Time(20), 2.0},
{execute.Time(0), execute.Time(100), execute.Time(30), 3.0},
{execute.Time(0), execute.Time(100), execute.Time(40), 4.0},
{execute.Time(0), execute.Time(100), execute.Time(50), 5.0},
{execute.Time(0), execute.Time(100), execute.Time(60), 6.0},
{execute.Time(0), execute.Time(100), execute.Time(70), 7.0},
{execute.Time(0), execute.Time(100), execute.Time(80), 8.0},
{execute.Time(0), execute.Time(100), execute.Time(90), 9.0},
},
},
{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(100), execute.Time(200), execute.Time(101), 10.0},
{execute.Time(100), execute.Time(200), execute.Time(110), 11.0},
{execute.Time(100), execute.Time(200), execute.Time(120), 12.0},
{execute.Time(100), execute.Time(200), execute.Time(130), 13.0},
{execute.Time(100), execute.Time(200), execute.Time(140), 14.0},
{execute.Time(100), execute.Time(200), execute.Time(150), 15.0},
{execute.Time(100), execute.Time(200), execute.Time(160), 16.0},
{execute.Time(100), execute.Time(200), execute.Time(170), 17.0},
{execute.Time(100), execute.Time(200), execute.Time(180), 18.0},
{execute.Time(100), execute.Time(200), execute.Time(190), 19.0},
},
},
},
want: []*executetest.Block{
{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(1), 0.0},
},
},
{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(100), execute.Time(200), execute.Time(101), 10.0},
},
},
},
},
}
for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
d := executetest.NewDataset(executetest.RandomDatasetID())
c := execute.NewBlockBuilderCache(executetest.UnlimitedAllocator)
c.SetTriggerSpec(execute.DefaultTriggerSpec)
selector := execute.NewRowSelectorTransformation(d, c, new(functions.MinSelector), tc.config)
parentID := executetest.RandomDatasetID()
for _, b := range tc.data {
if err := selector.Process(parentID, b); err != nil {
t.Fatal(err)
}
}
got, err := executetest.BlocksFromCache(c)
if err != nil {
t.Fatal(err)
}
executetest.NormalizeBlocks(got)
executetest.NormalizeBlocks(tc.want)
sort.Sort(executetest.SortedBlocks(got))
sort.Sort(executetest.SortedBlocks(tc.want))
if !cmp.Equal(tc.want, got, cmpopts.EquateNaNs()) {
t.Errorf("unexpected blocks -want/+got\n%s", cmp.Diff(tc.want, got))
}
})
}
}
func TestIndexSelector_Process(t *testing.T) {
// All test cases use a simple FirstSelector
testCases := []struct {
name string
config execute.SelectorConfig
data []*executetest.Block
want []*executetest.Block
}{
{
name: "single",
config: execute.SelectorConfig{
Column: "_value",
},
data: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(1), 0.0},
{execute.Time(0), execute.Time(100), execute.Time(10), 1.0},
{execute.Time(0), execute.Time(100), execute.Time(20), 2.0},
{execute.Time(0), execute.Time(100), execute.Time(30), 3.0},
{execute.Time(0), execute.Time(100), execute.Time(40), 4.0},
{execute.Time(0), execute.Time(100), execute.Time(50), 5.0},
{execute.Time(0), execute.Time(100), execute.Time(60), 6.0},
{execute.Time(0), execute.Time(100), execute.Time(70), 7.0},
{execute.Time(0), execute.Time(100), execute.Time(80), 8.0},
{execute.Time(0), execute.Time(100), execute.Time(90), 9.0},
},
}},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(1), 0.0},
},
}},
},
{
name: "multiple blocks",
config: execute.SelectorConfig{
Column: "_value",
},
data: []*executetest.Block{
{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(1), 0.0},
{execute.Time(0), execute.Time(100), execute.Time(10), 1.0},
{execute.Time(0), execute.Time(100), execute.Time(20), 2.0},
{execute.Time(0), execute.Time(100), execute.Time(30), 3.0},
{execute.Time(0), execute.Time(100), execute.Time(40), 4.0},
{execute.Time(0), execute.Time(100), execute.Time(50), 5.0},
{execute.Time(0), execute.Time(100), execute.Time(60), 6.0},
{execute.Time(0), execute.Time(100), execute.Time(70), 7.0},
{execute.Time(0), execute.Time(100), execute.Time(80), 8.0},
{execute.Time(0), execute.Time(100), execute.Time(90), 9.0},
},
},
{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(100), execute.Time(200), execute.Time(101), 10.0},
{execute.Time(100), execute.Time(200), execute.Time(110), 11.0},
{execute.Time(100), execute.Time(200), execute.Time(120), 12.0},
{execute.Time(100), execute.Time(200), execute.Time(130), 13.0},
{execute.Time(100), execute.Time(200), execute.Time(140), 14.0},
{execute.Time(100), execute.Time(200), execute.Time(150), 15.0},
{execute.Time(100), execute.Time(200), execute.Time(160), 16.0},
{execute.Time(100), execute.Time(200), execute.Time(170), 17.0},
{execute.Time(100), execute.Time(200), execute.Time(180), 18.0},
{execute.Time(100), execute.Time(200), execute.Time(190), 19.0},
},
},
},
want: []*executetest.Block{
{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(100), execute.Time(1), 0.0},
},
},
{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(100), execute.Time(200), execute.Time(101), 10.0},
},
},
},
},
}
for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
d := executetest.NewDataset(executetest.RandomDatasetID())
c := execute.NewBlockBuilderCache(executetest.UnlimitedAllocator)
c.SetTriggerSpec(execute.DefaultTriggerSpec)
selector := execute.NewIndexSelectorTransformation(d, c, new(functions.FirstSelector), tc.config)
parentID := executetest.RandomDatasetID()
for _, b := range tc.data {
if err := selector.Process(parentID, b); err != nil {
t.Fatal(err)
}
}
got, err := executetest.BlocksFromCache(c)
if err != nil {
t.Fatal(err)
}
executetest.NormalizeBlocks(got)
executetest.NormalizeBlocks(tc.want)
sort.Sort(executetest.SortedBlocks(got))
sort.Sort(executetest.SortedBlocks(tc.want))
if !cmp.Equal(tc.want, got, cmpopts.EquateNaNs()) {
t.Errorf("unexpected blocks -want/+got\n%s", cmp.Diff(tc.want, got))
}
})
}
}

28
query/execute/source.go Normal file
View File

@ -0,0 +1,28 @@
package execute
import (
"context"
"fmt"
"github.com/influxdata/ifql/query/plan"
)
type Node interface {
AddTransformation(t Transformation)
}
type Source interface {
Node
Run(ctx context.Context)
}
type CreateSource func(spec plan.ProcedureSpec, id DatasetID, ctx Administration) (Source, error)
var procedureToSource = make(map[plan.ProcedureKind]CreateSource)
func RegisterSource(k plan.ProcedureKind, c CreateSource) {
if procedureToSource[k] != nil {
panic(fmt.Errorf("duplicate registration for source with procedure kind %v", k))
}
procedureToSource[k] = c
}

View File

@ -0,0 +1,44 @@
package execute
import (
"fmt"
"github.com/influxdata/ifql/id"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/plan"
)
type Transformation interface {
RetractBlock(id DatasetID, key PartitionKey) error
Process(id DatasetID, b Block) error
UpdateWatermark(id DatasetID, t Time) error
UpdateProcessingTime(id DatasetID, t Time) error
Finish(id DatasetID, err error)
}
type Administration interface {
OrganizationID() id.ID
ResolveTime(qt query.Time) Time
Bounds() Bounds
Allocator() *Allocator
Parents() []DatasetID
ConvertID(plan.ProcedureID) DatasetID
Dependencies() Dependencies
}
// Dependencies represents the provided dependencies to the execution environment.
// The dependencies is opaque.
type Dependencies map[string]interface{}
type CreateTransformation func(id DatasetID, mode AccumulationMode, spec plan.ProcedureSpec, a Administration) (Transformation, Dataset, error)
var procedureToTransformation = make(map[plan.ProcedureKind]CreateTransformation)
func RegisterTransformation(k plan.ProcedureKind, c CreateTransformation) {
if procedureToTransformation[k] != nil {
panic(fmt.Errorf("duplicate registration for transformation with procedure kind %v", k))
}
procedureToTransformation[k] = c
}

314
query/execute/transport.go Normal file
View File

@ -0,0 +1,314 @@
package execute
import (
"sync"
"sync/atomic"
)
type Transport interface {
Transformation
// Finished reports when the Transport has completed and there is no more work to do.
Finished() <-chan struct{}
}
// consecutiveTransport implements Transport by transporting data consecutively to the downstream Transformation.
type consecutiveTransport struct {
dispatcher Dispatcher
t Transformation
messages MessageQueue
finished chan struct{}
errMu sync.Mutex
errValue error
schedulerState int32
inflight int32
}
func newConescutiveTransport(dispatcher Dispatcher, t Transformation) *consecutiveTransport {
return &consecutiveTransport{
dispatcher: dispatcher,
t: t,
// TODO(nathanielc): Have planner specify message queue initial buffer size.
messages: newMessageQueue(64),
finished: make(chan struct{}),
}
}
func (t *consecutiveTransport) setErr(err error) {
t.errMu.Lock()
t.errValue = err
t.errMu.Unlock()
}
func (t *consecutiveTransport) err() error {
t.errMu.Lock()
err := t.errValue
t.errMu.Unlock()
return err
}
func (t *consecutiveTransport) Finished() <-chan struct{} {
return t.finished
}
func (t *consecutiveTransport) RetractBlock(id DatasetID, key PartitionKey) error {
select {
case <-t.finished:
return t.err()
default:
}
t.pushMsg(&retractBlockMsg{
srcMessage: srcMessage(id),
key: key,
})
return nil
}
func (t *consecutiveTransport) Process(id DatasetID, b Block) error {
select {
case <-t.finished:
return t.err()
default:
}
t.pushMsg(&processMsg{
srcMessage: srcMessage(id),
block: b,
})
return nil
}
func (t *consecutiveTransport) UpdateWatermark(id DatasetID, time Time) error {
select {
case <-t.finished:
return t.err()
default:
}
t.pushMsg(&updateWatermarkMsg{
srcMessage: srcMessage(id),
time: time,
})
return nil
}
func (t *consecutiveTransport) UpdateProcessingTime(id DatasetID, time Time) error {
select {
case <-t.finished:
return t.err()
default:
}
t.pushMsg(&updateProcessingTimeMsg{
srcMessage: srcMessage(id),
time: time,
})
return nil
}
func (t *consecutiveTransport) Finish(id DatasetID, err error) {
select {
case <-t.finished:
return
default:
}
t.pushMsg(&finishMsg{
srcMessage: srcMessage(id),
err: err,
})
}
func (t *consecutiveTransport) pushMsg(m Message) {
t.messages.Push(m)
atomic.AddInt32(&t.inflight, 1)
t.schedule()
}
const (
// consecutiveTransport schedule states
idle int32 = iota
running
finished
)
// schedule indicates that there is work available to schedule.
func (t *consecutiveTransport) schedule() {
if t.tryTransition(idle, running) {
t.dispatcher.Schedule(t.processMessages)
}
}
// tryTransition attempts to transition into the new state and returns true on success.
func (t *consecutiveTransport) tryTransition(old, new int32) bool {
return atomic.CompareAndSwapInt32(&t.schedulerState, old, new)
}
// transition sets the new state.
func (t *consecutiveTransport) transition(new int32) {
atomic.StoreInt32(&t.schedulerState, new)
}
func (t *consecutiveTransport) processMessages(throughput int) {
PROCESS:
i := 0
for m := t.messages.Pop(); m != nil; m = t.messages.Pop() {
atomic.AddInt32(&t.inflight, -1)
if f, err := processMessage(t.t, m); err != nil || f {
// Set the error if there was any
t.setErr(err)
// Transition to the finished state.
if t.tryTransition(running, finished) {
// Call Finish if we have not already
if !f {
t.t.Finish(m.SrcDatasetID(), err)
}
// We are finished
close(t.finished)
return
}
}
i++
if i >= throughput {
// We have done enough work.
// Transition to the idle state and reschedule for later.
t.transition(idle)
t.schedule()
return
}
}
t.transition(idle)
// Check if more messages arrived after the above loop finished.
// This check must happen in the idle state.
if atomic.LoadInt32(&t.inflight) > 0 {
if t.tryTransition(idle, running) {
goto PROCESS
} // else we have already been scheduled again, we can return
}
}
// processMessage processes the message on t.
// The return value is true if the message was a FinishMsg.
func processMessage(t Transformation, m Message) (finished bool, err error) {
switch m := m.(type) {
case RetractBlockMsg:
err = t.RetractBlock(m.SrcDatasetID(), m.Key())
case ProcessMsg:
b := m.Block()
err = t.Process(m.SrcDatasetID(), b)
b.RefCount(-1)
case UpdateWatermarkMsg:
err = t.UpdateWatermark(m.SrcDatasetID(), m.WatermarkTime())
case UpdateProcessingTimeMsg:
err = t.UpdateProcessingTime(m.SrcDatasetID(), m.ProcessingTime())
case FinishMsg:
t.Finish(m.SrcDatasetID(), m.Error())
finished = true
}
return
}
type Message interface {
Type() MessageType
SrcDatasetID() DatasetID
}
type MessageType int
const (
RetractBlockType MessageType = iota
ProcessType
UpdateWatermarkType
UpdateProcessingTimeType
FinishType
)
type srcMessage DatasetID
func (m srcMessage) SrcDatasetID() DatasetID {
return DatasetID(m)
}
type RetractBlockMsg interface {
Message
Key() PartitionKey
}
type retractBlockMsg struct {
srcMessage
key PartitionKey
}
func (m *retractBlockMsg) Type() MessageType {
return RetractBlockType
}
func (m *retractBlockMsg) Key() PartitionKey {
return m.key
}
type ProcessMsg interface {
Message
Block() Block
}
type processMsg struct {
srcMessage
block Block
}
func (m *processMsg) Type() MessageType {
return ProcessType
}
func (m *processMsg) Block() Block {
return m.block
}
type UpdateWatermarkMsg interface {
Message
WatermarkTime() Time
}
type updateWatermarkMsg struct {
srcMessage
time Time
}
func (m *updateWatermarkMsg) Type() MessageType {
return UpdateWatermarkType
}
func (m *updateWatermarkMsg) WatermarkTime() Time {
return m.time
}
type UpdateProcessingTimeMsg interface {
Message
ProcessingTime() Time
}
type updateProcessingTimeMsg struct {
srcMessage
time Time
}
func (m *updateProcessingTimeMsg) Type() MessageType {
return UpdateProcessingTimeType
}
func (m *updateProcessingTimeMsg) ProcessingTime() Time {
return m.time
}
type FinishMsg interface {
Message
Error() error
}
type finishMsg struct {
srcMessage
err error
}
func (m *finishMsg) Type() MessageType {
return FinishType
}
func (m *finishMsg) Error() error {
return m.err
}

153
query/execute/trigger.go Normal file
View File

@ -0,0 +1,153 @@
package execute
import (
"fmt"
"github.com/influxdata/ifql/query"
)
type Trigger interface {
Triggered(TriggerContext) bool
Finished() bool
Reset()
}
type TriggerContext struct {
Block BlockContext
Watermark Time
CurrentProcessingTime Time
}
type BlockContext struct {
Key PartitionKey
Count int
}
func NewTriggerFromSpec(spec query.TriggerSpec) Trigger {
switch s := spec.(type) {
case query.AfterWatermarkTriggerSpec:
return &afterWatermarkTrigger{
allowedLateness: Duration(s.AllowedLateness),
}
case query.RepeatedTriggerSpec:
return &repeatedlyForever{
t: NewTriggerFromSpec(s.Trigger),
}
case query.AfterProcessingTimeTriggerSpec:
return &afterProcessingTimeTrigger{
duration: Duration(s.Duration),
}
case query.AfterAtLeastCountTriggerSpec:
return &afterAtLeastCount{
atLeast: s.Count,
}
case query.OrFinallyTriggerSpec:
return &orFinally{
main: NewTriggerFromSpec(s.Main),
finally: NewTriggerFromSpec(s.Finally),
}
default:
//TODO(nathanielc): Add proper error handling here.
// Maybe separate validation of a spec and creation of a spec so we know we cannot error during creation?
panic(fmt.Sprintf("unsupported trigger spec provided %T", spec))
}
}
// afterWatermarkTrigger triggers once the watermark is greater than the bounds of the block.
type afterWatermarkTrigger struct {
allowedLateness Duration
finished bool
}
func (t *afterWatermarkTrigger) Triggered(c TriggerContext) bool {
timeIdx := ColIdx(DefaultStopColLabel, c.Block.Key.Cols())
if timeIdx < 0 {
return false
}
stop := c.Block.Key.ValueTime(timeIdx)
if c.Watermark >= stop+Time(t.allowedLateness) {
t.finished = true
}
return c.Watermark >= stop
}
func (t *afterWatermarkTrigger) Finished() bool {
return t.finished
}
func (t *afterWatermarkTrigger) Reset() {
t.finished = false
}
type repeatedlyForever struct {
t Trigger
}
func (t *repeatedlyForever) Triggered(c TriggerContext) bool {
return t.t.Triggered(c)
}
func (t *repeatedlyForever) Finished() bool {
if t.t.Finished() {
t.Reset()
}
return false
}
func (t *repeatedlyForever) Reset() {
t.t.Reset()
}
type afterProcessingTimeTrigger struct {
duration Duration
triggerTimeSet bool
triggerTime Time
current Time
}
func (t *afterProcessingTimeTrigger) Triggered(c TriggerContext) bool {
if !t.triggerTimeSet {
t.triggerTimeSet = true
t.triggerTime = c.CurrentProcessingTime + Time(t.duration)
}
t.current = c.CurrentProcessingTime
return t.current >= t.triggerTime
}
func (t *afterProcessingTimeTrigger) Finished() bool {
return t.triggerTimeSet && t.current >= t.triggerTime
}
func (t *afterProcessingTimeTrigger) Reset() {
t.triggerTimeSet = false
}
type afterAtLeastCount struct {
n, atLeast int
}
func (t *afterAtLeastCount) Triggered(c TriggerContext) bool {
t.n = c.Block.Count
return t.n >= t.atLeast
}
func (t *afterAtLeastCount) Finished() bool {
return t.n >= t.atLeast
}
func (t *afterAtLeastCount) Reset() {
t.n = 0
}
type orFinally struct {
main Trigger
finally Trigger
finished bool
}
func (t *orFinally) Triggered(c TriggerContext) bool {
if t.finally.Triggered(c) {
t.finished = true
return true
}
return t.main.Triggered(c)
}
func (t *orFinally) Finished() bool {
return t.finished
}
func (t *orFinally) Reset() {
t.finished = false
}

8
query/execute/window.go Normal file
View File

@ -0,0 +1,8 @@
package execute
type Window struct {
Every Duration
Period Duration
Round Duration
Start Time
}

55
query/format.go Normal file
View File

@ -0,0 +1,55 @@
package query
import (
"encoding/json"
"fmt"
)
// TODO(nathanielc): Add better options for formatting plans as Graphviz dot format.
type FormatOption func(*formatter)
func Formatted(q *Spec, opts ...FormatOption) fmt.Formatter {
f := formatter{
q: q,
}
for _, o := range opts {
o(&f)
}
return f
}
func FmtJSON(f *formatter) { f.json = true }
type formatter struct {
q *Spec
json bool
}
func (f formatter) Format(fs fmt.State, c rune) {
if c == 'v' && fs.Flag('#') {
fmt.Fprintf(fs, "%#v", f.q)
return
}
if f.json {
f.formatJSON(fs)
} else {
f.formatDAG(fs)
}
}
func (f formatter) formatJSON(fs fmt.State) {
e := json.NewEncoder(fs)
e.SetIndent("", " ")
e.Encode(f.q)
}
func (f formatter) formatDAG(fs fmt.State) {
fmt.Fprint(fs, "digraph QuerySpec {\n")
_ = f.q.Walk(func(o *Operation) error {
fmt.Fprintf(fs, "%s[kind=%q];\n", o.ID, o.Spec.Kind())
for _, child := range f.q.Children(o.ID) {
fmt.Fprintf(fs, "%s->%s;\n", o.ID, child.ID)
}
return nil
})
fmt.Fprintln(fs, "}")
}

151
query/functions/count.go Normal file
View File

@ -0,0 +1,151 @@
package functions
import (
"fmt"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/plan"
)
const CountKind = "count"
type CountOpSpec struct {
execute.AggregateConfig
}
var countSignature = query.DefaultFunctionSignature()
func init() {
query.RegisterFunction(CountKind, createCountOpSpec, countSignature)
query.RegisterOpSpec(CountKind, newCountOp)
plan.RegisterProcedureSpec(CountKind, newCountProcedure, CountKind)
execute.RegisterTransformation(CountKind, createCountTransformation)
}
func createCountOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
}
s := new(CountOpSpec)
if err := s.AggregateConfig.ReadArgs(args); err != nil {
return nil, err
}
return s, nil
}
func newCountOp() query.OperationSpec {
return new(CountOpSpec)
}
func (s *CountOpSpec) Kind() query.OperationKind {
return CountKind
}
type CountProcedureSpec struct {
execute.AggregateConfig
}
func newCountProcedure(qs query.OperationSpec, a plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*CountOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
}
return &CountProcedureSpec{
AggregateConfig: spec.AggregateConfig,
}, nil
}
func (s *CountProcedureSpec) Kind() plan.ProcedureKind {
return CountKind
}
func (s *CountProcedureSpec) Copy() plan.ProcedureSpec {
return &CountProcedureSpec{
AggregateConfig: s.AggregateConfig,
}
}
func (s *CountProcedureSpec) AggregateMethod() string {
return CountKind
}
func (s *CountProcedureSpec) ReAggregateSpec() plan.ProcedureSpec {
return new(SumProcedureSpec)
}
func (s *CountProcedureSpec) PushDownRules() []plan.PushDownRule {
return []plan.PushDownRule{{
Root: FromKind,
Through: nil,
Match: func(spec plan.ProcedureSpec) bool {
selectSpec := spec.(*FromProcedureSpec)
return !selectSpec.GroupingSet
},
}}
}
func (s *CountProcedureSpec) PushDown(root *plan.Procedure, dup func() *plan.Procedure) {
selectSpec := root.Spec.(*FromProcedureSpec)
if selectSpec.AggregateSet {
root = dup()
selectSpec = root.Spec.(*FromProcedureSpec)
selectSpec.AggregateSet = false
selectSpec.AggregateMethod = ""
return
}
selectSpec.AggregateSet = true
selectSpec.AggregateMethod = s.AggregateMethod()
}
type CountAgg struct {
count int64
}
func createCountTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
s, ok := spec.(*CountProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", spec)
}
t, d := execute.NewAggregateTransformationAndDataset(id, mode, new(CountAgg), s.AggregateConfig, a.Allocator())
return t, d, nil
}
func (a *CountAgg) NewBoolAgg() execute.DoBoolAgg {
return new(CountAgg)
}
func (a *CountAgg) NewIntAgg() execute.DoIntAgg {
return new(CountAgg)
}
func (a *CountAgg) NewUIntAgg() execute.DoUIntAgg {
return new(CountAgg)
}
func (a *CountAgg) NewFloatAgg() execute.DoFloatAgg {
return new(CountAgg)
}
func (a *CountAgg) NewStringAgg() execute.DoStringAgg {
return new(CountAgg)
}
func (a *CountAgg) DoBool(vs []bool) {
a.count += int64(len(vs))
}
func (a *CountAgg) DoUInt(vs []uint64) {
a.count += int64(len(vs))
}
func (a *CountAgg) DoInt(vs []int64) {
a.count += int64(len(vs))
}
func (a *CountAgg) DoFloat(vs []float64) {
a.count += int64(len(vs))
}
func (a *CountAgg) DoString(vs []string) {
a.count += int64(len(vs))
}
func (a *CountAgg) Type() execute.DataType {
return execute.TInt
}
func (a *CountAgg) ValueInt() int64 {
return a.count
}

View File

@ -0,0 +1,134 @@
package functions_test
import (
"testing"
"time"
"github.com/influxdata/ifql/functions"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/execute/executetest"
"github.com/influxdata/ifql/query/plan"
"github.com/influxdata/ifql/query/plan/plantest"
"github.com/influxdata/ifql/query/querytest"
)
func TestCount_NewQuery(t *testing.T) {
tests := []querytest.NewQueryTestCase{
{
Name: "from with range and count",
Raw: `from(db:"mydb") |> range(start:-4h, stop:-2h) |> count()`,
Want: &query.Spec{
Operations: []*query.Operation{
{
ID: "from0",
Spec: &functions.FromOpSpec{
Database: "mydb",
},
},
{
ID: "range1",
Spec: &functions.RangeOpSpec{
Start: query.Time{
Relative: -4 * time.Hour,
IsRelative: true,
},
Stop: query.Time{
Relative: -2 * time.Hour,
IsRelative: true,
},
},
},
{
ID: "count2",
Spec: &functions.CountOpSpec{
AggregateConfig: execute.DefaultAggregateConfig,
},
},
},
Edges: []query.Edge{
{Parent: "from0", Child: "range1"},
{Parent: "range1", Child: "count2"},
},
},
},
}
for _, tc := range tests {
tc := tc
t.Run(tc.Name, func(t *testing.T) {
t.Parallel()
querytest.NewQueryTestHelper(t, tc)
})
}
}
func TestCountOperation_Marshaling(t *testing.T) {
data := []byte(`{"id":"count","kind":"count"}`)
op := &query.Operation{
ID: "count",
Spec: &functions.CountOpSpec{},
}
querytest.OperationMarshalingTestHelper(t, data, op)
}
func TestCount_Process(t *testing.T) {
executetest.AggFuncTestHelper(
t,
new(functions.CountAgg),
[]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
int64(10),
)
}
func BenchmarkCount(b *testing.B) {
executetest.AggFuncBenchmarkHelper(
b,
new(functions.CountAgg),
NormalData,
int64(len(NormalData)),
)
}
func TestCount_PushDown_Match(t *testing.T) {
spec := new(functions.CountProcedureSpec)
from := new(functions.FromProcedureSpec)
// Should not match when an aggregate is set
from.GroupingSet = true
plantest.PhysicalPlan_PushDown_Match_TestHelper(t, spec, from, []bool{false})
// Should match when no aggregate is set
from.GroupingSet = false
plantest.PhysicalPlan_PushDown_Match_TestHelper(t, spec, from, []bool{true})
}
func TestCount_PushDown(t *testing.T) {
spec := new(functions.CountProcedureSpec)
root := &plan.Procedure{
Spec: new(functions.FromProcedureSpec),
}
want := &plan.Procedure{
Spec: &functions.FromProcedureSpec{
AggregateSet: true,
AggregateMethod: functions.CountKind,
},
}
plantest.PhysicalPlan_PushDown_TestHelper(t, spec, root, false, want)
}
func TestCount_PushDown_Duplicate(t *testing.T) {
spec := new(functions.CountProcedureSpec)
root := &plan.Procedure{
Spec: &functions.FromProcedureSpec{
AggregateSet: true,
AggregateMethod: functions.CountKind,
},
}
want := &plan.Procedure{
// Expect the duplicate has been reset to zero values
Spec: new(functions.FromProcedureSpec),
}
plantest.PhysicalPlan_PushDown_TestHelper(t, spec, root, true, want)
}

View File

@ -0,0 +1,251 @@
package functions
import (
"fmt"
"math"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/plan"
"github.com/influxdata/ifql/semantic"
"github.com/pkg/errors"
)
const CovarianceKind = "covariance"
type CovarianceOpSpec struct {
PearsonCorrelation bool `json:"pearsonr"`
ValueDst string `json:"value_dst"`
execute.AggregateConfig
}
var covarianceSignature = query.DefaultFunctionSignature()
func init() {
covarianceSignature.Params["pearsonr"] = semantic.Bool
covarianceSignature.Params["columns"] = semantic.Array
query.RegisterBuiltIn("covariance", covarianceBuiltIn)
query.RegisterFunction(CovarianceKind, createCovarianceOpSpec, covarianceSignature)
query.RegisterOpSpec(CovarianceKind, newCovarianceOp)
plan.RegisterProcedureSpec(CovarianceKind, newCovarianceProcedure, CovarianceKind)
execute.RegisterTransformation(CovarianceKind, createCovarianceTransformation)
}
// covarianceBuiltIn defines a `cov` function with an automatic join.
var covarianceBuiltIn = `
cov = (x,y,on,pearsonr=false) =>
join(
tables:{x:x, y:y},
on:on,
fn: (t) => ({x:t.x._value, y:t.y._value}),
)
|> covariance(pearsonr:pearsonr, columns:["x","y"])
pearsonr = (x,y,on) => cov(x:x, y:y, on:on, pearsonr:true)
`
func createCovarianceOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
}
spec := new(CovarianceOpSpec)
pearsonr, ok, err := args.GetBool("pearsonr")
if err != nil {
return nil, err
} else if ok {
spec.PearsonCorrelation = pearsonr
}
label, ok, err := args.GetString("valueDst")
if err != nil {
return nil, err
} else if ok {
spec.ValueDst = label
} else {
spec.ValueDst = execute.DefaultValueColLabel
}
if err := spec.AggregateConfig.ReadArgs(args); err != nil {
return nil, err
}
if len(spec.Columns) != 2 {
return nil, errors.New("must provide exactly two columns")
}
return spec, nil
}
func newCovarianceOp() query.OperationSpec {
return new(CovarianceOpSpec)
}
func (s *CovarianceOpSpec) Kind() query.OperationKind {
return CovarianceKind
}
type CovarianceProcedureSpec struct {
PearsonCorrelation bool
ValueLabel string
execute.AggregateConfig
}
func newCovarianceProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*CovarianceOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
}
return &CovarianceProcedureSpec{
PearsonCorrelation: spec.PearsonCorrelation,
ValueLabel: spec.ValueDst,
AggregateConfig: spec.AggregateConfig,
}, nil
}
func (s *CovarianceProcedureSpec) Kind() plan.ProcedureKind {
return CovarianceKind
}
func (s *CovarianceProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(CovarianceProcedureSpec)
*ns = *s
ns.AggregateConfig = s.AggregateConfig.Copy()
return ns
}
type CovarianceTransformation struct {
d execute.Dataset
cache execute.BlockBuilderCache
bounds execute.Bounds
spec CovarianceProcedureSpec
yIdx int
n,
xm1,
ym1,
xm2,
ym2,
xym2 float64
}
func createCovarianceTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
s, ok := spec.(*CovarianceProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", spec)
}
cache := execute.NewBlockBuilderCache(a.Allocator())
d := execute.NewDataset(id, mode, cache)
t := NewCovarianceTransformation(d, cache, s)
return t, d, nil
}
func NewCovarianceTransformation(d execute.Dataset, cache execute.BlockBuilderCache, spec *CovarianceProcedureSpec) *CovarianceTransformation {
return &CovarianceTransformation{
d: d,
cache: cache,
spec: *spec,
}
}
func (t *CovarianceTransformation) RetractBlock(id execute.DatasetID, key execute.PartitionKey) error {
return t.d.RetractBlock(key)
}
func (t *CovarianceTransformation) Process(id execute.DatasetID, b execute.Block) error {
cols := b.Cols()
builder, created := t.cache.BlockBuilder(b.Key())
if !created {
return fmt.Errorf("covariance found duplicate block with key: %v", b.Key())
}
execute.AddBlockKeyCols(b.Key(), builder)
builder.AddCol(execute.ColMeta{
Label: t.spec.TimeDst,
Type: execute.TTime,
})
valueIdx := builder.AddCol(execute.ColMeta{
Label: t.spec.ValueLabel,
Type: execute.TFloat,
})
xIdx := execute.ColIdx(t.spec.Columns[0], cols)
yIdx := execute.ColIdx(t.spec.Columns[1], cols)
if cols[xIdx].Type != cols[yIdx].Type {
return errors.New("cannot compute the covariance between different types")
}
if err := execute.AppendAggregateTime(t.spec.TimeSrc, t.spec.TimeDst, b.Key(), builder); err != nil {
return err
}
t.reset()
b.Do(func(cr execute.ColReader) error {
switch typ := cols[xIdx].Type; typ {
case execute.TFloat:
t.DoFloat(cr.Floats(xIdx), cr.Floats(yIdx))
default:
return fmt.Errorf("covariance does not support %v", typ)
}
return nil
})
execute.AppendKeyValues(b.Key(), builder)
builder.AppendFloat(valueIdx, t.value())
return nil
}
func (t *CovarianceTransformation) reset() {
t.n = 0
t.xm1 = 0
t.ym1 = 0
t.xm2 = 0
t.ym2 = 0
t.xym2 = 0
}
func (t *CovarianceTransformation) DoFloat(xs, ys []float64) {
var xdelta, ydelta, xdelta2, ydelta2 float64
for i, x := range xs {
y := ys[i]
t.n++
// Update means
xdelta = x - t.xm1
ydelta = y - t.ym1
t.xm1 += xdelta / t.n
t.ym1 += ydelta / t.n
// Update variance sums
xdelta2 = x - t.xm1
ydelta2 = y - t.ym1
t.xm2 += xdelta * xdelta2
t.ym2 += ydelta * ydelta2
// Update covariance sum
// Covariance is symetric so we do not need to compute the yxm2 value.
t.xym2 += xdelta * ydelta2
}
}
func (t *CovarianceTransformation) value() float64 {
if t.n < 2 {
return math.NaN()
}
if t.spec.PearsonCorrelation {
return (t.xym2) / math.Sqrt(t.xm2*t.ym2)
}
return t.xym2 / (t.n - 1)
}
func (t *CovarianceTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error {
return t.d.UpdateWatermark(mark)
}
func (t *CovarianceTransformation) UpdateProcessingTime(id execute.DatasetID, pt execute.Time) error {
return t.d.UpdateProcessingTime(pt)
}
func (t *CovarianceTransformation) Finish(id execute.DatasetID, err error) {
t.d.Finish(err)
}

View File

@ -0,0 +1,399 @@
package functions_test
import (
"testing"
"github.com/influxdata/ifql/functions"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/execute/executetest"
"github.com/influxdata/ifql/query/querytest"
"github.com/influxdata/ifql/semantic"
)
func TestCovariance_NewQuery(t *testing.T) {
tests := []querytest.NewQueryTestCase{
{
Name: "simple covariance",
Raw: `from(db:"mydb") |> covariance(columns:["a","b"],)`,
Want: &query.Spec{
Operations: []*query.Operation{
{
ID: "from0",
Spec: &functions.FromOpSpec{
Database: "mydb",
},
},
{
ID: "covariance1",
Spec: &functions.CovarianceOpSpec{
ValueDst: execute.DefaultValueColLabel,
AggregateConfig: execute.AggregateConfig{
TimeSrc: execute.DefaultStopColLabel,
TimeDst: execute.DefaultTimeColLabel,
Columns: []string{"a", "b"},
},
},
},
},
Edges: []query.Edge{
{Parent: "from0", Child: "covariance1"},
},
},
},
{
Name: "pearsonr",
Raw: `from(db:"mydb")|>covariance(columns:["a","b"],pearsonr:true)`,
Want: &query.Spec{
Operations: []*query.Operation{
{
ID: "from0",
Spec: &functions.FromOpSpec{
Database: "mydb",
},
},
{
ID: "covariance1",
Spec: &functions.CovarianceOpSpec{
ValueDst: execute.DefaultValueColLabel,
PearsonCorrelation: true,
AggregateConfig: execute.AggregateConfig{
TimeSrc: execute.DefaultStopColLabel,
TimeDst: execute.DefaultTimeColLabel,
Columns: []string{"a", "b"},
},
},
},
},
Edges: []query.Edge{
{Parent: "from0", Child: "covariance1"},
},
},
},
{
Name: "global covariance",
Raw: `cov(x: from(db:"mydb"), y:from(db:"mydb"), on:["host"], pearsonr:true)`,
Want: &query.Spec{
Operations: []*query.Operation{
{
ID: "from0",
Spec: &functions.FromOpSpec{
Database: "mydb",
},
},
{
ID: "from1",
Spec: &functions.FromOpSpec{
Database: "mydb",
},
},
{
ID: "join2",
Spec: &functions.JoinOpSpec{
On: []string{"host"},
TableNames: map[query.OperationID]string{
"from0": "x",
"from1": "y",
},
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{
{Key: &semantic.Identifier{Name: "t"}},
},
Body: &semantic.ObjectExpression{
Properties: []*semantic.Property{
{
Key: &semantic.Identifier{Name: "x"},
Value: &semantic.MemberExpression{
Object: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "t"},
Property: "x",
},
Property: "_value",
},
},
{
Key: &semantic.Identifier{Name: "y"},
Value: &semantic.MemberExpression{
Object: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "t"},
Property: "y",
},
Property: "_value",
},
},
},
},
},
},
},
{
ID: "covariance3",
Spec: &functions.CovarianceOpSpec{
ValueDst: execute.DefaultValueColLabel,
PearsonCorrelation: true,
AggregateConfig: execute.AggregateConfig{
TimeSrc: execute.DefaultStopColLabel,
TimeDst: execute.DefaultTimeColLabel,
Columns: []string{"x", "y"},
},
},
},
},
Edges: []query.Edge{
{Parent: "from0", Child: "join2"},
{Parent: "from1", Child: "join2"},
{Parent: "join2", Child: "covariance3"},
},
},
},
}
for _, tc := range tests {
tc := tc
t.Run(tc.Name, func(t *testing.T) {
t.Parallel()
querytest.NewQueryTestHelper(t, tc)
})
}
}
func TestCovarianceOperation_Marshaling(t *testing.T) {
data := []byte(`{
"id":"covariance",
"kind":"covariance",
"spec":{
"pearsonr":true
}
}`)
op := &query.Operation{
ID: "covariance",
Spec: &functions.CovarianceOpSpec{
PearsonCorrelation: true,
},
}
querytest.OperationMarshalingTestHelper(t, data, op)
}
func TestCovariance_Process(t *testing.T) {
testCases := []struct {
name string
spec *functions.CovarianceProcedureSpec
data []execute.Block
want []*executetest.Block
}{
{
name: "variance",
spec: &functions.CovarianceProcedureSpec{
ValueLabel: execute.DefaultValueColLabel,
AggregateConfig: execute.AggregateConfig{
TimeSrc: execute.DefaultStopColLabel,
TimeDst: execute.DefaultTimeColLabel,
Columns: []string{"x", "y"},
},
},
data: []execute.Block{&executetest.Block{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(0), 1.0, 1.0},
{execute.Time(0), execute.Time(5), execute.Time(1), 2.0, 2.0},
{execute.Time(0), execute.Time(5), execute.Time(2), 3.0, 3.0},
{execute.Time(0), execute.Time(5), execute.Time(3), 4.0, 4.0},
{execute.Time(0), execute.Time(5), execute.Time(4), 5.0, 5.0},
},
}},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(5), 2.5},
},
}},
},
{
name: "negative covariance",
spec: &functions.CovarianceProcedureSpec{
ValueLabel: execute.DefaultValueColLabel,
AggregateConfig: execute.AggregateConfig{
TimeSrc: execute.DefaultStopColLabel,
TimeDst: execute.DefaultTimeColLabel,
Columns: []string{"x", "y"},
},
},
data: []execute.Block{&executetest.Block{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(0), 1.0, 5.0},
{execute.Time(0), execute.Time(5), execute.Time(1), 2.0, 4.0},
{execute.Time(0), execute.Time(5), execute.Time(2), 3.0, 3.0},
{execute.Time(0), execute.Time(5), execute.Time(3), 4.0, 2.0},
{execute.Time(0), execute.Time(5), execute.Time(4), 5.0, 1.0},
},
}},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(5), -2.5},
},
}},
},
{
name: "small covariance",
spec: &functions.CovarianceProcedureSpec{
ValueLabel: execute.DefaultValueColLabel,
AggregateConfig: execute.AggregateConfig{
TimeSrc: execute.DefaultStopColLabel,
TimeDst: execute.DefaultTimeColLabel,
Columns: []string{"x", "y"},
},
},
data: []execute.Block{&executetest.Block{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(0), 1.0, 1.0},
{execute.Time(0), execute.Time(5), execute.Time(1), 2.0, 1.0},
{execute.Time(0), execute.Time(5), execute.Time(2), 3.0, 1.0},
{execute.Time(0), execute.Time(5), execute.Time(3), 4.0, 1.0},
{execute.Time(0), execute.Time(5), execute.Time(4), 5.0, 2.0},
},
}},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(5), 0.5},
},
}},
},
{
name: "pearson correlation",
spec: &functions.CovarianceProcedureSpec{
ValueLabel: execute.DefaultValueColLabel,
PearsonCorrelation: true,
AggregateConfig: execute.AggregateConfig{
TimeSrc: execute.DefaultStopColLabel,
TimeDst: execute.DefaultTimeColLabel,
Columns: []string{"x", "y"},
},
},
data: []execute.Block{&executetest.Block{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(0), 1.0, 1.0},
{execute.Time(0), execute.Time(5), execute.Time(1), 2.0, 2.0},
{execute.Time(0), execute.Time(5), execute.Time(2), 3.0, 3.0},
{execute.Time(0), execute.Time(5), execute.Time(3), 4.0, 4.0},
{execute.Time(0), execute.Time(5), execute.Time(4), 5.0, 5.0},
},
}},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(5), 1.0},
},
}},
},
{
name: "pearson correlation opposite",
spec: &functions.CovarianceProcedureSpec{
ValueLabel: execute.DefaultValueColLabel,
PearsonCorrelation: true,
AggregateConfig: execute.AggregateConfig{
TimeSrc: execute.DefaultStopColLabel,
TimeDst: execute.DefaultTimeColLabel,
Columns: []string{"x", "y"},
},
},
data: []execute.Block{&executetest.Block{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(0), 1.0, 5.0},
{execute.Time(0), execute.Time(5), execute.Time(1), 2.0, 4.0},
{execute.Time(0), execute.Time(5), execute.Time(2), 3.0, 3.0},
{execute.Time(0), execute.Time(5), execute.Time(3), 4.0, 2.0},
{execute.Time(0), execute.Time(5), execute.Time(4), 5.0, 1.0},
},
}},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(5), execute.Time(5), -1.0},
},
}},
},
}
for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
executetest.ProcessTestHelper(
t,
tc.data,
tc.want,
func(d execute.Dataset, c execute.BlockBuilderCache) execute.Transformation {
return functions.NewCovarianceTransformation(d, c, tc.spec)
},
)
})
}
}

View File

@ -0,0 +1,199 @@
package functions
import (
"fmt"
"github.com/influxdata/ifql/interpreter"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/plan"
"github.com/influxdata/ifql/semantic"
)
const CumulativeSumKind = "cumulativeSum"
type CumulativeSumOpSpec struct {
Columns []string `json:"columns"`
}
var cumulativeSumSignature = query.DefaultFunctionSignature()
func init() {
query.RegisterFunction(CumulativeSumKind, createCumulativeSumOpSpec, cumulativeSumSignature)
query.RegisterOpSpec(CumulativeSumKind, newCumulativeSumOp)
plan.RegisterProcedureSpec(CumulativeSumKind, newCumulativeSumProcedure, CumulativeSumKind)
execute.RegisterTransformation(CumulativeSumKind, createCumulativeSumTransformation)
}
func createCumulativeSumOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
}
spec := new(CumulativeSumOpSpec)
if cols, ok, err := args.GetArray("columns", semantic.String); err != nil {
return nil, err
} else if ok {
columns, err := interpreter.ToStringArray(cols)
if err != nil {
return nil, err
}
spec.Columns = columns
} else {
spec.Columns = []string{execute.DefaultValueColLabel}
}
return spec, nil
}
func newCumulativeSumOp() query.OperationSpec {
return new(CumulativeSumOpSpec)
}
func (s *CumulativeSumOpSpec) Kind() query.OperationKind {
return CumulativeSumKind
}
type CumulativeSumProcedureSpec struct {
Columns []string
}
func newCumulativeSumProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*CumulativeSumOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
}
return &CumulativeSumProcedureSpec{
Columns: spec.Columns,
}, nil
}
func (s *CumulativeSumProcedureSpec) Kind() plan.ProcedureKind {
return CumulativeSumKind
}
func (s *CumulativeSumProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(CumulativeSumProcedureSpec)
*ns = *s
if s.Columns != nil {
ns.Columns = make([]string, len(s.Columns))
copy(ns.Columns, s.Columns)
}
return ns
}
func createCumulativeSumTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
s, ok := spec.(*CumulativeSumProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", spec)
}
cache := execute.NewBlockBuilderCache(a.Allocator())
d := execute.NewDataset(id, mode, cache)
t := NewCumulativeSumTransformation(d, cache, s)
return t, d, nil
}
type cumulativeSumTransformation struct {
d execute.Dataset
cache execute.BlockBuilderCache
spec CumulativeSumProcedureSpec
}
func NewCumulativeSumTransformation(d execute.Dataset, cache execute.BlockBuilderCache, spec *CumulativeSumProcedureSpec) *cumulativeSumTransformation {
return &cumulativeSumTransformation{
d: d,
cache: cache,
spec: *spec,
}
}
func (t *cumulativeSumTransformation) RetractBlock(id execute.DatasetID, key execute.PartitionKey) error {
return t.d.RetractBlock(key)
}
func (t *cumulativeSumTransformation) Process(id execute.DatasetID, b execute.Block) error {
builder, created := t.cache.BlockBuilder(b.Key())
if !created {
return fmt.Errorf("cumulative sum found duplicate block with key: %v", b.Key())
}
execute.AddBlockCols(b, builder)
cols := b.Cols()
sumers := make([]*cumulativeSum, len(cols))
for j, c := range cols {
for _, label := range t.spec.Columns {
if c.Label == label {
sumers[j] = &cumulativeSum{}
break
}
}
}
return b.Do(func(cr execute.ColReader) error {
l := cr.Len()
for j, c := range cols {
switch c.Type {
case execute.TBool:
builder.AppendBools(j, cr.Bools(j))
case execute.TInt:
if sumers[j] != nil {
for i := 0; i < l; i++ {
builder.AppendInt(j, sumers[j].sumInt(cr.Ints(j)[i]))
}
} else {
builder.AppendInts(j, cr.Ints(j))
}
case execute.TUInt:
if sumers[j] != nil {
for i := 0; i < l; i++ {
builder.AppendUInt(j, sumers[j].sumUInt(cr.UInts(j)[i]))
}
} else {
builder.AppendUInts(j, cr.UInts(j))
}
case execute.TFloat:
if sumers[j] != nil {
for i := 0; i < l; i++ {
builder.AppendFloat(j, sumers[j].sumFloat(cr.Floats(j)[i]))
}
} else {
builder.AppendFloats(j, cr.Floats(j))
}
case execute.TString:
builder.AppendStrings(j, cr.Strings(j))
case execute.TTime:
builder.AppendTimes(j, cr.Times(j))
}
}
return nil
})
}
func (t *cumulativeSumTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error {
return t.d.UpdateWatermark(mark)
}
func (t *cumulativeSumTransformation) UpdateProcessingTime(id execute.DatasetID, pt execute.Time) error {
return t.d.UpdateProcessingTime(pt)
}
func (t *cumulativeSumTransformation) Finish(id execute.DatasetID, err error) {
t.d.Finish(err)
}
type cumulativeSum struct {
intVal int64
uintVal uint64
floatVal float64
}
func (s *cumulativeSum) sumInt(val int64) int64 {
s.intVal += val
return s.intVal
}
func (s *cumulativeSum) sumUInt(val uint64) uint64 {
s.uintVal += val
return s.uintVal
}
func (s *cumulativeSum) sumFloat(val float64) float64 {
s.floatVal += val
return s.floatVal
}

View File

@ -0,0 +1,232 @@
package functions_test
import (
"testing"
"github.com/influxdata/ifql/functions"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/execute/executetest"
"github.com/influxdata/ifql/query/querytest"
)
func TestCumulativeSumOperation_Marshaling(t *testing.T) {
data := []byte(`{"id":"cumulativeSum","kind":"cumulativeSum","spec":{}}`)
op := &query.Operation{
ID: "cumulativeSum",
Spec: &functions.CumulativeSumOpSpec{},
}
querytest.OperationMarshalingTestHelper(t, data, op)
}
func TestCumulativeSum_PassThrough(t *testing.T) {
executetest.TransformationPassThroughTestHelper(t, func(d execute.Dataset, c execute.BlockBuilderCache) execute.Transformation {
s := functions.NewCumulativeSumTransformation(
d,
c,
&functions.CumulativeSumProcedureSpec{},
)
return s
})
}
func TestCumulativeSum_Process(t *testing.T) {
testCases := []struct {
name string
spec *functions.CumulativeSumProcedureSpec
data []execute.Block
want []*executetest.Block
}{
{
name: "float",
spec: &functions.CumulativeSumProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), 2.0},
{execute.Time(1), 1.0},
{execute.Time(2), 3.0},
{execute.Time(3), 4.0},
{execute.Time(4), 2.0},
{execute.Time(5), 6.0},
{execute.Time(6), 2.0},
{execute.Time(7), 7.0},
{execute.Time(8), 3.0},
{execute.Time(9), 8.0},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), 2.0},
{execute.Time(1), 3.0},
{execute.Time(2), 6.0},
{execute.Time(3), 10.0},
{execute.Time(4), 12.0},
{execute.Time(5), 18.0},
{execute.Time(6), 20.0},
{execute.Time(7), 27.0},
{execute.Time(8), 30.0},
{execute.Time(9), 38.0},
},
}},
},
{
name: "multiple value columns",
spec: &functions.CumulativeSumProcedureSpec{
Columns: []string{"int", "uint", "float"},
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "int", Type: execute.TInt},
{Label: "uint", Type: execute.TUInt},
{Label: "float", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), int64(2), uint64(1), 1.0},
{execute.Time(1), int64(1), uint64(2), 1.0},
{execute.Time(2), int64(3), uint64(3), 2.0},
{execute.Time(3), int64(4), uint64(4), 13.0},
{execute.Time(4), int64(2), uint64(5), 4.0},
{execute.Time(5), int64(6), uint64(6), 5.0},
{execute.Time(6), int64(2), uint64(7), -7.0},
{execute.Time(7), int64(-7), uint64(8), 2.0},
{execute.Time(8), int64(3), uint64(9), -6.0},
{execute.Time(9), int64(8), uint64(11), 3.0},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "int", Type: execute.TInt},
{Label: "uint", Type: execute.TUInt},
{Label: "float", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), int64(2), uint64(1), 1.0},
{execute.Time(1), int64(3), uint64(3), 2.0},
{execute.Time(2), int64(6), uint64(6), 4.0},
{execute.Time(3), int64(10), uint64(10), 17.0},
{execute.Time(4), int64(12), uint64(15), 21.0},
{execute.Time(5), int64(18), uint64(21), 26.0},
{execute.Time(6), int64(20), uint64(28), 19.0},
{execute.Time(7), int64(13), uint64(36), 21.0},
{execute.Time(8), int64(16), uint64(45), 15.0},
{execute.Time(9), int64(24), uint64(56), 18.0},
},
}},
},
{
name: "multiple time columns",
spec: &functions.CumulativeSumProcedureSpec{
Columns: []string{"int", "float"},
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "time2", Type: execute.TTime},
{Label: "int", Type: execute.TInt},
{Label: "float", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(0), int64(2), 1.0},
{execute.Time(1), execute.Time(1), int64(1), 1.0},
{execute.Time(2), execute.Time(2), int64(3), 2.0},
{execute.Time(3), execute.Time(3), int64(4), 13.0},
{execute.Time(4), execute.Time(4), int64(2), 4.0},
{execute.Time(5), execute.Time(5), int64(6), 5.0},
{execute.Time(6), execute.Time(6), int64(2), -7.0},
{execute.Time(7), execute.Time(7), int64(7), 2.0},
{execute.Time(8), execute.Time(8), int64(3), -6.0},
{execute.Time(9), execute.Time(9), int64(8), 3.0},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "time2", Type: execute.TTime},
{Label: "int", Type: execute.TInt},
{Label: "float", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(0), execute.Time(0), int64(2), 1.0},
{execute.Time(1), execute.Time(1), int64(3), 2.0},
{execute.Time(2), execute.Time(2), int64(6), 4.0},
{execute.Time(3), execute.Time(3), int64(10), 17.0},
{execute.Time(4), execute.Time(4), int64(12), 21.0},
{execute.Time(5), execute.Time(5), int64(18), 26.0},
{execute.Time(6), execute.Time(6), int64(20), 19.0},
{execute.Time(7), execute.Time(7), int64(27), 21.0},
{execute.Time(8), execute.Time(8), int64(30), 15.0},
{execute.Time(9), execute.Time(9), int64(38), 18.0},
},
}},
},
{
name: "tag columns",
spec: &functions.CumulativeSumProcedureSpec{
Columns: []string{"int"},
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "int", Type: execute.TInt},
{Label: "t", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(0), int64(2), "tag0"},
{execute.Time(1), int64(1), "tag0"},
{execute.Time(2), int64(3), "tag1"},
{execute.Time(3), int64(4), "tag1"},
{execute.Time(4), int64(2), "tag0"},
{execute.Time(5), int64(6), "tag0"},
{execute.Time(6), int64(2), "tag1"},
{execute.Time(7), int64(7), "tag1"},
{execute.Time(8), int64(3), "tag0"},
{execute.Time(9), int64(8), "tag0"},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "int", Type: execute.TInt},
{Label: "t", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(0), int64(2), "tag0"},
{execute.Time(1), int64(3), "tag0"},
{execute.Time(2), int64(6), "tag1"},
{execute.Time(3), int64(10), "tag1"},
{execute.Time(4), int64(12), "tag0"},
{execute.Time(5), int64(18), "tag0"},
{execute.Time(6), int64(20), "tag1"},
{execute.Time(7), int64(27), "tag1"},
{execute.Time(8), int64(30), "tag0"},
{execute.Time(9), int64(38), "tag0"},
},
}},
},
}
for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
executetest.ProcessTestHelper(
t,
tc.data,
tc.want,
func(d execute.Dataset, c execute.BlockBuilderCache) execute.Transformation {
return functions.NewCumulativeSumTransformation(d, c, tc.spec)
},
)
})
}
}

View File

@ -0,0 +1,97 @@
package functions_test
import (
"math/rand"
"time"
"github.com/gonum/stat/distuv"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/execute/executetest"
)
const (
N = 1e6
Mu = 10
Sigma = 3
seed = 42
)
func init() {
query.FinalizeRegistration()
}
// NormalData is a slice of N random values that are normaly distributed with mean Mu and standard deviation Sigma.
var NormalData []float64
// NormalBlock is a block of data whose value col is NormalData.
var NormalBlock execute.Block
func init() {
dist := distuv.Normal{
Mu: Mu,
Sigma: Sigma,
Source: rand.New(rand.NewSource(seed)),
}
NormalData = make([]float64, N)
for i := range NormalData {
NormalData[i] = dist.Rand()
}
start := execute.Time(time.Date(2016, 10, 10, 0, 0, 0, 0, time.UTC).UnixNano())
stop := execute.Time(time.Date(2017, 10, 10, 0, 0, 0, 0, time.UTC).UnixNano())
t1Value := "a"
key := execute.NewPartitionKey(
[]execute.ColMeta{
{Label: execute.DefaultStartColLabel, Type: execute.TTime},
{Label: execute.DefaultStopColLabel, Type: execute.TTime},
{Label: "t1", Type: execute.TString},
},
[]interface{}{
start,
stop,
t1Value,
},
)
normalBlockBuilder := execute.NewColListBlockBuilder(key, executetest.UnlimitedAllocator)
normalBlockBuilder.AddCol(execute.ColMeta{Label: execute.DefaultTimeColLabel, Type: execute.TTime})
normalBlockBuilder.AddCol(execute.ColMeta{Label: execute.DefaultStartColLabel, Type: execute.TTime})
normalBlockBuilder.AddCol(execute.ColMeta{Label: execute.DefaultStopColLabel, Type: execute.TTime})
normalBlockBuilder.AddCol(execute.ColMeta{Label: execute.DefaultValueColLabel, Type: execute.TFloat})
normalBlockBuilder.AddCol(execute.ColMeta{Label: "t1", Type: execute.TString})
normalBlockBuilder.AddCol(execute.ColMeta{Label: "t2", Type: execute.TString})
times := make([]execute.Time, N)
startTimes := make([]execute.Time, N)
stopTimes := make([]execute.Time, N)
values := NormalData
t1 := make([]string, N)
t2 := make([]string, N)
for i, v := range values {
startTimes[i] = start
stopTimes[i] = stop
t1[i] = t1Value
// There are roughly 1 million, 31 second intervals in a year.
times[i] = start + execute.Time(time.Duration(i*31)*time.Second)
// Pick t2 based off the value
switch int(v) % 3 {
case 0:
t2[i] = "x"
case 1:
t2[i] = "y"
case 2:
t2[i] = "z"
}
}
normalBlockBuilder.AppendTimes(0, times)
normalBlockBuilder.AppendTimes(1, startTimes)
normalBlockBuilder.AppendTimes(2, stopTimes)
normalBlockBuilder.AppendFloats(3, values)
normalBlockBuilder.AppendStrings(4, t1)
normalBlockBuilder.AppendStrings(5, t2)
NormalBlock, _ = normalBlockBuilder.Block()
}

View File

@ -0,0 +1,355 @@
package functions
import (
"fmt"
"math"
"time"
"github.com/influxdata/ifql/interpreter"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/plan"
"github.com/influxdata/ifql/semantic"
)
const DerivativeKind = "derivative"
type DerivativeOpSpec struct {
Unit query.Duration `json:"unit"`
NonNegative bool `json:"non_negative"`
Columns []string `json:"columns"`
TimeSrc string `json:"time_src"`
}
var derivativeSignature = query.DefaultFunctionSignature()
func init() {
derivativeSignature.Params["unit"] = semantic.Duration
derivativeSignature.Params["nonNegative"] = semantic.Bool
derivativeSignature.Params["columns"] = semantic.NewArrayType(semantic.String)
derivativeSignature.Params["timeSrc"] = semantic.String
query.RegisterFunction(DerivativeKind, createDerivativeOpSpec, derivativeSignature)
query.RegisterOpSpec(DerivativeKind, newDerivativeOp)
plan.RegisterProcedureSpec(DerivativeKind, newDerivativeProcedure, DerivativeKind)
execute.RegisterTransformation(DerivativeKind, createDerivativeTransformation)
}
func createDerivativeOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
}
spec := new(DerivativeOpSpec)
if unit, ok, err := args.GetDuration("unit"); err != nil {
return nil, err
} else if ok {
spec.Unit = unit
} else {
//Default is 1s
spec.Unit = query.Duration(time.Second)
}
if nn, ok, err := args.GetBool("nonNegative"); err != nil {
return nil, err
} else if ok {
spec.NonNegative = nn
}
if timeCol, ok, err := args.GetString("timeSrc"); err != nil {
return nil, err
} else if ok {
spec.TimeSrc = timeCol
} else {
spec.TimeSrc = execute.DefaultTimeColLabel
}
if cols, ok, err := args.GetArray("columns", semantic.String); err != nil {
return nil, err
} else if ok {
columns, err := interpreter.ToStringArray(cols)
if err != nil {
return nil, err
}
spec.Columns = columns
} else {
spec.Columns = []string{execute.DefaultValueColLabel}
}
return spec, nil
}
func newDerivativeOp() query.OperationSpec {
return new(DerivativeOpSpec)
}
func (s *DerivativeOpSpec) Kind() query.OperationKind {
return DerivativeKind
}
type DerivativeProcedureSpec struct {
Unit query.Duration `json:"unit"`
NonNegative bool `json:"non_negative"`
Columns []string `json:"columns"`
TimeCol string `json:"time_col"`
}
func newDerivativeProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*DerivativeOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
}
return &DerivativeProcedureSpec{
Unit: spec.Unit,
NonNegative: spec.NonNegative,
Columns: spec.Columns,
TimeCol: spec.TimeSrc,
}, nil
}
func (s *DerivativeProcedureSpec) Kind() plan.ProcedureKind {
return DerivativeKind
}
func (s *DerivativeProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(DerivativeProcedureSpec)
*ns = *s
if s.Columns != nil {
ns.Columns = make([]string, len(s.Columns))
copy(ns.Columns, s.Columns)
}
return ns
}
func createDerivativeTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
s, ok := spec.(*DerivativeProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", spec)
}
cache := execute.NewBlockBuilderCache(a.Allocator())
d := execute.NewDataset(id, mode, cache)
t := NewDerivativeTransformation(d, cache, s)
return t, d, nil
}
type derivativeTransformation struct {
d execute.Dataset
cache execute.BlockBuilderCache
unit time.Duration
nonNegative bool
columns []string
timeCol string
}
func NewDerivativeTransformation(d execute.Dataset, cache execute.BlockBuilderCache, spec *DerivativeProcedureSpec) *derivativeTransformation {
return &derivativeTransformation{
d: d,
cache: cache,
unit: time.Duration(spec.Unit),
nonNegative: spec.NonNegative,
columns: spec.Columns,
timeCol: spec.TimeCol,
}
}
func (t *derivativeTransformation) RetractBlock(id execute.DatasetID, key execute.PartitionKey) error {
return t.d.RetractBlock(key)
}
func (t *derivativeTransformation) Process(id execute.DatasetID, b execute.Block) error {
builder, created := t.cache.BlockBuilder(b.Key())
if !created {
return fmt.Errorf("derivative found duplicate block with key: %v", b.Key())
}
cols := b.Cols()
derivatives := make([]*derivative, len(cols))
timeIdx := -1
for j, c := range cols {
found := false
for _, label := range t.columns {
if c.Label == label {
found = true
break
}
}
if c.Label == t.timeCol {
timeIdx = j
}
if found {
dc := c
// Derivative always results in a float
dc.Type = execute.TFloat
builder.AddCol(dc)
derivatives[j] = newDerivative(j, t.unit, t.nonNegative)
} else {
builder.AddCol(c)
}
}
if timeIdx < 0 {
return fmt.Errorf("no column %q exists", t.timeCol)
}
// We need to drop the first row since its derivative is undefined
firstIdx := 1
return b.Do(func(cr execute.ColReader) error {
l := cr.Len()
for j, c := range cols {
d := derivatives[j]
switch c.Type {
case execute.TBool:
builder.AppendBools(j, cr.Bools(j)[firstIdx:])
case execute.TInt:
if d != nil {
for i := 0; i < l; i++ {
time := cr.Times(timeIdx)[i]
v := d.updateInt(time, cr.Ints(j)[i])
if i != 0 || firstIdx == 0 {
builder.AppendFloat(j, v)
}
}
} else {
builder.AppendInts(j, cr.Ints(j)[firstIdx:])
}
case execute.TUInt:
if d != nil {
for i := 0; i < l; i++ {
time := cr.Times(timeIdx)[i]
v := d.updateUInt(time, cr.UInts(j)[i])
if i != 0 || firstIdx == 0 {
builder.AppendFloat(j, v)
}
}
} else {
builder.AppendUInts(j, cr.UInts(j)[firstIdx:])
}
case execute.TFloat:
if d != nil {
for i := 0; i < l; i++ {
time := cr.Times(timeIdx)[i]
v := d.updateFloat(time, cr.Floats(j)[i])
if i != 0 || firstIdx == 0 {
builder.AppendFloat(j, v)
}
}
} else {
builder.AppendFloats(j, cr.Floats(j)[firstIdx:])
}
case execute.TString:
builder.AppendStrings(j, cr.Strings(j)[firstIdx:])
case execute.TTime:
builder.AppendTimes(j, cr.Times(j)[firstIdx:])
}
}
// Now that we skipped the first row, start at 0 for the rest of the batches
firstIdx = 0
return nil
})
}
func (t *derivativeTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error {
return t.d.UpdateWatermark(mark)
}
func (t *derivativeTransformation) UpdateProcessingTime(id execute.DatasetID, pt execute.Time) error {
return t.d.UpdateProcessingTime(pt)
}
func (t *derivativeTransformation) Finish(id execute.DatasetID, err error) {
t.d.Finish(err)
}
func newDerivative(col int, unit time.Duration, nonNegative bool) *derivative {
return &derivative{
col: col,
first: true,
unit: float64(unit),
nonNegative: nonNegative,
}
}
type derivative struct {
col int
first bool
unit float64
nonNegative bool
pIntValue int64
pUIntValue uint64
pFloatValue float64
pTime execute.Time
}
func (d *derivative) updateInt(t execute.Time, v int64) float64 {
if d.first {
d.pTime = t
d.pIntValue = v
d.first = false
return math.NaN()
}
diff := float64(v - d.pIntValue)
if d.nonNegative && diff < 0 {
//TODO(nathanielc): Should we return null when we have null support
// Or should we assume the previous is 0?
diff = float64(v)
}
elapsed := float64(time.Duration(t-d.pTime)) / d.unit
d.pTime = t
d.pIntValue = v
if d.nonNegative && diff < 0 {
//TODO(nathanielc): Should we return null when we have null support
// Or should we assume the previous is 0?
return float64(v)
}
return diff / elapsed
}
func (d *derivative) updateUInt(t execute.Time, v uint64) float64 {
if d.first {
d.pTime = t
d.pUIntValue = v
d.first = false
return math.NaN()
}
var diff float64
if d.pUIntValue > v {
// Prevent uint64 overflow by applying the negative sign after the conversion to a float64.
diff = float64(d.pUIntValue-v) * -1
} else {
diff = float64(v - d.pUIntValue)
}
if d.nonNegative && diff < 0 {
//TODO(nathanielc): Should we return null when we have null support
// Or should we assume the previous is 0?
diff = float64(v)
}
elapsed := float64(time.Duration(t-d.pTime)) / d.unit
d.pTime = t
d.pUIntValue = v
return diff / elapsed
}
func (d *derivative) updateFloat(t execute.Time, v float64) float64 {
if d.first {
d.pTime = t
d.pFloatValue = v
d.first = false
return math.NaN()
}
diff := v - d.pFloatValue
if d.nonNegative && diff < 0 {
//TODO(nathanielc): Should we return null when we have null support
// Or should we assume the previous is 0?
diff = v
}
elapsed := float64(time.Duration(t-d.pTime)) / d.unit
d.pTime = t
d.pFloatValue = v
return diff / elapsed
}

View File

@ -0,0 +1,427 @@
package functions_test
import (
"testing"
"time"
"github.com/influxdata/ifql/functions"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/execute/executetest"
"github.com/influxdata/ifql/query/querytest"
)
func TestDerivativeOperation_Marshaling(t *testing.T) {
data := []byte(`{"id":"derivative","kind":"derivative","spec":{"unit":"1m","non_negative":true}}`)
op := &query.Operation{
ID: "derivative",
Spec: &functions.DerivativeOpSpec{
Unit: query.Duration(time.Minute),
NonNegative: true,
},
}
querytest.OperationMarshalingTestHelper(t, data, op)
}
func TestDerivative_PassThrough(t *testing.T) {
executetest.TransformationPassThroughTestHelper(t, func(d execute.Dataset, c execute.BlockBuilderCache) execute.Transformation {
s := functions.NewDerivativeTransformation(
d,
c,
&functions.DerivativeProcedureSpec{},
)
return s
})
}
func TestDerivative_Process(t *testing.T) {
testCases := []struct {
name string
spec *functions.DerivativeProcedureSpec
data []execute.Block
want []*executetest.Block
}{
{
name: "float",
spec: &functions.DerivativeProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
TimeCol: execute.DefaultTimeColLabel,
Unit: 1,
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), 2.0},
{execute.Time(2), 1.0},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(2), -1.0},
},
}},
},
{
name: "float with units",
spec: &functions.DerivativeProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
TimeCol: execute.DefaultTimeColLabel,
Unit: query.Duration(time.Second),
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(1 * time.Second), 2.0},
{execute.Time(3 * time.Second), 1.0},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(3 * time.Second), -0.5},
},
}},
},
{
name: "int",
spec: &functions.DerivativeProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
TimeCol: execute.DefaultTimeColLabel,
Unit: 1,
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TInt},
},
Data: [][]interface{}{
{execute.Time(1), int64(20)},
{execute.Time(2), int64(10)},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(2), -10.0},
},
}},
},
{
name: "int with units",
spec: &functions.DerivativeProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
TimeCol: execute.DefaultTimeColLabel,
Unit: query.Duration(time.Second),
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TInt},
},
Data: [][]interface{}{
{execute.Time(1 * time.Second), int64(20)},
{execute.Time(3 * time.Second), int64(10)},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(3 * time.Second), -5.0},
},
}},
},
{
name: "int non negative",
spec: &functions.DerivativeProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
TimeCol: execute.DefaultTimeColLabel,
Unit: 1,
NonNegative: true,
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TInt},
},
Data: [][]interface{}{
{execute.Time(1), int64(20)},
{execute.Time(2), int64(10)},
{execute.Time(3), int64(20)},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(2), 10.0},
{execute.Time(3), 10.0},
},
}},
},
{
name: "uint",
spec: &functions.DerivativeProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
TimeCol: execute.DefaultTimeColLabel,
Unit: 1,
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TUInt},
},
Data: [][]interface{}{
{execute.Time(1), uint64(10)},
{execute.Time(2), uint64(20)},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(2), 10.0},
},
}},
},
{
name: "uint with negative result",
spec: &functions.DerivativeProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
TimeCol: execute.DefaultTimeColLabel,
Unit: 1,
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TUInt},
},
Data: [][]interface{}{
{execute.Time(1), uint64(20)},
{execute.Time(2), uint64(10)},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(2), -10.0},
},
}},
},
{
name: "uint with non negative",
spec: &functions.DerivativeProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
TimeCol: execute.DefaultTimeColLabel,
Unit: 1,
NonNegative: true,
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TUInt},
},
Data: [][]interface{}{
{execute.Time(1), uint64(20)},
{execute.Time(2), uint64(10)},
{execute.Time(3), uint64(20)},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(2), 10.0},
{execute.Time(3), 10.0},
},
}},
},
{
name: "uint with units",
spec: &functions.DerivativeProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
TimeCol: execute.DefaultTimeColLabel,
Unit: query.Duration(time.Second),
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TUInt},
},
Data: [][]interface{}{
{execute.Time(1 * time.Second), uint64(20)},
{execute.Time(3 * time.Second), uint64(10)},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(3 * time.Second), -5.0},
},
}},
},
{
name: "non negative one block",
spec: &functions.DerivativeProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
TimeCol: execute.DefaultTimeColLabel,
Unit: 1,
NonNegative: true,
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), 2.0},
{execute.Time(2), 1.0},
{execute.Time(3), 2.0},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(2), 1.0},
{execute.Time(3), 1.0},
},
}},
},
{
name: "float with tags",
spec: &functions.DerivativeProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
TimeCol: execute.DefaultTimeColLabel,
Unit: 1,
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(1), 2.0, "a"},
{execute.Time(2), 1.0, "b"},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(2), -1.0, "b"},
},
}},
},
{
name: "float with multiple values",
spec: &functions.DerivativeProcedureSpec{
Columns: []string{"x", "y"},
TimeCol: execute.DefaultTimeColLabel,
Unit: 1,
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), 2.0, 20.0},
{execute.Time(2), 1.0, 10.0},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(2), -1.0, -10.0},
},
}},
},
{
name: "float non negative with multiple values",
spec: &functions.DerivativeProcedureSpec{
Columns: []string{"x", "y"},
TimeCol: execute.DefaultTimeColLabel,
Unit: 1,
NonNegative: true,
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), 2.0, 20.0},
{execute.Time(2), 1.0, 10.0},
{execute.Time(3), 2.0, 0.0},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(2), 1.0, 10.0},
{execute.Time(3), 1.0, 0.0},
},
}},
},
}
for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
executetest.ProcessTestHelper(
t,
tc.data,
tc.want,
func(d execute.Dataset, c execute.BlockBuilderCache) execute.Transformation {
return functions.NewDerivativeTransformation(d, c, tc.spec)
},
)
})
}
}

View File

@ -0,0 +1,313 @@
package functions
import (
"fmt"
"math"
"github.com/influxdata/ifql/interpreter"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/plan"
"github.com/influxdata/ifql/semantic"
)
const DifferenceKind = "difference"
type DifferenceOpSpec struct {
NonNegative bool `json:"non_negative"`
Columns []string `json:"columns"`
}
var differenceSignature = query.DefaultFunctionSignature()
func init() {
differenceSignature.Params["nonNegative"] = semantic.Bool
derivativeSignature.Params["columns"] = semantic.NewArrayType(semantic.String)
query.RegisterFunction(DifferenceKind, createDifferenceOpSpec, differenceSignature)
query.RegisterOpSpec(DifferenceKind, newDifferenceOp)
plan.RegisterProcedureSpec(DifferenceKind, newDifferenceProcedure, DifferenceKind)
execute.RegisterTransformation(DifferenceKind, createDifferenceTransformation)
}
func createDifferenceOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
}
err := a.AddParentFromArgs(args)
if err != nil {
return nil, err
}
spec := new(DifferenceOpSpec)
if nn, ok, err := args.GetBool("nonNegative"); err != nil {
return nil, err
} else if ok {
spec.NonNegative = nn
}
if cols, ok, err := args.GetArray("columns", semantic.String); err != nil {
return nil, err
} else if ok {
columns, err := interpreter.ToStringArray(cols)
if err != nil {
return nil, err
}
spec.Columns = columns
} else {
spec.Columns = []string{execute.DefaultValueColLabel}
}
return spec, nil
}
func newDifferenceOp() query.OperationSpec {
return new(DifferenceOpSpec)
}
func (s *DifferenceOpSpec) Kind() query.OperationKind {
return DifferenceKind
}
type DifferenceProcedureSpec struct {
NonNegative bool `json:"non_negative"`
Columns []string `json:"columns"`
}
func newDifferenceProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*DifferenceOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
}
return &DifferenceProcedureSpec{
NonNegative: spec.NonNegative,
Columns: spec.Columns,
}, nil
}
func (s *DifferenceProcedureSpec) Kind() plan.ProcedureKind {
return DifferenceKind
}
func (s *DifferenceProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(DifferenceProcedureSpec)
*ns = *s
if s.Columns != nil {
ns.Columns = make([]string, len(s.Columns))
copy(ns.Columns, s.Columns)
}
return ns
}
func createDifferenceTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
s, ok := spec.(*DifferenceProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", spec)
}
cache := execute.NewBlockBuilderCache(a.Allocator())
d := execute.NewDataset(id, mode, cache)
t := NewDifferenceTransformation(d, cache, s)
return t, d, nil
}
type differenceTransformation struct {
d execute.Dataset
cache execute.BlockBuilderCache
nonNegative bool
columns []string
}
func NewDifferenceTransformation(d execute.Dataset, cache execute.BlockBuilderCache, spec *DifferenceProcedureSpec) *differenceTransformation {
return &differenceTransformation{
d: d,
cache: cache,
nonNegative: spec.NonNegative,
columns: spec.Columns,
}
}
func (t *differenceTransformation) RetractBlock(id execute.DatasetID, key execute.PartitionKey) error {
return t.d.RetractBlock(key)
}
func (t *differenceTransformation) Process(id execute.DatasetID, b execute.Block) error {
builder, created := t.cache.BlockBuilder(b.Key())
if !created {
return fmt.Errorf("difference found duplicate block with key: %v", b.Key())
}
cols := b.Cols()
differences := make([]*difference, len(cols))
for j, c := range cols {
found := false
for _, label := range t.columns {
if c.Label == label {
found = true
break
}
}
if found {
var typ execute.DataType
switch c.Type {
case execute.TInt, execute.TUInt:
typ = execute.TInt
case execute.TFloat:
typ = execute.TFloat
}
builder.AddCol(execute.ColMeta{
Label: c.Label,
Type: typ,
})
differences[j] = newDifference(j, t.nonNegative)
} else {
builder.AddCol(c)
}
}
// We need to drop the first row since its derivative is undefined
firstIdx := 1
return b.Do(func(cr execute.ColReader) error {
l := cr.Len()
for j, c := range cols {
d := differences[j]
switch c.Type {
case execute.TBool:
builder.AppendBools(j, cr.Bools(j)[firstIdx:])
case execute.TInt:
if d != nil {
for i := 0; i < l; i++ {
v := d.updateInt(cr.Ints(j)[i])
if i != 0 || firstIdx == 0 {
builder.AppendInt(j, v)
}
}
} else {
builder.AppendInts(j, cr.Ints(j)[firstIdx:])
}
case execute.TUInt:
if d != nil {
for i := 0; i < l; i++ {
v := d.updateUInt(cr.UInts(j)[i])
if i != 0 || firstIdx == 0 {
builder.AppendInt(j, v)
}
}
} else {
builder.AppendUInts(j, cr.UInts(j)[firstIdx:])
}
case execute.TFloat:
if d != nil {
for i := 0; i < l; i++ {
v := d.updateFloat(cr.Floats(j)[i])
if i != 0 || firstIdx == 0 {
builder.AppendFloat(j, v)
}
}
} else {
builder.AppendFloats(j, cr.Floats(j)[firstIdx:])
}
case execute.TString:
builder.AppendStrings(j, cr.Strings(j)[firstIdx:])
case execute.TTime:
builder.AppendTimes(j, cr.Times(j)[firstIdx:])
}
}
// Now that we skipped the first row, start at 0 for the rest of the batches
firstIdx = 0
return nil
})
}
func (t *differenceTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error {
return t.d.UpdateWatermark(mark)
}
func (t *differenceTransformation) UpdateProcessingTime(id execute.DatasetID, pt execute.Time) error {
return t.d.UpdateProcessingTime(pt)
}
func (t *differenceTransformation) Finish(id execute.DatasetID, err error) {
t.d.Finish(err)
}
func newDifference(col int, nonNegative bool) *difference {
return &difference{
col: col,
first: true,
nonNegative: nonNegative,
}
}
type difference struct {
col int
first bool
nonNegative bool
pIntValue int64
pUIntValue uint64
pFloatValue float64
}
func (d *difference) updateInt(v int64) int64 {
if d.first {
d.pIntValue = v
d.first = false
return 0
}
diff := v - d.pIntValue
d.pIntValue = v
if d.nonNegative && diff < 0 {
//TODO(nathanielc): Return null when we have null support
// Also see https://github.com/influxdata/ifql/issues/217
return v
}
return diff
}
func (d *difference) updateUInt(v uint64) int64 {
if d.first {
d.pUIntValue = v
d.first = false
return 0
}
var diff int64
if d.pUIntValue > v {
// Prevent uint64 overflow by applying the negative sign after the conversion to an int64.
diff = int64(d.pUIntValue-v) * -1
} else {
diff = int64(v - d.pUIntValue)
}
d.pUIntValue = v
if d.nonNegative && diff < 0 {
//TODO(nathanielc): Return null when we have null support
// Also see https://github.com/influxdata/ifql/issues/217
return int64(v)
}
return diff
}
func (d *difference) updateFloat(v float64) float64 {
if d.first {
d.pFloatValue = v
d.first = false
return math.NaN()
}
diff := v - d.pFloatValue
d.pFloatValue = v
if d.nonNegative && diff < 0 {
//TODO(nathanielc): Return null when we have null support
// Also see https://github.com/influxdata/ifql/issues/217
return v
}
return diff
}

View File

@ -0,0 +1,324 @@
package functions_test
import (
"testing"
"github.com/influxdata/ifql/functions"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/execute/executetest"
"github.com/influxdata/ifql/query/querytest"
)
func TestDifferenceOperation_Marshaling(t *testing.T) {
data := []byte(`{"id":"difference","kind":"difference","spec":{"non_negative":true}}`)
op := &query.Operation{
ID: "difference",
Spec: &functions.DifferenceOpSpec{
NonNegative: true,
},
}
querytest.OperationMarshalingTestHelper(t, data, op)
}
func TestDifference_PassThrough(t *testing.T) {
executetest.TransformationPassThroughTestHelper(t, func(d execute.Dataset, c execute.BlockBuilderCache) execute.Transformation {
s := functions.NewDifferenceTransformation(
d,
c,
&functions.DifferenceProcedureSpec{},
)
return s
})
}
func TestDifference_Process(t *testing.T) {
testCases := []struct {
name string
spec *functions.DifferenceProcedureSpec
data []execute.Block
want []*executetest.Block
}{
{
name: "float",
spec: &functions.DifferenceProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), 2.0},
{execute.Time(2), 1.0},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(2), -1.0},
},
}},
},
{
name: "int",
spec: &functions.DifferenceProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TInt},
},
Data: [][]interface{}{
{execute.Time(1), int64(20)},
{execute.Time(2), int64(10)},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TInt},
},
Data: [][]interface{}{
{execute.Time(2), int64(-10)},
},
}},
},
{
name: "int non negative",
spec: &functions.DifferenceProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
NonNegative: true,
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TInt},
},
Data: [][]interface{}{
{execute.Time(1), int64(20)},
{execute.Time(2), int64(10)},
{execute.Time(3), int64(20)},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TInt},
},
Data: [][]interface{}{
{execute.Time(2), int64(10)},
{execute.Time(3), int64(10)},
},
}},
},
{
name: "uint",
spec: &functions.DifferenceProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TUInt},
},
Data: [][]interface{}{
{execute.Time(1), uint64(10)},
{execute.Time(2), uint64(20)},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TInt},
},
Data: [][]interface{}{
{execute.Time(2), int64(10)},
},
}},
},
{
name: "uint with negative result",
spec: &functions.DifferenceProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TUInt},
},
Data: [][]interface{}{
{execute.Time(1), uint64(20)},
{execute.Time(2), uint64(10)},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TInt},
},
Data: [][]interface{}{
{execute.Time(2), int64(-10)},
},
}},
},
{
name: "uint with non negative",
spec: &functions.DifferenceProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
NonNegative: true,
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TUInt},
},
Data: [][]interface{}{
{execute.Time(1), uint64(20)},
{execute.Time(2), uint64(10)},
{execute.Time(3), uint64(20)},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TInt},
},
Data: [][]interface{}{
{execute.Time(2), int64(10)},
{execute.Time(3), int64(10)},
},
}},
},
{
name: "non negative one block",
spec: &functions.DifferenceProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
NonNegative: true,
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), 2.0},
{execute.Time(2), 1.0},
{execute.Time(3), 2.0},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(2), 1.0},
{execute.Time(3), 1.0},
},
}},
},
{
name: "float with tags",
spec: &functions.DifferenceProcedureSpec{
Columns: []string{execute.DefaultValueColLabel},
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(1), 2.0, "a"},
{execute.Time(2), 1.0, "b"},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(2), -1.0, "b"},
},
}},
},
{
name: "float with multiple values",
spec: &functions.DifferenceProcedureSpec{
Columns: []string{"x", "y"},
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), 2.0, 20.0},
{execute.Time(2), 1.0, 10.0},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(2), -1.0, -10.0},
},
}},
},
{
name: "float non negative with multiple values",
spec: &functions.DifferenceProcedureSpec{
Columns: []string{"x", "y"},
NonNegative: true,
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), 2.0, 20.0},
{execute.Time(2), 1.0, 10.0},
{execute.Time(3), 2.0, 0.0},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(2), 1.0, 10.0},
{execute.Time(3), 1.0, 0.0},
},
}},
},
}
for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
executetest.ProcessTestHelper(
t,
tc.data,
tc.want,
func(d execute.Dataset, c execute.BlockBuilderCache) execute.Transformation {
return functions.NewDifferenceTransformation(d, c, tc.spec)
},
)
})
}
}

273
query/functions/distinct.go Normal file
View File

@ -0,0 +1,273 @@
package functions
import (
"fmt"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/plan"
"github.com/influxdata/ifql/semantic"
)
const DistinctKind = "distinct"
type DistinctOpSpec struct {
Column string `json:"column"`
}
var distinctSignature = query.DefaultFunctionSignature()
func init() {
distinctSignature.Params["column"] = semantic.String
query.RegisterFunction(DistinctKind, createDistinctOpSpec, distinctSignature)
query.RegisterOpSpec(DistinctKind, newDistinctOp)
plan.RegisterProcedureSpec(DistinctKind, newDistinctProcedure, DistinctKind)
plan.RegisterRewriteRule(DistinctPointLimitRewriteRule{})
execute.RegisterTransformation(DistinctKind, createDistinctTransformation)
}
func createDistinctOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
}
spec := new(DistinctOpSpec)
if col, ok, err := args.GetString("column"); err != nil {
return nil, err
} else if ok {
spec.Column = col
} else {
spec.Column = execute.DefaultValueColLabel
}
return spec, nil
}
func newDistinctOp() query.OperationSpec {
return new(DistinctOpSpec)
}
func (s *DistinctOpSpec) Kind() query.OperationKind {
return DistinctKind
}
type DistinctProcedureSpec struct {
Column string
}
func newDistinctProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*DistinctOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
}
return &DistinctProcedureSpec{
Column: spec.Column,
}, nil
}
func (s *DistinctProcedureSpec) Kind() plan.ProcedureKind {
return DistinctKind
}
func (s *DistinctProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(DistinctProcedureSpec)
*ns = *s
return ns
}
type DistinctPointLimitRewriteRule struct {
}
func (r DistinctPointLimitRewriteRule) Root() plan.ProcedureKind {
return FromKind
}
func (r DistinctPointLimitRewriteRule) Rewrite(pr *plan.Procedure, planner plan.PlanRewriter) error {
fromSpec, ok := pr.Spec.(*FromProcedureSpec)
if !ok {
return nil
}
var distinct *DistinctProcedureSpec
pr.DoChildren(func(child *plan.Procedure) {
if d, ok := child.Spec.(*DistinctProcedureSpec); ok {
distinct = d
}
})
if distinct == nil {
return nil
}
groupStar := !fromSpec.GroupingSet && distinct.Column != execute.DefaultValueColLabel
groupByColumn := fromSpec.GroupingSet && ((len(fromSpec.GroupKeys) > 0 && execute.ContainsStr(fromSpec.GroupKeys, distinct.Column)) || (len(fromSpec.GroupExcept) > 0 && !execute.ContainsStr(fromSpec.GroupExcept, distinct.Column)))
if groupStar || groupByColumn {
fromSpec.LimitSet = true
fromSpec.PointsLimit = -1
return nil
}
return nil
}
func createDistinctTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
s, ok := spec.(*DistinctProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", spec)
}
cache := execute.NewBlockBuilderCache(a.Allocator())
d := execute.NewDataset(id, mode, cache)
t := NewDistinctTransformation(d, cache, s)
return t, d, nil
}
type distinctTransformation struct {
d execute.Dataset
cache execute.BlockBuilderCache
column string
}
func NewDistinctTransformation(d execute.Dataset, cache execute.BlockBuilderCache, spec *DistinctProcedureSpec) *distinctTransformation {
return &distinctTransformation{
d: d,
cache: cache,
column: spec.Column,
}
}
func (t *distinctTransformation) RetractBlock(id execute.DatasetID, key execute.PartitionKey) error {
return t.d.RetractBlock(key)
}
func (t *distinctTransformation) Process(id execute.DatasetID, b execute.Block) error {
builder, created := t.cache.BlockBuilder(b.Key())
if !created {
return fmt.Errorf("distinct found duplicate block with key: %v", b.Key())
}
colIdx := execute.ColIdx(t.column, b.Cols())
if colIdx < 0 {
return fmt.Errorf("no column %q exists", t.column)
}
col := b.Cols()[colIdx]
execute.AddBlockKeyCols(b.Key(), builder)
colIdx = builder.AddCol(execute.ColMeta{
Label: execute.DefaultValueColLabel,
Type: col.Type,
})
if b.Key().HasCol(t.column) {
j := execute.ColIdx(t.column, b.Key().Cols())
switch col.Type {
case execute.TBool:
builder.AppendBool(colIdx, b.Key().ValueBool(j))
case execute.TInt:
builder.AppendInt(colIdx, b.Key().ValueInt(j))
case execute.TUInt:
builder.AppendUInt(colIdx, b.Key().ValueUInt(j))
case execute.TFloat:
builder.AppendFloat(colIdx, b.Key().ValueFloat(j))
case execute.TString:
builder.AppendString(colIdx, b.Key().ValueString(j))
case execute.TTime:
builder.AppendTime(colIdx, b.Key().ValueTime(j))
}
execute.AppendKeyValues(b.Key(), builder)
// TODO: this is a hack
return b.Do(func(execute.ColReader) error {
return nil
})
}
var (
boolDistinct map[bool]bool
intDistinct map[int64]bool
uintDistinct map[uint64]bool
floatDistinct map[float64]bool
stringDistinct map[string]bool
timeDistinct map[execute.Time]bool
)
switch col.Type {
case execute.TBool:
boolDistinct = make(map[bool]bool)
case execute.TInt:
intDistinct = make(map[int64]bool)
case execute.TUInt:
uintDistinct = make(map[uint64]bool)
case execute.TFloat:
floatDistinct = make(map[float64]bool)
case execute.TString:
stringDistinct = make(map[string]bool)
case execute.TTime:
timeDistinct = make(map[execute.Time]bool)
}
return b.Do(func(cr execute.ColReader) error {
l := cr.Len()
for i := 0; i < l; i++ {
// Check distinct
switch col.Type {
case execute.TBool:
v := cr.Bools(colIdx)[i]
if boolDistinct[v] {
continue
}
boolDistinct[v] = true
builder.AppendBool(colIdx, v)
case execute.TInt:
v := cr.Ints(colIdx)[i]
if intDistinct[v] {
continue
}
intDistinct[v] = true
builder.AppendInt(colIdx, v)
case execute.TUInt:
v := cr.UInts(colIdx)[i]
if uintDistinct[v] {
continue
}
uintDistinct[v] = true
builder.AppendUInt(colIdx, v)
case execute.TFloat:
v := cr.Floats(colIdx)[i]
if floatDistinct[v] {
continue
}
floatDistinct[v] = true
builder.AppendFloat(colIdx, v)
case execute.TString:
v := cr.Strings(colIdx)[i]
if stringDistinct[v] {
continue
}
stringDistinct[v] = true
builder.AppendString(colIdx, v)
case execute.TTime:
v := cr.Times(colIdx)[i]
if timeDistinct[v] {
continue
}
timeDistinct[v] = true
builder.AppendTime(colIdx, v)
}
execute.AppendKeyValues(b.Key(), builder)
}
return nil
})
}
func (t *distinctTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error {
return t.d.UpdateWatermark(mark)
}
func (t *distinctTransformation) UpdateProcessingTime(id execute.DatasetID, pt execute.Time) error {
return t.d.UpdateProcessingTime(pt)
}
func (t *distinctTransformation) Finish(id execute.DatasetID, err error) {
t.d.Finish(err)
}

263
query/functions/filter.go Normal file
View File

@ -0,0 +1,263 @@
package functions
import (
"fmt"
"log"
"github.com/influxdata/ifql/ast"
"github.com/influxdata/ifql/interpreter"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/plan"
"github.com/influxdata/ifql/semantic"
)
const FilterKind = "filter"
type FilterOpSpec struct {
Fn *semantic.FunctionExpression `json:"fn"`
}
var filterSignature = query.DefaultFunctionSignature()
func init() {
//TODO(nathanielc): Use complete function signature here, or formalize soft kind validation instead of complete function validation.
filterSignature.Params["fn"] = semantic.Function
query.RegisterFunction(FilterKind, createFilterOpSpec, filterSignature)
query.RegisterOpSpec(FilterKind, newFilterOp)
plan.RegisterProcedureSpec(FilterKind, newFilterProcedure, FilterKind)
execute.RegisterTransformation(FilterKind, createFilterTransformation)
}
func createFilterOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
}
f, err := args.GetRequiredFunction("fn")
if err != nil {
return nil, err
}
fn, err := interpreter.ResolveFunction(f)
if err != nil {
return nil, err
}
return &FilterOpSpec{
Fn: fn,
}, nil
}
func newFilterOp() query.OperationSpec {
return new(FilterOpSpec)
}
func (s *FilterOpSpec) Kind() query.OperationKind {
return FilterKind
}
type FilterProcedureSpec struct {
Fn *semantic.FunctionExpression
}
func newFilterProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*FilterOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
}
return &FilterProcedureSpec{
Fn: spec.Fn,
}, nil
}
func (s *FilterProcedureSpec) Kind() plan.ProcedureKind {
return FilterKind
}
func (s *FilterProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(FilterProcedureSpec)
ns.Fn = s.Fn.Copy().(*semantic.FunctionExpression)
return ns
}
func (s *FilterProcedureSpec) PushDownRules() []plan.PushDownRule {
return []plan.PushDownRule{
{
Root: FromKind,
Through: []plan.ProcedureKind{GroupKind, LimitKind, RangeKind},
Match: func(spec plan.ProcedureSpec) bool {
// TODO(nathanielc): Remove once row functions support calling functions
if _, ok := s.Fn.Body.(semantic.Expression); !ok {
return false
}
fs := spec.(*FromProcedureSpec)
if fs.Filter != nil {
if _, ok := fs.Filter.Body.(semantic.Expression); !ok {
return false
}
}
return true
},
},
{
Root: FilterKind,
Through: []plan.ProcedureKind{GroupKind, LimitKind, RangeKind},
Match: func(spec plan.ProcedureSpec) bool {
// TODO(nathanielc): Remove once row functions support calling functions
if _, ok := s.Fn.Body.(semantic.Expression); !ok {
return false
}
fs := spec.(*FilterProcedureSpec)
if _, ok := fs.Fn.Body.(semantic.Expression); !ok {
return false
}
return true
},
},
}
}
func (s *FilterProcedureSpec) PushDown(root *plan.Procedure, dup func() *plan.Procedure) {
switch spec := root.Spec.(type) {
case *FromProcedureSpec:
if spec.FilterSet {
spec.Filter = mergeArrowFunction(spec.Filter, s.Fn)
return
}
spec.FilterSet = true
spec.Filter = s.Fn
case *FilterProcedureSpec:
spec.Fn = mergeArrowFunction(spec.Fn, s.Fn)
}
}
func mergeArrowFunction(a, b *semantic.FunctionExpression) *semantic.FunctionExpression {
fn := a.Copy().(*semantic.FunctionExpression)
aExp, aOK := a.Body.(semantic.Expression)
bExp, bOK := b.Body.(semantic.Expression)
if aOK && bOK {
fn.Body = &semantic.LogicalExpression{
Operator: ast.AndOperator,
Left: aExp,
Right: bExp,
}
return fn
}
// TODO(nathanielc): This code is unreachable while the current PushDownRule Match function is inplace.
and := &semantic.LogicalExpression{
Operator: ast.AndOperator,
Left: aExp,
Right: bExp,
}
// Create pass through arguments expression
passThroughArgs := &semantic.ObjectExpression{
Properties: make([]*semantic.Property, len(a.Params)),
}
for i, p := range a.Params {
passThroughArgs.Properties[i] = &semantic.Property{
Key: p.Key,
//TODO(nathanielc): Construct valid IdentifierExpression with Declaration for the value.
//Value: p.Key,
}
}
if !aOK {
// Rewrite left expression as a function call.
and.Left = &semantic.CallExpression{
Callee: a.Copy().(*semantic.FunctionExpression),
Arguments: passThroughArgs.Copy().(*semantic.ObjectExpression),
}
}
if !bOK {
// Rewrite right expression as a function call.
and.Right = &semantic.CallExpression{
Callee: b.Copy().(*semantic.FunctionExpression),
Arguments: passThroughArgs.Copy().(*semantic.ObjectExpression),
}
}
return fn
}
func createFilterTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
s, ok := spec.(*FilterProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", spec)
}
cache := execute.NewBlockBuilderCache(a.Allocator())
d := execute.NewDataset(id, mode, cache)
t, err := NewFilterTransformation(d, cache, s)
if err != nil {
return nil, nil, err
}
return t, d, nil
}
type filterTransformation struct {
d execute.Dataset
cache execute.BlockBuilderCache
fn *execute.RowPredicateFn
}
func NewFilterTransformation(d execute.Dataset, cache execute.BlockBuilderCache, spec *FilterProcedureSpec) (*filterTransformation, error) {
fn, err := execute.NewRowPredicateFn(spec.Fn)
if err != nil {
return nil, err
}
return &filterTransformation{
d: d,
cache: cache,
fn: fn,
}, nil
}
func (t *filterTransformation) RetractBlock(id execute.DatasetID, key execute.PartitionKey) error {
return t.d.RetractBlock(key)
}
func (t *filterTransformation) Process(id execute.DatasetID, b execute.Block) error {
builder, created := t.cache.BlockBuilder(b.Key())
if !created {
return fmt.Errorf("filter found duplicate block with key: %v", b.Key())
}
execute.AddBlockCols(b, builder)
// Prepare the function for the column types.
cols := b.Cols()
if err := t.fn.Prepare(cols); err != nil {
// TODO(nathanielc): Should we not fail the query for failed compilation?
return err
}
// Append only matching rows to block
return b.Do(func(cr execute.ColReader) error {
l := cr.Len()
for i := 0; i < l; i++ {
if pass, err := t.fn.Eval(i, cr); err != nil {
log.Printf("failed to evaluate filter expression: %v", err)
continue
} else if !pass {
// No match, skipping
continue
}
execute.AppendRecord(i, cr, builder)
}
return nil
})
}
func (t *filterTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error {
return t.d.UpdateWatermark(mark)
}
func (t *filterTransformation) UpdateProcessingTime(id execute.DatasetID, pt execute.Time) error {
return t.d.UpdateProcessingTime(pt)
}
func (t *filterTransformation) Finish(id execute.DatasetID, err error) {
t.d.Finish(err)
}

View File

@ -0,0 +1,897 @@
package functions_test
import (
"regexp"
"testing"
"time"
"github.com/influxdata/ifql/ast"
"github.com/influxdata/ifql/functions"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/execute/executetest"
"github.com/influxdata/ifql/query/plan"
"github.com/influxdata/ifql/query/plan/plantest"
"github.com/influxdata/ifql/query/querytest"
"github.com/influxdata/ifql/semantic"
)
func TestFilter_NewQuery(t *testing.T) {
tests := []querytest.NewQueryTestCase{
{
Name: "from with database filter and range",
Raw: `from(db:"mydb") |> filter(fn: (r) => r["t1"]=="val1" and r["t2"]=="val2") |> range(start:-4h, stop:-2h) |> count()`,
Want: &query.Spec{
Operations: []*query.Operation{
{
ID: "from0",
Spec: &functions.FromOpSpec{
Database: "mydb",
},
},
{
ID: "filter1",
Spec: &functions.FilterOpSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.LogicalExpression{
Operator: ast.AndOperator,
Left: &semantic.BinaryExpression{
Operator: ast.EqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "t1",
},
Right: &semantic.StringLiteral{Value: "val1"},
},
Right: &semantic.BinaryExpression{
Operator: ast.EqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "t2",
},
Right: &semantic.StringLiteral{Value: "val2"},
},
},
},
},
},
{
ID: "range2",
Spec: &functions.RangeOpSpec{
Start: query.Time{
Relative: -4 * time.Hour,
IsRelative: true,
},
Stop: query.Time{
Relative: -2 * time.Hour,
IsRelative: true,
},
},
},
{
ID: "count3",
Spec: &functions.CountOpSpec{
AggregateConfig: execute.DefaultAggregateConfig,
},
},
},
Edges: []query.Edge{
{Parent: "from0", Child: "filter1"},
{Parent: "filter1", Child: "range2"},
{Parent: "range2", Child: "count3"},
},
},
},
{
Name: "from with database filter (and with or) and range",
Raw: `from(db:"mydb")
|> filter(fn: (r) =>
(
(r["t1"]=="val1")
and
(r["t2"]=="val2")
)
or
(r["t3"]=="val3")
)
|> range(start:-4h, stop:-2h)
|> count()`,
Want: &query.Spec{
Operations: []*query.Operation{
{
ID: "from0",
Spec: &functions.FromOpSpec{
Database: "mydb",
},
},
{
ID: "filter1",
Spec: &functions.FilterOpSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.LogicalExpression{
Operator: ast.OrOperator,
Left: &semantic.LogicalExpression{
Operator: ast.AndOperator,
Left: &semantic.BinaryExpression{
Operator: ast.EqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "t1",
},
Right: &semantic.StringLiteral{Value: "val1"},
},
Right: &semantic.BinaryExpression{
Operator: ast.EqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "t2",
},
Right: &semantic.StringLiteral{Value: "val2"},
},
},
Right: &semantic.BinaryExpression{
Operator: ast.EqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "t3",
},
Right: &semantic.StringLiteral{Value: "val3"},
},
},
},
},
},
{
ID: "range2",
Spec: &functions.RangeOpSpec{
Start: query.Time{
Relative: -4 * time.Hour,
IsRelative: true,
},
Stop: query.Time{
Relative: -2 * time.Hour,
IsRelative: true,
},
},
},
{
ID: "count3",
Spec: &functions.CountOpSpec{
AggregateConfig: execute.DefaultAggregateConfig,
},
},
},
Edges: []query.Edge{
{Parent: "from0", Child: "filter1"},
{Parent: "filter1", Child: "range2"},
{Parent: "range2", Child: "count3"},
},
},
},
{
Name: "from with database filter including fields",
Raw: `from(db:"mydb")
|> filter(fn: (r) =>
(r["t1"] =="val1")
and
(r["_field"] == 10)
)
|> range(start:-4h, stop:-2h)
|> count()`,
Want: &query.Spec{
Operations: []*query.Operation{
{
ID: "from0",
Spec: &functions.FromOpSpec{
Database: "mydb",
},
},
{
ID: "filter1",
Spec: &functions.FilterOpSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.LogicalExpression{
Operator: ast.AndOperator,
Left: &semantic.BinaryExpression{
Operator: ast.EqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "t1",
},
Right: &semantic.StringLiteral{Value: "val1"},
},
Right: &semantic.BinaryExpression{
Operator: ast.EqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_field",
},
Right: &semantic.IntegerLiteral{Value: 10},
},
},
},
},
},
{
ID: "range2",
Spec: &functions.RangeOpSpec{
Start: query.Time{
Relative: -4 * time.Hour,
IsRelative: true,
},
Stop: query.Time{
Relative: -2 * time.Hour,
IsRelative: true,
},
},
},
{
ID: "count3",
Spec: &functions.CountOpSpec{
AggregateConfig: execute.DefaultAggregateConfig,
},
},
},
Edges: []query.Edge{
{Parent: "from0", Child: "filter1"},
{Parent: "filter1", Child: "range2"},
{Parent: "range2", Child: "count3"},
},
},
},
{
Name: "from with database filter with no parens including fields",
Raw: `from(db:"mydb")
|> filter(fn: (r) =>
r["t1"]=="val1"
and
r["_field"] == 10
)
|> range(start:-4h, stop:-2h)
|> count()`,
Want: &query.Spec{
Operations: []*query.Operation{
{
ID: "from0",
Spec: &functions.FromOpSpec{
Database: "mydb",
},
},
{
ID: "filter1",
Spec: &functions.FilterOpSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.LogicalExpression{
Operator: ast.AndOperator,
Left: &semantic.BinaryExpression{
Operator: ast.EqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "t1",
},
Right: &semantic.StringLiteral{Value: "val1"},
},
Right: &semantic.BinaryExpression{
Operator: ast.EqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_field",
},
Right: &semantic.IntegerLiteral{Value: 10},
},
},
},
},
},
{
ID: "range2",
Spec: &functions.RangeOpSpec{
Start: query.Time{
Relative: -4 * time.Hour,
IsRelative: true,
},
Stop: query.Time{
Relative: -2 * time.Hour,
IsRelative: true,
},
},
},
{
ID: "count3",
Spec: &functions.CountOpSpec{
AggregateConfig: execute.DefaultAggregateConfig,
},
},
},
Edges: []query.Edge{
{Parent: "from0", Child: "filter1"},
{Parent: "filter1", Child: "range2"},
{Parent: "range2", Child: "count3"},
},
},
},
{
Name: "from with database filter with no parens including regex and field",
Raw: `from(db:"mydb")
|> filter(fn: (r) =>
r["t1"]==/val1/
and
r["_field"] == 10.5
)
|> range(start:-4h, stop:-2h)
|> count()`,
Want: &query.Spec{
Operations: []*query.Operation{
{
ID: "from0",
Spec: &functions.FromOpSpec{
Database: "mydb",
},
},
{
ID: "filter1",
Spec: &functions.FilterOpSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.LogicalExpression{
Operator: ast.AndOperator,
Left: &semantic.BinaryExpression{
Operator: ast.EqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "t1",
},
Right: &semantic.RegexpLiteral{Value: regexp.MustCompile("val1")},
},
Right: &semantic.BinaryExpression{
Operator: ast.EqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_field",
},
Right: &semantic.FloatLiteral{Value: 10.5},
},
},
},
},
},
{
ID: "range2",
Spec: &functions.RangeOpSpec{
Start: query.Time{
Relative: -4 * time.Hour,
IsRelative: true,
},
Stop: query.Time{
Relative: -2 * time.Hour,
IsRelative: true,
},
},
},
{
ID: "count3",
Spec: &functions.CountOpSpec{
AggregateConfig: execute.DefaultAggregateConfig,
},
},
},
Edges: []query.Edge{
{Parent: "from0", Child: "filter1"},
{Parent: "filter1", Child: "range2"},
{Parent: "range2", Child: "count3"},
},
},
},
{
Name: "from with database regex with escape",
Raw: `from(db:"mydb")
|> filter(fn: (r) =>
r["t1"]==/va\/l1/
)`,
Want: &query.Spec{
Operations: []*query.Operation{
{
ID: "from0",
Spec: &functions.FromOpSpec{
Database: "mydb",
},
},
{
ID: "filter1",
Spec: &functions.FilterOpSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.BinaryExpression{
Operator: ast.EqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "t1",
},
Right: &semantic.RegexpLiteral{Value: regexp.MustCompile(`va/l1`)},
},
},
},
},
},
Edges: []query.Edge{
{Parent: "from0", Child: "filter1"},
},
},
},
{
Name: "from with database with two regex",
Raw: `from(db:"mydb")
|> filter(fn: (r) =>
r["t1"]==/va\/l1/
and
r["t2"] != /val2/
)`,
Want: &query.Spec{
Operations: []*query.Operation{
{
ID: "from0",
Spec: &functions.FromOpSpec{
Database: "mydb",
},
},
{
ID: "filter1",
Spec: &functions.FilterOpSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.LogicalExpression{
Operator: ast.AndOperator,
Left: &semantic.BinaryExpression{
Operator: ast.EqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "t1",
},
Right: &semantic.RegexpLiteral{Value: regexp.MustCompile(`va/l1`)},
},
Right: &semantic.BinaryExpression{
Operator: ast.NotEqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "t2",
},
Right: &semantic.RegexpLiteral{Value: regexp.MustCompile(`val2`)},
},
},
},
},
},
},
Edges: []query.Edge{
{Parent: "from0", Child: "filter1"},
},
},
},
}
for _, tc := range tests {
tc := tc
t.Run(tc.Name, func(t *testing.T) {
t.Parallel()
querytest.NewQueryTestHelper(t, tc)
})
}
}
func TestFilterOperation_Marshaling(t *testing.T) {
data := []byte(`{
"id":"filter",
"kind":"filter",
"spec":{
"fn":{
"type": "ArrowFunctionExpression",
"params": [{"type":"FunctionParam","key":{"type":"Identifier","name":"r"}}],
"body":{
"type":"BinaryExpression",
"operator": "!=",
"left":{
"type":"MemberExpression",
"object": {
"type": "IdentifierExpression",
"name":"r"
},
"property": "_measurement"
},
"right":{
"type":"StringLiteral",
"value":"mem"
}
}
}
}
}`)
op := &query.Operation{
ID: "filter",
Spec: &functions.FilterOpSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.BinaryExpression{
Operator: ast.NotEqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_measurement",
},
Right: &semantic.StringLiteral{Value: "mem"},
},
},
},
}
querytest.OperationMarshalingTestHelper(t, data, op)
}
func TestFilter_Process(t *testing.T) {
testCases := []struct {
name string
spec *functions.FilterProcedureSpec
data []execute.Block
want []*executetest.Block
}{
{
name: `_value>5`,
spec: &functions.FilterProcedureSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.BinaryExpression{
Operator: ast.GreaterThanOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_value",
},
Right: &semantic.FloatLiteral{Value: 5},
},
},
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), 1.0},
{execute.Time(2), 6.0},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(2), 6.0},
},
}},
},
{
name: "_value>5 multiple blocks",
spec: &functions.FilterProcedureSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.BinaryExpression{
Operator: ast.GreaterThanOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_value",
},
Right: &semantic.FloatLiteral{
Value: 5,
},
},
},
},
data: []execute.Block{
&executetest.Block{
KeyCols: []string{"t1"},
ColMeta: []execute.ColMeta{
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{"a", execute.Time(1), 3.0},
{"a", execute.Time(2), 6.0},
{"a", execute.Time(2), 1.0},
},
},
&executetest.Block{
KeyCols: []string{"t1"},
ColMeta: []execute.ColMeta{
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{"b", execute.Time(3), 3.0},
{"b", execute.Time(3), 2.0},
{"b", execute.Time(4), 8.0},
},
},
},
want: []*executetest.Block{
{
KeyCols: []string{"t1"},
ColMeta: []execute.ColMeta{
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{"a", execute.Time(2), 6.0},
},
},
{
KeyCols: []string{"t1"},
ColMeta: []execute.ColMeta{
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{"b", execute.Time(4), 8.0},
},
},
},
},
{
name: "_value>5 and t1 = a and t2 = y",
spec: &functions.FilterProcedureSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.LogicalExpression{
Operator: ast.AndOperator,
Left: &semantic.BinaryExpression{
Operator: ast.GreaterThanOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_value",
},
Right: &semantic.FloatLiteral{
Value: 5,
},
},
Right: &semantic.LogicalExpression{
Operator: ast.AndOperator,
Left: &semantic.BinaryExpression{
Operator: ast.EqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "t1",
},
Right: &semantic.StringLiteral{
Value: "a",
},
},
Right: &semantic.BinaryExpression{
Operator: ast.EqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "t2",
},
Right: &semantic.StringLiteral{
Value: "y",
},
},
},
},
},
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(1), 1.0, "a", "x"},
{execute.Time(2), 6.0, "a", "x"},
{execute.Time(3), 8.0, "a", "y"},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(3), 8.0, "a", "y"},
},
}},
},
}
for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
executetest.ProcessTestHelper(
t,
tc.data,
tc.want,
func(d execute.Dataset, c execute.BlockBuilderCache) execute.Transformation {
f, err := functions.NewFilterTransformation(d, c, tc.spec)
if err != nil {
t.Fatal(err)
}
return f
},
)
})
}
}
func TestFilter_PushDown(t *testing.T) {
spec := &functions.FilterProcedureSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.BinaryExpression{
Operator: ast.NotEqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_measurement",
},
Right: &semantic.StringLiteral{Value: "mem"},
},
},
}
root := &plan.Procedure{
Spec: new(functions.FromProcedureSpec),
}
want := &plan.Procedure{
Spec: &functions.FromProcedureSpec{
FilterSet: true,
Filter: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.BinaryExpression{
Operator: ast.NotEqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_measurement",
},
Right: &semantic.StringLiteral{Value: "mem"},
},
},
},
}
plantest.PhysicalPlan_PushDown_TestHelper(t, spec, root, false, want)
}
func TestFilter_PushDown_MergeExpressions(t *testing.T) {
testCases := []struct {
name string
spec *functions.FilterProcedureSpec
root *plan.Procedure
want *plan.Procedure
}{
{
name: "merge with from",
spec: &functions.FilterProcedureSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.BinaryExpression{
Operator: ast.NotEqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_measurement",
},
Right: &semantic.StringLiteral{Value: "cpu"},
},
},
},
root: &plan.Procedure{
Spec: &functions.FromProcedureSpec{
FilterSet: true,
Filter: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.BinaryExpression{
Operator: ast.NotEqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_measurement",
},
Right: &semantic.StringLiteral{Value: "mem"},
},
},
},
},
want: &plan.Procedure{
Spec: &functions.FromProcedureSpec{
FilterSet: true,
Filter: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.LogicalExpression{
Operator: ast.AndOperator,
Left: &semantic.BinaryExpression{
Operator: ast.NotEqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_measurement",
},
Right: &semantic.StringLiteral{Value: "mem"},
},
Right: &semantic.BinaryExpression{
Operator: ast.NotEqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_measurement",
},
Right: &semantic.StringLiteral{Value: "cpu"},
},
},
},
},
},
},
{
name: "merge with filter",
spec: &functions.FilterProcedureSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.BinaryExpression{
Operator: ast.NotEqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_measurement",
},
Right: &semantic.StringLiteral{Value: "cpu"},
},
},
},
root: &plan.Procedure{
Spec: &functions.FilterProcedureSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.BinaryExpression{
Operator: ast.NotEqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_measurement",
},
Right: &semantic.StringLiteral{Value: "mem"},
},
},
},
},
want: &plan.Procedure{
Spec: &functions.FilterProcedureSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.LogicalExpression{
Operator: ast.AndOperator,
Left: &semantic.BinaryExpression{
Operator: ast.NotEqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_measurement",
},
Right: &semantic.StringLiteral{Value: "mem"},
},
Right: &semantic.BinaryExpression{
Operator: ast.NotEqualOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{Name: "r"},
Property: "_measurement",
},
Right: &semantic.StringLiteral{Value: "cpu"},
},
},
},
},
},
},
}
for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
plantest.PhysicalPlan_PushDown_TestHelper(t, tc.spec, tc.root, false, tc.want)
})
}
}

170
query/functions/first.go Normal file
View File

@ -0,0 +1,170 @@
package functions
import (
"fmt"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/plan"
"github.com/influxdata/ifql/semantic"
)
const FirstKind = "first"
type FirstOpSpec struct {
execute.SelectorConfig
}
var firstSignature = query.DefaultFunctionSignature()
func init() {
firstSignature.Params["column"] = semantic.String
firstSignature.Params["useRowTime"] = semantic.Bool
query.RegisterFunction(FirstKind, createFirstOpSpec, firstSignature)
query.RegisterOpSpec(FirstKind, newFirstOp)
plan.RegisterProcedureSpec(FirstKind, newFirstProcedure, FirstKind)
execute.RegisterTransformation(FirstKind, createFirstTransformation)
}
func createFirstOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
}
spec := new(FirstOpSpec)
if err := spec.SelectorConfig.ReadArgs(args); err != nil {
return nil, err
}
return spec, nil
}
func newFirstOp() query.OperationSpec {
return new(FirstOpSpec)
}
func (s *FirstOpSpec) Kind() query.OperationKind {
return FirstKind
}
type FirstProcedureSpec struct {
execute.SelectorConfig
}
func newFirstProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*FirstOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
}
return &FirstProcedureSpec{
SelectorConfig: spec.SelectorConfig,
}, nil
}
func (s *FirstProcedureSpec) Kind() plan.ProcedureKind {
return FirstKind
}
func (s *FirstProcedureSpec) PushDownRules() []plan.PushDownRule {
return []plan.PushDownRule{{
Root: FromKind,
Through: []plan.ProcedureKind{GroupKind, LimitKind, FilterKind},
Match: func(spec plan.ProcedureSpec) bool {
selectSpec := spec.(*FromProcedureSpec)
return !selectSpec.AggregateSet
},
}}
}
func (s *FirstProcedureSpec) PushDown(root *plan.Procedure, dup func() *plan.Procedure) {
selectSpec := root.Spec.(*FromProcedureSpec)
if selectSpec.BoundsSet || selectSpec.LimitSet || selectSpec.DescendingSet {
root = dup()
selectSpec = root.Spec.(*FromProcedureSpec)
selectSpec.BoundsSet = false
selectSpec.Bounds = plan.BoundsSpec{}
selectSpec.LimitSet = false
selectSpec.PointsLimit = 0
selectSpec.SeriesLimit = 0
selectSpec.SeriesOffset = 0
selectSpec.DescendingSet = false
selectSpec.Descending = false
return
}
selectSpec.BoundsSet = true
selectSpec.Bounds = plan.BoundsSpec{
Start: query.MinTime,
Stop: query.Now,
}
selectSpec.LimitSet = true
selectSpec.PointsLimit = 1
selectSpec.DescendingSet = true
selectSpec.Descending = false
}
func (s *FirstProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(FirstProcedureSpec)
*ns = *s
ns.SelectorConfig = s.SelectorConfig
return ns
}
type FirstSelector struct {
selected bool
}
func createFirstTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
ps, ok := spec.(*FirstProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", ps)
}
t, d := execute.NewIndexSelectorTransformationAndDataset(id, mode, new(FirstSelector), ps.SelectorConfig, a.Allocator())
return t, d, nil
}
func (s *FirstSelector) reset() {
s.selected = false
}
func (s *FirstSelector) NewBoolSelector() execute.DoBoolIndexSelector {
s.reset()
return s
}
func (s *FirstSelector) NewIntSelector() execute.DoIntIndexSelector {
s.reset()
return s
}
func (s *FirstSelector) NewUIntSelector() execute.DoUIntIndexSelector {
s.reset()
return s
}
func (s *FirstSelector) NewFloatSelector() execute.DoFloatIndexSelector {
s.reset()
return s
}
func (s *FirstSelector) NewStringSelector() execute.DoStringIndexSelector {
s.reset()
return s
}
func (s *FirstSelector) selectFirst(l int) []int {
if !s.selected && l > 0 {
s.selected = true
return []int{0}
}
return nil
}
func (s *FirstSelector) DoBool(vs []bool) []int {
return s.selectFirst(len(vs))
}
func (s *FirstSelector) DoInt(vs []int64) []int {
return s.selectFirst(len(vs))
}
func (s *FirstSelector) DoUInt(vs []uint64) []int {
return s.selectFirst(len(vs))
}
func (s *FirstSelector) DoFloat(vs []float64) []int {
return s.selectFirst(len(vs))
}
func (s *FirstSelector) DoString(vs []string) []int {
return s.selectFirst(len(vs))
}

View File

@ -0,0 +1,132 @@
package functions_test
import (
"testing"
"github.com/influxdata/ifql/functions"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/execute/executetest"
"github.com/influxdata/ifql/query/plan"
"github.com/influxdata/ifql/query/plan/plantest"
"github.com/influxdata/ifql/query/querytest"
)
func TestFirstOperation_Marshaling(t *testing.T) {
data := []byte(`{"id":"first","kind":"first","spec":{"column":"foo"}}`)
op := &query.Operation{
ID: "first",
Spec: &functions.FirstOpSpec{
SelectorConfig: execute.SelectorConfig{
Column: "foo",
},
},
}
querytest.OperationMarshalingTestHelper(t, data, op)
}
func TestFirst_Process(t *testing.T) {
testCases := []struct {
name string
data *executetest.Block
want [][]int
}{
{
name: "first",
data: &executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(0), 0.0, "a", "y"},
{execute.Time(10), 5.0, "a", "x"},
{execute.Time(20), 9.0, "a", "y"},
{execute.Time(30), 4.0, "a", "x"},
{execute.Time(40), 6.0, "a", "y"},
{execute.Time(50), 8.0, "a", "x"},
{execute.Time(60), 1.0, "a", "y"},
{execute.Time(70), 2.0, "a", "x"},
{execute.Time(80), 3.0, "a", "y"},
{execute.Time(90), 7.0, "a", "x"},
},
},
want: [][]int{{0}, nil, nil, nil, nil, nil, nil, nil, nil, nil},
},
}
for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
executetest.IndexSelectorFuncTestHelper(
t,
new(functions.FirstSelector),
tc.data,
tc.want,
)
})
}
}
func BenchmarkFirst(b *testing.B) {
executetest.IndexSelectorFuncBenchmarkHelper(b, new(functions.FirstSelector), NormalBlock)
}
func TestFirst_PushDown_Match(t *testing.T) {
spec := new(functions.FirstProcedureSpec)
from := new(functions.FromProcedureSpec)
// Should not match when an aggregate is set
from.AggregateSet = true
plantest.PhysicalPlan_PushDown_Match_TestHelper(t, spec, from, []bool{false})
// Should match when no aggregate is set
from.AggregateSet = false
plantest.PhysicalPlan_PushDown_Match_TestHelper(t, spec, from, []bool{true})
}
func TestFirst_PushDown(t *testing.T) {
spec := new(functions.FirstProcedureSpec)
root := &plan.Procedure{
Spec: new(functions.FromProcedureSpec),
}
want := &plan.Procedure{
Spec: &functions.FromProcedureSpec{
BoundsSet: true,
Bounds: plan.BoundsSpec{
Start: query.MinTime,
Stop: query.Now,
},
LimitSet: true,
PointsLimit: 1,
DescendingSet: true,
Descending: false,
},
}
plantest.PhysicalPlan_PushDown_TestHelper(t, spec, root, false, want)
}
func TestFirst_PushDown_Duplicate(t *testing.T) {
spec := new(functions.FirstProcedureSpec)
root := &plan.Procedure{
Spec: &functions.FromProcedureSpec{
BoundsSet: true,
Bounds: plan.BoundsSpec{
Start: query.MinTime,
Stop: query.Now,
},
LimitSet: true,
PointsLimit: 1,
DescendingSet: true,
Descending: false,
},
}
want := &plan.Procedure{
// Expect the duplicate has been reset to zero values
Spec: new(functions.FromProcedureSpec),
}
plantest.PhysicalPlan_PushDown_TestHelper(t, spec, root, true, want)
}

234
query/functions/from.go Normal file
View File

@ -0,0 +1,234 @@
package functions
import (
"fmt"
"github.com/influxdata/ifql/functions/storage"
"github.com/influxdata/ifql/id"
"github.com/influxdata/ifql/interpreter"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/plan"
"github.com/influxdata/ifql/semantic"
"github.com/pkg/errors"
)
const FromKind = "from"
type FromOpSpec struct {
Database string `json:"db"`
Bucket string `json:"bucket"`
Hosts []string `json:"hosts"`
}
var fromSignature = semantic.FunctionSignature{
Params: map[string]semantic.Type{
"db": semantic.String,
},
ReturnType: query.TableObjectType,
}
func init() {
query.RegisterFunction(FromKind, createFromOpSpec, fromSignature)
query.RegisterOpSpec(FromKind, newFromOp)
plan.RegisterProcedureSpec(FromKind, newFromProcedure, FromKind)
execute.RegisterSource(FromKind, createFromSource)
}
func createFromOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
spec := new(FromOpSpec)
if db, ok, err := args.GetString("db"); err != nil {
return nil, err
} else if ok {
spec.Database = db
}
if bucket, ok, err := args.GetString("bucket"); err != nil {
return nil, err
} else if ok {
spec.Bucket = bucket
}
if spec.Database == "" && spec.Bucket == "" {
return nil, errors.New("must specify one of db or bucket")
}
if spec.Database != "" && spec.Bucket != "" {
return nil, errors.New("must specify only one of db or bucket")
}
if array, ok, err := args.GetArray("hosts", semantic.String); err != nil {
return nil, err
} else if ok {
spec.Hosts, err = interpreter.ToStringArray(array)
if err != nil {
return nil, err
}
}
return spec, nil
}
func newFromOp() query.OperationSpec {
return new(FromOpSpec)
}
func (s *FromOpSpec) Kind() query.OperationKind {
return FromKind
}
type FromProcedureSpec struct {
Database string
Bucket string
Hosts []string
BoundsSet bool
Bounds plan.BoundsSpec
FilterSet bool
Filter *semantic.FunctionExpression
DescendingSet bool
Descending bool
LimitSet bool
PointsLimit int64
SeriesLimit int64
SeriesOffset int64
WindowSet bool
Window plan.WindowSpec
GroupingSet bool
OrderByTime bool
MergeAll bool
GroupKeys []string
GroupExcept []string
AggregateSet bool
AggregateMethod string
}
func newFromProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*FromOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
}
return &FromProcedureSpec{
Database: spec.Database,
Bucket: spec.Bucket,
Hosts: spec.Hosts,
}, nil
}
func (s *FromProcedureSpec) Kind() plan.ProcedureKind {
return FromKind
}
func (s *FromProcedureSpec) TimeBounds() plan.BoundsSpec {
return s.Bounds
}
func (s *FromProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(FromProcedureSpec)
ns.Database = s.Database
ns.Bucket = s.Bucket
if len(s.Hosts) > 0 {
ns.Hosts = make([]string, len(s.Hosts))
copy(ns.Hosts, s.Hosts)
}
ns.BoundsSet = s.BoundsSet
ns.Bounds = s.Bounds
ns.FilterSet = s.FilterSet
// TODO copy predicate
ns.Filter = s.Filter
ns.DescendingSet = s.DescendingSet
ns.Descending = s.Descending
ns.LimitSet = s.LimitSet
ns.PointsLimit = s.PointsLimit
ns.SeriesLimit = s.SeriesLimit
ns.SeriesOffset = s.SeriesOffset
ns.WindowSet = s.WindowSet
ns.Window = s.Window
ns.AggregateSet = s.AggregateSet
ns.AggregateMethod = s.AggregateMethod
return ns
}
func createFromSource(prSpec plan.ProcedureSpec, dsid execute.DatasetID, a execute.Administration) (execute.Source, error) {
spec := prSpec.(*FromProcedureSpec)
var w execute.Window
if spec.WindowSet {
w = execute.Window{
Every: execute.Duration(spec.Window.Every),
Period: execute.Duration(spec.Window.Period),
Round: execute.Duration(spec.Window.Round),
Start: a.ResolveTime(spec.Window.Start),
}
} else {
duration := execute.Duration(a.ResolveTime(spec.Bounds.Stop)) - execute.Duration(a.ResolveTime(spec.Bounds.Start))
w = execute.Window{
Every: duration,
Period: duration,
Start: a.ResolveTime(spec.Bounds.Start),
}
}
currentTime := w.Start + execute.Time(w.Period)
bounds := execute.Bounds{
Start: a.ResolveTime(spec.Bounds.Start),
Stop: a.ResolveTime(spec.Bounds.Stop),
}
deps := a.Dependencies()[FromKind].(storage.Dependencies)
orgID := a.OrganizationID()
var bucketID id.ID
if spec.Database == "" {
b, ok := deps.BucketLookup.Lookup(orgID, spec.Bucket)
if !ok {
return nil, fmt.Errorf("could not find bucket %q", spec.Bucket)
}
bucketID = b
} else {
bucketID = id.ID(spec.Database)
}
return storage.NewSource(
dsid,
deps.Reader,
storage.ReadSpec{
OrganizationID: orgID,
BucketID: bucketID,
Hosts: spec.Hosts,
Predicate: spec.Filter,
PointsLimit: spec.PointsLimit,
SeriesLimit: spec.SeriesLimit,
SeriesOffset: spec.SeriesOffset,
Descending: spec.Descending,
OrderByTime: spec.OrderByTime,
MergeAll: spec.MergeAll,
GroupKeys: spec.GroupKeys,
GroupExcept: spec.GroupExcept,
AggregateMethod: spec.AggregateMethod,
},
bounds,
w,
currentTime,
), nil
}
func InjectFromDependencies(depsMap execute.Dependencies, deps storage.Dependencies) error {
if err := deps.Validate(); err != nil {
return err
}
depsMap[FromKind] = deps
return nil
}

View File

@ -0,0 +1,91 @@
package functions_test
import (
"testing"
"time"
"github.com/influxdata/ifql/functions"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/querytest"
)
func TestFrom_NewQuery(t *testing.T) {
tests := []querytest.NewQueryTestCase{
{
Name: "from no args",
Raw: `from()`,
WantErr: true,
},
{
Name: "from conflicting args",
Raw: `from(db:"d", bucket:"b")`,
WantErr: true,
},
{
Name: "from repeat arg",
Raw: `from(db:"telegraf", db:"oops")`,
WantErr: true,
},
{
Name: "from",
Raw: `from(db:"telegraf", chicken:"what is this?")`,
WantErr: true,
},
{
Name: "from with database",
Raw: `from(db:"mydb") |> range(start:-4h, stop:-2h) |> sum()`,
Want: &query.Spec{
Operations: []*query.Operation{
{
ID: "from0",
Spec: &functions.FromOpSpec{
Database: "mydb",
},
},
{
ID: "range1",
Spec: &functions.RangeOpSpec{
Start: query.Time{
Relative: -4 * time.Hour,
IsRelative: true,
},
Stop: query.Time{
Relative: -2 * time.Hour,
IsRelative: true,
},
},
},
{
ID: "sum2",
Spec: &functions.SumOpSpec{
AggregateConfig: execute.DefaultAggregateConfig,
},
},
},
Edges: []query.Edge{
{Parent: "from0", Child: "range1"},
{Parent: "range1", Child: "sum2"},
},
},
},
}
for _, tc := range tests {
tc := tc
t.Run(tc.Name, func(t *testing.T) {
t.Parallel()
querytest.NewQueryTestHelper(t, tc)
})
}
}
func TestFromOperation_Marshaling(t *testing.T) {
data := []byte(`{"id":"from","kind":"from","spec":{"db":"mydb"}}`)
op := &query.Operation{
ID: "from",
Spec: &functions.FromOpSpec{
Database: "mydb",
},
}
querytest.OperationMarshalingTestHelper(t, data, op)
}

273
query/functions/group.go Normal file
View File

@ -0,0 +1,273 @@
package functions
import (
"errors"
"fmt"
"sort"
"github.com/influxdata/ifql/interpreter"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/plan"
"github.com/influxdata/ifql/semantic"
)
const GroupKind = "group"
type GroupOpSpec struct {
By []string `json:"by"`
Except []string `json:"except"`
}
var groupSignature = query.DefaultFunctionSignature()
func init() {
groupSignature.Params["by"] = semantic.NewArrayType(semantic.String)
groupSignature.Params["except"] = semantic.NewArrayType(semantic.String)
query.RegisterFunction(GroupKind, createGroupOpSpec, groupSignature)
query.RegisterOpSpec(GroupKind, newGroupOp)
plan.RegisterProcedureSpec(GroupKind, newGroupProcedure, GroupKind)
plan.RegisterRewriteRule(AggregateGroupRewriteRule{})
execute.RegisterTransformation(GroupKind, createGroupTransformation)
}
func createGroupOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
}
spec := new(GroupOpSpec)
if array, ok, err := args.GetArray("by", semantic.String); err != nil {
return nil, err
} else if ok {
spec.By, err = interpreter.ToStringArray(array)
if err != nil {
return nil, err
}
}
if array, ok, err := args.GetArray("except", semantic.String); err != nil {
return nil, err
} else if ok {
spec.Except, err = interpreter.ToStringArray(array)
if err != nil {
return nil, err
}
}
if len(spec.By) > 0 && len(spec.Except) > 0 {
return nil, errors.New(`cannot specify both "by" and "except" keyword arguments`)
}
return spec, nil
}
func newGroupOp() query.OperationSpec {
return new(GroupOpSpec)
}
func (s *GroupOpSpec) Kind() query.OperationKind {
return GroupKind
}
type GroupProcedureSpec struct {
By []string
Except []string
}
func newGroupProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*GroupOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
}
p := &GroupProcedureSpec{
By: spec.By,
Except: spec.Except,
}
return p, nil
}
func (s *GroupProcedureSpec) Kind() plan.ProcedureKind {
return GroupKind
}
func (s *GroupProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(GroupProcedureSpec)
ns.By = make([]string, len(s.By))
copy(ns.By, s.By)
ns.Except = make([]string, len(s.Except))
copy(ns.Except, s.Except)
return ns
}
func (s *GroupProcedureSpec) PushDownRules() []plan.PushDownRule {
return []plan.PushDownRule{{
Root: FromKind,
Through: []plan.ProcedureKind{LimitKind, RangeKind, FilterKind},
Match: func(spec plan.ProcedureSpec) bool {
selectSpec := spec.(*FromProcedureSpec)
return !selectSpec.AggregateSet
},
}}
}
func (s *GroupProcedureSpec) PushDown(root *plan.Procedure, dup func() *plan.Procedure) {
selectSpec := root.Spec.(*FromProcedureSpec)
if selectSpec.GroupingSet {
root = dup()
selectSpec = root.Spec.(*FromProcedureSpec)
selectSpec.OrderByTime = false
selectSpec.GroupingSet = false
selectSpec.MergeAll = false
selectSpec.GroupKeys = nil
selectSpec.GroupExcept = nil
return
}
selectSpec.GroupingSet = true
// TODO implement OrderByTime
//selectSpec.OrderByTime = true
// Merge all series into a single group if we have no specific grouping dimensions.
selectSpec.MergeAll = len(s.By) == 0 && len(s.Except) == 0
selectSpec.GroupKeys = s.By
selectSpec.GroupExcept = s.Except
}
type AggregateGroupRewriteRule struct {
}
func (r AggregateGroupRewriteRule) Root() plan.ProcedureKind {
return FromKind
}
func (r AggregateGroupRewriteRule) Rewrite(pr *plan.Procedure, planner plan.PlanRewriter) error {
var agg *plan.Procedure
pr.DoChildren(func(child *plan.Procedure) {
if _, ok := child.Spec.(plan.AggregateProcedureSpec); ok {
agg = child
}
})
if agg == nil {
return nil
}
fromSpec := pr.Spec.(*FromProcedureSpec)
if fromSpec.AggregateSet {
return nil
}
// Rewrite
isoFrom, err := planner.IsolatePath(pr, agg)
if err != nil {
return err
}
return r.rewrite(isoFrom, planner)
}
func (r AggregateGroupRewriteRule) rewrite(fromPr *plan.Procedure, planner plan.PlanRewriter) error {
fromSpec := fromPr.Spec.(*FromProcedureSpec)
aggPr := fromPr.Child(0)
aggSpec := aggPr.Spec.(plan.AggregateProcedureSpec)
fromSpec.AggregateSet = true
fromSpec.AggregateMethod = aggSpec.AggregateMethod()
if err := planner.RemoveBranch(aggPr); err != nil {
return err
}
planner.AddChild(fromPr, aggSpec.ReAggregateSpec())
return nil
}
func createGroupTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
s, ok := spec.(*GroupProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", spec)
}
cache := execute.NewBlockBuilderCache(a.Allocator())
d := execute.NewDataset(id, mode, cache)
t := NewGroupTransformation(d, cache, s)
return t, d, nil
}
type groupTransformation struct {
d execute.Dataset
cache execute.BlockBuilderCache
keys []string
except []string
// Ignoring is true of len(keys) == 0 && len(except) > 0
ignoring bool
}
func NewGroupTransformation(d execute.Dataset, cache execute.BlockBuilderCache, spec *GroupProcedureSpec) *groupTransformation {
t := &groupTransformation{
d: d,
cache: cache,
keys: spec.By,
except: spec.Except,
ignoring: len(spec.By) == 0 && len(spec.Except) > 0,
}
sort.Strings(t.keys)
sort.Strings(t.except)
return t
}
func (t *groupTransformation) RetractBlock(id execute.DatasetID, key execute.PartitionKey) (err error) {
//TODO(nathanielc): Investigate if this can be smarter and not retract all blocks with the same time bounds.
panic("not implemented")
//t.cache.ForEachBuilder(func(bk execute.BlockKey, builder execute.BlockBuilder) {
// if err != nil {
// return
// }
// if meta.Bounds().Equal(builder.Bounds()) {
// err = t.d.RetractBlock(bk)
// }
//})
//return
}
func (t *groupTransformation) Process(id execute.DatasetID, b execute.Block) error {
cols := b.Cols()
on := make(map[string]bool, len(cols))
if len(t.keys) > 0 {
for _, k := range t.keys {
on[k] = true
}
} else if len(t.except) > 0 {
COLS:
for _, c := range cols {
for _, label := range t.except {
if c.Label == label {
continue COLS
}
}
on[c.Label] = true
}
}
return b.Do(func(cr execute.ColReader) error {
l := cr.Len()
for i := 0; i < l; i++ {
key := execute.PartitionKeyForRowOn(i, cr, on)
builder, created := t.cache.BlockBuilder(key)
if created {
execute.AddBlockCols(b, builder)
}
execute.AppendRecord(i, cr, builder)
}
return nil
})
}
func (t *groupTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error {
return t.d.UpdateWatermark(mark)
}
func (t *groupTransformation) UpdateProcessingTime(id execute.DatasetID, pt execute.Time) error {
return t.d.UpdateProcessingTime(pt)
}
func (t *groupTransformation) Finish(id execute.DatasetID, err error) {
t.d.Finish(err)
}

View File

@ -0,0 +1,344 @@
package functions_test
import (
"testing"
"github.com/influxdata/ifql/functions"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/execute/executetest"
"github.com/influxdata/ifql/query/plan"
"github.com/influxdata/ifql/query/plan/plantest"
"github.com/influxdata/ifql/query/querytest"
)
func TestGroupOperation_Marshaling(t *testing.T) {
data := []byte(`{"id":"group","kind":"group","spec":{"by":["t1","t2"]}}`)
op := &query.Operation{
ID: "group",
Spec: &functions.GroupOpSpec{
By: []string{"t1", "t2"},
},
}
querytest.OperationMarshalingTestHelper(t, data, op)
}
func TestGroup_Process(t *testing.T) {
testCases := []struct {
name string
spec *functions.GroupProcedureSpec
data []execute.Block
want []*executetest.Block
}{
{
name: "fan in",
spec: &functions.GroupProcedureSpec{
By: []string{"t1"},
},
data: []execute.Block{
&executetest.Block{
KeyCols: []string{"t1", "t2"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(1), 2.0, "a", "x"},
},
},
&executetest.Block{
KeyCols: []string{"t1", "t2"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(2), 1.0, "a", "y"},
},
},
&executetest.Block{
KeyCols: []string{"t1", "t2"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(1), 4.0, "b", "x"},
},
},
&executetest.Block{
KeyCols: []string{"t1", "t2"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(2), 7.0, "b", "y"},
},
},
},
want: []*executetest.Block{
{
KeyCols: []string{"t1"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(1), 2.0, "a", "x"},
{execute.Time(2), 1.0, "a", "y"},
},
},
{
KeyCols: []string{"t1"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(1), 4.0, "b", "x"},
{execute.Time(2), 7.0, "b", "y"},
},
},
},
},
{
name: "fan in ignoring",
spec: &functions.GroupProcedureSpec{
Except: []string{"_time", "_value", "t2"},
},
data: []execute.Block{
&executetest.Block{
KeyCols: []string{"t1", "t2", "t3"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
{Label: "t3", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(1), 2.0, "a", "m", "x"},
},
},
&executetest.Block{
KeyCols: []string{"t1", "t2", "t3"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
{Label: "t3", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(2), 1.0, "a", "n", "x"},
},
},
&executetest.Block{
KeyCols: []string{"t1", "t2", "t3"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
{Label: "t3", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(1), 4.0, "b", "m", "x"},
},
},
&executetest.Block{
KeyCols: []string{"t1", "t2", "t3"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
{Label: "t3", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(2), 7.0, "b", "n", "x"},
},
},
},
want: []*executetest.Block{
{
KeyCols: []string{"t1", "t3"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
{Label: "t3", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(1), 2.0, "a", "m", "x"},
{execute.Time(2), 1.0, "a", "n", "x"},
},
},
{
KeyCols: []string{"t1", "t3"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
{Label: "t3", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(1), 4.0, "b", "m", "x"},
{execute.Time(2), 7.0, "b", "n", "x"},
},
},
},
},
{
name: "fan out",
spec: &functions.GroupProcedureSpec{
By: []string{"t1"},
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(1), 2.0, "a"},
{execute.Time(2), 1.0, "b"},
},
}},
want: []*executetest.Block{
{
KeyCols: []string{"t1"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(1), 2.0, "a"},
},
},
{
KeyCols: []string{"t1"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(2), 1.0, "b"},
},
},
},
},
{
name: "fan out ignoring",
spec: &functions.GroupProcedureSpec{
Except: []string{"_time", "_value", "t2"},
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
{Label: "t3", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(1), 2.0, "a", "m", "x"},
{execute.Time(2), 1.0, "a", "n", "y"},
},
}},
want: []*executetest.Block{
{
KeyCols: []string{"t1", "t3"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
{Label: "t3", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(1), 2.0, "a", "m", "x"},
},
},
{
KeyCols: []string{"t1", "t3"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
{Label: "t3", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(2), 1.0, "a", "n", "y"},
},
},
},
},
}
for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
executetest.ProcessTestHelper(
t,
tc.data,
tc.want,
func(d execute.Dataset, c execute.BlockBuilderCache) execute.Transformation {
return functions.NewGroupTransformation(d, c, tc.spec)
},
)
})
}
}
func TestGroup_PushDown(t *testing.T) {
spec := &functions.GroupProcedureSpec{
By: []string{"t1", "t2"},
}
root := &plan.Procedure{
Spec: new(functions.FromProcedureSpec),
}
want := &plan.Procedure{
Spec: &functions.FromProcedureSpec{
GroupingSet: true,
MergeAll: false,
GroupKeys: []string{"t1", "t2"},
},
}
plantest.PhysicalPlan_PushDown_TestHelper(t, spec, root, false, want)
}
func TestGroup_PushDown_Duplicate(t *testing.T) {
spec := &functions.GroupProcedureSpec{
By: []string{"t1", "t2"},
}
root := &plan.Procedure{
Spec: &functions.FromProcedureSpec{
GroupingSet: true,
MergeAll: true,
},
}
want := &plan.Procedure{
// Expect the duplicate has been reset to zero values
Spec: new(functions.FromProcedureSpec),
}
plantest.PhysicalPlan_PushDown_TestHelper(t, spec, root, true, want)
}

224
query/functions/integral.go Normal file
View File

@ -0,0 +1,224 @@
package functions
import (
"fmt"
"time"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/plan"
"github.com/influxdata/ifql/semantic"
)
const IntegralKind = "integral"
type IntegralOpSpec struct {
Unit query.Duration `json:"unit"`
execute.AggregateConfig
}
var integralSignature = query.DefaultFunctionSignature()
func init() {
integralSignature.Params["unit"] = semantic.Duration
query.RegisterFunction(IntegralKind, createIntegralOpSpec, integralSignature)
query.RegisterOpSpec(IntegralKind, newIntegralOp)
plan.RegisterProcedureSpec(IntegralKind, newIntegralProcedure, IntegralKind)
execute.RegisterTransformation(IntegralKind, createIntegralTransformation)
}
func createIntegralOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
}
spec := new(IntegralOpSpec)
if unit, ok, err := args.GetDuration("unit"); err != nil {
return nil, err
} else if ok {
spec.Unit = unit
} else {
//Default is 1s
spec.Unit = query.Duration(time.Second)
}
if err := spec.AggregateConfig.ReadArgs(args); err != nil {
return nil, err
}
return spec, nil
}
func newIntegralOp() query.OperationSpec {
return new(IntegralOpSpec)
}
func (s *IntegralOpSpec) Kind() query.OperationKind {
return IntegralKind
}
type IntegralProcedureSpec struct {
Unit query.Duration `json:"unit"`
execute.AggregateConfig
}
func newIntegralProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*IntegralOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
}
return &IntegralProcedureSpec{
Unit: spec.Unit,
AggregateConfig: spec.AggregateConfig,
}, nil
}
func (s *IntegralProcedureSpec) Kind() plan.ProcedureKind {
return IntegralKind
}
func (s *IntegralProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(IntegralProcedureSpec)
*ns = *s
ns.AggregateConfig = s.AggregateConfig.Copy()
return ns
}
func createIntegralTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
s, ok := spec.(*IntegralProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", spec)
}
cache := execute.NewBlockBuilderCache(a.Allocator())
d := execute.NewDataset(id, mode, cache)
t := NewIntegralTransformation(d, cache, s)
return t, d, nil
}
type integralTransformation struct {
d execute.Dataset
cache execute.BlockBuilderCache
spec IntegralProcedureSpec
}
func NewIntegralTransformation(d execute.Dataset, cache execute.BlockBuilderCache, spec *IntegralProcedureSpec) *integralTransformation {
return &integralTransformation{
d: d,
cache: cache,
spec: *spec,
}
}
func (t *integralTransformation) RetractBlock(id execute.DatasetID, key execute.PartitionKey) error {
return t.d.RetractBlock(key)
}
func (t *integralTransformation) Process(id execute.DatasetID, b execute.Block) error {
builder, created := t.cache.BlockBuilder(b.Key())
if !created {
return fmt.Errorf("integral found duplicate block with key: %v", b.Key())
}
execute.AddBlockKeyCols(b.Key(), builder)
builder.AddCol(execute.ColMeta{
Label: t.spec.TimeDst,
Type: execute.TTime,
})
cols := b.Cols()
integrals := make([]*integral, len(cols))
colMap := make([]int, len(cols))
for j, c := range cols {
if execute.ContainsStr(t.spec.Columns, c.Label) {
integrals[j] = newIntegral(time.Duration(t.spec.Unit))
colMap[j] = builder.AddCol(execute.ColMeta{
Label: c.Label,
Type: execute.TFloat,
})
}
}
if err := execute.AppendAggregateTime(t.spec.TimeSrc, t.spec.TimeDst, b.Key(), builder); err != nil {
return err
}
timeIdx := execute.ColIdx(t.spec.TimeDst, cols)
if timeIdx < 0 {
return fmt.Errorf("no column %q exists", t.spec.TimeSrc)
}
err := b.Do(func(cr execute.ColReader) error {
for j, in := range integrals {
if in == nil {
continue
}
l := cr.Len()
for i := 0; i < l; i++ {
tm := cr.Times(timeIdx)[i]
in.updateFloat(tm, cr.Floats(j)[i])
}
}
return nil
})
if err != nil {
return err
}
execute.AppendKeyValues(b.Key(), builder)
for j, in := range integrals {
if in == nil {
continue
}
builder.AppendFloat(colMap[j], in.value())
}
return nil
}
func (t *integralTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error {
return t.d.UpdateWatermark(mark)
}
func (t *integralTransformation) UpdateProcessingTime(id execute.DatasetID, pt execute.Time) error {
return t.d.UpdateProcessingTime(pt)
}
func (t *integralTransformation) Finish(id execute.DatasetID, err error) {
t.d.Finish(err)
}
func newIntegral(unit time.Duration) *integral {
return &integral{
first: true,
unit: float64(unit),
}
}
type integral struct {
first bool
unit float64
pFloatValue float64
pTime execute.Time
sum float64
}
func (in *integral) value() float64 {
return in.sum
}
func (in *integral) updateFloat(t execute.Time, v float64) {
if in.first {
in.pTime = t
in.pFloatValue = v
in.first = false
return
}
elapsed := float64(t-in.pTime) / in.unit
in.sum += 0.5 * (v + in.pFloatValue) * elapsed
in.pTime = t
in.pFloatValue = v
}

View File

@ -0,0 +1,194 @@
package functions_test
import (
"testing"
"time"
"github.com/influxdata/ifql/functions"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/execute/executetest"
"github.com/influxdata/ifql/query/querytest"
)
func TestIntegralOperation_Marshaling(t *testing.T) {
data := []byte(`{"id":"integral","kind":"integral","spec":{"unit":"1m"}}`)
op := &query.Operation{
ID: "integral",
Spec: &functions.IntegralOpSpec{
Unit: query.Duration(time.Minute),
},
}
querytest.OperationMarshalingTestHelper(t, data, op)
}
func TestIntegral_PassThrough(t *testing.T) {
executetest.TransformationPassThroughTestHelper(t, func(d execute.Dataset, c execute.BlockBuilderCache) execute.Transformation {
s := functions.NewIntegralTransformation(
d,
c,
&functions.IntegralProcedureSpec{},
)
return s
})
}
func TestIntegral_Process(t *testing.T) {
testCases := []struct {
name string
spec *functions.IntegralProcedureSpec
data []execute.Block
want []*executetest.Block
}{
{
name: "float",
spec: &functions.IntegralProcedureSpec{
Unit: 1,
AggregateConfig: execute.DefaultAggregateConfig,
},
data: []execute.Block{&executetest.Block{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), execute.Time(3), execute.Time(1), 2.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 1.0},
},
}},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), execute.Time(3), execute.Time(3), 1.5},
},
}},
},
{
name: "float with units",
spec: &functions.IntegralProcedureSpec{
Unit: query.Duration(time.Second),
AggregateConfig: execute.DefaultAggregateConfig,
},
data: []execute.Block{&executetest.Block{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(1 * time.Second), execute.Time(4 * time.Second), execute.Time(1 * time.Second), 2.0},
{execute.Time(1 * time.Second), execute.Time(4 * time.Second), execute.Time(3 * time.Second), 1.0},
},
}},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(1 * time.Second), execute.Time(4 * time.Second), execute.Time(4 * time.Second), 3.0},
},
}},
},
{
name: "float with tags",
spec: &functions.IntegralProcedureSpec{
Unit: 1,
AggregateConfig: execute.DefaultAggregateConfig,
},
data: []execute.Block{&executetest.Block{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(1), execute.Time(3), execute.Time(1), 2.0, "a"},
{execute.Time(1), execute.Time(3), execute.Time(2), 1.0, "b"},
},
}},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), execute.Time(3), execute.Time(3), 1.5},
},
}},
},
{
name: "float with multiple values",
spec: &functions.IntegralProcedureSpec{
Unit: 1,
AggregateConfig: execute.AggregateConfig{
TimeDst: execute.DefaultTimeColLabel,
TimeSrc: execute.DefaultStopColLabel,
Columns: []string{"x", "y"},
},
},
data: []execute.Block{&executetest.Block{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), execute.Time(5), execute.Time(1), 2.0, 20.0},
{execute.Time(1), execute.Time(5), execute.Time(2), 1.0, 10.0},
{execute.Time(1), execute.Time(5), execute.Time(3), 2.0, 20.0},
{execute.Time(1), execute.Time(5), execute.Time(4), 1.0, 10.0},
},
}},
want: []*executetest.Block{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []execute.ColMeta{
{Label: "_start", Type: execute.TTime},
{Label: "_stop", Type: execute.TTime},
{Label: "_time", Type: execute.TTime},
{Label: "x", Type: execute.TFloat},
{Label: "y", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), execute.Time(5), execute.Time(5), 4.5, 45.0},
},
}},
},
}
for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
executetest.ProcessTestHelper(
t,
tc.data,
tc.want,
func(d execute.Dataset, c execute.BlockBuilderCache) execute.Transformation {
return functions.NewIntegralTransformation(d, c, tc.spec)
},
)
})
}
}

750
query/functions/join.go Normal file
View File

@ -0,0 +1,750 @@
package functions
import (
"fmt"
"math"
"sort"
"sync"
"github.com/influxdata/ifql/compiler"
"github.com/influxdata/ifql/interpreter"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/plan"
"github.com/influxdata/ifql/semantic"
"github.com/influxdata/ifql/values"
"github.com/pkg/errors"
)
const JoinKind = "join"
const MergeJoinKind = "merge-join"
type JoinOpSpec struct {
// On is a list of tags on which to join.
On []string `json:"on"`
// Fn is a function accepting a single parameter.
// The parameter is map if records for each of the parent operations.
Fn *semantic.FunctionExpression `json:"fn"`
// TableNames are the names to give to each parent when populating the parameter for the function.
// The first parent is referenced by the first name and so forth.
// TODO(nathanielc): Change this to a map of parent operation IDs to names.
// Then make it possible for the transformation to map operation IDs to parent IDs.
TableNames map[query.OperationID]string `json:"table_names"`
}
var joinSignature = semantic.FunctionSignature{
Params: map[string]semantic.Type{
"tables": semantic.Object,
"fn": semantic.Function,
"on": semantic.NewArrayType(semantic.String),
},
ReturnType: query.TableObjectType,
PipeArgument: "tables",
}
func init() {
query.RegisterFunction(JoinKind, createJoinOpSpec, joinSignature)
query.RegisterOpSpec(JoinKind, newJoinOp)
//TODO(nathanielc): Allow for other types of join implementations
plan.RegisterProcedureSpec(MergeJoinKind, newMergeJoinProcedure, JoinKind)
execute.RegisterTransformation(MergeJoinKind, createMergeJoinTransformation)
}
func createJoinOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
f, err := args.GetRequiredFunction("fn")
if err != nil {
return nil, err
}
fn, err := interpreter.ResolveFunction(f)
if err != nil {
return nil, err
}
spec := &JoinOpSpec{
Fn: fn,
TableNames: make(map[query.OperationID]string),
}
if array, ok, err := args.GetArray("on", semantic.String); err != nil {
return nil, err
} else if ok {
spec.On, err = interpreter.ToStringArray(array)
if err != nil {
return nil, err
}
}
if m, ok, err := args.GetObject("tables"); err != nil {
return nil, err
} else if ok {
var err error
m.Range(func(k string, t values.Value) {
if err != nil {
return
}
if t.Type().Kind() != semantic.Object {
err = fmt.Errorf("value for key %q in tables must be an object: got %v", k, t.Type().Kind())
return
}
if t.Type() != query.TableObjectType {
err = fmt.Errorf("value for key %q in tables must be an table object: got %v", k, t.Type())
return
}
p := t.(query.TableObject)
a.AddParent(p)
spec.TableNames[p.ID] = k
})
if err != nil {
return nil, err
}
}
return spec, nil
}
func newJoinOp() query.OperationSpec {
return new(JoinOpSpec)
}
func (s *JoinOpSpec) Kind() query.OperationKind {
return JoinKind
}
type MergeJoinProcedureSpec struct {
On []string `json:"keys"`
Fn *semantic.FunctionExpression `json:"f"`
TableNames map[plan.ProcedureID]string `json:"table_names"`
}
func newMergeJoinProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*JoinOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
}
tableNames := make(map[plan.ProcedureID]string, len(spec.TableNames))
for qid, name := range spec.TableNames {
pid := pa.ConvertID(qid)
tableNames[pid] = name
}
p := &MergeJoinProcedureSpec{
On: spec.On,
Fn: spec.Fn,
TableNames: tableNames,
}
sort.Strings(p.On)
return p, nil
}
func (s *MergeJoinProcedureSpec) Kind() plan.ProcedureKind {
return MergeJoinKind
}
func (s *MergeJoinProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(MergeJoinProcedureSpec)
ns.On = make([]string, len(s.On))
copy(ns.On, s.On)
ns.Fn = s.Fn.Copy().(*semantic.FunctionExpression)
return ns
}
func (s *MergeJoinProcedureSpec) ParentChanged(old, new plan.ProcedureID) {
if v, ok := s.TableNames[old]; ok {
delete(s.TableNames, old)
s.TableNames[new] = v
}
}
func createMergeJoinTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
s, ok := spec.(*MergeJoinProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", spec)
}
parents := a.Parents()
if len(parents) != 2 {
//TODO(nathanielc): Support n-way joins
return nil, nil, errors.New("joins currently must only have two parents")
}
tableNames := make(map[execute.DatasetID]string, len(s.TableNames))
for pid, name := range s.TableNames {
id := a.ConvertID(pid)
tableNames[id] = name
}
leftName := tableNames[parents[0]]
rightName := tableNames[parents[1]]
joinFn, err := NewRowJoinFunction(s.Fn, parents, tableNames)
if err != nil {
return nil, nil, errors.Wrap(err, "invalid expression")
}
cache := NewMergeJoinCache(joinFn, a.Allocator(), leftName, rightName, s.On)
d := execute.NewDataset(id, mode, cache)
t := NewMergeJoinTransformation(d, cache, s, parents, tableNames)
return t, d, nil
}
type mergeJoinTransformation struct {
parents []execute.DatasetID
mu sync.Mutex
d execute.Dataset
cache MergeJoinCache
leftID, rightID execute.DatasetID
leftName, rightName string
parentState map[execute.DatasetID]*mergeJoinParentState
keys []string
}
func NewMergeJoinTransformation(d execute.Dataset, cache MergeJoinCache, spec *MergeJoinProcedureSpec, parents []execute.DatasetID, tableNames map[execute.DatasetID]string) *mergeJoinTransformation {
t := &mergeJoinTransformation{
d: d,
cache: cache,
keys: spec.On,
leftID: parents[0],
rightID: parents[1],
leftName: tableNames[parents[0]],
rightName: tableNames[parents[1]],
}
t.parentState = make(map[execute.DatasetID]*mergeJoinParentState)
for _, id := range parents {
t.parentState[id] = new(mergeJoinParentState)
}
return t
}
type mergeJoinParentState struct {
mark execute.Time
processing execute.Time
finished bool
}
func (t *mergeJoinTransformation) RetractBlock(id execute.DatasetID, key execute.PartitionKey) error {
panic("not implemented")
//t.mu.Lock()
//defer t.mu.Unlock()
//bm := blockMetadata{
// tags: meta.Tags().IntersectingSubset(t.keys),
// bounds: meta.Bounds(),
//}
//return t.d.RetractBlock(execute.ToBlockKey(bm))
}
func (t *mergeJoinTransformation) Process(id execute.DatasetID, b execute.Block) error {
t.mu.Lock()
defer t.mu.Unlock()
tables := t.cache.Tables(b.Key())
var references []string
var table execute.BlockBuilder
switch id {
case t.leftID:
table = tables.left
references = tables.joinFn.references[t.leftName]
case t.rightID:
table = tables.right
references = tables.joinFn.references[t.rightName]
}
// Add columns to table
labels := unionStrs(t.keys, references)
colMap := make([]int, len(labels))
for _, label := range labels {
blockIdx := execute.ColIdx(label, b.Cols())
if blockIdx < 0 {
return fmt.Errorf("no column %q exists", label)
}
// Only add the column if it does not already exist
builderIdx := execute.ColIdx(label, table.Cols())
if builderIdx < 0 {
c := b.Cols()[blockIdx]
builderIdx = table.AddCol(c)
}
colMap[builderIdx] = blockIdx
}
execute.AppendBlock(b, table, colMap)
return nil
}
func unionStrs(as, bs []string) []string {
u := make([]string, len(bs), len(as)+len(bs))
copy(u, bs)
for _, a := range as {
found := false
for _, b := range bs {
if a == b {
found = true
break
}
}
if !found {
u = append(u, a)
}
}
return u
}
func (t *mergeJoinTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error {
t.mu.Lock()
defer t.mu.Unlock()
t.parentState[id].mark = mark
min := execute.Time(math.MaxInt64)
for _, state := range t.parentState {
if state.mark < min {
min = state.mark
}
}
return t.d.UpdateWatermark(min)
}
func (t *mergeJoinTransformation) UpdateProcessingTime(id execute.DatasetID, pt execute.Time) error {
t.mu.Lock()
defer t.mu.Unlock()
t.parentState[id].processing = pt
min := execute.Time(math.MaxInt64)
for _, state := range t.parentState {
if state.processing < min {
min = state.processing
}
}
return t.d.UpdateProcessingTime(min)
}
func (t *mergeJoinTransformation) Finish(id execute.DatasetID, err error) {
t.mu.Lock()
defer t.mu.Unlock()
if err != nil {
t.d.Finish(err)
}
t.parentState[id].finished = true
finished := true
for _, state := range t.parentState {
finished = finished && state.finished
}
if finished {
t.d.Finish(nil)
}
}
type MergeJoinCache interface {
Tables(execute.PartitionKey) *joinTables
}
type mergeJoinCache struct {
data *execute.PartitionLookup
alloc *execute.Allocator
keys []string
on map[string]bool
leftName, rightName string
triggerSpec query.TriggerSpec
joinFn *joinFunc
}
func NewMergeJoinCache(joinFn *joinFunc, a *execute.Allocator, leftName, rightName string, keys []string) *mergeJoinCache {
on := make(map[string]bool, len(keys))
for _, k := range keys {
on[k] = true
}
return &mergeJoinCache{
data: execute.NewPartitionLookup(),
keys: keys,
on: on,
joinFn: joinFn,
alloc: a,
leftName: leftName,
rightName: rightName,
}
}
func (c *mergeJoinCache) Block(key execute.PartitionKey) (execute.Block, error) {
t, ok := c.lookup(key)
if !ok {
return nil, errors.New("block not found")
}
return t.Join()
}
func (c *mergeJoinCache) ForEach(f func(execute.PartitionKey)) {
c.data.Range(func(key execute.PartitionKey, value interface{}) {
f(key)
})
}
func (c *mergeJoinCache) ForEachWithContext(f func(execute.PartitionKey, execute.Trigger, execute.BlockContext)) {
c.data.Range(func(key execute.PartitionKey, value interface{}) {
tables := value.(*joinTables)
bc := execute.BlockContext{
Key: key,
Count: tables.Size(),
}
f(key, tables.trigger, bc)
})
}
func (c *mergeJoinCache) DiscardBlock(key execute.PartitionKey) {
t, ok := c.lookup(key)
if ok {
t.ClearData()
}
}
func (c *mergeJoinCache) ExpireBlock(key execute.PartitionKey) {
v, ok := c.data.Delete(key)
if ok {
v.(*joinTables).ClearData()
}
}
func (c *mergeJoinCache) SetTriggerSpec(spec query.TriggerSpec) {
c.triggerSpec = spec
}
func (c *mergeJoinCache) lookup(key execute.PartitionKey) (*joinTables, bool) {
v, ok := c.data.Lookup(key)
if !ok {
return nil, false
}
return v.(*joinTables), true
}
func (c *mergeJoinCache) Tables(key execute.PartitionKey) *joinTables {
tables, ok := c.lookup(key)
if !ok {
tables = &joinTables{
keys: c.keys,
key: key,
on: c.on,
alloc: c.alloc,
left: execute.NewColListBlockBuilder(key, c.alloc),
right: execute.NewColListBlockBuilder(key, c.alloc),
leftName: c.leftName,
rightName: c.rightName,
trigger: execute.NewTriggerFromSpec(c.triggerSpec),
joinFn: c.joinFn,
}
c.data.Set(key, tables)
}
return tables
}
type joinTables struct {
keys []string
on map[string]bool
key execute.PartitionKey
alloc *execute.Allocator
left, right *execute.ColListBlockBuilder
leftName, rightName string
trigger execute.Trigger
joinFn *joinFunc
}
func (t *joinTables) Size() int {
return t.left.NRows() + t.right.NRows()
}
func (t *joinTables) ClearData() {
t.left = execute.NewColListBlockBuilder(t.key, t.alloc)
t.right = execute.NewColListBlockBuilder(t.key, t.alloc)
}
// Join performs a sort-merge join
func (t *joinTables) Join() (execute.Block, error) {
// First prepare the join function
left := t.left.RawBlock()
right := t.right.RawBlock()
err := t.joinFn.Prepare(map[string]*execute.ColListBlock{
t.leftName: left,
t.rightName: right,
})
if err != nil {
return nil, errors.Wrap(err, "failed to prepare join function")
}
// Create a builder for the result of the join
builder := execute.NewColListBlockBuilder(t.key, t.alloc)
// Add columns from function in sorted order
properties := t.joinFn.Type().Properties()
keys := make([]string, 0, len(properties))
for k := range properties {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
builder.AddCol(execute.ColMeta{
Label: k,
Type: execute.ConvertFromKind(properties[k].Kind()),
})
}
// Now that all columns have been added, keep a reference.
bCols := builder.Cols()
// Determine sort order for the joining tables
sortOrder := make([]string, len(t.keys))
for i, label := range t.keys {
sortOrder[i] = label
}
// Sort input tables
t.left.Sort(sortOrder, false)
t.right.Sort(sortOrder, false)
var (
leftSet, rightSet subset
leftKey, rightKey execute.PartitionKey
)
rows := map[string]int{
t.leftName: -1,
t.rightName: -1,
}
leftSet, leftKey = t.advance(leftSet.Stop, left)
rightSet, rightKey = t.advance(rightSet.Stop, right)
for !leftSet.Empty() && !rightSet.Empty() {
if leftKey.Equal(rightKey) {
// Inner join
for l := leftSet.Start; l < leftSet.Stop; l++ {
for r := rightSet.Start; r < rightSet.Stop; r++ {
// Evaluate expression and add to block
rows[t.leftName] = l
rows[t.rightName] = r
m, err := t.joinFn.Eval(rows)
if err != nil {
return nil, errors.Wrap(err, "failed to evaluate join function")
}
for j, c := range bCols {
v, _ := m.Get(c.Label)
execute.AppendValue(builder, j, v)
}
}
}
leftSet, leftKey = t.advance(leftSet.Stop, left)
rightSet, rightKey = t.advance(rightSet.Stop, right)
} else if leftKey.Less(rightKey) {
leftSet, leftKey = t.advance(leftSet.Stop, left)
} else {
rightSet, rightKey = t.advance(rightSet.Stop, right)
}
}
return builder.Block()
}
func (t *joinTables) advance(offset int, table *execute.ColListBlock) (subset, execute.PartitionKey) {
if n := table.NRows(); n == offset {
return subset{Start: n, Stop: n}, nil
}
start := offset
key := execute.PartitionKeyForRowOn(start, table, t.on)
s := subset{Start: start}
offset++
for offset < table.NRows() && equalRowKeys(start, offset, table, t.on) {
offset++
}
s.Stop = offset
return s, key
}
type subset struct {
Start int
Stop int
}
func (s subset) Empty() bool {
return s.Start == s.Stop
}
func equalRowKeys(x, y int, table *execute.ColListBlock, on map[string]bool) bool {
for j, c := range table.Cols() {
if !on[c.Label] {
continue
}
switch c.Type {
case execute.TBool:
if xv, yv := table.Bools(j)[x], table.Bools(j)[y]; xv != yv {
return false
}
case execute.TInt:
if xv, yv := table.Ints(j)[x], table.Ints(j)[y]; xv != yv {
return false
}
case execute.TUInt:
if xv, yv := table.UInts(j)[x], table.UInts(j)[y]; xv != yv {
return false
}
case execute.TFloat:
if xv, yv := table.Floats(j)[x], table.Floats(j)[y]; xv != yv {
return false
}
case execute.TString:
if xv, yv := table.Strings(j)[x], table.Strings(j)[y]; xv != yv {
return false
}
case execute.TTime:
if xv, yv := table.Times(j)[x], table.Times(j)[y]; xv != yv {
return false
}
default:
execute.PanicUnknownType(c.Type)
}
}
return true
}
type joinFunc struct {
fn *semantic.FunctionExpression
compilationCache *compiler.CompilationCache
scope compiler.Scope
preparedFn compiler.Func
recordName string
record *execute.Record
recordCols map[tableCol]int
references map[string][]string
isWrap bool
wrapObj *execute.Record
tableData map[string]*execute.ColListBlock
}
type tableCol struct {
table, col string
}
func NewRowJoinFunction(fn *semantic.FunctionExpression, parentIDs []execute.DatasetID, tableNames map[execute.DatasetID]string) (*joinFunc, error) {
if len(fn.Params) != 1 {
return nil, errors.New("join function should only have one parameter for the map of tables")
}
scope, decls := query.BuiltIns()
return &joinFunc{
compilationCache: compiler.NewCompilationCache(fn, scope, decls),
scope: make(compiler.Scope, 1),
references: findTableReferences(fn),
recordCols: make(map[tableCol]int),
recordName: fn.Params[0].Key.Name,
}, nil
}
func (f *joinFunc) Prepare(tables map[string]*execute.ColListBlock) error {
f.tableData = tables
propertyTypes := make(map[string]semantic.Type, len(f.references))
// Prepare types and recordcols
for tbl, b := range tables {
cols := b.Cols()
tblPropertyTypes := make(map[string]semantic.Type, len(f.references[tbl]))
for _, r := range f.references[tbl] {
j := execute.ColIdx(r, cols)
if j < 0 {
return fmt.Errorf("function references unknown column %q of table %q", r, tbl)
}
c := cols[j]
f.recordCols[tableCol{table: tbl, col: c.Label}] = j
tblPropertyTypes[r] = execute.ConvertToKind(c.Type)
}
propertyTypes[tbl] = semantic.NewObjectType(tblPropertyTypes)
}
f.record = execute.NewRecord(semantic.NewObjectType(propertyTypes))
for tbl := range tables {
f.record.Set(tbl, execute.NewRecord(propertyTypes[tbl]))
}
// Compile fn for given types
fn, err := f.compilationCache.Compile(map[string]semantic.Type{
f.recordName: f.record.Type(),
})
if err != nil {
return err
}
f.preparedFn = fn
k := f.preparedFn.Type().Kind()
f.isWrap = k != semantic.Object
if f.isWrap {
f.wrapObj = execute.NewRecord(semantic.NewObjectType(map[string]semantic.Type{
execute.DefaultValueColLabel: f.preparedFn.Type(),
}))
}
return nil
}
func (f *joinFunc) Type() semantic.Type {
if f.isWrap {
return f.wrapObj.Type()
}
return f.preparedFn.Type()
}
func (f *joinFunc) Eval(rows map[string]int) (values.Object, error) {
for tbl, references := range f.references {
row := rows[tbl]
data := f.tableData[tbl]
obj, _ := f.record.Get(tbl)
o := obj.(*execute.Record)
for _, r := range references {
o.Set(r, execute.ValueForRow(row, f.recordCols[tableCol{table: tbl, col: r}], data))
}
}
f.scope[f.recordName] = f.record
v, err := f.preparedFn.Eval(f.scope)
if err != nil {
return nil, err
}
if f.isWrap {
f.wrapObj.Set(execute.DefaultValueColLabel, v)
return f.wrapObj, nil
}
return v.Object(), nil
}
func findTableReferences(fn *semantic.FunctionExpression) map[string][]string {
v := &tableReferenceVisitor{
record: fn.Params[0].Key.Name,
refs: make(map[string][]string),
}
semantic.Walk(v, fn)
return v.refs
}
type tableReferenceVisitor struct {
record string
refs map[string][]string
}
func (c *tableReferenceVisitor) Visit(node semantic.Node) semantic.Visitor {
if col, ok := node.(*semantic.MemberExpression); ok {
if table, ok := col.Object.(*semantic.MemberExpression); ok {
if record, ok := table.Object.(*semantic.IdentifierExpression); ok && record.Name == c.record {
c.refs[table.Property] = append(c.refs[table.Property], col.Property)
return nil
}
}
}
return c
}
func (c *tableReferenceVisitor) Done() {}

1100
query/functions/join_test.go Normal file

File diff suppressed because it is too large Load Diff

210
query/functions/keys.go Normal file
View File

@ -0,0 +1,210 @@
package functions
import (
"fmt"
"sort"
"strings"
"github.com/influxdata/ifql/interpreter"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/plan"
"github.com/influxdata/ifql/semantic"
)
const KeysKind = "keys"
var (
keysExceptDefaultValue = []string{"_time", "_value"}
)
type KeysOpSpec struct {
Except []string `json:"except"`
}
var keysSignature = query.DefaultFunctionSignature()
func init() {
keysSignature.Params["except"] = semantic.NewArrayType(semantic.String)
query.RegisterFunction(KeysKind, createKeysOpSpec, keysSignature)
query.RegisterOpSpec(KeysKind, newKeysOp)
plan.RegisterProcedureSpec(KeysKind, newKeysProcedure, KeysKind)
plan.RegisterRewriteRule(KeysPointLimitRewriteRule{})
execute.RegisterTransformation(KeysKind, createKeysTransformation)
}
func createKeysOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
}
spec := new(KeysOpSpec)
if array, ok, err := args.GetArray("except", semantic.String); err != nil {
return nil, err
} else if ok {
spec.Except, err = interpreter.ToStringArray(array)
if err != nil {
return nil, err
}
} else {
spec.Except = keysExceptDefaultValue
}
return spec, nil
}
func newKeysOp() query.OperationSpec {
return new(KeysOpSpec)
}
func (s *KeysOpSpec) Kind() query.OperationKind {
return KeysKind
}
type KeysProcedureSpec struct {
Except []string
}
func newKeysProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*KeysOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
}
return &KeysProcedureSpec{
Except: spec.Except,
}, nil
}
func (s *KeysProcedureSpec) Kind() plan.ProcedureKind {
return KeysKind
}
func (s *KeysProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(KeysProcedureSpec)
*ns = *s
return ns
}
type KeysPointLimitRewriteRule struct {
}
func (r KeysPointLimitRewriteRule) Root() plan.ProcedureKind {
return FromKind
}
func (r KeysPointLimitRewriteRule) Rewrite(pr *plan.Procedure, planner plan.PlanRewriter) error {
fromSpec, ok := pr.Spec.(*FromProcedureSpec)
if !ok {
return nil
}
var keys *KeysProcedureSpec
pr.DoChildren(func(child *plan.Procedure) {
if d, ok := child.Spec.(*KeysProcedureSpec); ok {
keys = d
}
})
if keys == nil {
return nil
}
if !fromSpec.LimitSet {
fromSpec.LimitSet = true
fromSpec.PointsLimit = -1
}
return nil
}
func createKeysTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
s, ok := spec.(*KeysProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", spec)
}
cache := execute.NewBlockBuilderCache(a.Allocator())
d := execute.NewDataset(id, mode, cache)
t := NewKeysTransformation(d, cache, s)
return t, d, nil
}
type keysTransformation struct {
d execute.Dataset
cache execute.BlockBuilderCache
except []string
}
func NewKeysTransformation(d execute.Dataset, cache execute.BlockBuilderCache, spec *KeysProcedureSpec) *keysTransformation {
var except []string
if len(spec.Except) > 0 {
except = append([]string{}, spec.Except...)
sort.Strings(except)
}
return &keysTransformation{
d: d,
cache: cache,
except: except,
}
}
func (t *keysTransformation) RetractBlock(id execute.DatasetID, key execute.PartitionKey) error {
return t.d.RetractBlock(key)
}
func (t *keysTransformation) Process(id execute.DatasetID, b execute.Block) error {
builder, created := t.cache.BlockBuilder(b.Key())
if !created {
return fmt.Errorf("keys found duplicate block with key: %v", b.Key())
}
execute.AddBlockKeyCols(b.Key(), builder)
colIdx := builder.AddCol(execute.ColMeta{Label: execute.DefaultValueColLabel, Type: execute.TString})
cols := b.Cols()
sort.Slice(cols, func(i, j int) bool {
return cols[i].Label < cols[j].Label
})
var i int
if len(t.except) > 0 {
var j int
for i < len(cols) && j < len(t.except) {
c := strings.Compare(cols[i].Label, t.except[j])
if c < 0 {
execute.AppendKeyValues(b.Key(), builder)
builder.AppendString(colIdx, cols[i].Label)
i++
} else if c > 0 {
j++
} else {
i++
j++
}
}
}
// add remaining
for ; i < len(cols); i++ {
execute.AppendKeyValues(b.Key(), builder)
builder.AppendString(colIdx, cols[i].Label)
}
// TODO: this is a hack
return b.Do(func(execute.ColReader) error {
return nil
})
}
func (t *keysTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error {
return t.d.UpdateWatermark(mark)
}
func (t *keysTransformation) UpdateProcessingTime(id execute.DatasetID, pt execute.Time) error {
return t.d.UpdateProcessingTime(pt)
}
func (t *keysTransformation) Finish(id execute.DatasetID, err error) {
t.d.Finish(err)
}

View File

@ -0,0 +1,146 @@
package functions_test
import (
"testing"
"github.com/influxdata/ifql/functions"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/execute/executetest"
)
func TestKeys_Process(t *testing.T) {
testCases := []struct {
name string
spec *functions.KeysProcedureSpec
data []execute.Block
want []*executetest.Block
}{
{
name: "one block",
spec: &functions.KeysProcedureSpec{},
data: []execute.Block{
&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "tag0", Type: execute.TString},
{Label: "tag1", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(1), 2.0},
},
},
},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_value", Type: execute.TString},
},
Data: [][]interface{}{
{"_time"},
{"_value"},
{"tag0"},
{"tag1"},
},
}},
},
{
name: "one block except",
spec: &functions.KeysProcedureSpec{Except: []string{"_value", "_time"}},
data: []execute.Block{
&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "tag0", Type: execute.TString},
{Label: "tag1", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(1), 2.0},
},
},
},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_value", Type: execute.TString},
},
Data: [][]interface{}{
{"tag0"},
{"tag1"},
},
}},
},
{
name: "two blocks",
spec: &functions.KeysProcedureSpec{},
data: []execute.Block{
&executetest.Block{
KeyCols: []string{"tag0", "tag1"},
ColMeta: []execute.ColMeta{
{Label: "tag0", Type: execute.TString},
{Label: "tag1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{"tag0-0", "tag1-0", execute.Time(1), 2.0},
},
},
&executetest.Block{
KeyCols: []string{"tag0", "tag2"},
ColMeta: []execute.ColMeta{
{Label: "tag0", Type: execute.TString},
{Label: "tag2", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{"tag0-0", "tag2-0", execute.Time(1), 2.0},
},
},
},
want: []*executetest.Block{
{
KeyCols: []string{"tag0", "tag1"},
ColMeta: []execute.ColMeta{
{Label: "tag0", Type: execute.TString},
{Label: "tag1", Type: execute.TString},
{Label: "_value", Type: execute.TString},
},
Data: [][]interface{}{
{"tag0-0", "tag1-0", "_time"},
{"tag0-0", "tag1-0", "_value"},
{"tag0-0", "tag1-0", "tag0"},
{"tag0-0", "tag1-0", "tag1"},
},
},
{
KeyCols: []string{"tag0", "tag2"},
ColMeta: []execute.ColMeta{
{Label: "tag0", Type: execute.TString},
{Label: "tag2", Type: execute.TString},
{Label: "_value", Type: execute.TString},
},
Data: [][]interface{}{
{"tag0-0", "tag2-0", "_time"},
{"tag0-0", "tag2-0", "_value"},
{"tag0-0", "tag2-0", "tag0"},
{"tag0-0", "tag2-0", "tag2"},
},
},
},
},
}
for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
executetest.ProcessTestHelper(
t,
tc.data,
tc.want,
func(d execute.Dataset, c execute.BlockBuilderCache) execute.Transformation {
return functions.NewKeysTransformation(d, c, tc.spec)
},
)
})
}
}

176
query/functions/last.go Normal file
View File

@ -0,0 +1,176 @@
package functions
import (
"fmt"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/plan"
"github.com/influxdata/ifql/semantic"
)
const LastKind = "last"
type LastOpSpec struct {
execute.SelectorConfig
}
var lastSignature = query.DefaultFunctionSignature()
func init() {
lastSignature.Params["column"] = semantic.String
lastSignature.Params["useRowTime"] = semantic.Bool
query.RegisterFunction(LastKind, createLastOpSpec, lastSignature)
query.RegisterOpSpec(LastKind, newLastOp)
plan.RegisterProcedureSpec(LastKind, newLastProcedure, LastKind)
execute.RegisterTransformation(LastKind, createLastTransformation)
}
func createLastOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
}
spec := new(LastOpSpec)
if err := spec.SelectorConfig.ReadArgs(args); err != nil {
return nil, err
}
return spec, nil
}
func newLastOp() query.OperationSpec {
return new(LastOpSpec)
}
func (s *LastOpSpec) Kind() query.OperationKind {
return LastKind
}
type LastProcedureSpec struct {
execute.SelectorConfig
}
func newLastProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*LastOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
}
return &LastProcedureSpec{
SelectorConfig: spec.SelectorConfig,
}, nil
}
func (s *LastProcedureSpec) Kind() plan.ProcedureKind {
return LastKind
}
func (s *LastProcedureSpec) PushDownRules() []plan.PushDownRule {
return []plan.PushDownRule{{
Root: FromKind,
Through: []plan.ProcedureKind{GroupKind, LimitKind, FilterKind},
Match: func(spec plan.ProcedureSpec) bool {
selectSpec := spec.(*FromProcedureSpec)
return !selectSpec.AggregateSet
},
}}
}
func (s *LastProcedureSpec) PushDown(root *plan.Procedure, dup func() *plan.Procedure) {
selectSpec := root.Spec.(*FromProcedureSpec)
if selectSpec.BoundsSet || selectSpec.LimitSet || selectSpec.DescendingSet {
root = dup()
selectSpec = root.Spec.(*FromProcedureSpec)
selectSpec.BoundsSet = false
selectSpec.Bounds = plan.BoundsSpec{}
selectSpec.LimitSet = false
selectSpec.PointsLimit = 0
selectSpec.SeriesLimit = 0
selectSpec.SeriesOffset = 0
selectSpec.DescendingSet = false
selectSpec.Descending = false
return
}
selectSpec.BoundsSet = true
selectSpec.Bounds = plan.BoundsSpec{
Start: query.MinTime,
Stop: query.Now,
}
selectSpec.LimitSet = true
selectSpec.PointsLimit = 1
selectSpec.DescendingSet = true
selectSpec.Descending = true
}
func (s *LastProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(LastProcedureSpec)
ns.SelectorConfig = s.SelectorConfig
return ns
}
type LastSelector struct {
rows []execute.Row
}
func createLastTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
ps, ok := spec.(*LastProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", ps)
}
t, d := execute.NewRowSelectorTransformationAndDataset(id, mode, new(LastSelector), ps.SelectorConfig, a.Allocator())
return t, d, nil
}
func (s *LastSelector) reset() {
s.rows = nil
}
func (s *LastSelector) NewBoolSelector() execute.DoBoolRowSelector {
s.reset()
return s
}
func (s *LastSelector) NewIntSelector() execute.DoIntRowSelector {
s.reset()
return s
}
func (s *LastSelector) NewUIntSelector() execute.DoUIntRowSelector {
s.reset()
return s
}
func (s *LastSelector) NewFloatSelector() execute.DoFloatRowSelector {
s.reset()
return s
}
func (s *LastSelector) NewStringSelector() execute.DoStringRowSelector {
s.reset()
return s
}
func (s *LastSelector) Rows() []execute.Row {
return s.rows
}
func (s *LastSelector) selectLast(l int, cr execute.ColReader) {
if l > 0 {
s.rows = []execute.Row{execute.ReadRow(l-1, cr)}
}
}
func (s *LastSelector) DoBool(vs []bool, cr execute.ColReader) {
s.selectLast(len(vs), cr)
}
func (s *LastSelector) DoInt(vs []int64, cr execute.ColReader) {
s.selectLast(len(vs), cr)
}
func (s *LastSelector) DoUInt(vs []uint64, cr execute.ColReader) {
s.selectLast(len(vs), cr)
}
func (s *LastSelector) DoFloat(vs []float64, cr execute.ColReader) {
s.selectLast(len(vs), cr)
}
func (s *LastSelector) DoString(vs []string, cr execute.ColReader) {
s.selectLast(len(vs), cr)
}

View File

@ -0,0 +1,135 @@
package functions_test
import (
"testing"
"github.com/influxdata/ifql/functions"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/execute/executetest"
"github.com/influxdata/ifql/query/plan"
"github.com/influxdata/ifql/query/plan/plantest"
"github.com/influxdata/ifql/query/querytest"
)
func TestLastOperation_Marshaling(t *testing.T) {
data := []byte(`{"id":"last","kind":"last","spec":{"column":"bar"}}`)
op := &query.Operation{
ID: "last",
Spec: &functions.LastOpSpec{
SelectorConfig: execute.SelectorConfig{
Column: "bar",
},
},
}
querytest.OperationMarshalingTestHelper(t, data, op)
}
func TestLast_Process(t *testing.T) {
testCases := []struct {
name string
data *executetest.Block
want []execute.Row
}{
{
name: "last",
data: &executetest.Block{
KeyCols: []string{"t1"},
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
{Label: "t1", Type: execute.TString},
{Label: "t2", Type: execute.TString},
},
Data: [][]interface{}{
{execute.Time(0), 0.0, "a", "y"},
{execute.Time(10), 5.0, "a", "x"},
{execute.Time(20), 9.0, "a", "y"},
{execute.Time(30), 4.0, "a", "x"},
{execute.Time(40), 6.0, "a", "y"},
{execute.Time(50), 8.0, "a", "x"},
{execute.Time(60), 1.0, "a", "y"},
{execute.Time(70), 2.0, "a", "x"},
{execute.Time(80), 3.0, "a", "y"},
{execute.Time(90), 7.0, "a", "x"},
},
},
want: []execute.Row{{
Values: []interface{}{execute.Time(90), 7.0, "a", "x"},
}},
},
}
for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
executetest.RowSelectorFuncTestHelper(
t,
new(functions.LastSelector),
tc.data,
tc.want,
)
})
}
}
func BenchmarkLast(b *testing.B) {
executetest.RowSelectorFuncBenchmarkHelper(b, new(functions.LastSelector), NormalBlock)
}
func TestLast_PushDown_Match(t *testing.T) {
spec := new(functions.LastProcedureSpec)
from := new(functions.FromProcedureSpec)
// Should not match when an aggregate is set
from.AggregateSet = true
plantest.PhysicalPlan_PushDown_Match_TestHelper(t, spec, from, []bool{false})
// Should match when no aggregate is set
from.AggregateSet = false
plantest.PhysicalPlan_PushDown_Match_TestHelper(t, spec, from, []bool{true})
}
func TestLast_PushDown(t *testing.T) {
spec := new(functions.LastProcedureSpec)
root := &plan.Procedure{
Spec: new(functions.FromProcedureSpec),
}
want := &plan.Procedure{
Spec: &functions.FromProcedureSpec{
BoundsSet: true,
Bounds: plan.BoundsSpec{
Start: query.MinTime,
Stop: query.Now,
},
LimitSet: true,
PointsLimit: 1,
DescendingSet: true,
Descending: true,
},
}
plantest.PhysicalPlan_PushDown_TestHelper(t, spec, root, false, want)
}
func TestLast_PushDown_Duplicate(t *testing.T) {
spec := new(functions.LastProcedureSpec)
root := &plan.Procedure{
Spec: &functions.FromProcedureSpec{
BoundsSet: true,
Bounds: plan.BoundsSpec{
Start: query.MinTime,
Stop: query.Now,
},
LimitSet: true,
PointsLimit: 1,
DescendingSet: true,
Descending: true,
},
}
want := &plan.Procedure{
// Expect the duplicate has been reset to zero values
Spec: new(functions.FromProcedureSpec),
}
plantest.PhysicalPlan_PushDown_TestHelper(t, spec, root, true, want)
}

219
query/functions/limit.go Normal file
View File

@ -0,0 +1,219 @@
package functions
import (
"fmt"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/plan"
"github.com/influxdata/ifql/semantic"
"github.com/pkg/errors"
)
const LimitKind = "limit"
// LimitOpSpec limits the number of rows returned per block.
// Currently offset is not supported.
type LimitOpSpec struct {
N int64 `json:"n"`
//Offset int64 `json:"offset"`
}
var limitSignature = query.DefaultFunctionSignature()
func init() {
limitSignature.Params["n"] = semantic.Int
query.RegisterFunction(LimitKind, createLimitOpSpec, limitSignature)
query.RegisterOpSpec(LimitKind, newLimitOp)
plan.RegisterProcedureSpec(LimitKind, newLimitProcedure, LimitKind)
// TODO register a range transformation. Currently range is only supported if it is pushed down into a select procedure.
execute.RegisterTransformation(LimitKind, createLimitTransformation)
}
func createLimitOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
}
spec := new(LimitOpSpec)
n, err := args.GetRequiredInt("n")
if err != nil {
return nil, err
}
spec.N = n
return spec, nil
}
func newLimitOp() query.OperationSpec {
return new(LimitOpSpec)
}
func (s *LimitOpSpec) Kind() query.OperationKind {
return LimitKind
}
type LimitProcedureSpec struct {
N int64 `json:"n"`
//Offset int64 `json:"offset"`
}
func newLimitProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*LimitOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
}
return &LimitProcedureSpec{
N: spec.N,
//Offset: spec.Offset,
}, nil
}
func (s *LimitProcedureSpec) Kind() plan.ProcedureKind {
return LimitKind
}
func (s *LimitProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(LimitProcedureSpec)
ns.N = s.N
//ns.Offset = s.Offset
return ns
}
func (s *LimitProcedureSpec) PushDownRules() []plan.PushDownRule {
return []plan.PushDownRule{{
Root: FromKind,
Through: []plan.ProcedureKind{GroupKind, RangeKind, FilterKind},
}}
}
func (s *LimitProcedureSpec) PushDown(root *plan.Procedure, dup func() *plan.Procedure) {
selectSpec := root.Spec.(*FromProcedureSpec)
if selectSpec.LimitSet {
root = dup()
selectSpec = root.Spec.(*FromProcedureSpec)
selectSpec.LimitSet = false
selectSpec.PointsLimit = 0
selectSpec.SeriesLimit = 0
selectSpec.SeriesOffset = 0
return
}
selectSpec.LimitSet = true
selectSpec.PointsLimit = s.N
selectSpec.SeriesLimit = 0
selectSpec.SeriesOffset = 0
}
func createLimitTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
s, ok := spec.(*LimitProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", spec)
}
cache := execute.NewBlockBuilderCache(a.Allocator())
d := execute.NewDataset(id, mode, cache)
t := NewLimitTransformation(d, cache, s)
return t, d, nil
}
type limitTransformation struct {
d execute.Dataset
cache execute.BlockBuilderCache
n int
colMap []int
}
func NewLimitTransformation(d execute.Dataset, cache execute.BlockBuilderCache, spec *LimitProcedureSpec) *limitTransformation {
return &limitTransformation{
d: d,
cache: cache,
n: int(spec.N),
}
}
func (t *limitTransformation) RetractBlock(id execute.DatasetID, key execute.PartitionKey) error {
return t.d.RetractBlock(key)
}
func (t *limitTransformation) Process(id execute.DatasetID, b execute.Block) error {
builder, created := t.cache.BlockBuilder(b.Key())
if !created {
return fmt.Errorf("limit found duplicate block with key: %v", b.Key())
}
execute.AddBlockCols(b, builder)
ncols := builder.NCols()
if cap(t.colMap) < ncols {
t.colMap = make([]int, ncols)
for j := range t.colMap {
t.colMap[j] = j
}
} else {
t.colMap = t.colMap[:ncols]
}
// AppendBlock with limit
n := t.n
b.Do(func(cr execute.ColReader) error {
if n <= 0 {
// Returning an error terminates iteration
return errors.New("finished")
}
l := cr.Len()
if l > n {
l = n
}
n -= l
lcr := limitColReader{
ColReader: cr,
n: l,
}
execute.AppendCols(lcr, builder, t.colMap)
return nil
})
return nil
}
type limitColReader struct {
execute.ColReader
n int
}
func (cr limitColReader) Len() int {
return cr.n
}
func (cr limitColReader) Bools(j int) []bool {
return cr.ColReader.Bools(j)[:cr.n]
}
func (cr limitColReader) Ints(j int) []int64 {
return cr.ColReader.Ints(j)[:cr.n]
}
func (cr limitColReader) UInts(j int) []uint64 {
return cr.ColReader.UInts(j)[:cr.n]
}
func (cr limitColReader) Floats(j int) []float64 {
return cr.ColReader.Floats(j)[:cr.n]
}
func (cr limitColReader) Strings(j int) []string {
return cr.ColReader.Strings(j)[:cr.n]
}
func (cr limitColReader) Times(j int) []execute.Time {
return cr.ColReader.Times(j)[:cr.n]
}
func (t *limitTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error {
return t.d.UpdateWatermark(mark)
}
func (t *limitTransformation) UpdateProcessingTime(id execute.DatasetID, pt execute.Time) error {
return t.d.UpdateProcessingTime(pt)
}
func (t *limitTransformation) Finish(id execute.DatasetID, err error) {
t.d.Finish(err)
}

View File

@ -0,0 +1,167 @@
package functions_test
import (
"testing"
"github.com/influxdata/ifql/functions"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/execute/executetest"
"github.com/influxdata/ifql/query/plan"
"github.com/influxdata/ifql/query/plan/plantest"
"github.com/influxdata/ifql/query/querytest"
)
func TestLimitOperation_Marshaling(t *testing.T) {
data := []byte(`{"id":"limit","kind":"limit","spec":{"n":10}}`)
op := &query.Operation{
ID: "limit",
Spec: &functions.LimitOpSpec{
N: 10,
},
}
querytest.OperationMarshalingTestHelper(t, data, op)
}
func TestLimit_Process(t *testing.T) {
testCases := []struct {
name string
spec *functions.LimitProcedureSpec
data []execute.Block
want []*executetest.Block
}{
{
name: "one block",
spec: &functions.LimitProcedureSpec{
N: 1,
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), 2.0},
{execute.Time(2), 1.0},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), 2.0},
},
}},
},
{
name: "multiple blocks",
spec: &functions.LimitProcedureSpec{
N: 2,
},
data: []execute.Block{
&executetest.Block{
KeyCols: []string{"t1"},
ColMeta: []execute.ColMeta{
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{"a", execute.Time(1), 3.0},
{"a", execute.Time(2), 2.0},
{"a", execute.Time(2), 1.0},
},
},
&executetest.Block{
KeyCols: []string{"t1"},
ColMeta: []execute.ColMeta{
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{"b", execute.Time(3), 3.0},
{"b", execute.Time(3), 2.0},
{"b", execute.Time(4), 1.0},
},
},
},
want: []*executetest.Block{
{
KeyCols: []string{"t1"},
ColMeta: []execute.ColMeta{
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{"a", execute.Time(1), 3.0},
{"a", execute.Time(2), 2.0},
},
},
{
KeyCols: []string{"t1"},
ColMeta: []execute.ColMeta{
{Label: "t1", Type: execute.TString},
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{"b", execute.Time(3), 3.0},
{"b", execute.Time(3), 2.0},
},
},
},
},
}
for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
executetest.ProcessTestHelper(
t,
tc.data,
tc.want,
func(d execute.Dataset, c execute.BlockBuilderCache) execute.Transformation {
return functions.NewLimitTransformation(d, c, tc.spec)
},
)
})
}
}
func TestLimit_PushDown(t *testing.T) {
spec := &functions.LimitProcedureSpec{
N: 42,
}
root := &plan.Procedure{
Spec: new(functions.FromProcedureSpec),
}
want := &plan.Procedure{
Spec: &functions.FromProcedureSpec{
LimitSet: true,
PointsLimit: 42,
},
}
plantest.PhysicalPlan_PushDown_TestHelper(t, spec, root, false, want)
}
func TestLimit_PushDown_Duplicate(t *testing.T) {
spec := &functions.LimitProcedureSpec{
N: 9,
}
root := &plan.Procedure{
Spec: &functions.FromProcedureSpec{
LimitSet: true,
PointsLimit: 42,
},
}
want := &plan.Procedure{
// Expect the duplicate has been reset to zero values
Spec: new(functions.FromProcedureSpec),
}
plantest.PhysicalPlan_PushDown_TestHelper(t, spec, root, true, want)
}

171
query/functions/map.go Normal file
View File

@ -0,0 +1,171 @@
package functions
import (
"fmt"
"log"
"sort"
"github.com/influxdata/ifql/interpreter"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/plan"
"github.com/influxdata/ifql/semantic"
)
const MapKind = "map"
type MapOpSpec struct {
Fn *semantic.FunctionExpression `json:"fn"`
}
var mapSignature = query.DefaultFunctionSignature()
func init() {
mapSignature.Params["fn"] = semantic.Function
query.RegisterFunction(MapKind, createMapOpSpec, mapSignature)
query.RegisterOpSpec(MapKind, newMapOp)
plan.RegisterProcedureSpec(MapKind, newMapProcedure, MapKind)
execute.RegisterTransformation(MapKind, createMapTransformation)
}
func createMapOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
}
f, err := args.GetRequiredFunction("fn")
if err != nil {
return nil, err
}
fn, err := interpreter.ResolveFunction(f)
if err != nil {
return nil, err
}
return &MapOpSpec{
Fn: fn,
}, nil
}
func newMapOp() query.OperationSpec {
return new(MapOpSpec)
}
func (s *MapOpSpec) Kind() query.OperationKind {
return MapKind
}
type MapProcedureSpec struct {
Fn *semantic.FunctionExpression
}
func newMapProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*MapOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
}
return &MapProcedureSpec{
Fn: spec.Fn,
}, nil
}
func (s *MapProcedureSpec) Kind() plan.ProcedureKind {
return MapKind
}
func (s *MapProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(MapProcedureSpec)
ns.Fn = s.Fn.Copy().(*semantic.FunctionExpression)
return ns
}
func createMapTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
s, ok := spec.(*MapProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", spec)
}
cache := execute.NewBlockBuilderCache(a.Allocator())
d := execute.NewDataset(id, mode, cache)
t, err := NewMapTransformation(d, cache, s)
if err != nil {
return nil, nil, err
}
return t, d, nil
}
type mapTransformation struct {
d execute.Dataset
cache execute.BlockBuilderCache
fn *execute.RowMapFn
}
func NewMapTransformation(d execute.Dataset, cache execute.BlockBuilderCache, spec *MapProcedureSpec) (*mapTransformation, error) {
fn, err := execute.NewRowMapFn(spec.Fn)
if err != nil {
return nil, err
}
return &mapTransformation{
d: d,
cache: cache,
fn: fn,
}, nil
}
func (t *mapTransformation) RetractBlock(id execute.DatasetID, key execute.PartitionKey) error {
return t.d.RetractBlock(key)
}
func (t *mapTransformation) Process(id execute.DatasetID, b execute.Block) error {
// Prepare the functions for the column types.
cols := b.Cols()
err := t.fn.Prepare(cols)
if err != nil {
// TODO(nathanielc): Should we not fail the query for failed compilation?
return err
}
return b.Do(func(cr execute.ColReader) error {
l := cr.Len()
for i := 0; i < l; i++ {
m, err := t.fn.Eval(i, cr)
if err != nil {
log.Printf("failed to evaluate map expression: %v", err)
continue
}
key := execute.PartitionKeyForRow(i, cr)
builder, created := t.cache.BlockBuilder(key)
if created {
// Add columns from function in sorted order
properties := t.fn.Type().Properties()
keys := make([]string, 0, len(properties))
for k := range properties {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
builder.AddCol(execute.ColMeta{
Label: k,
Type: execute.ConvertFromKind(properties[k].Kind()),
})
}
}
for j, c := range builder.Cols() {
v, _ := m.Get(c.Label)
execute.AppendValue(builder, j, v)
}
}
return nil
})
}
func (t *mapTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error {
return t.d.UpdateWatermark(mark)
}
func (t *mapTransformation) UpdateProcessingTime(id execute.DatasetID, pt execute.Time) error {
return t.d.UpdateProcessingTime(pt)
}
func (t *mapTransformation) Finish(id execute.DatasetID, err error) {
t.d.Finish(err)
}

400
query/functions/map_test.go Normal file
View File

@ -0,0 +1,400 @@
package functions_test
import (
"testing"
"github.com/influxdata/ifql/ast"
"github.com/influxdata/ifql/functions"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/execute/executetest"
"github.com/influxdata/ifql/query/querytest"
"github.com/influxdata/ifql/semantic"
)
func TestMap_NewQuery(t *testing.T) {
tests := []querytest.NewQueryTestCase{
{
Name: "simple static map",
Raw: `from(db:"mydb") |> map(fn: (r) => r._value + 1)`,
Want: &query.Spec{
Operations: []*query.Operation{
{
ID: "from0",
Spec: &functions.FromOpSpec{
Database: "mydb",
},
},
{
ID: "map1",
Spec: &functions.MapOpSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.BinaryExpression{
Operator: ast.AdditionOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "r",
},
Property: "_value",
},
Right: &semantic.IntegerLiteral{Value: 1},
},
},
},
},
},
Edges: []query.Edge{
{Parent: "from0", Child: "map1"},
},
},
},
{
Name: "resolve map",
Raw: `x = 2 from(db:"mydb") |> map(fn: (r) => r._value + x)`,
Want: &query.Spec{
Operations: []*query.Operation{
{
ID: "from0",
Spec: &functions.FromOpSpec{
Database: "mydb",
},
},
{
ID: "map1",
Spec: &functions.MapOpSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.BinaryExpression{
Operator: ast.AdditionOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "r",
},
Property: "_value",
},
Right: &semantic.IntegerLiteral{Value: 2},
},
},
},
},
},
Edges: []query.Edge{
{Parent: "from0", Child: "map1"},
},
},
},
}
for _, tc := range tests {
tc := tc
t.Run(tc.Name, func(t *testing.T) {
t.Parallel()
querytest.NewQueryTestHelper(t, tc)
})
}
}
func TestMapOperation_Marshaling(t *testing.T) {
data := []byte(`{
"id":"map",
"kind":"map",
"spec":{
"fn":{
"type": "ArrowFunctionExpression",
"params": [{"type":"FunctionParam","key":{"type":"Identifier","name":"r"}}],
"body":{
"type":"BinaryExpression",
"operator": "-",
"left":{
"type":"MemberExpression",
"object": {
"type": "IdentifierExpression",
"name":"r"
},
"property": "_value"
},
"right":{
"type":"FloatLiteral",
"value": 5.6
}
}
}
}
}`)
op := &query.Operation{
ID: "map",
Spec: &functions.MapOpSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.BinaryExpression{
Operator: ast.SubtractionOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "r",
},
Property: "_value",
},
Right: &semantic.FloatLiteral{Value: 5.6},
},
},
},
}
querytest.OperationMarshalingTestHelper(t, data, op)
}
func TestMap_Process(t *testing.T) {
testCases := []struct {
name string
spec *functions.MapProcedureSpec
data []execute.Block
want []*executetest.Block
}{
{
name: `_value+5`,
spec: &functions.MapProcedureSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.ObjectExpression{
Properties: []*semantic.Property{
{
Key: &semantic.Identifier{Name: "_time"},
Value: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "r",
},
Property: "_time",
},
},
{
Key: &semantic.Identifier{Name: "_value"},
Value: &semantic.BinaryExpression{
Operator: ast.AdditionOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "r",
},
Property: "_value",
},
Right: &semantic.FloatLiteral{
Value: 5,
},
},
},
},
},
},
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), 1.0},
{execute.Time(2), 6.0},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), 6.0},
{execute.Time(2), 11.0},
},
}},
},
{
name: `_value*_value`,
spec: &functions.MapProcedureSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.ObjectExpression{
Properties: []*semantic.Property{
{
Key: &semantic.Identifier{Name: "_time"},
Value: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "r",
},
Property: "_time",
},
},
{
Key: &semantic.Identifier{Name: "_value"},
Value: &semantic.BinaryExpression{
Operator: ast.MultiplicationOperator,
Left: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "r",
},
Property: "_value",
},
Right: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "r",
},
Property: "_value",
},
},
},
},
},
},
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), 1.0},
{execute.Time(2), 6.0},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), 1.0},
{execute.Time(2), 36.0},
},
}},
},
{
name: "float(r._value) int",
spec: &functions.MapProcedureSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.ObjectExpression{
Properties: []*semantic.Property{
{
Key: &semantic.Identifier{Name: "_time"},
Value: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "r",
},
Property: "_time",
},
},
{
Key: &semantic.Identifier{Name: "_value"},
Value: &semantic.CallExpression{
Callee: &semantic.IdentifierExpression{Name: "float"},
Arguments: &semantic.ObjectExpression{
Properties: []*semantic.Property{{
Key: &semantic.Identifier{Name: "v"},
Value: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "r",
},
Property: "_value",
},
}},
},
},
},
},
},
},
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TInt},
},
Data: [][]interface{}{
{execute.Time(1), int64(1)},
{execute.Time(2), int64(6)},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), 1.0},
{execute.Time(2), 6.0},
},
}},
},
{
name: "float(r._value) uint",
spec: &functions.MapProcedureSpec{
Fn: &semantic.FunctionExpression{
Params: []*semantic.FunctionParam{{Key: &semantic.Identifier{Name: "r"}}},
Body: &semantic.ObjectExpression{
Properties: []*semantic.Property{
{
Key: &semantic.Identifier{Name: "_time"},
Value: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "r",
},
Property: "_time",
},
},
{
Key: &semantic.Identifier{Name: "_value"},
Value: &semantic.CallExpression{
Callee: &semantic.IdentifierExpression{Name: "float"},
Arguments: &semantic.ObjectExpression{
Properties: []*semantic.Property{{
Key: &semantic.Identifier{Name: "v"},
Value: &semantic.MemberExpression{
Object: &semantic.IdentifierExpression{
Name: "r",
},
Property: "_value",
},
}},
},
},
},
},
},
},
},
data: []execute.Block{&executetest.Block{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TUInt},
},
Data: [][]interface{}{
{execute.Time(1), uint64(1)},
{execute.Time(2), uint64(6)},
},
}},
want: []*executetest.Block{{
ColMeta: []execute.ColMeta{
{Label: "_time", Type: execute.TTime},
{Label: "_value", Type: execute.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), 1.0},
{execute.Time(2), 6.0},
},
}},
},
}
for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
executetest.ProcessTestHelper(
t,
tc.data,
tc.want,
func(d execute.Dataset, c execute.BlockBuilderCache) execute.Transformation {
f, err := functions.NewMapTransformation(d, c, tc.spec)
if err != nil {
t.Fatal(err)
}
return f
},
)
})
}
}

167
query/functions/max.go Normal file
View File

@ -0,0 +1,167 @@
package functions
import (
"fmt"
"github.com/influxdata/ifql/query"
"github.com/influxdata/ifql/query/execute"
"github.com/influxdata/ifql/query/plan"
"github.com/influxdata/ifql/semantic"
)
const MaxKind = "max"
type MaxOpSpec struct {
execute.SelectorConfig
}
var maxSignature = query.DefaultFunctionSignature()
func init() {
maxSignature.Params["column"] = semantic.String
maxSignature.Params["useRowTime"] = semantic.Bool
query.RegisterFunction(MaxKind, createMaxOpSpec, maxSignature)
query.RegisterOpSpec(MaxKind, newMaxOp)
plan.RegisterProcedureSpec(MaxKind, newMaxProcedure, MaxKind)
execute.RegisterTransformation(MaxKind, createMaxTransformation)
}
func createMaxOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
}
spec := new(MaxOpSpec)
if err := spec.SelectorConfig.ReadArgs(args); err != nil {
return nil, err
}
return spec, nil
}
func newMaxOp() query.OperationSpec {
return new(MaxOpSpec)
}
func (s *MaxOpSpec) Kind() query.OperationKind {
return MaxKind
}
type MaxProcedureSpec struct {
execute.SelectorConfig
}
func newMaxProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*MaxOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
}
return &MaxProcedureSpec{
SelectorConfig: spec.SelectorConfig,
}, nil
}
func (s *MaxProcedureSpec) Kind() plan.ProcedureKind {
return MaxKind
}
func (s *MaxProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(MaxProcedureSpec)
ns.SelectorConfig = s.SelectorConfig
return ns
}
type MaxSelector struct {
set bool
rows []execute.Row
}
func createMaxTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
ps, ok := spec.(*MaxProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", ps)
}
t, d := execute.NewRowSelectorTransformationAndDataset(id, mode, new(MaxSelector), ps.SelectorConfig, a.Allocator())
return t, d, nil
}
type MaxIntSelector struct {
MaxSelector
max int64
}
type MaxUIntSelector struct {
MaxSelector
max uint64
}
type MaxFloatSelector struct {
MaxSelector
max float64
}
func (s *MaxSelector) NewBoolSelector() execute.DoBoolRowSelector {
return nil
}
func (s *MaxSelector) NewIntSelector() execute.DoIntRowSelector {
return new(MaxIntSelector)
}
func (s *MaxSelector) NewUIntSelector() execute.DoUIntRowSelector {
return new(MaxUIntSelector)
}
func (s *MaxSelector) NewFloatSelector() execute.DoFloatRowSelector {
return new(MaxFloatSelector)
}
func (s *MaxSelector) NewStringSelector() execute.DoStringRowSelector {
return nil
}
func (s *MaxSelector) Rows() []execute.Row {
if !s.set {
return nil
}
return s.rows
}
func (s *MaxSelector) selectRow(idx int, cr execute.ColReader) {
// Capture row
if idx >= 0 {
s.rows = []execute.Row{execute.ReadRow(idx, cr)}
}
}
func (s *MaxIntSelector) DoInt(vs []int64, cr execute.ColReader) {
maxIdx := -1
for i, v := range vs {
if !s.set || v > s.max {
s.set = true
s.max = v
maxIdx = i
}
}
s.selectRow(maxIdx, cr)
}
func (s *MaxUIntSelector) DoUInt(vs []uint64, cr execute.ColReader) {
maxIdx := -1
for i, v := range vs {
if !s.set || v > s.max {
s.set = true
s.max = v
maxIdx = i
}
}
s.selectRow(maxIdx, cr)
}
func (s *MaxFloatSelector) DoFloat(vs []float64, cr execute.ColReader) {
maxIdx := -1
for i, v := range vs {
if !s.set || v > s.max {
s.set = true
s.max = v
maxIdx = i
}
}
s.selectRow(maxIdx, cr)
}

Some files were not shown because too many files have changed in this diff Show More