2018-03-01 20:12:22 +00:00
|
|
|
package query
|
|
|
|
|
|
|
|
import (
|
2018-03-27 13:44:44 +00:00
|
|
|
"math"
|
2018-03-01 20:12:22 +00:00
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/influxdata/influxql"
|
|
|
|
)
|
|
|
|
|
2018-03-27 13:44:44 +00:00
|
|
|
var NullFloat interface{} = (*float64)(nil)
|
|
|
|
|
2018-03-01 20:12:22 +00:00
|
|
|
// Series represents the metadata about a series.
|
|
|
|
type Series struct {
|
|
|
|
// Name is the measurement name.
|
|
|
|
Name string
|
|
|
|
|
|
|
|
// Tags for the series.
|
|
|
|
Tags Tags
|
|
|
|
|
|
|
|
// This is an internal id used to easily compare if a series is the
|
|
|
|
// same as another series. Whenever the internal cursor changes
|
|
|
|
// to a new series, this id gets incremented. It is not exposed to
|
|
|
|
// the user so we can implement this in whatever way we want.
|
|
|
|
// If a series is not generated by a cursor, this id is zero and
|
|
|
|
// it will instead attempt to compare the name and tags.
|
|
|
|
id uint64
|
|
|
|
}
|
|
|
|
|
|
|
|
// SameSeries checks if this is the same series as another one.
|
|
|
|
// It does not necessarily check for equality so this is different from
|
|
|
|
// checking to see if the name and tags are the same. It checks whether
|
|
|
|
// the two are part of the same series in the response.
|
|
|
|
func (s Series) SameSeries(other Series) bool {
|
|
|
|
if s.id != 0 && other.id != 0 {
|
|
|
|
return s.id == other.id
|
|
|
|
}
|
|
|
|
return s.Name == other.Name && s.Tags.ID() == other.Tags.ID()
|
|
|
|
}
|
|
|
|
|
|
|
|
// Equal checks to see if the Series are identical.
|
|
|
|
func (s Series) Equal(other Series) bool {
|
|
|
|
if s.id != 0 && other.id != 0 {
|
|
|
|
// If the ids are the same, then we can short-circuit and assume they
|
|
|
|
// are the same. If they are not the same, do the long check since
|
|
|
|
// they may still be identical, but not necessarily generated from
|
|
|
|
// the same cursor.
|
|
|
|
if s.id == other.id {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return s.Name == other.Name && s.Tags.ID() == other.Tags.ID()
|
|
|
|
}
|
|
|
|
|
|
|
|
// Row represents a single row returned by the query engine.
|
|
|
|
type Row struct {
|
|
|
|
// Time returns the time for this row. If the cursor was created to
|
|
|
|
// return time as one of the values, the time will also be included as
|
|
|
|
// a time.Time in the appropriate column within Values.
|
|
|
|
// This ensures that time is always present in the Row structure
|
|
|
|
// even if it hasn't been requested in the output.
|
|
|
|
Time int64
|
|
|
|
|
|
|
|
// Series contains the series metadata for this row.
|
|
|
|
Series Series
|
|
|
|
|
|
|
|
// Values contains the values within the current row.
|
|
|
|
Values []interface{}
|
|
|
|
}
|
|
|
|
|
|
|
|
type Cursor interface {
|
|
|
|
// Scan will retrieve the next row and assign the result to
|
|
|
|
// the passed in Row. If the Row has not been initialized, the Cursor
|
|
|
|
// will initialize the Row.
|
|
|
|
// To increase speed and memory usage, the same Row can be used and
|
|
|
|
// the previous values will be overwritten while using the same memory.
|
|
|
|
Scan(row *Row) bool
|
|
|
|
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
// Stats returns the IteratorStats from the underlying iterators.
|
|
|
|
Stats() IteratorStats
|
|
|
|
|
2018-03-01 20:12:22 +00:00
|
|
|
// Err returns any errors that were encountered from scanning the rows.
|
|
|
|
Err() error
|
|
|
|
|
|
|
|
// Columns returns the column names and types.
|
|
|
|
Columns() []influxql.VarRef
|
|
|
|
|
|
|
|
// Close closes the underlying resources that the cursor is using.
|
|
|
|
Close() error
|
|
|
|
}
|
|
|
|
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
// RowCursor returns a Cursor that iterates over Rows.
|
|
|
|
func RowCursor(rows []Row, columns []influxql.VarRef) Cursor {
|
|
|
|
return &rowCursor{
|
|
|
|
rows: rows,
|
|
|
|
columns: columns,
|
|
|
|
}
|
|
|
|
}
|
2018-03-01 20:12:22 +00:00
|
|
|
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
type rowCursor struct {
|
|
|
|
rows []Row
|
|
|
|
columns []influxql.VarRef
|
2018-03-01 20:12:22 +00:00
|
|
|
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
series Series
|
2018-03-01 20:12:22 +00:00
|
|
|
}
|
|
|
|
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
func (cur *rowCursor) Scan(row *Row) bool {
|
|
|
|
if len(cur.rows) == 0 {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
*row = cur.rows[0]
|
|
|
|
if row.Series.Name != cur.series.Name || !row.Series.Tags.Equals(&cur.series.Tags) {
|
|
|
|
cur.series.Name = row.Series.Name
|
|
|
|
cur.series.Tags = row.Series.Tags
|
|
|
|
cur.series.id++
|
2018-03-01 20:12:22 +00:00
|
|
|
}
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
cur.rows = cur.rows[1:]
|
|
|
|
return true
|
2018-03-01 20:12:22 +00:00
|
|
|
}
|
|
|
|
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
func (cur *rowCursor) Stats() IteratorStats {
|
|
|
|
return IteratorStats{}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (cur *rowCursor) Err() error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (cur *rowCursor) Columns() []influxql.VarRef {
|
|
|
|
return cur.columns
|
|
|
|
}
|
|
|
|
|
|
|
|
func (cur *rowCursor) Close() error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
type scannerFunc func(m map[string]interface{}) (int64, string, Tags)
|
|
|
|
|
|
|
|
type scannerCursorBase struct {
|
|
|
|
fields []influxql.Expr
|
|
|
|
m map[string]interface{}
|
|
|
|
|
|
|
|
series Series
|
|
|
|
columns []influxql.VarRef
|
|
|
|
loc *time.Location
|
|
|
|
|
reuse ValuerEval objects
Scanner objects and iterators often need a ValuerEval. This
object is created, often with a function call, and has at
least one interface in it, so it allocates storage. Then it's
dropped again right away. The only part of it that might be
subject to change is usually a map. While the map's contents
change over time, the actual map doesn't change for the
lifetime of the object.
So, in both iterators and scanners, stash the ValuerEval
and continue reusing it. On a query returning a fair number
of data points, this produces a small (<5% in practice)
improvement in observed performance, visible as a significant
reduction in time spent in runtime (mallocgc, newobject,
etcetera).
The performance improvement isn't big, but it's reasonably
easy to evaluate it and establish that it's a safe change
to make.
Signed-off-by: seebs <seebs@seebs.net>
2018-10-25 16:28:40 +00:00
|
|
|
scan scannerFunc
|
|
|
|
valuer influxql.ValuerEval
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func newScannerCursorBase(scan scannerFunc, fields []*influxql.Field, loc *time.Location) scannerCursorBase {
|
|
|
|
typmap := FunctionTypeMapper{}
|
|
|
|
exprs := make([]influxql.Expr, len(fields))
|
|
|
|
columns := make([]influxql.VarRef, len(fields))
|
|
|
|
for i, f := range fields {
|
|
|
|
exprs[i] = f.Expr
|
|
|
|
columns[i] = influxql.VarRef{
|
|
|
|
Val: f.Name(),
|
|
|
|
Type: influxql.EvalType(f.Expr, nil, typmap),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if loc == nil {
|
|
|
|
loc = time.UTC
|
2018-03-01 20:12:22 +00:00
|
|
|
}
|
|
|
|
|
reuse ValuerEval objects
Scanner objects and iterators often need a ValuerEval. This
object is created, often with a function call, and has at
least one interface in it, so it allocates storage. Then it's
dropped again right away. The only part of it that might be
subject to change is usually a map. While the map's contents
change over time, the actual map doesn't change for the
lifetime of the object.
So, in both iterators and scanners, stash the ValuerEval
and continue reusing it. On a query returning a fair number
of data points, this produces a small (<5% in practice)
improvement in observed performance, visible as a significant
reduction in time spent in runtime (mallocgc, newobject,
etcetera).
The performance improvement isn't big, but it's reasonably
easy to evaluate it and establish that it's a safe change
to make.
Signed-off-by: seebs <seebs@seebs.net>
2018-10-25 16:28:40 +00:00
|
|
|
m := make(map[string]interface{})
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
return scannerCursorBase{
|
|
|
|
fields: exprs,
|
reuse ValuerEval objects
Scanner objects and iterators often need a ValuerEval. This
object is created, often with a function call, and has at
least one interface in it, so it allocates storage. Then it's
dropped again right away. The only part of it that might be
subject to change is usually a map. While the map's contents
change over time, the actual map doesn't change for the
lifetime of the object.
So, in both iterators and scanners, stash the ValuerEval
and continue reusing it. On a query returning a fair number
of data points, this produces a small (<5% in practice)
improvement in observed performance, visible as a significant
reduction in time spent in runtime (mallocgc, newobject,
etcetera).
The performance improvement isn't big, but it's reasonably
easy to evaluate it and establish that it's a safe change
to make.
Signed-off-by: seebs <seebs@seebs.net>
2018-10-25 16:28:40 +00:00
|
|
|
m: m,
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
columns: columns,
|
|
|
|
loc: loc,
|
|
|
|
scan: scan,
|
reuse ValuerEval objects
Scanner objects and iterators often need a ValuerEval. This
object is created, often with a function call, and has at
least one interface in it, so it allocates storage. Then it's
dropped again right away. The only part of it that might be
subject to change is usually a map. While the map's contents
change over time, the actual map doesn't change for the
lifetime of the object.
So, in both iterators and scanners, stash the ValuerEval
and continue reusing it. On a query returning a fair number
of data points, this produces a small (<5% in practice)
improvement in observed performance, visible as a significant
reduction in time spent in runtime (mallocgc, newobject,
etcetera).
The performance improvement isn't big, but it's reasonably
easy to evaluate it and establish that it's a safe change
to make.
Signed-off-by: seebs <seebs@seebs.net>
2018-10-25 16:28:40 +00:00
|
|
|
valuer: influxql.ValuerEval{
|
|
|
|
Valuer: influxql.MultiValuer(
|
|
|
|
MathValuer{},
|
|
|
|
influxql.MapValuer(m),
|
|
|
|
),
|
|
|
|
IntegerFloatDivision: true,
|
|
|
|
},
|
2018-03-01 20:12:22 +00:00
|
|
|
}
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
}
|
2018-03-01 20:12:22 +00:00
|
|
|
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
func (cur *scannerCursorBase) Scan(row *Row) bool {
|
|
|
|
ts, name, tags := cur.scan(cur.m)
|
|
|
|
if ts == ZeroTime {
|
2018-03-01 20:12:22 +00:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
row.Time = ts
|
2018-03-01 20:12:22 +00:00
|
|
|
if name != cur.series.Name || tags.ID() != cur.series.Tags.ID() {
|
|
|
|
cur.series.Name = name
|
|
|
|
cur.series.Tags = tags
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
cur.series.id++
|
2018-03-01 20:12:22 +00:00
|
|
|
}
|
|
|
|
row.Series = cur.series
|
|
|
|
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
if len(cur.columns) > len(row.Values) {
|
2018-03-01 20:12:22 +00:00
|
|
|
row.Values = make([]interface{}, len(cur.columns))
|
|
|
|
}
|
|
|
|
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
for i, expr := range cur.fields {
|
|
|
|
// A special case if the field is time to reduce memory allocations.
|
|
|
|
if ref, ok := expr.(*influxql.VarRef); ok && ref.Val == "time" {
|
|
|
|
row.Values[i] = time.Unix(0, row.Time).In(cur.loc)
|
2018-03-01 20:12:22 +00:00
|
|
|
continue
|
|
|
|
}
|
reuse ValuerEval objects
Scanner objects and iterators often need a ValuerEval. This
object is created, often with a function call, and has at
least one interface in it, so it allocates storage. Then it's
dropped again right away. The only part of it that might be
subject to change is usually a map. While the map's contents
change over time, the actual map doesn't change for the
lifetime of the object.
So, in both iterators and scanners, stash the ValuerEval
and continue reusing it. On a query returning a fair number
of data points, this produces a small (<5% in practice)
improvement in observed performance, visible as a significant
reduction in time spent in runtime (mallocgc, newobject,
etcetera).
The performance improvement isn't big, but it's reasonably
easy to evaluate it and establish that it's a safe change
to make.
Signed-off-by: seebs <seebs@seebs.net>
2018-10-25 16:28:40 +00:00
|
|
|
v := cur.valuer.Eval(expr)
|
2018-03-27 13:44:44 +00:00
|
|
|
if fv, ok := v.(float64); ok && math.IsNaN(fv) {
|
|
|
|
// If the float value is NaN, convert it to a null float
|
|
|
|
// so this can be serialized correctly, but not mistaken for
|
|
|
|
// a null value that needs to be filled.
|
|
|
|
v = NullFloat
|
|
|
|
}
|
|
|
|
row.Values[i] = v
|
2018-03-01 20:12:22 +00:00
|
|
|
}
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
return true
|
2018-03-01 20:12:22 +00:00
|
|
|
}
|
|
|
|
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
func (cur *scannerCursorBase) Columns() []influxql.VarRef {
|
|
|
|
return cur.columns
|
|
|
|
}
|
2018-03-01 20:12:22 +00:00
|
|
|
|
2019-04-18 14:41:56 +00:00
|
|
|
func (cur *scannerCursorBase) clear(m map[string]interface{}) {
|
|
|
|
for k := range m {
|
|
|
|
delete(m, k)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
var _ Cursor = (*scannerCursor)(nil)
|
2018-03-01 20:12:22 +00:00
|
|
|
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
type scannerCursor struct {
|
|
|
|
scanner IteratorScanner
|
|
|
|
scannerCursorBase
|
|
|
|
}
|
2018-03-01 20:12:22 +00:00
|
|
|
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
func newScannerCursor(s IteratorScanner, fields []*influxql.Field, opt IteratorOptions) *scannerCursor {
|
|
|
|
cur := &scannerCursor{scanner: s}
|
|
|
|
cur.scannerCursorBase = newScannerCursorBase(cur.scan, fields, opt.Location)
|
|
|
|
return cur
|
2018-03-01 20:12:22 +00:00
|
|
|
}
|
|
|
|
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
func (s *scannerCursor) scan(m map[string]interface{}) (int64, string, Tags) {
|
|
|
|
ts, name, tags := s.scanner.Peek()
|
2019-04-18 14:41:56 +00:00
|
|
|
// if a new series, clear the map of previous values
|
|
|
|
if name != s.series.Name || tags.ID() != s.series.Tags.ID() {
|
|
|
|
s.clear(m)
|
|
|
|
}
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
if ts == ZeroTime {
|
|
|
|
return ts, name, tags
|
2018-03-01 20:12:22 +00:00
|
|
|
}
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
s.scanner.ScanAt(ts, name, tags, m)
|
|
|
|
return ts, name, tags
|
|
|
|
}
|
2018-03-01 20:12:22 +00:00
|
|
|
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
func (cur *scannerCursor) Stats() IteratorStats {
|
|
|
|
return cur.scanner.Stats()
|
2018-03-01 20:12:22 +00:00
|
|
|
}
|
|
|
|
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
func (cur *scannerCursor) Err() error {
|
|
|
|
return cur.scanner.Err()
|
2018-03-01 20:12:22 +00:00
|
|
|
}
|
|
|
|
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
func (cur *scannerCursor) Close() error {
|
|
|
|
return cur.scanner.Close()
|
2018-03-01 20:12:22 +00:00
|
|
|
}
|
|
|
|
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
var _ Cursor = (*multiScannerCursor)(nil)
|
|
|
|
|
|
|
|
type multiScannerCursor struct {
|
|
|
|
scanners []IteratorScanner
|
|
|
|
err error
|
|
|
|
ascending bool
|
|
|
|
scannerCursorBase
|
2018-03-01 20:12:22 +00:00
|
|
|
}
|
|
|
|
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
func newMultiScannerCursor(scanners []IteratorScanner, fields []*influxql.Field, opt IteratorOptions) *multiScannerCursor {
|
|
|
|
cur := &multiScannerCursor{
|
|
|
|
scanners: scanners,
|
|
|
|
ascending: opt.Ascending,
|
2018-03-01 20:12:22 +00:00
|
|
|
}
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
cur.scannerCursorBase = newScannerCursorBase(cur.scan, fields, opt.Location)
|
|
|
|
return cur
|
2018-03-01 20:12:22 +00:00
|
|
|
}
|
|
|
|
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
func (cur *multiScannerCursor) scan(m map[string]interface{}) (ts int64, name string, tags Tags) {
|
|
|
|
ts = ZeroTime
|
|
|
|
for _, s := range cur.scanners {
|
|
|
|
curTime, curName, curTags := s.Peek()
|
|
|
|
if curTime == ZeroTime {
|
|
|
|
if err := s.Err(); err != nil {
|
|
|
|
cur.err = err
|
|
|
|
return ZeroTime, "", Tags{}
|
|
|
|
}
|
|
|
|
continue
|
|
|
|
}
|
2018-03-01 20:12:22 +00:00
|
|
|
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
if ts == ZeroTime {
|
|
|
|
ts, name, tags = curTime, curName, curTags
|
|
|
|
continue
|
|
|
|
}
|
2018-03-01 20:12:22 +00:00
|
|
|
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
if cur.ascending {
|
|
|
|
if (curName < name) || (curName == name && curTags.ID() < tags.ID()) || (curName == name && curTags.ID() == tags.ID() && curTime < ts) {
|
|
|
|
ts, name, tags = curTime, curName, curTags
|
|
|
|
}
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if (curName > name) || (curName == name && curTags.ID() > tags.ID()) || (curName == name && curTags.ID() == tags.ID() && curTime > ts) {
|
|
|
|
ts, name, tags = curTime, curName, curTags
|
|
|
|
}
|
2018-03-01 20:12:22 +00:00
|
|
|
}
|
|
|
|
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
if ts == ZeroTime {
|
|
|
|
return ts, name, tags
|
2018-03-01 20:12:22 +00:00
|
|
|
}
|
2019-04-18 14:41:56 +00:00
|
|
|
// if a new series, clear the map of previous values
|
|
|
|
if name != cur.series.Name || tags.ID() != cur.series.Tags.ID() {
|
|
|
|
cur.clear(m)
|
|
|
|
}
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
for _, s := range cur.scanners {
|
|
|
|
s.ScanAt(ts, name, tags, m)
|
|
|
|
}
|
|
|
|
return ts, name, tags
|
2018-03-01 20:12:22 +00:00
|
|
|
}
|
|
|
|
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
func (cur *multiScannerCursor) Stats() IteratorStats {
|
|
|
|
var stats IteratorStats
|
|
|
|
for _, s := range cur.scanners {
|
|
|
|
stats.Add(s.Stats())
|
|
|
|
}
|
|
|
|
return stats
|
2018-03-01 20:12:22 +00:00
|
|
|
}
|
|
|
|
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
func (cur *multiScannerCursor) Err() error {
|
|
|
|
return cur.err
|
2018-03-01 20:12:22 +00:00
|
|
|
}
|
|
|
|
|
Refactor the math engine to compile the query and use eval
This change makes it so that we simplify the math engine so it doesn't
use a complicated set of nested iterators. That way, we have to change
math in one fewer place.
It also greatly simplifies the query engine as now we can create the
necessary iterators, join them by time, name, and tags, and then use the
cursor interface to read them and use eval to compute the result. It
makes it so the auxiliary iterators and all of their complexity can be
removed.
This also makes use of the new eval functionality that was recently
added to the influxql package.
No math functions have been added, but the scaffolding has been included
so things like trigonometry functions are just a single commit away.
This also introduces a small breaking change. Because of the call
optimization, it is now possible to use the same selector multiple times
as a selector. So if you do this:
SELECT max(value) * 2, max(value) / 2 FROM cpu
This will now return the timestamp of the max value rather than zero
since this query is considered to have only a single selector rather
than multiple separate selectors. If any aspect of the selector is
different, such as different selector functions or different arguments,
it will consider the selectors to be aggregates like the old behavior.
2018-03-19 17:05:55 +00:00
|
|
|
func (cur *multiScannerCursor) Close() error {
|
|
|
|
var err error
|
|
|
|
for _, s := range cur.scanners {
|
|
|
|
if e := s.Close(); e != nil && err == nil {
|
|
|
|
err = e
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return err
|
2018-03-01 20:12:22 +00:00
|
|
|
}
|
|
|
|
|
2018-03-28 18:13:46 +00:00
|
|
|
type filterCursor struct {
|
|
|
|
Cursor
|
|
|
|
// fields holds the mapping of field names to the index in the row
|
|
|
|
// based off of the column metadata. This only contains the fields
|
|
|
|
// we need and will exclude the ones we do not.
|
|
|
|
fields map[string]IteratorMap
|
|
|
|
filter influxql.Expr
|
|
|
|
m map[string]interface{}
|
reuse ValuerEval objects
Scanner objects and iterators often need a ValuerEval. This
object is created, often with a function call, and has at
least one interface in it, so it allocates storage. Then it's
dropped again right away. The only part of it that might be
subject to change is usually a map. While the map's contents
change over time, the actual map doesn't change for the
lifetime of the object.
So, in both iterators and scanners, stash the ValuerEval
and continue reusing it. On a query returning a fair number
of data points, this produces a small (<5% in practice)
improvement in observed performance, visible as a significant
reduction in time spent in runtime (mallocgc, newobject,
etcetera).
The performance improvement isn't big, but it's reasonably
easy to evaluate it and establish that it's a safe change
to make.
Signed-off-by: seebs <seebs@seebs.net>
2018-10-25 16:28:40 +00:00
|
|
|
valuer influxql.ValuerEval
|
2018-03-28 18:13:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func newFilterCursor(cur Cursor, filter influxql.Expr) *filterCursor {
|
|
|
|
fields := make(map[string]IteratorMap)
|
|
|
|
for _, name := range influxql.ExprNames(filter) {
|
|
|
|
for i, col := range cur.Columns() {
|
|
|
|
if name.Val == col.Val {
|
2018-03-30 21:58:37 +00:00
|
|
|
fields[name.Val] = FieldMap{
|
|
|
|
Index: i,
|
|
|
|
Type: name.Type,
|
|
|
|
}
|
2018-03-28 18:13:46 +00:00
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// If the field is not a column, assume it is a tag value.
|
|
|
|
// We do not know what the tag values will be, but there really
|
|
|
|
// isn't any different between NullMap and a TagMap that's pointed
|
|
|
|
// at the wrong location for the purposes described here.
|
|
|
|
if _, ok := fields[name.Val]; !ok {
|
|
|
|
fields[name.Val] = TagMap(name.Val)
|
|
|
|
}
|
|
|
|
}
|
reuse ValuerEval objects
Scanner objects and iterators often need a ValuerEval. This
object is created, often with a function call, and has at
least one interface in it, so it allocates storage. Then it's
dropped again right away. The only part of it that might be
subject to change is usually a map. While the map's contents
change over time, the actual map doesn't change for the
lifetime of the object.
So, in both iterators and scanners, stash the ValuerEval
and continue reusing it. On a query returning a fair number
of data points, this produces a small (<5% in practice)
improvement in observed performance, visible as a significant
reduction in time spent in runtime (mallocgc, newobject,
etcetera).
The performance improvement isn't big, but it's reasonably
easy to evaluate it and establish that it's a safe change
to make.
Signed-off-by: seebs <seebs@seebs.net>
2018-10-25 16:28:40 +00:00
|
|
|
m := make(map[string]interface{})
|
2018-03-28 18:13:46 +00:00
|
|
|
return &filterCursor{
|
|
|
|
Cursor: cur,
|
|
|
|
fields: fields,
|
|
|
|
filter: filter,
|
reuse ValuerEval objects
Scanner objects and iterators often need a ValuerEval. This
object is created, often with a function call, and has at
least one interface in it, so it allocates storage. Then it's
dropped again right away. The only part of it that might be
subject to change is usually a map. While the map's contents
change over time, the actual map doesn't change for the
lifetime of the object.
So, in both iterators and scanners, stash the ValuerEval
and continue reusing it. On a query returning a fair number
of data points, this produces a small (<5% in practice)
improvement in observed performance, visible as a significant
reduction in time spent in runtime (mallocgc, newobject,
etcetera).
The performance improvement isn't big, but it's reasonably
easy to evaluate it and establish that it's a safe change
to make.
Signed-off-by: seebs <seebs@seebs.net>
2018-10-25 16:28:40 +00:00
|
|
|
m: m,
|
|
|
|
valuer: influxql.ValuerEval{Valuer: influxql.MapValuer(m)},
|
2018-03-28 18:13:46 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (cur *filterCursor) Scan(row *Row) bool {
|
|
|
|
for cur.Cursor.Scan(row) {
|
|
|
|
// Use the field mappings to prepare the map for the valuer.
|
|
|
|
for name, f := range cur.fields {
|
|
|
|
cur.m[name] = f.Value(row)
|
|
|
|
}
|
|
|
|
|
reuse ValuerEval objects
Scanner objects and iterators often need a ValuerEval. This
object is created, often with a function call, and has at
least one interface in it, so it allocates storage. Then it's
dropped again right away. The only part of it that might be
subject to change is usually a map. While the map's contents
change over time, the actual map doesn't change for the
lifetime of the object.
So, in both iterators and scanners, stash the ValuerEval
and continue reusing it. On a query returning a fair number
of data points, this produces a small (<5% in practice)
improvement in observed performance, visible as a significant
reduction in time spent in runtime (mallocgc, newobject,
etcetera).
The performance improvement isn't big, but it's reasonably
easy to evaluate it and establish that it's a safe change
to make.
Signed-off-by: seebs <seebs@seebs.net>
2018-10-25 16:28:40 +00:00
|
|
|
if cur.valuer.EvalBool(cur.filter) {
|
2018-03-28 18:13:46 +00:00
|
|
|
// Passes the filter! Return true. We no longer need to
|
|
|
|
// search for a suitable value.
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2018-03-30 21:58:37 +00:00
|
|
|
type nullCursor struct {
|
|
|
|
columns []influxql.VarRef
|
|
|
|
}
|
|
|
|
|
|
|
|
func newNullCursor(fields []*influxql.Field) *nullCursor {
|
|
|
|
columns := make([]influxql.VarRef, len(fields))
|
|
|
|
for i, f := range fields {
|
|
|
|
columns[i].Val = f.Name()
|
|
|
|
}
|
|
|
|
return &nullCursor{columns: columns}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (cur *nullCursor) Scan(row *Row) bool {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
func (cur *nullCursor) Stats() IteratorStats {
|
|
|
|
return IteratorStats{}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (cur *nullCursor) Err() error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (cur *nullCursor) Columns() []influxql.VarRef {
|
|
|
|
return cur.columns
|
|
|
|
}
|
|
|
|
|
|
|
|
func (cur *nullCursor) Close() error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2018-03-01 20:12:22 +00:00
|
|
|
// DrainCursor will read and discard all values from a Cursor and return the error
|
|
|
|
// if one happens.
|
|
|
|
func DrainCursor(cur Cursor) error {
|
|
|
|
var row Row
|
|
|
|
for cur.Scan(&row) {
|
|
|
|
// Do nothing with the result.
|
|
|
|
}
|
|
|
|
return cur.Err()
|
|
|
|
}
|