feat: multi-measurement query optimization (#22301)

* feat: multi-measurement query optimization
pull/22461/head
William Baker 2021-09-13 13:00:08 -06:00 committed by GitHub
parent 6f39255fc8
commit 3e275a123d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 528 additions and 146 deletions

View File

@ -0,0 +1,261 @@
package influxdb_test
import "csv"
import "testing"
option now = () => 2030-01-01T00:00:00Z
input = "
#datatype,string,long,dateTime:RFC3339,string,string,string,double
#group,false,false,false,true,true,true,false
#default,_result,,,,,,
,result,table,_time,_measurement,host,_field,_value
,,0,2018-05-22T19:53:26Z,system,host.local,load1,1.83
,,0,2018-05-22T19:53:36Z,system,host.local,load1,1.72
,,0,2018-05-22T19:53:46Z,system,host.local,load1,1.74
,,0,2018-05-22T19:53:56Z,system,host.local,load1,1.63
,,0,2018-05-22T19:54:06Z,system,host.local,load1,1.91
,,0,2018-05-22T19:54:16Z,system,host.local,load1,1.84
,,1,2018-05-22T19:53:26Z,sys,host.local,load3,1.98
,,1,2018-05-22T19:53:36Z,sys,host.local,load3,1.97
,,1,2018-05-22T19:53:46Z,sys,host.local,load3,1.97
,,1,2018-05-22T19:53:56Z,sys,host.local,load3,1.96
,,1,2018-05-22T19:54:06Z,sys,host.local,load3,1.98
,,1,2018-05-22T19:54:16Z,sys,host.local,load3,1.97
,,2,2018-05-22T19:53:26Z,system,host.local,load5,1.95
,,2,2018-05-22T19:53:36Z,system,host.local,load5,1.92
,,2,2018-05-22T19:53:46Z,system,host.local,load5,1.92
,,2,2018-05-22T19:53:56Z,system,host.local,load5,1.89
,,2,2018-05-22T19:54:06Z,system,host.local,load5,1.94
,,2,2018-05-22T19:54:16Z,system,host.local,load5,1.93
,,3,2018-05-22T19:53:26Z,var,host.local,load3,91.98
,,3,2018-05-22T19:53:36Z,var,host.local,load3,91.97
,,3,2018-05-22T19:53:46Z,var,host.local,load3,91.97
,,3,2018-05-22T19:53:56Z,var,host.local,load3,91.96
,,3,2018-05-22T19:54:06Z,var,host.local,load3,91.98
,,3,2018-05-22T19:54:16Z,var,host.local,load3,91.97
,,4,2018-05-22T19:53:26Z,swap,host.global,used_percent,82.98
,,4,2018-05-22T19:53:36Z,swap,host.global,used_percent,82.59
,,4,2018-05-22T19:53:46Z,swap,host.global,used_percent,82.59
,,4,2018-05-22T19:53:56Z,swap,host.global,used_percent,82.59
,,4,2018-05-22T19:54:06Z,swap,host.global,used_percent,82.59
,,4,2018-05-22T19:54:16Z,swap,host.global,used_percent,82.64
#datatype,string,long,dateTime:RFC3339,string,string,string,double
#group,false,false,false,true,true,true,false
#default,_result,,,,,,
,result,table,_time,_measurement,loc,_field,_value
,,0,2018-05-22T19:53:26Z,locale,en,lat,37.09
,,0,2018-05-22T19:53:36Z,locale,en,lat,37.10
,,0,2018-05-22T19:53:46Z,locale,en,lat,37.08
"
testcase multi_measure {
got = testing.loadStorage(csv: input)
|> range(start: 2018-01-01T00:00:00Z, stop: 2019-01-01T00:00:00Z)
|> filter(fn: (r) => r["_measurement"] == "system" or r["_measurement"] == "sys")
|> filter(fn: (r) => r["_field"] == "load1" or r["_field"] == "load3")
|> drop(columns: ["_start", "_stop"])
want = csv.from(csv: "#datatype,string,long,dateTime:RFC3339,string,string,string,double
#group,false,false,false,true,true,true,false
#default,_result,,,,,,
,result,table,_time,_measurement,host,_field,_value
,,0,2018-05-22T19:53:26Z,system,host.local,load1,1.83
,,0,2018-05-22T19:53:36Z,system,host.local,load1,1.72
,,0,2018-05-22T19:53:46Z,system,host.local,load1,1.74
,,0,2018-05-22T19:53:56Z,system,host.local,load1,1.63
,,0,2018-05-22T19:54:06Z,system,host.local,load1,1.91
,,0,2018-05-22T19:54:16Z,system,host.local,load1,1.84
,,1,2018-05-22T19:53:26Z,sys,host.local,load3,1.98
,,1,2018-05-22T19:53:36Z,sys,host.local,load3,1.97
,,1,2018-05-22T19:53:46Z,sys,host.local,load3,1.97
,,1,2018-05-22T19:53:56Z,sys,host.local,load3,1.96
,,1,2018-05-22T19:54:06Z,sys,host.local,load3,1.98
,,1,2018-05-22T19:54:16Z,sys,host.local,load3,1.97
")
testing.diff(got, want)
}
testcase multi_measure_match_all {
got = testing.loadStorage(csv: input)
|> range(start: 2018-01-01T00:00:00Z, stop: 2019-01-01T00:00:00Z)
|> filter(fn: (r) => r["_measurement"] == "system" or r["_measurement"] == "sys" or r["_measurement"] == "var" or r["_measurement"] == "swap")
|> filter(fn: (r) => r["_field"] == "load1" or r["_field"] == "load3" or r["_field"] == "load5" or r["_field"] == "used_percent")
|> drop(columns: ["_start", "_stop"])
want = csv.from(csv: "#datatype,string,long,dateTime:RFC3339,string,string,string,double
#group,false,false,false,true,true,true,false
#default,_result,,,,,,
,result,table,_time,_measurement,host,_field,_value
,,0,2018-05-22T19:53:26Z,system,host.local,load1,1.83
,,0,2018-05-22T19:53:36Z,system,host.local,load1,1.72
,,0,2018-05-22T19:53:46Z,system,host.local,load1,1.74
,,0,2018-05-22T19:53:56Z,system,host.local,load1,1.63
,,0,2018-05-22T19:54:06Z,system,host.local,load1,1.91
,,0,2018-05-22T19:54:16Z,system,host.local,load1,1.84
,,1,2018-05-22T19:53:26Z,sys,host.local,load3,1.98
,,1,2018-05-22T19:53:36Z,sys,host.local,load3,1.97
,,1,2018-05-22T19:53:46Z,sys,host.local,load3,1.97
,,1,2018-05-22T19:53:56Z,sys,host.local,load3,1.96
,,1,2018-05-22T19:54:06Z,sys,host.local,load3,1.98
,,1,2018-05-22T19:54:16Z,sys,host.local,load3,1.97
,,2,2018-05-22T19:53:26Z,system,host.local,load5,1.95
,,2,2018-05-22T19:53:36Z,system,host.local,load5,1.92
,,2,2018-05-22T19:53:46Z,system,host.local,load5,1.92
,,2,2018-05-22T19:53:56Z,system,host.local,load5,1.89
,,2,2018-05-22T19:54:06Z,system,host.local,load5,1.94
,,2,2018-05-22T19:54:16Z,system,host.local,load5,1.93
,,3,2018-05-22T19:53:26Z,var,host.local,load3,91.98
,,3,2018-05-22T19:53:36Z,var,host.local,load3,91.97
,,3,2018-05-22T19:53:46Z,var,host.local,load3,91.97
,,3,2018-05-22T19:53:56Z,var,host.local,load3,91.96
,,3,2018-05-22T19:54:06Z,var,host.local,load3,91.98
,,3,2018-05-22T19:54:16Z,var,host.local,load3,91.97
,,4,2018-05-22T19:53:26Z,swap,host.global,used_percent,82.98
,,4,2018-05-22T19:53:36Z,swap,host.global,used_percent,82.59
,,4,2018-05-22T19:53:46Z,swap,host.global,used_percent,82.59
,,4,2018-05-22T19:53:56Z,swap,host.global,used_percent,82.59
,,4,2018-05-22T19:54:06Z,swap,host.global,used_percent,82.59
,,4,2018-05-22T19:54:16Z,swap,host.global,used_percent,82.64
")
testing.diff(got, want)
}
testcase multi_measure_tag_filter {
got = testing.loadStorage(csv: input)
|> range(start: 2018-01-01T00:00:00Z, stop: 2019-01-01T00:00:00Z)
|> filter(fn: (r) => r["_measurement"] == "system" or r["_measurement"] == "swap")
|> filter(fn: (r) => r["_field"] == "load1" or r["_field"] == "load3" or r["_field"] == "used_percent")
|> filter(fn: (r) => r["host"] == "host.local" or r["host"] == "host.global")
|> drop(columns: ["_start", "_stop"])
want = csv.from(csv: "#datatype,string,long,dateTime:RFC3339,string,string,string,double
#group,false,false,false,true,true,true,false
#default,_result,,,,,,
,result,table,_time,_measurement,host,_field,_value
,,0,2018-05-22T19:53:26Z,system,host.local,load1,1.83
,,0,2018-05-22T19:53:36Z,system,host.local,load1,1.72
,,0,2018-05-22T19:53:46Z,system,host.local,load1,1.74
,,0,2018-05-22T19:53:56Z,system,host.local,load1,1.63
,,0,2018-05-22T19:54:06Z,system,host.local,load1,1.91
,,0,2018-05-22T19:54:16Z,system,host.local,load1,1.84
,,4,2018-05-22T19:53:26Z,swap,host.global,used_percent,82.98
,,4,2018-05-22T19:53:36Z,swap,host.global,used_percent,82.59
,,4,2018-05-22T19:53:46Z,swap,host.global,used_percent,82.59
,,4,2018-05-22T19:53:56Z,swap,host.global,used_percent,82.59
,,4,2018-05-22T19:54:06Z,swap,host.global,used_percent,82.59
,,4,2018-05-22T19:54:16Z,swap,host.global,used_percent,82.64
")
testing.diff(got, want)
}
testcase multi_measure_complex_or {
got = testing.loadStorage(csv: input)
|> range(start: 2018-01-01T00:00:00Z, stop: 2019-01-01T00:00:00Z)
|> filter(fn: (r) => (r["_measurement"] == "system" or r["_measurement"] == "swap") or (r["_measurement"] != "var" and r["host"] == "host.local"))
|> drop(columns: ["_start", "_stop"])
want = csv.from(csv: "#datatype,string,long,dateTime:RFC3339,string,string,string,double
#group,false,false,false,true,true,true,false
#default,_result,,,,,,
,result,table,_time,_measurement,host,_field,_value
,,0,2018-05-22T19:53:26Z,system,host.local,load1,1.83
,,0,2018-05-22T19:53:36Z,system,host.local,load1,1.72
,,0,2018-05-22T19:53:46Z,system,host.local,load1,1.74
,,0,2018-05-22T19:53:56Z,system,host.local,load1,1.63
,,0,2018-05-22T19:54:06Z,system,host.local,load1,1.91
,,0,2018-05-22T19:54:16Z,system,host.local,load1,1.84
,,2,2018-05-22T19:53:26Z,system,host.local,load5,1.95
,,2,2018-05-22T19:53:36Z,system,host.local,load5,1.92
,,2,2018-05-22T19:53:46Z,system,host.local,load5,1.92
,,2,2018-05-22T19:53:56Z,system,host.local,load5,1.89
,,2,2018-05-22T19:54:06Z,system,host.local,load5,1.94
,,2,2018-05-22T19:54:16Z,system,host.local,load5,1.93
,,4,2018-05-22T19:53:26Z,swap,host.global,used_percent,82.98
,,4,2018-05-22T19:53:36Z,swap,host.global,used_percent,82.59
,,4,2018-05-22T19:53:46Z,swap,host.global,used_percent,82.59
,,4,2018-05-22T19:53:56Z,swap,host.global,used_percent,82.59
,,4,2018-05-22T19:54:06Z,swap,host.global,used_percent,82.59
,,4,2018-05-22T19:54:16Z,swap,host.global,used_percent,82.64
,,1,2018-05-22T19:53:26Z,sys,host.local,load3,1.98
,,1,2018-05-22T19:53:36Z,sys,host.local,load3,1.97
,,1,2018-05-22T19:53:46Z,sys,host.local,load3,1.97
,,1,2018-05-22T19:53:56Z,sys,host.local,load3,1.96
,,1,2018-05-22T19:54:06Z,sys,host.local,load3,1.98
,,1,2018-05-22T19:54:16Z,sys,host.local,load3,1.97
")
testing.diff(got, want)
}
testcase multi_measure_complex_and {
got = testing.loadStorage(csv: input)
|> range(start: 2018-01-01T00:00:00Z, stop: 2019-01-01T00:00:00Z)
|> filter(fn: (r) => r["_measurement"] != "system" or r["_measurement"] == "swap")
|> filter(fn: (r) => r["_measurement"] == "swap" or r["_measurement"] == "var")
|> drop(columns: ["_start", "_stop"])
want = csv.from(csv: "#datatype,string,long,dateTime:RFC3339,string,string,string,double
#group,false,false,false,true,true,true,false
#default,_result,,,,,,
,result,table,_time,_measurement,host,_field,_value
,,4,2018-05-22T19:53:26Z,swap,host.global,used_percent,82.98
,,4,2018-05-22T19:53:36Z,swap,host.global,used_percent,82.59
,,4,2018-05-22T19:53:46Z,swap,host.global,used_percent,82.59
,,4,2018-05-22T19:53:56Z,swap,host.global,used_percent,82.59
,,4,2018-05-22T19:54:06Z,swap,host.global,used_percent,82.59
,,4,2018-05-22T19:54:16Z,swap,host.global,used_percent,82.64
,,3,2018-05-22T19:53:26Z,var,host.local,load3,91.98
,,3,2018-05-22T19:53:36Z,var,host.local,load3,91.97
,,3,2018-05-22T19:53:46Z,var,host.local,load3,91.97
,,3,2018-05-22T19:53:56Z,var,host.local,load3,91.96
,,3,2018-05-22T19:54:06Z,var,host.local,load3,91.98
,,3,2018-05-22T19:54:16Z,var,host.local,load3,91.97
")
testing.diff(got, want)
}
testcase multi_measure_negation {
got = testing.loadStorage(csv: input)
|> range(start: 2018-01-01T00:00:00Z, stop: 2019-01-01T00:00:00Z)
|> filter(fn: (r) => r["_measurement"] != "system")
|> filter(fn: (r) => r["host"] == "host.local" or not exists r["host"])
|> drop(columns: ["_start", "_stop"])
want = csv.from(csv: "#datatype,string,long,dateTime:RFC3339,string,string,string,double
#group,false,false,false,true,true,true,false
#default,_result,,,,,,
,result,table,_time,_measurement,host,_field,_value
,,1,2018-05-22T19:53:26Z,sys,host.local,load3,1.98
,,1,2018-05-22T19:53:36Z,sys,host.local,load3,1.97
,,1,2018-05-22T19:53:46Z,sys,host.local,load3,1.97
,,1,2018-05-22T19:53:56Z,sys,host.local,load3,1.96
,,1,2018-05-22T19:54:06Z,sys,host.local,load3,1.98
,,1,2018-05-22T19:54:16Z,sys,host.local,load3,1.97
,,3,2018-05-22T19:53:26Z,var,host.local,load3,91.98
,,3,2018-05-22T19:53:36Z,var,host.local,load3,91.97
,,3,2018-05-22T19:53:46Z,var,host.local,load3,91.97
,,3,2018-05-22T19:53:56Z,var,host.local,load3,91.96
,,3,2018-05-22T19:54:06Z,var,host.local,load3,91.98
,,3,2018-05-22T19:54:16Z,var,host.local,load3,91.97
#datatype,string,long,dateTime:RFC3339,string,string,string,double
#group,false,false,false,true,true,true,false
#default,_result,,,,,,
,result,table,_time,_measurement,loc,_field,_value
,,0,2018-05-22T19:53:26Z,locale,en,lat,37.09
,,0,2018-05-22T19:53:36Z,locale,en,lat,37.10
,,0,2018-05-22T19:53:46Z,locale,en,lat,37.08
")
testing.diff(got, want)
}

View File

@ -42,6 +42,7 @@ const (
statWritePointsOK = "writePointsOk"
statWriteBytes = "writeBytes"
statDiskBytes = "diskBytes"
measurementKey = "_name"
)
var (
@ -1290,9 +1291,12 @@ func (a Shards) MeasurementNamesByPredicate(expr influxql.Expr) ([][]byte, error
// FieldKeysByPredicate returns the field keys for series that match
// the given predicate.
func (a Shards) FieldKeysByPredicate(expr influxql.Expr) (map[string][]string, error) {
names, err := a.MeasurementNamesByPredicate(expr)
if err != nil {
return nil, err
names, ok := measurementOptimization(expr, measurementKey)
if !ok {
var err error
if names, err = a.MeasurementNamesByPredicate(expr); err != nil {
return nil, err
}
}
all := make(map[string][]string, len(names))
@ -1302,6 +1306,134 @@ func (a Shards) FieldKeysByPredicate(expr influxql.Expr) (map[string][]string, e
return all, nil
}
// consecutiveAndChildren finds all child nodes of consecutive
// influxql.BinaryExpr with AND operator nodes ("AND nodes") which are not
// themselves AND nodes. This may be the root of the tree if the root of the
// tree is not an AND node.
type consecutiveAndChildren struct {
children []influxql.Node
}
func (v *consecutiveAndChildren) Visit(node influxql.Node) influxql.Visitor {
switch n := node.(type) {
case *influxql.BinaryExpr:
if n.Op == influxql.AND {
return v
}
case *influxql.ParenExpr:
// Parens are essentially a no-op and can be traversed through.
return v
}
// If this wasn't a BinaryExpr with an AND operator or a Paren, record this
// child node and stop the search for this branch.
v.children = append(v.children, node)
return nil
}
// orMeasurementTree determines if a tree (or subtree) represents a grouping of
// exclusively measurement names OR'd together with EQ operators for the
// measurements themselves. It collects the list of measurement names
// encountered and records the validity of the tree.
type orMeasurementTree struct {
measurementKey string
measurementNames []string
valid bool
}
func (v *orMeasurementTree) Visit(node influxql.Node) influxql.Visitor {
// Return early if this tree has already been invalidated - no reason to
// continue evaluating at that point.
if !v.valid {
return nil
}
switch n := node.(type) {
case *influxql.BinaryExpr:
// A BinaryExpr must have an operation of OR or EQ in a valid tree
if n.Op == influxql.OR {
return v
} else if n.Op == influxql.EQ {
// An EQ must be in the form of "v.measurementKey == measurementName" in a
// valid tree
if name, ok := measurementNameFromEqBinary(n, v.measurementKey); ok {
v.measurementNames = append(v.measurementNames, name)
// If a valid measurement key/value was found, there is no need to
// continue evaluating the VarRef/StringLiteral child nodes of this
// node.
return nil
}
}
case *influxql.ParenExpr:
// Parens are essentially a no-op and can be traversed through.
return v
}
// The the type switch didn't already return, this tree is invalid.
v.valid = false
return nil
}
func measurementOptimization(expr influxql.Expr, key string) ([][]byte, bool) {
// A measurement optimization is possible if the query contains a single group
// of one or more measurements (in the form of _measurement = measName,
// equality operator only) grouped together by OR operators, with the subtree
// containing the OR'd measurements accessible from root of the tree either
// directly (tree contains nothing but OR'd measurements) or by traversing AND
// binary expression nodes.
// Get a list of "candidate" measurement subtrees.
v := consecutiveAndChildren{}
influxql.Walk(&v, expr)
possibleSubtrees := v.children
// Evaluate the candidate subtrees to determine which measurement names they
// contain, and to see if they are valid for the optimization.
validSubtrees := []orMeasurementTree{}
for _, h := range possibleSubtrees {
t := orMeasurementTree{
measurementKey: key,
valid: true,
}
influxql.Walk(&t, h)
if t.valid {
validSubtrees = append(validSubtrees, t)
}
}
// There must be exactly one valid measurement subtree for this optimization
// to be applied. Note: It may also be possible to have measurements in
// multiple subtrees, as long as there are no measurements in invalid
// subtrees, by determining an intersection of the measurement names across
// all valid subtrees - this is not currently implemented.
if len(validSubtrees) != 1 {
return nil, false
}
return slices.StringsToBytes(validSubtrees[0].measurementNames...), true
}
// measurementNameFromEqBinary returns the name of a measurement from a binary
// expression if possible, and a boolean status indicating if the binary
// expression contained a measurement name. A meausurement name will only be
// returned if the operator for the binary is EQ, and the measurement key is on
// the LHS with the measurement name on the RHS.
func measurementNameFromEqBinary(be *influxql.BinaryExpr, key string) (string, bool) {
lhs, ok := be.LHS.(*influxql.VarRef)
if !ok {
return "", false
} else if lhs.Val != key {
return "", false
}
rhs, ok := be.RHS.(*influxql.StringLiteral)
if !ok {
return "", false
}
return rhs.Val, true
}
func (a Shards) FieldDimensions(measurements []string) (fields map[string]influxql.DataType, dimensions map[string]struct{}, err error) {
fields = make(map[string]influxql.DataType)
dimensions = make(map[string]struct{})

View File

@ -16,6 +16,7 @@ import (
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxql"
"github.com/stretchr/testify/require"
"go.uber.org/zap/zaptest"
)
@ -204,6 +205,125 @@ mem,host=serverB value=50i,val3=t 10
}
}
func TestShard_MeasurementOptimization(t *testing.T) {
t.Parallel()
cases := []struct {
expr influxql.Expr
name string
ok bool
names [][]byte
}{
{
expr: influxql.MustParseExpr(`_name = 'm0'`),
name: "single measurement",
ok: true,
names: [][]byte{[]byte("m0")},
},
{
expr: influxql.MustParseExpr(`_something = 'f' AND _name = 'm0'`),
name: "single measurement with AND",
ok: true,
names: [][]byte{[]byte("m0")},
},
{
expr: influxql.MustParseExpr(`_something = 'f' AND (a =~ /x0/ AND _name = 'm0')`),
name: "single measurement with multiple AND",
ok: true,
names: [][]byte{[]byte("m0")},
},
{
expr: influxql.MustParseExpr(`_name = 'm0' OR _name = 'm1' OR _name = 'm2'`),
name: "multiple measurements alone",
ok: true,
names: [][]byte{[]byte("m0"), []byte("m1"), []byte("m2")},
},
{
expr: influxql.MustParseExpr(`(_name = 'm0' OR _name = 'm1' OR _name = 'm2') AND (_field = 'foo' OR _field = 'bar' OR _field = 'qux')`),
name: "multiple measurements combined",
ok: true,
names: [][]byte{[]byte("m0"), []byte("m1"), []byte("m2")},
},
{
expr: influxql.MustParseExpr(`(_name = 'm0' OR (_name = 'm1' OR _name = 'm2')) AND tag1 != 'foo'`),
name: "parens in expression",
ok: true,
names: [][]byte{[]byte("m0"), []byte("m1"), []byte("m2")},
},
{
expr: influxql.MustParseExpr(`(tag1 != 'foo' OR tag2 = 'bar') AND (_name = 'm0' OR _name = 'm1' OR _name = 'm2') AND (_field = 'val1' OR _field = 'val2')`),
name: "multiple AND",
ok: true,
names: [][]byte{[]byte("m0"), []byte("m1"), []byte("m2")},
},
{
expr: influxql.MustParseExpr(`(_name = 'm0' OR _name = 'm1' OR _name = 'm2') AND (tag1 != 'foo' OR _name = 'm1')`),
name: "measurements on in multiple groups, only one valid group",
ok: true,
names: [][]byte{[]byte("m0"), []byte("m1"), []byte("m2")},
},
{
expr: influxql.MustParseExpr(`_name = 'm0' OR tag1 != 'foo'`),
name: "single measurement with OR",
ok: false,
names: nil,
},
{
expr: influxql.MustParseExpr(`_name = 'm0' OR true`),
name: "measurement with OR boolean literal",
ok: false,
names: nil,
},
{
expr: influxql.MustParseExpr(`_name != 'm0' AND tag1 != 'foo'`),
name: "single measurement with non-equal",
ok: false,
names: nil,
},
{
expr: influxql.MustParseExpr(`(_name = 'm0' OR _name != 'm1' OR _name = 'm2') AND (_field = 'foo' OR _field = 'bar' OR _field = 'qux')`),
name: "multiple measurements with non-equal",
ok: false,
names: nil,
},
{
expr: influxql.MustParseExpr(`tag1 = 'foo' AND tag2 = 'bar'`),
name: "no measurements - multiple tags",
ok: false,
names: nil,
},
{
expr: influxql.MustParseExpr(`_field = 'foo'`),
name: "no measurements - single field",
ok: false,
names: nil,
},
{
expr: influxql.MustParseExpr(`(_name = 'm0' OR _name = 'm1' AND _name = 'm2') AND tag1 != 'foo'`),
name: "measurements with AND",
ok: false,
names: nil,
},
{
expr: influxql.MustParseExpr(`(_name = 'm0' OR _name = 'm1' OR _name = 'm2') OR (tag1 != 'foo' OR _name = 'm1')`),
name: "top level is not AND",
ok: false,
names: nil,
},
}
for _, tc := range cases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
names, ok := measurementOptimization(tc.expr, measurementKey)
require.Equal(t, tc.names, names)
require.Equal(t, tc.ok, ok)
})
}
}
// TempShard represents a test wrapper for Shard that uses temporary
// filesystem paths.
type TempShard struct {

View File

@ -25,56 +25,6 @@ func RewriteExprRemoveFieldKeyAndValue(expr influxql.Expr) influxql.Expr {
})
}
// HasSingleMeasurementNoOR determines if an index optimisation is available.
//
// Typically the read service will use the query engine to retrieve all field
// keys for all measurements that match the expression, which can be very
// inefficient if it can be proved that only one measurement matches the expression.
//
// This condition is determined when the following is true:
//
// * there is only one occurrence of the tag key `_measurement`.
// * there are no OR operators in the expression tree.
// * the operator for the `_measurement` binary expression is ==.
//
func HasSingleMeasurementNoOR(expr influxql.Expr) (string, bool) {
var lastMeasurement string
foundOnce := true
var invalidOP bool
influxql.WalkFunc(expr, func(node influxql.Node) {
if !foundOnce || invalidOP {
return
}
if be, ok := node.(*influxql.BinaryExpr); ok {
if be.Op == influxql.OR {
invalidOP = true
return
}
if ref, ok := be.LHS.(*influxql.VarRef); ok {
if ref.Val == measurementRemap[measurementKey] {
if be.Op != influxql.EQ {
invalidOP = true
return
}
if lastMeasurement != "" {
foundOnce = false
}
// Check that RHS is a literal string
if ref, ok := be.RHS.(*influxql.StringLiteral); ok {
lastMeasurement = ref.Val
}
}
}
}
})
return lastMeasurement, len(lastMeasurement) > 0 && foundOnce && !invalidOP
}
type hasRefs struct {
refs []string
found []bool

View File

@ -10,65 +10,6 @@ import (
"github.com/influxdata/influxql"
)
func TestHasSingleMeasurementNoOR(t *testing.T) {
cases := []struct {
expr influxql.Expr
name string
ok bool
}{
{
expr: influxql.MustParseExpr(`_name = 'm0'`),
name: "m0",
ok: true,
},
{
expr: influxql.MustParseExpr(`_something = 'f' AND _name = 'm0'`),
name: "m0",
ok: true,
},
{
expr: influxql.MustParseExpr(`_something = 'f' AND (a =~ /x0/ AND _name = 'm0')`),
name: "m0",
ok: true,
},
{
expr: influxql.MustParseExpr(`tag1 != 'foo'`),
ok: false,
},
{
expr: influxql.MustParseExpr(`_name = 'm0' OR tag1 != 'foo'`),
ok: false,
},
{
expr: influxql.MustParseExpr(`_name = 'm0' AND tag1 != 'foo' AND _name = 'other'`),
ok: false,
},
{
expr: influxql.MustParseExpr(`_name = 'm0' AND tag1 != 'foo' OR _name = 'other'`),
ok: false,
},
{
expr: influxql.MustParseExpr(`_name = 'm0' AND (tag1 != 'foo' OR tag2 = 'other')`),
ok: false,
},
{
expr: influxql.MustParseExpr(`(tag1 != 'foo' OR tag2 = 'other') OR _name = 'm0'`),
ok: false,
},
}
for _, tc := range cases {
name, ok := storage.HasSingleMeasurementNoOR(tc.expr)
if ok != tc.ok {
t.Fatalf("got %q, %v for expression %q, expected %q, %v", name, ok, tc.expr, tc.name, tc.ok)
}
if ok && name != tc.name {
t.Fatalf("got %q, %v for expression %q, expected %q, %v", name, ok, tc.expr, tc.name, tc.ok)
}
}
}
func TestRewriteExprRemoveFieldKeyAndValue(t *testing.T) {
node := &datatypes.Node{
NodeType: datatypes.NodeTypeLogicalExpression,

View File

@ -2,9 +2,11 @@ package storage
import (
"context"
"sort"
"github.com/influxdata/influxdb/v2/influxql/query"
"github.com/influxdata/influxdb/v2/models"
"github.com/influxdata/influxdb/v2/pkg/slices"
"github.com/influxdata/influxdb/v2/storage/reads"
"github.com/influxdata/influxdb/v2/storage/reads/datatypes"
"github.com/influxdata/influxdb/v2/tsdb"
@ -93,51 +95,27 @@ func newIndexSeriesCursorInfluxQLPred(ctx context.Context, predicate influxql.Ex
}
}
var mitr tsdb.MeasurementIterator
name, singleMeasurement := HasSingleMeasurementNoOR(p.measurementCond)
if singleMeasurement {
mitr = tsdb.NewMeasurementSliceIterator([][]byte{[]byte(name)})
}
sg := tsdb.Shards(shards)
p.sqry, err = sg.CreateSeriesCursor(ctx, tsdb.SeriesCursorRequest{Measurements: mitr}, opt.Condition)
if p.sqry != nil && err == nil {
// Optimisation to check if request is only interested in results for a
// single measurement. In this case we can efficiently produce all known
// field keys from the collection of shards without having to go via
// the query engine.
if singleMeasurement {
fkeys := sg.FieldKeysByMeasurement([]byte(name))
if len(fkeys) == 0 {
goto CLEANUP
}
fields := make([]field, 0, len(fkeys))
for _, key := range fkeys {
fields = append(fields, field{n: key, nb: []byte(key)})
}
p.fields = map[string][]field{name: fields}
return p, nil
}
var mfkeys map[string][]string
mfkeys, err = sg.FieldKeysByPredicate(opt.Condition)
if err != nil {
goto CLEANUP
}
if mfkeys, err := sg.FieldKeysByPredicate(opt.Condition); err == nil {
p.fields = make(map[string][]field, len(mfkeys))
measurementNamesForFields := []string{}
for name, fkeys := range mfkeys {
fields := make([]field, 0, len(fkeys))
for _, key := range fkeys {
fields = append(fields, field{n: key, nb: []byte(key)})
}
p.fields[name] = fields
measurementNamesForFields = append(measurementNamesForFields, name)
}
sort.Strings(measurementNamesForFields)
mitr := tsdb.NewMeasurementSliceIterator(slices.StringsToBytes(measurementNamesForFields...))
p.sqry, err = sg.CreateSeriesCursor(ctx, tsdb.SeriesCursorRequest{Measurements: mitr}, opt.Condition)
if p.sqry != nil && err == nil {
return p, nil
}
return p, nil
}
CLEANUP:
p.Close()
return nil, err
}