influxdb/query/influxql/group.go

package influxql

import (
	"fmt"
	"math"
	"strings"
	"time"

	"github.com/influxdata/flux"
	"github.com/influxdata/flux/execute"
	"github.com/influxdata/flux/functions/transformations"
	"github.com/influxdata/flux/semantic"
	"github.com/influxdata/influxql"
	"github.com/pkg/errors"
)

type groupInfo struct {
	call     *influxql.Call
	refs     []*influxql.VarRef
	selector bool
}

type groupVisitor struct {
	calls []*function
	refs  []*influxql.VarRef
	err   error
}

func (v *groupVisitor) Visit(n influxql.Node) influxql.Visitor {
	if v.err != nil {
		return nil
	}

	// TODO(jsternberg): Identify duplicates so they are a single common instance.
	switch expr := n.(type) {
	case *influxql.Call:
		// TODO(jsternberg): Identify math functions so we visit their arguments instead of recording them.
		fn, err := parseFunction(expr)
		if err != nil {
			v.err = err
			return nil
		}
		v.calls = append(v.calls, fn)
		return nil
	case *influxql.Distinct:
		v.err = errors.New("unimplemented: distinct expression")
		return nil
	case *influxql.VarRef:
		if expr.Val == "time" {
			return nil
		}
		v.refs = append(v.refs, expr)
		return nil
	case *influxql.Wildcard:
		v.err = errors.New("unimplemented: field wildcard")
		return nil
	case *influxql.RegexLiteral:
		v.err = errors.New("unimplemented: field regex wildcard")
		return nil
	}
	return v
}

// identifyGroups will identify the groups for creating data access cursors.
func identifyGroups(stmt *influxql.SelectStatement) ([]*groupInfo, error) {
	v := &groupVisitor{}
	influxql.Walk(v, stmt.Fields)
	if v.err != nil {
		return nil, v.err
	}

	// Attempt to take the calls and variables and put them into groups.
	if len(v.refs) > 0 {
		// If any of the calls are not selectors, we have an error message.
		for _, fn := range v.calls {
			if !influxql.IsSelector(fn.call) {
				return nil, errors.New("mixing aggregate and non-aggregate queries is not supported")
			}
		}

		// All of the functions are selectors. If we have more than 1, then we have another error message.
		if len(v.calls) > 1 {
			return nil, errors.New("mixing multiple selector functions with tags or fields is not supported")
		}

		// Otherwise, we create a single group.
		var call *influxql.Call
		if len(v.calls) == 1 {
			call = v.calls[0].call
		}
		return []*groupInfo{{
			call:     call,
			refs:     v.refs,
			selector: true, // Always a selector if we are here.
		}}, nil
	}

	// We do not have any auxiliary fields so each of the function calls goes into
	// its own group.
	groups := make([]*groupInfo, 0, len(v.calls))
	for _, fn := range v.calls {
		groups = append(groups, &groupInfo{call: fn.call})
	}

	// If there is exactly one group and that contains a selector, then mark it as so.
	if len(groups) == 1 && influxql.IsSelector(groups[0].call) {
		groups[0].selector = true
	}
	return groups, nil
}

func (gr *groupInfo) createCursor(t *transpilerState) (cursor, error) {
	// Create all of the cursors for every variable reference.
	// TODO(jsternberg): Determine which of these cursors are from fields and which are tags.
	var cursors []cursor
	if gr.call != nil {
		ref, ok := gr.call.Args[0].(*influxql.VarRef)
		if !ok {
			// TODO(jsternberg): This should be validated and figured out somewhere else.
			return nil, fmt.Errorf("first argument to %q must be a variable", gr.call.Name)
		}
		cur, err := createVarRefCursor(t, ref)
		if err != nil {
			return nil, err
		}
		cursors = append(cursors, cur)
	}

	for _, ref := range gr.refs {
		cur, err := createVarRefCursor(t, ref)
		if err != nil {
			return nil, err
		}
		cursors = append(cursors, cur)
	}

	// TODO(jsternberg): Establish which variables in the condition are tags and which are fields.
	// We need to create the references to fields here so they can be joined.
	var (
		tags map[influxql.VarRef]struct{}
		cond influxql.Expr
	)
	valuer := influxql.NowValuer{Now: t.spec.Now}
	if t.stmt.Condition != nil {
		var err error
		if cond, _, err = influxql.ConditionExpr(t.stmt.Condition, &valuer); err != nil {
			return nil, err
		} else if cond != nil {
			tags = make(map[influxql.VarRef]struct{})

			// Walk through the condition for every variable reference. There will be no function
			// calls here.
			var condErr error
			influxql.WalkFunc(cond, func(node influxql.Node) {
				if condErr != nil {
					return
				}
				ref, ok := node.(*influxql.VarRef)
				if !ok {
					return
				}

				// If the variable reference is in any of the cursors, it is definitely
				// a field and we do not have to inspect it further.
				for _, cur := range cursors {
					if _, ok := cur.Value(ref); ok {
						return
					}
				}

				// This may be a field or a tag. If it is a field, we need to create the cursor
				// and add it to the listing of cursors so it can be joined before we evaluate the condition.
				switch typ := t.mapType(ref); typ {
				case influxql.Tag:
					// Add this variable name to the listing of tags.
					tags[*ref] = struct{}{}
				default:
					cur, err := createVarRefCursor(t, ref)
					if err != nil {
						condErr = err
						return
					}
					cursors = append(cursors, cur)
				}
			})
		}
	}

	// Join the cursors using an inner join.
	// TODO(jsternberg): We need to differentiate between various join types and this needs to be
	// except: ["_field"] rather than joining on the _measurement. This also needs to specify what the time
	// column should be.
	if len(cursors) > 1 {
		return nil, errors.New("unimplemented: joining fields within a cursor")
	}

	cur := Join(t, cursors, []string{"_measurement"})
	if len(tags) > 0 {
		cur = &tagsCursor{cursor: cur, tags: tags}
	}

	// Evaluate the conditional and insert a filter if a condition exists.
	if cond != nil {
		// Generate a filter expression by evaluating the condition and wrapping it in a filter op.
		expr, err := t.mapField(cond, cur)
		if err != nil {
			return nil, errors.Wrap(err, "unable to evaluate condition")
		}
		id := t.op("filter", &transformations.FilterOpSpec{
			Fn: &semantic.FunctionExpression{
				Block: &semantic.FunctionBlock{
					Parameters: &semantic.FunctionParameters{
						List: []*semantic.FunctionParameter{{
							Key: &semantic.Identifier{Name: "r"},
						}},
					},
					Body: expr,
				},
			},
		}, cur.ID())
		cur = &opCursor{id: id, cursor: cur}
	}

	// Group together the results.
	if c, err := gr.group(t, cur); err != nil {
		return nil, err
	} else {
		cur = c
	}

	interval, err := t.stmt.GroupByInterval()
	if err != nil {
		return nil, err
	}

	// If a function call is present, evaluate the function call.
	if gr.call != nil {
		c, err := createFunctionCursor(t, gr.call, cur, !gr.selector)
		if err != nil {
			return nil, err
		}
		cur = c

		// If there was a window operation, we now need to undo that and sort by the start column
		// so they stay in the same table and are joined in the correct order.
		if interval > 0 {
			cur = &groupCursor{
				id: t.op("window", &transformations.WindowOpSpec{
					Every:       flux.Duration(math.MaxInt64),
					Period:      flux.Duration(math.MaxInt64),
					TimeColumn:  execute.DefaultTimeColLabel,
					StartColumn: execute.DefaultStartColLabel,
					StopColumn:  execute.DefaultStopColLabel,
				}, cur.ID()),
				cursor: cur,
			}
		}
	} else {
		// If we do not have a function, but we have a field option,
		// return the appropriate error message if there is something wrong with the flux.
		if interval > 0 {
			return nil, errors.New("using GROUP BY requires at least one aggregate function")
		}

		// TODO(jsternberg): Fill needs to be somewhere and it's probably here somewhere.
		// Move this to the correct location once we've figured it out.
		switch t.stmt.Fill {
		case influxql.NoFill:
			return nil, errors.New("fill(none) must be used with a function")
		case influxql.LinearFill:
			return nil, errors.New("fill(linear) must be used with a function")
		}
	}
	return cur, nil
}

type groupCursor struct {
	cursor
	id flux.OperationID
}

func (gr *groupInfo) group(t *transpilerState, in cursor) (cursor, error) {
	var windowEvery time.Duration
	var windowStart time.Time
	tags := []string{"_measurement", "_start"}
	if len(t.stmt.Dimensions) > 0 {
		// Maintain a set of the dimensions we have encountered.
		// This is so we don't duplicate groupings, but we still maintain the
		// listing of tags in the tags slice so it is deterministic.
		m := make(map[string]struct{})
		for _, d := range t.stmt.Dimensions {
			// Reduce the expression before attempting anything. Do not evaluate the call.
			expr := influxql.Reduce(d.Expr, nil)

			switch expr := expr.(type) {
			case *influxql.VarRef:
				if strings.ToLower(expr.Val) == "time" {
					return nil, errors.New("time() is a function and expects at least one argument")
				} else if _, ok := m[expr.Val]; ok {
					continue
				}
				tags = append(tags, expr.Val)
				m[expr.Val] = struct{}{}
			case *influxql.Call:
				// Ensure the call is time() and it has one or two duration arguments.
				if expr.Name != "time" {
					return nil, errors.New("only time() calls allowed in dimensions")
				} else if got := len(expr.Args); got < 1 || got > 2 {
					return nil, errors.New("time dimension expected 1 or 2 arguments")
				} else if lit, ok := expr.Args[0].(*influxql.DurationLiteral); !ok {
					return nil, errors.New("time dimension must have duration argument")
				} else if windowEvery != 0 {
					return nil, errors.New("multiple time dimensions not allowed")
				} else {
					windowEvery = lit.Val
					var windowOffset time.Duration
					if len(expr.Args) == 2 {
						switch lit2 := expr.Args[1].(type) {
						case *influxql.DurationLiteral:
							windowOffset = lit2.Val % windowEvery
						case *influxql.TimeLiteral:
							windowOffset = lit2.Val.Sub(lit2.Val.Truncate(windowEvery))
						case *influxql.Call:
							if lit2.Name != "now" {
								return nil, errors.New("time dimension offset function must be now()")
							} else if len(lit2.Args) != 0 {
								return nil, errors.New("time dimension offset now() function requires no arguments")
							}
							now := t.spec.Now
							windowOffset = now.Sub(now.Truncate(windowEvery))

							// Use the evaluated offset to replace the argument. Ideally, we would
							// use the interval assigned above, but the query engine hasn't been changed
							// to use the compiler information yet.
							expr.Args[1] = &influxql.DurationLiteral{Val: windowOffset}
						case *influxql.StringLiteral:
							// If literal looks like a date time then parse it as a time literal.
							if lit2.IsTimeLiteral() {
								t, err := lit2.ToTimeLiteral(t.stmt.Location)
								if err != nil {
									return nil, err
								}
								windowOffset = t.Val.Sub(t.Val.Truncate(windowEvery))
							} else {
								return nil, errors.New("time dimension offset must be duration or now()")
							}
						default:
							return nil, errors.New("time dimension offset must be duration or now()")
						}

						//TODO set windowStart
						windowStart = time.Unix(0, 0).Add(windowOffset)
					}
				}
			case *influxql.Wildcard:
				return nil, errors.New("unimplemented: dimension wildcards")
			case *influxql.RegexLiteral:
				return nil, errors.New("unimplemented: dimension regex wildcards")
			default:
				return nil, errors.New("only time and tag dimensions allowed")
			}
		}
	}

	// Perform the grouping by the tags we found. There is always a group by because
	// there is always something to group in influxql.
	// TODO(jsternberg): A wildcard will skip this step.
	id := t.op("group", &transformations.GroupOpSpec{
		Columns: tags,
		Mode:    "by",
	}, in.ID())

	if windowEvery > 0 {
		windowOp := &transformations.WindowOpSpec{
			Every:       flux.Duration(windowEvery),
			Period:      flux.Duration(windowEvery),
			TimeColumn:  execute.DefaultTimeColLabel,
			StartColumn: execute.DefaultStartColLabel,
			StopColumn:  execute.DefaultStopColLabel,
		}

		if !windowStart.IsZero() {
			windowOp.Start = flux.Time{Absolute: windowStart}
		}

		id = t.op("window", windowOp, id)
	}

	return &groupCursor{id: id, cursor: in}, nil
}

func (c *groupCursor) ID() flux.OperationID { return c.id }

// tagsCursor is a pseudo-cursor that can be used to access tags within the cursor.
type tagsCursor struct {
	cursor
	tags map[influxql.VarRef]struct{}
}

func (c *tagsCursor) Value(expr influxql.Expr) (string, bool) {
	if value, ok := c.cursor.Value(expr); ok {
		return value, ok
	}

	if ref, ok := expr.(*influxql.VarRef); ok {
		if _, ok := c.tags[*ref]; ok {
			return ref.Val, true
		}
	}
	return "", false
}