From cb3d099f6cde460dfd4d701ff323439fe3198b47 Mon Sep 17 00:00:00 2001 From: Adam Date: Thu, 30 Aug 2018 12:01:32 -0400 Subject: [PATCH] add query/functions/doc.go (#686) * finished with first draft of doc.go. Need to follow up on documenting related packages --- query/compile.go | 8 +++- query/functions/doc.go | 104 +++++++++++++++++++++++++++++++++++++++++ query/operation.go | 4 +- 3 files changed, 113 insertions(+), 3 deletions(-) create mode 100644 query/functions/doc.go diff --git a/query/compile.go b/query/compile.go index 35f28b56cf..11009a35ba 100644 --- a/query/compile.go +++ b/query/compile.go @@ -152,7 +152,7 @@ var builtinDeclarations = make(semantic.DeclarationScope) var builtinScripts = make(map[string]string) var finalized bool -// RegisterBuiltIn adds any variable declarations in the script to the builtin scope. +// RegisterBuiltIn adds any variable declarations written in Flux script to the builtin scope. func RegisterBuiltIn(name, script string) { if finalized { panic(errors.New("already finalized, cannot register builtin")) @@ -161,6 +161,9 @@ func RegisterBuiltIn(name, script string) { } // RegisterFunction adds a new builtin top level function. +// Name is the name of the function as it would be called. +// c is a function reference of type CreateOperationSpec +// sig is a function signature type that specifies the names and types of each argument for the function. func RegisterFunction(name string, c CreateOperationSpec, sig semantic.FunctionSignature) { f := function{ t: semantic.NewFunctionType(sig), @@ -173,6 +176,9 @@ func RegisterFunction(name string, c CreateOperationSpec, sig semantic.FunctionS // RegisterFunctionWithSideEffect adds a new builtin top level function that produces side effects. // For example, the builtin functions yield(), toKafka(), and toHTTP() all produce side effects. +// name is the name of the function as it would be called +// c is a function reference of type CreateOperationSpec +// sig is a function signature type that specifies the names and types of each argument for the function func RegisterFunctionWithSideEffect(name string, c CreateOperationSpec, sig semantic.FunctionSignature) { f := function{ t: semantic.NewFunctionType(sig), diff --git a/query/functions/doc.go b/query/functions/doc.go new file mode 100644 index 0000000000..ab161fe1dc --- /dev/null +++ b/query/functions/doc.go @@ -0,0 +1,104 @@ +/* +Package functions is a collection of built-in functions that are callable in the flux query processor. While flux may +be extended at runtime by writing function expressions, there are some limitations for which a built-in function is +necessary, such as the need for custom data structures, stateful procedures, complex looping and branching, and +connection to external services. Another reason for implementing a built-in function is to provide a function that is +broadly applicable for many users (e.g., sum() or max()). + +The functions package is rarely accessed as a direct API. Rather, the query processing engine accepts named registrations +for various interfaces implemented within the functions package and executes them generically using an API that is common to all +functions. The registration process is executed by running the init() function in each function file, and is then finalized +by importing package builtin, which itself imports the functions package and runs a final setup routine that finalizes +installation of the builtin functions to the query processor. + +Because of this design, a built-in function implementation consists of a bundle of different interface implementations +that are required at various phases of query execution. These phases are query, plan and execute. The query phase is +for identifying initializing the parameter sets for each function, and initializing the internal representation of a query. +The plan phase is for creating a final execution plan, and the execution phase is for physically accessing the data +and computing a final result. + +The query phase takes each function call in a query and performs a match against the registry to see if +there are type definitions for a built-in operation. If matched, it will instantiate the correct query.OperationSpec +type for that function, given the runtime parameters. +If a builtin OperationSpec is not found, then it will check for functions defined at runtime, and otherwise return an error. +The following registrations are typically executed in the function's init() for the query phase to execute properly: + + query.RegisterFunction(name string, c query.CreateOperationSpec, sig semantic.FunctionSignature) + query.RegisterOpSpec(k query.OperationKind, c query.NewOperationSpec) + +In the plan phase, an operation spec must be converted to a plan.ProcedureSpec. A query plan must know what operations to +carry out, including the function names and parameters. In the trivial case, the OperationSpec +and ProcedureSpec have identical fields and the operation spec may be encapsulated as part of the procedure spec. The base +interface for a plan.ProcedureSpec requires a Kind() function, as well as a Copy() function which should perform a deep copy +of the object. Refer to the following interfaces for more information about designing a procedure spec: + plan.ProcedureSpec + plan.PushDownProcedureSpec + plan.BoundedProcedureSpec + plan.YieldProcedureSpec + plan.AggregateProcedureSpec + plan.ParentAwareProcedureSpec + +Once you have determined the interface(s) that must be implemented for your function, you register them with + plan.RegisterProcedureSpec(k ProcedureKind, c CreateProcedureSpec, qks ...query.OperationKind) + +The registration in this phase creates two lookups. First, it creates a named lookup in a similar fashion as for OperationSpecs +in the query phase. Second, it creates a mapping from OperationSpec types to ProcedureSpec types so that the collection of +OperationSpecs for the query can be quickly converted to corresponding Procedure specs. One feature to note is that the +registration takes a list of query.OperationSpec values. This is because several user-facing query functions may map +to the same internal procedure. + +The primary function of the plan phase is to re-order, re-write and possibly combine the operations +described in the incoming query in order to improve the performance of the query execution. The planner has two primary +operations for doing this: Pushdowns and ReWrites. + +A push down operation is a planning technique for pushing the logic from one operation into another so that only a single +composite function needs to be called instead of two simpler function call. +A pushdown is implemented by implementing the plan.PushDownProcedureSpec interface, which requires functions that define +the rules and methods for executing a pushdown operation. + +A Rewrite rule is used to modify one or more ProcedureSpecs in cases where redundant or complementary operations can be +combined to get a simpler result. Similar to a pushdown operation, the rewrite is triggered whenever certain rules apply. +Rewrite rules are implemented differently and require a separate registration: + plan.RegisterRewriteRule(r RewriteRule) + +Which in turn requires an implementation of plan.RewriteRule. + +Finally, the execute phase is tasked with executing the specific data processing algorithm for the function. A function +implementation registers an implementation of the execute.Transformation interface that implements functions that +control how the execution engine will take an input table, apply the function, and produce an output table. A transformation +implementation is registered via: + execute.RegisterTransformation(k plan.ProcedureKind, c execute.CreateTransformation) + +The registration will record a mapping of the procedure's kind to the given transformation type. + +In addition to implementing the transformation type, a number of helper types and functions are provided that facilitate +the transformation process: + execute.Administration + execute.Dataset + execute.TableBuilderCache + execute.TableBuilder + execute.NewAggregateTransformationAndDataset + execute.NewRowSelectorTransformationAndDataset + query.Table + query.GroupKey + query.ColMeta + query.ColReader + +The most important part of a function implementation is for the interface method execute.Transformation.Process(id execute.DatasetID, tbl query.Table). +While the full details of how to implement this function are out of the scope of this document, a typical implementation +will do the following: +1. Validate the incoming table schema if needed +2. Construct the column and group key schema for the output table via the table builder. +3. Process the incoming table via query.Table.Do, and use the input data to determine the output rows for the table builder. +4. Add rows to the output table. + +Finally, there is a special class of functions do not receive an input table from another function's output. +In other words, these transformations do not have a parent process that supplies it with table data. These transformation +functions are referred to as sources, and naturally implement a connection to a data source (e.g. influxdb, prometheus, csvFile, etc.). +They are registered using: + execute.RegisterSource(k plan.ProcedureKind, c execute.CreateSource) + +The substantial part of a source implementation is its Run method, which should connect to the data source, +collect its data into query.Table structures, and apply any transformations associated with the source. +*/ +package functions diff --git a/query/operation.go b/query/operation.go index 5fabecf9ab..7b9526640b 100644 --- a/query/operation.go +++ b/query/operation.go @@ -80,8 +80,8 @@ type OperationKind string var kindToOp = make(map[OperationKind]NewOperationSpec) // RegisterOpSpec registers an operation spec with a given kind. -// If the kind has already been registered the call panics. -// +// k is a label that uniquely identifies this operation. If the kind has already been registered the call panics. +// c is a function reference that creates a new, default-initialized opSpec for the given kind. // TODO:(nathanielc) make this part of RegisterMethod/RegisterFunction func RegisterOpSpec(k OperationKind, c NewOperationSpec) { if kindToOp[k] != nil {