Procedural documentation for the join package (#4466)

* WIP task-based join docs

* WIP join task-based docs

* WIP prodedural join docs

* WIP new join docs

* WIP join docs, join diagrams

* WIP join docs

* WIP join docs

* wrap up join docs

* Apply suggestions from code review

Co-authored-by: kelseiv <47797004+kelseiv@users.noreply.github.com>

* apply updates from pr review

* minor updates

* fixed typo, updated update-flux-versions.js script

* Apply suggestions from code review

Co-authored-by: kelseiv <47797004+kelseiv@users.noreply.github.com>

* Apply suggestions from code review

* address PR feedback

* add flux stdlib related links for join

* fixed frontmatter injection

Co-authored-by: kelseiv <47797004+kelseiv@users.noreply.github.com>
pull/4480/head
Scott Anderson 2022-09-23 14:42:19 -06:00 committed by GitHub
parent 1d34362e3c
commit 9472a6c7fc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 1493 additions and 40 deletions

View File

@ -200,29 +200,16 @@ $('.tooltip').each( function(){
$(this).prepend($toolTipElement);
});
/////////////////// Style time columns in tables to not wrap ///////////////////
//////////////////// Style time cells in tables to not wrap ////////////////////
$('.article--content table').each(function() {
var table = $(this);
var timeColumns = ['_time', '*_time', '_start', '*_start', '_stop', '*_stop'];
let header = [];
let timeColumnIndexes = [];
// Return an array of column headers
table.find('th').each(function () {
header.push($(this)[0].innerHTML);
});
// Return indexes of time columns
header.forEach(function(value, i) {
if ( timeColumns.includes(value) ) { timeColumnIndexes.push(i) };
});
// Add the nowrap class to cells with time column indexes
table.find('td').each(function() {
if (timeColumnIndexes.includes( $(this)[0].cellIndex )) {
$(this).addClass('nowrap');
let cellContent = $(this)[0].innerText
if (/\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.*Z/.test(cellContent)) {
$(this).addClass('nowrap')
}
})
})

View File

@ -7,19 +7,20 @@
}
.flex-container {
margin-right: 1rem;
margin-right: 1.5rem;
&.half { width: calc(50% - 1rem); }
&.third { width: calc(33.33% - 1rem); }
&.quarter { width: calc(25% - 1rem); }
&.half { width: calc(50% - 1.5rem); }
&.third { width: calc(33.33% - 1.5rem); }
&.quarter { width: calc(25% - 1.5rem); }
&.two-thirds { width: calc(66% - 2rem);}
&.half, &.third, &.quarter {
table:not(:last-child) {margin-right: 1rem;}
table:not(:last-child) {margin-right: 1.5rem;}
}
img { margin-bottom: 0;}
table { display: table; }
p:last-child {margin-bottom: 0.5rem;}
}
////////////////////////////////////////////////////////////////////////////////
@ -32,5 +33,6 @@
&.third,
&.two-thirds { width: calc(100% - 1rem); }
&.quarter { width: calc(50% - 1rem); }
p:last-child {margin-bottom: 1.5rem;}
}
}

View File

@ -14,6 +14,35 @@ svg {
.st1 {fill: $article-text;}
.st2 {font-family: $rubik; font-weight: $medium}
}
//////////////////////////////// Join Diagram ////////////////////////////////
&#join-diagram {
$fill-color: rgba($article-text, .35);
display: block;
max-width: 250px;
margin: 1rem 0 2rem;
&.center {margin: 0 auto 2rem auto;}
&.small {max-width: 125px; path{stroke-width: 3;} }
path {
stroke: $article-text;
stroke-width:2;
stroke-miterlimit:10;
fill: none;
}
&.inner {path { &#center {fill:$fill-color; }}}
&.left {path { &#left, &#center {fill:$fill-color; }}}
&.right {path { &#center, &#right {fill:$fill-color; }}}
&.full {path { &#left, &#center, &#right {fill:$fill-color; }}}
}
}
@include media(small) {
svg {
&#join-diagram {margin: 1rem auto 2rem; }
}
}
//////////////////////////// Styles for SVG legends ////////////////////////////

View File

@ -39,16 +39,22 @@ that contains one value for each [row](#row).
A **row** is a collection of associated [column](#column) values.
#### Group key
A **group key** defines which columns and specific column values to include in a table.
All rows in a table contain the same values in group key columns.
All tables in a stream of tables have a unique group key, but group key
modifications are applied to a stream of tables.
A **group key** defines which columns to use to group tables in a stream of tables.
Each table in a stream of tables represents a unique **group key instance**.
All rows in a table contain the same values for each group key column.
##### Example group keys
Group keys contain key-value pairs, where each key represents a column name and
each value represents the column value included in the table.
The following are examples of group keys in a stream of tables with three separate tables.
Each group key represents a table containing data for a unique location:
##### Example group key
A group key can be represented by an array of column labels.
```
[_measurement, facility, _field]
```
##### Example group key instances
Group key instances (unique to each table) include key-value pairs that identify
each column name in the table that has the same value.
The following are examples of group key instances in a stream of tables with three separate tables.
Each represents a table containing data for a unique location:
```
[_measurement: "production", facility: "us-midwest", _field: "apq"]

View File

@ -0,0 +1,147 @@
---
title: Join data
seotitle: Join data with Flux
description: >
Flux supports inner, full outer, left outer, and right outer joins.
Learn how to use the `join` package to join two data sets with common values.
menu:
flux_0_x:
name: Join data
weight: 8
related:
- /flux/v0.x/stdlib/join/
- /flux/v0.x/stdlib/join/inner/
- /flux/v0.x/stdlib/join/left/
- /flux/v0.x/stdlib/join/right/
- /flux/v0.x/stdlib/join/full/
- /flux/v0.x/stdlib/join/time/
---
Use the Flux [`join` package](/flux/v0.x/stdlib/join/) to join two data sets based on common values.
Learn how join two data sets using the following join methods:
{{< flex >}}
{{< flex-content "quarter" >}}
<a href="#perform-an-inner-join">
<p style="text-align:center"><strong>Inner join</strong></p>
{{< svg svg="static/svgs/join-diagram.svg" class="inner small center" >}}
</a>
{{< /flex-content >}}
{{< flex-content "quarter" >}}
<a href="#perform-a-left-outer-join">
<p style="text-align:center"><strong>Left outer join</strong></p>
{{< svg svg="static/svgs/join-diagram.svg" class="left small center" >}}
</a>
{{< /flex-content >}}
{{< flex-content "quarter" >}}
<a href="#perform-a-right-outer-join">
<p style="text-align:center"><strong>Right outer join</strong></p>
{{< svg svg="static/svgs/join-diagram.svg" class="right small center" >}}
</a>
{{< /flex-content >}}
{{< flex-content "quarter" >}}
<a href="#perform-a-full-outer-join">
<p style="text-align:center"><strong>Full outer join</strong></p>
{{< svg svg="static/svgs/join-diagram.svg" class="full small center" >}}
</a>
{{< /flex-content >}}
{{< /flex >}}
{{% note %}}
#### When to use the join package
We recommend using the `join` package to join streams that have mostly different
schemas or that come from two separate data sources.
If you're joining data from the same data source with the same schema, using
[`union()`](/flux/v0.x/stdlib/universe/union/) and [`pivot()`](/flux/v0.x/stdlib/universe/pivot/)
to combine the data will likely be more performant.
{{% /note %}}
- [How join functions work](#how-join-functions-work)
- [Input streams](#input-streams)
- [Join predicate function (on)](#join-predicate-function-on)
- [Join output function (as)](#join-output-function-as)
- [Perform join operations](#perform-join-operations)
{{< children type="anchored-list" filterOut="Troubleshoot join operations" >}}
- [Troubleshoot join operations](#troubleshoot-join-operations)
## How join functions work
`join` functions join _two_ streams of tables together based
on common values in each input stream.
- [Input streams](#input-streams)
- [Join predicate function (on)](#join-predicate-function-on)
- [Join output function (as)](#join-output-function-as)
### Input streams
Each input stream is assigned to the `left` or `right` parameter.
Input streams can be defined from any valid data source.
For more information, see:
- [Query data sources](/flux/v0.x/query-data/)
- Define ad hoc tables with [`array.from()`](/flux/v0.x/stdlib/array/from/)
#### Data requirements
To join data, each input stream must have the following:
- **One or more columns with common values to join on**.
Columns do not need identical labels, but they do need to have comparable values.
- **Identical [group keys](/flux/v0.x/get-started/data-model/#group-key)**.
Functions in the `join` package use group keys to quickly determine what tables
from each input stream should be paired and evaluated for the join operation.
_Both input streams should have the same group key._
If they don't, your join operation may not find any matching tables and will
return unexpected output.
If the group keys of your input streams are not identical, use
[`group()`](/flux/v0.x/stdlib/universe/group/) to regroup each input
stream before joining them together.
{{% note %}}
Only tables with the same [group key instance](/flux/v0.x/get-started/data-model/#example-group-key-instances)
are joined.
{{% /note %}}
### Join predicate function (on)
`join` package functions require the `on` parameter to compare values from each input stream (represented by `l` (left) and `r` (right))
and returns `true` or `false`.
Rows that return `true` are joined.
This parameter is a [predicate function](/flux/v0.x/get-started/syntax-basics/#predicate-functions).
```js
(l, r) => l.column == r.column
```
### Join output function (as)
`join` package functions _(except [`join.time()`](/flux/v0.x/stdlib/join/time/))_
require the `as` parameter to define the output schema of the join.
The `as` parameter returns a new record using values from
joined rowsleft (`l`) and right (`r`).
```js
(l, r) => ({l with name: r.name, location: r.location})
```
{{% note %}}
#### Do not modify group key columns
Do not modify group key columns. The `as` function must return the same group key as both input streams to successfully perform a join.
{{% /note %}}
## Perform join operations
The `join` package supports the following join types and special use cases:
{{< children type="anchored-list" filterOut="Troubleshoot join operations" >}}
{{< children readmore=true filterOut="Troubleshoot join operations" >}}
## Troubleshoot join operations
For information about unexpected behaviors and errors when using the `join` package,
see [Troubleshoot join operations](/flux/v0.x/join-data/troubleshoot-joins/).

View File

@ -0,0 +1,252 @@
---
title: Perform a full outer join
description: >
Use [`join.full()`](/flux/v0.x/stdlib/join/full/) to perform an full outer join of two streams of data.
Full outer joins output a row for all rows in both the **left** and **right** input streams
and join rows that match according to the `on` predicate.
menu:
flux_0_x:
name: Full outer join
parent: Join data
weight: 103
related:
- /flux/v0.x/join-data/troubleshoot-joins/
- /flux/v0.x/stdlib/join/
- /flux/v0.x/stdlib/join/full/
list_code_example: |
```js
import "join"
left = from(bucket: "example-bucket-1") |> //...
right = from(bucket: "example-bucket-2") |> //...
join.full(
left: left,
right: right,
on: (l, r) => l.id== r.id,
as: (l, r) => {
id = if exists l.id then l.id else r.id
return {name: l.name, location: r.location, id: id}
},
)
```
---
Use [`join.full()`](/flux/v0.x/stdlib/join/full/) to perform an full outer join of two streams of data.
Full outer joins output a row for all rows in both the **left** and **right** input streams
and join rows that match according to the `on` predicate.
{{< svg svg="static/svgs/join-diagram.svg" class="full" >}}
{{< expand-wrapper >}}
{{% expand "View table illustration of a full outer join" %}}
{{< flex >}}
{{% flex-content "third" %}}
#### left
| | | |
| :-- | :----------------------------------- | :----------------------------------- |
| r1 | <span style="color:#9b2aff"></span> | <span style="color:#9b2aff"></span> |
| r2 | <span style="color:#9b2aff"></span> | <span style="color:#9b2aff"></span> |
{{% /flex-content %}}
{{% flex-content "third" %}}
#### right
| | | |
| :-- | :----------------------------------- | :----------------------------------- |
| r1 | <span style="color:#d30971"></span> | <span style="color:#d30971"></span> |
| r3 | <span style="color:#d30971"></span> | <span style="color:#d30971"></span> |
| r4 | <span style="color:#d30971"></span> | <span style="color:#d30971"></span> |
{{% /flex-content %}}
{{% flex-content "third" %}}
#### Full outer join result
| | | | | |
| :-- | :----------------------------------- | :----------------------------------- | :----------------------------------- | :----------------------------------- |
| r1 | <span style="color:#9b2aff"></span> | <span style="color:#9b2aff"></span> | <span style="color:#d30971"></span> | <span style="color:#d30971"></span> |
| r2 | <span style="color:#9b2aff"></span> | <span style="color:#9b2aff"></span> | | |
| r3 | | | <span style="color:#d30971"></span> | <span style="color:#d30971"></span> |
| r4 | | | <span style="color:#d30971"></span> | <span style="color:#d30971"></span> |
{{% /flex-content %}}
{{< /flex >}}
{{% /expand %}}
{{< /expand-wrapper >}}
## Use join.full to join your data
1. Import the `join` package.
2. Define the **left** and **right** data streams to join:
- Each stream must have one or more columns with common values.
Column labels do not need to match, but column values do.
- Each stream should have identical [group keys](/flux/v0.x/get-started/data-model/#group-key).
_For more information, see [join data requirements](/flux/v0.x/join-data/#data-requirements)._
3. Use `join.full()` to join the two streams together.
Provide the following required parameters:
- `left`: Stream of data representing the left side of the join.
- `right`: Stream of data representing the right side of the join.
- `on`: [Join predicate](/flux/v0.x/join-data/#join-predicate-function-on).
For example: `(l, r) => l.column == r.column`.
- `as`: [Join output function](/flux/v0.x/join-data/#join-output-function-as)
that returns a record with values from each input stream.
##### Account for missing, non-group-key values
In a full outer join, its possible for either the left (`l`) or right (`r`)
to contain _null_ values for the columns used in the join operation
and default to a default record (group key columns are populated and
other columns are _null_).
`l` and `r` will never both use default records at the same time.
To ensure non-null values are included in the output for non-group-key columns,
check for the existence of a value in the `l` or `r` record, and return
the value that exists:
```js
(l, r) => {
id = if exists l.id then l.id else r.id
return {_time: l.time, location: r.location, id: id}
}
```
The following example uses a filtered selection from the
[**machineProduction** sample data set](/flux/v0.x/stdlib/influxdata/influxdb/sample/data/#set)
as the **left** data stream and an ad-hoc table created with [`array.from()`](/flux/v0.x/stdlib/array/from/)
as the **right** data stream.
{{% note %}}
#### Example data grouping
The example below ungroups the **left** stream to match the grouping of the **right** stream.
After the two streams are joined together, the joined data is grouped by `stationID`
and sorted by `_time`.
{{% /note %}}
```js
import "array"
import "influxdata/influxdb/sample"
import "join"
left =
sample.data(set: "machineProduction")
|> filter(fn: (r) => r.stationID == "g1" or r.stationID == "g2" or r.stationID == "g3")
|> filter(fn: (r) => r._field == "oil_temp")
|> limit(n: 5)
right =
array.from(
rows: [
{station: "g1", opType: "auto", last_maintained: 2021-07-15T00:00:00Z},
{station: "g2", opType: "manned", last_maintained: 2021-07-02T00:00:00Z},
{station: "g4", opType: "auto", last_maintained: 2021-08-04T00:00:00Z},
],
)
join.full(
left: left |> group(),
right: right,
on: (l, r) => l.stationID == r.station,
as: (l, r) => {
stationID = if exists l.stationID then l.stationID else r.station
return {
stationID: stationID,
_time: l._time,
_field: l._field,
_value: l._value,
opType: r.opType,
maintained: r.last_maintained,
}
},
)
|> group(columns: ["stationID"])
|> sort(columns: ["_time"])
```
{{< expand-wrapper >}}
{{% expand "View example input and output" %}}
### Input
#### left {#left-input}
{{% note %}}
_`_start` and `_stop` columns have been omitted._
{{% /note %}}
| _time | _measurement | stationID | _field | _value |
| :---------------------- | :----------- | :-------- | :------- | -----: |
| 2021-08-01T00:00:00Z | machinery | g1 | oil_temp | 39.1 |
| 2021-08-01T00:00:11.51Z | machinery | g1 | oil_temp | 40.3 |
| 2021-08-01T00:00:19.53Z | machinery | g1 | oil_temp | 40.6 |
| 2021-08-01T00:00:25.1Z | machinery | g1 | oil_temp | 40.72 |
| 2021-08-01T00:00:36.88Z | machinery | g1 | oil_temp | 40.8 |
| _time | _measurement | stationID | _field | _value |
| :---------------------- | :----------- | :-------- | :------- | -----: |
| 2021-08-01T00:00:00Z | machinery | g2 | oil_temp | 40.6 |
| 2021-08-01T00:00:27.93Z | machinery | g2 | oil_temp | 40.6 |
| 2021-08-01T00:00:54.96Z | machinery | g2 | oil_temp | 40.6 |
| 2021-08-01T00:01:17.27Z | machinery | g2 | oil_temp | 40.6 |
| 2021-08-01T00:01:41.84Z | machinery | g2 | oil_temp | 40.6 |
| _time | _measurement | stationID | _field | _value |
| :---------------------- | :----------- | :-------- | :------- | -----: |
| 2021-08-01T00:00:00Z | machinery | g3 | oil_temp | 41.4 |
| 2021-08-01T00:00:14.46Z | machinery | g3 | oil_temp | 41.36 |
| 2021-08-01T00:00:25.29Z | machinery | g3 | oil_temp | 41.4 |
| 2021-08-01T00:00:38.77Z | machinery | g3 | oil_temp | 41.4 |
| 2021-08-01T00:00:51.2Z | machinery | g3 | oil_temp | 41.4 |
#### right {#right-input}
| station | opType | last_maintained |
| :------ | :----- | -------------------: |
| g1 | auto | 2021-07-15T00:00:00Z |
| g2 | manned | 2021-07-02T00:00:00Z |
| g4 | auto | 2021-08-04T00:00:00Z |
### Output {#example-output}
| _time | stationID | _field | _value | maintained | opType |
| :---------------------- | :-------- | :------- | -----: | :------------------- | :----- |
| 2021-08-01T00:00:00Z | g1 | oil_temp | 39.1 | 2021-07-15T00:00:00Z | auto |
| 2021-08-01T00:00:11.51Z | g1 | oil_temp | 40.3 | 2021-07-15T00:00:00Z | auto |
| 2021-08-01T00:00:19.53Z | g1 | oil_temp | 40.6 | 2021-07-15T00:00:00Z | auto |
| 2021-08-01T00:00:25.1Z | g1 | oil_temp | 40.72 | 2021-07-15T00:00:00Z | auto |
| 2021-08-01T00:00:36.88Z | g1 | oil_temp | 40.8 | 2021-07-15T00:00:00Z | auto |
| _time | stationID | _field | _value | maintained | opType |
| :---------------------- | :-------- | :------- | -----: | :------------------- | :----- |
| 2021-08-01T00:00:00Z | g2 | oil_temp | 40.6 | 2021-07-02T00:00:00Z | manned |
| 2021-08-01T00:00:27.93Z | g2 | oil_temp | 40.6 | 2021-07-02T00:00:00Z | manned |
| 2021-08-01T00:00:54.96Z | g2 | oil_temp | 40.6 | 2021-07-02T00:00:00Z | manned |
| 2021-08-01T00:01:17.27Z | g2 | oil_temp | 40.6 | 2021-07-02T00:00:00Z | manned |
| 2021-08-01T00:01:41.84Z | g2 | oil_temp | 40.6 | 2021-07-02T00:00:00Z | manned |
| _time | stationID | _field | _value | maintained | opType |
| :---------------------- | :-------- | :------- | -----: | :--------- | :----- |
| 2021-08-01T00:00:00Z | g3 | oil_temp | 41.4 | | |
| 2021-08-01T00:00:14.46Z | g3 | oil_temp | 41.36 | | |
| 2021-08-01T00:00:25.29Z | g3 | oil_temp | 41.4 | | |
| 2021-08-01T00:00:38.77Z | g3 | oil_temp | 41.4 | | |
| 2021-08-01T00:00:51.2Z | g3 | oil_temp | 41.4 | | |
| _time | stationID | _field | _value | maintained | opType |
| :---- | :-------- | :----- | -----: | :------------------- | :----- |
| | g4 | | | 2021-08-04T00:00:00Z | auto |
#### Things to note about the join output
- Because the [right stream](#right-input) does not have rows with the `g3` stationID tag,
the joined output includes rows with the `g3` stationID tag from the [left stream](#left-input)
with _null_ values in columns populated from the **right** stream.
- Because the [left stream](#left-input) does not have rows with the `g4` stationID tag,
the joined output includes rows with the `g4` stationID tag from the [right stream](#right-input)
with _null_ values in columns populated from the **left** stream.
{{% /expand %}}
{{< /expand-wrapper >}}

View File

@ -0,0 +1,196 @@
---
title: Perform an inner join
description: >
Use [`join.inner()`](/flux/v0.x/stdlib/join/inner/) to perform an inner join of two streams of data.
Inner joins drop any rows from both input streams that do not have a matching
row in the other stream.
menu:
flux_0_x:
name: Inner join
parent: Join data
weight: 101
related:
- /flux/v0.x/join-data/troubleshoot-joins/
- /flux/v0.x/stdlib/join/
- /flux/v0.x/stdlib/join/inner/
list_code_example: |
```js
import "join"
left = from(bucket: "example-bucket-1") |> //...
right = from(bucket: "example-bucket-2") |> //...
join.inner(
left: left,
right: right,
on: (l, r) => l.column == r.column,
as: (l, r) => ({l with name: r.name, location: r.location}),
)
```
---
Use [`join.inner()`](/flux/v0.x/stdlib/join/inner/) to perform an inner join of two streams of data.
Inner joins drop any rows from both input streams that do not have a matching
row in the other stream.
{{< svg svg="static/svgs/join-diagram.svg" class="inner" >}}
{{< expand-wrapper >}}
{{% expand "View table illustration of an inner join" %}}
{{< flex >}}
{{% flex-content "third" %}}
#### left
| | | |
| :-- | :----------------------------------- | :----------------------------------- |
| r1 | <span style="color:#9b2aff"></span> | <span style="color:#9b2aff"></span> |
| r2 | <span style="color:#9b2aff"></span> | <span style="color:#9b2aff"></span> |
{{% /flex-content %}}
{{% flex-content "third" %}}
#### right
| | | |
| :-- | :----------------------------------- | :----------------------------------- |
| r1 | <span style="color:#d30971"></span> | <span style="color:#d30971"></span> |
| r3 | <span style="color:#d30971"></span> | <span style="color:#d30971"></span> |
| r4 | <span style="color:#d30971"></span> | <span style="color:#d30971"></span> |
{{% /flex-content %}}
{{% flex-content "third" %}}
#### Inner join result
| | | | | |
| :-- | :----------------------------------- | :----------------------------------- | :----------------------------------- | :----------------------------------- |
| r1 | <span style="color:#9b2aff"></span> | <span style="color:#9b2aff"></span> | <span style="color:#d30971"></span> | <span style="color:#d30971"></span> |
{{% /flex-content %}}
{{< /flex >}}
{{% /expand %}}
{{< /expand-wrapper >}}
## Use join.inner to join your data
1. Import the `join` package.
2. Define the **left** and **right** data streams to join:
- Each stream must have one or more columns with common values.
Column labels do not need to match, but column values do.
- Each stream should have identical [group keys](/flux/v0.x/get-started/data-model/#group-key).
_For more information, see [join data requirements](/flux/v0.x/join-data/#data-requirements)._
3. Use `join.inner()` to join the two streams together.
Provide the following required parameters:
- `left`: Stream of data representing the left side of the join.
- `right`: Stream of data representing the right side of the join.
- `on`: [Join predicate](/flux/v0.x/join-data/#join-predicate-function-on).
For example: `(l, r) => l.column == r.column`.
- `as`: [Join output function](/flux/v0.x/join-data/#join-output-function-as)
that returns a record with values from each input stream.
For example: `(l, r) => ({l with column1: r.column1, column2: r.column2})`.
The following example uses a filtered selection from the
[**machineProduction** sample data set](/flux/v0.x/stdlib/influxdata/influxdb/sample/data/#set)
as the **left** data stream and an ad-hoc table created with [`array.from()`](/flux/v0.x/stdlib/array/from/)
as the **right** data stream.
{{% note %}}
#### Example data grouping
The example below ungroups the **left** stream to match the grouping of the **right** stream.
After the two streams are joined together, the joined data is grouped by `stationID`.
{{% /note %}}
```js
import "array"
import "influxdata/influxdb/sample"
import "join"
left =
sample.data(set: "machineProduction")
|> filter(fn: (r) => r.stationID == "g1" or r.stationID == "g2" or r.stationID == "g3")
|> filter(fn: (r) => r._field == "oil_temp")
|> limit(n: 5)
right =
array.from(
rows: [
{station: "g1", opType: "auto", last_maintained: 2021-07-15T00:00:00Z},
{station: "g2", opType: "manned", last_maintained: 2021-07-02T00:00:00Z},
],
)
join.inner(
left: left |> group(),
right: right,
on: (l, r) => l.stationID == r.station,
as: (l, r) => ({l with opType: r.opType, maintained: r.last_maintained}),
)
|> group(columns: ["stationID"])
```
{{< expand-wrapper >}}
{{% expand "View example input and output" %}}
{{% note %}}
_`_start` and `_stop` columns have been omitted from example input and output._
{{% /note %}}
### Input
#### left {#left-input}
| _time | _measurement | stationID | _field | _value |
| :---------------------- | :----------- | :-------- | :------- | -----: |
| 2021-08-01T00:00:00Z | machinery | g1 | oil_temp | 39.1 |
| 2021-08-01T00:00:11.51Z | machinery | g1 | oil_temp | 40.3 |
| 2021-08-01T00:00:19.53Z | machinery | g1 | oil_temp | 40.6 |
| 2021-08-01T00:00:25.1Z | machinery | g1 | oil_temp | 40.72 |
| 2021-08-01T00:00:36.88Z | machinery | g1 | oil_temp | 40.8 |
| _time | _measurement | stationID | _field | _value |
| :---------------------- | :----------- | :-------- | :------- | -----: |
| 2021-08-01T00:00:00Z | machinery | g2 | oil_temp | 40.6 |
| 2021-08-01T00:00:27.93Z | machinery | g2 | oil_temp | 40.6 |
| 2021-08-01T00:00:54.96Z | machinery | g2 | oil_temp | 40.6 |
| 2021-08-01T00:01:17.27Z | machinery | g2 | oil_temp | 40.6 |
| 2021-08-01T00:01:41.84Z | machinery | g2 | oil_temp | 40.6 |
| _time | _measurement | stationID | _field | _value |
| :---------------------- | :----------- | :-------- | :------- | -----: |
| 2021-08-01T00:00:00Z | machinery | g3 | oil_temp | 41.4 |
| 2021-08-01T00:00:14.46Z | machinery | g3 | oil_temp | 41.36 |
| 2021-08-01T00:00:25.29Z | machinery | g3 | oil_temp | 41.4 |
| 2021-08-01T00:00:38.77Z | machinery | g3 | oil_temp | 41.4 |
| 2021-08-01T00:00:51.2Z | machinery | g3 | oil_temp | 41.4 |
#### right {#right-input}
| station | opType | last_maintained |
| :------ | :----- | -------------------: |
| g1 | auto | 2021-07-15T00:00:00Z |
| g2 | manned | 2021-07-02T00:00:00Z |
### Output {#example-output}
| _time | _measurement | stationID | _field | _value | opType | maintained |
| :---------------------- | :----------- | :-------- | :------- | -----: | :----- | :------------------- |
| 2021-08-01T00:00:00Z | machinery | g1 | oil_temp | 39.1 | auto | 2021-07-15T00:00:00Z |
| 2021-08-01T00:00:11.51Z | machinery | g1 | oil_temp | 40.3 | auto | 2021-07-15T00:00:00Z |
| 2021-08-01T00:00:19.53Z | machinery | g1 | oil_temp | 40.6 | auto | 2021-07-15T00:00:00Z |
| 2021-08-01T00:00:25.1Z | machinery | g1 | oil_temp | 40.72 | auto | 2021-07-15T00:00:00Z |
| 2021-08-01T00:00:36.88Z | machinery | g1 | oil_temp | 40.8 | auto | 2021-07-15T00:00:00Z |
| _time | _measurement | stationID | _field | _value | opType | maintained |
| :---------------------- | :----------- | :-------- | :------- | -----: | :----- | :------------------- |
| 2021-08-01T00:00:00Z | machinery | g2 | oil_temp | 40.6 | manned | 2021-07-02T00:00:00Z |
| 2021-08-01T00:00:27.93Z | machinery | g2 | oil_temp | 40.6 | manned | 2021-07-02T00:00:00Z |
| 2021-08-01T00:00:54.96Z | machinery | g2 | oil_temp | 40.6 | manned | 2021-07-02T00:00:00Z |
| 2021-08-01T00:01:17.27Z | machinery | g2 | oil_temp | 40.6 | manned | 2021-07-02T00:00:00Z |
| 2021-08-01T00:01:41.84Z | machinery | g2 | oil_temp | 40.6 | manned | 2021-07-02T00:00:00Z |
#### Things to note about the join output
- Because the [right stream](#right-input) does not have a row with the `g3` station tag,
the joined output drops all rows with the `g3` stationID tag from the [left stream](#left-input).
`join.inner()` drops any rows that do not have a matching row in the other
data stream.
{{% /expand %}}
{{< /expand-wrapper >}}

View File

@ -0,0 +1,207 @@
---
title: Perform a left outer join
description: >
Use [`join.left()`](/flux/v0.x/stdlib/join/left/) to perform an outer left join of two streams of data.
Left joins output a row for each row in the **left** data stream with data matching
from the **right** data stream. If there is no matching data in the **right**
data stream, non-group-key columns with values from the **right** data stream are _null_.
menu:
flux_0_x:
name: Left outer join
parent: Join data
weight: 102
related:
- /flux/v0.x/join-data/troubleshoot-joins/
- /flux/v0.x/stdlib/join/
- /flux/v0.x/stdlib/join/left/
list_code_example: |
```js
import "join"
left = from(bucket: "example-bucket-1") |> //...
right = from(bucket: "example-bucket-2") |> //...
join.left(
left: left,
right: right,
on: (l, r) => l.column == r.column,
as: (l, r) => ({l with name: r.name, location: r.location}),
)
```
---
Use [`join.left()`](/flux/v0.x/stdlib/join/left/) to perform an left outer join of two streams of data.
Left joins output a row for each row in the **left** data stream with data matching
from the **right** data stream. If there is no matching data in the **right**
data stream, non-group-key columns with values from the **right** data stream are _null_.
{{< svg svg="static/svgs/join-diagram.svg" class="left" >}}
{{< expand-wrapper >}}
{{% expand "View table illustration of a left outer join" %}}
{{< flex >}}
{{% flex-content "third" %}}
#### left
| | | |
| :-- | :----------------------------------- | :----------------------------------- |
| r1 | <span style="color:#9b2aff"></span> | <span style="color:#9b2aff"></span> |
| r2 | <span style="color:#9b2aff"></span> | <span style="color:#9b2aff"></span> |
{{% /flex-content %}}
{{% flex-content "third" %}}
#### right
| | | |
| :-- | :----------------------------------- | :----------------------------------- |
| r1 | <span style="color:#d30971"></span> | <span style="color:#d30971"></span> |
| r3 | <span style="color:#d30971"></span> | <span style="color:#d30971"></span> |
| r4 | <span style="color:#d30971"></span> | <span style="color:#d30971"></span> |
{{% /flex-content %}}
{{% flex-content "third" %}}
#### Left outer join result
| | | | | |
| :-- | :----------------------------------- | :----------------------------------- | :----------------------------------- | :----------------------------------- |
| r1 | <span style="color:#9b2aff"></span> | <span style="color:#9b2aff"></span> | <span style="color:#d30971"></span> | <span style="color:#d30971"></span> |
| r2 | <span style="color:#9b2aff"></span> | <span style="color:#9b2aff"></span> | | |
{{% /flex-content %}}
{{< /flex >}}
{{% /expand %}}
{{< /expand-wrapper >}}
## Use join.left to join your data
1. Import the `join` package.
2. Define the **left** and **right** data streams to join:
- Each stream must have one or more columns with common values.
Column labels do not need to match, but column values do.
- Each stream should have identical [group keys](/flux/v0.x/get-started/data-model/#group-key).
_For more information, see [join data requirements](/flux/v0.x/join-data/#data-requirements)._
3. Use `join.left()` to join the two streams together.
Provide the following parameters:
- `left`: Stream of data representing the left side of the join.
- `right`: Stream of data representing the right side of the join.
- `on`: [Join predicate](/flux/v0.x/join-data/#join-predicate-function-on).
For example: `(l, r) => l.column == r.column`.
- `as`: [Join output function](/flux/v0.x/join-data/#join-output-function-as)
that returns a record with values from each input stream.
For example: `(l, r) => ({l with column1: r.column1, column2: r.column2})`.
The following example uses a filtered selection from the
[**machineProduction** sample data set](/flux/v0.x/stdlib/influxdata/influxdb/sample/data/#set)
as the **left** data stream and an ad-hoc table created with [`array.from()`](/flux/v0.x/stdlib/array/from/)
as the **right** data stream.
{{% note %}}
#### Example data grouping
The example below ungroups the **left** stream to match the grouping of the **right** stream.
After the two streams are joined together, the joined data is grouped by `stationID`.
{{% /note %}}
```js
import "array"
import "influxdata/influxdb/sample"
import "join"
left =
sample.data(set: "machineProduction")
|> filter(fn: (r) => r.stationID == "g1" or r.stationID == "g2" or r.stationID == "g3")
|> filter(fn: (r) => r._field == "oil_temp")
|> limit(n: 5)
right =
array.from(
rows: [
{station: "g1", opType: "auto", last_maintained: 2021-07-15T00:00:00Z},
{station: "g2", opType: "manned", last_maintained: 2021-07-02T00:00:00Z},
],
)
join.left(
left: left |> group(),
right: right,
on: (l, r) => l.stationID == r.station,
as: (l, r) => ({l with opType: r.opType, maintained: r.last_maintained}),
)
|> group(columns: ["stationID"])
```
{{< expand-wrapper >}}
{{% expand "View example input and output" %}}
{{% note %}}
_`_start` and `_stop` columns have been omitted from example input and output._
{{% /note %}}
### Input
#### left {#left-input}
| _time | _measurement | stationID | _field | _value |
| :---------------------- | :----------- | :-------- | :------- | -----: |
| 2021-08-01T00:00:00Z | machinery | g1 | oil_temp | 39.1 |
| 2021-08-01T00:00:11.51Z | machinery | g1 | oil_temp | 40.3 |
| 2021-08-01T00:00:19.53Z | machinery | g1 | oil_temp | 40.6 |
| 2021-08-01T00:00:25.1Z | machinery | g1 | oil_temp | 40.72 |
| 2021-08-01T00:00:36.88Z | machinery | g1 | oil_temp | 40.8 |
| _time | _measurement | stationID | _field | _value |
| :---------------------- | :----------- | :-------- | :------- | -----: |
| 2021-08-01T00:00:00Z | machinery | g2 | oil_temp | 40.6 |
| 2021-08-01T00:00:27.93Z | machinery | g2 | oil_temp | 40.6 |
| 2021-08-01T00:00:54.96Z | machinery | g2 | oil_temp | 40.6 |
| 2021-08-01T00:01:17.27Z | machinery | g2 | oil_temp | 40.6 |
| 2021-08-01T00:01:41.84Z | machinery | g2 | oil_temp | 40.6 |
| _time | _measurement | stationID | _field | _value |
| :---------------------- | :----------- | :-------- | :------- | -----: |
| 2021-08-01T00:00:00Z | machinery | g3 | oil_temp | 41.4 |
| 2021-08-01T00:00:14.46Z | machinery | g3 | oil_temp | 41.36 |
| 2021-08-01T00:00:25.29Z | machinery | g3 | oil_temp | 41.4 |
| 2021-08-01T00:00:38.77Z | machinery | g3 | oil_temp | 41.4 |
| 2021-08-01T00:00:51.2Z | machinery | g3 | oil_temp | 41.4 |
#### right {#right-input}
| station | opType | last_maintained |
| :------ | :----- | -------------------: |
| g1 | auto | 2021-07-15T00:00:00Z |
| g2 | manned | 2021-07-02T00:00:00Z |
### Output {#example-output}
| _time | _measurement | stationID | _field | _value | opType | maintained |
| :---------------------- | :----------- | :-------- | :------- | -----: | :----- | :------------------- |
| 2021-08-01T00:00:00Z | machinery | g1 | oil_temp | 39.1 | auto | 2021-07-15T00:00:00Z |
| 2021-08-01T00:00:11.51Z | machinery | g1 | oil_temp | 40.3 | auto | 2021-07-15T00:00:00Z |
| 2021-08-01T00:00:19.53Z | machinery | g1 | oil_temp | 40.6 | auto | 2021-07-15T00:00:00Z |
| 2021-08-01T00:00:25.1Z | machinery | g1 | oil_temp | 40.72 | auto | 2021-07-15T00:00:00Z |
| 2021-08-01T00:00:36.88Z | machinery | g1 | oil_temp | 40.8 | auto | 2021-07-15T00:00:00Z |
| _time | _measurement | stationID | _field | _value | opType | maintained |
| :---------------------- | :----------- | :-------- | :------- | -----: | :----- | :------------------- |
| 2021-08-01T00:00:00Z | machinery | g2 | oil_temp | 40.6 | manned | 2021-07-02T00:00:00Z |
| 2021-08-01T00:00:27.93Z | machinery | g2 | oil_temp | 40.6 | manned | 2021-07-02T00:00:00Z |
| 2021-08-01T00:00:54.96Z | machinery | g2 | oil_temp | 40.6 | manned | 2021-07-02T00:00:00Z |
| 2021-08-01T00:01:17.27Z | machinery | g2 | oil_temp | 40.6 | manned | 2021-07-02T00:00:00Z |
| 2021-08-01T00:01:41.84Z | machinery | g2 | oil_temp | 40.6 | manned | 2021-07-02T00:00:00Z |
| _time | _measurement | stationID | _field | _value | opType | maintained |
| :---------------------- | :----------- | :-------- | :------- | -----: | :----- | :--------- |
| 2021-08-01T00:00:00Z | machinery | g3 | oil_temp | 41.4 | | |
| 2021-08-01T00:00:14.46Z | machinery | g3 | oil_temp | 41.3 | | |
| 2021-08-01T00:00:25.29Z | machinery | g3 | oil_temp | 41.4 | | |
| 2021-08-01T00:00:38.77Z | machinery | g3 | oil_temp | 41.4 | | |
| 2021-08-01T00:00:51.2Z | machinery | g3 | oil_temp | 41.4 | | |
#### Things to note about the join output
- Because the [right stream](#right-input) does not have a row with the `g3` station tag,
rows from the [left stream](#left-input) with the `g3` stationID tag include
_null_ values in columns that are populated from the right stream (`r`) in the
`as` parameter.
{{% /expand %}}
{{< /expand-wrapper >}}

View File

@ -0,0 +1,170 @@
---
title: Perform a right outer join
description: >
Use [`join.right()`](/flux/v0.x/stdlib/join/right/) to perform an right outer join of two streams of data.
Right joins output a row for each row in the **right** data stream with data matching
from the **left** data stream. If there is no matching data in the **left**
data stream, non-group-key columns with values from the **left** data stream are _null_.
menu:
flux_0_x:
name: Right outer join
parent: Join data
weight: 102
related:
- /flux/v0.x/join-data/troubleshoot-joins/
- /flux/v0.x/stdlib/join/
- /flux/v0.x/stdlib/join/right/
list_code_example: |
```js
import "join"
left = from(bucket: "example-bucket-1") |> //...
right = from(bucket: "example-bucket-2") |> //...
join.right(
left: left,
right: right,
on: (l, r) => l.column == r.column,
as: (l, r) => ({r with name: l.name, location: l.location}),
)
```
---
Use [`join.right()`](/flux/v0.x/stdlib/join/right/) to perform an right outer join of two streams of data.
Right joins output a row for each row in the **right** data stream with data matching
from the **left** data stream. If there is no matching data in the **left**
data stream, non-group-key columns with values from the **left** data stream are _null_.
{{< svg svg="static/svgs/join-diagram.svg" class="right" >}}
{{< expand-wrapper >}}
{{% expand "View table illustration of a right outer join" %}}
{{< flex >}}
{{% flex-content "third" %}}
#### left
| | | |
| :-- | :----------------------------------- | :----------------------------------- |
| r1 | <span style="color:#9b2aff"></span> | <span style="color:#9b2aff"></span> |
| r2 | <span style="color:#9b2aff"></span> | <span style="color:#9b2aff"></span> |
{{% /flex-content %}}
{{% flex-content "third" %}}
#### right
| | | |
| :-- | :----------------------------------- | :----------------------------------- |
| r1 | <span style="color:#d30971"></span> | <span style="color:#d30971"></span> |
| r3 | <span style="color:#d30971"></span> | <span style="color:#d30971"></span> |
| r4 | <span style="color:#d30971"></span> | <span style="color:#d30971"></span> |
{{% /flex-content %}}
{{% flex-content "third" %}}
#### Right outer join result
| | | | | |
| :-- | :----------------------------------- | :----------------------------------- | :----------------------------------- | :----------------------------------- |
| r1 | <span style="color:#9b2aff"></span> | <span style="color:#9b2aff"></span> | <span style="color:#d30971"></span> | <span style="color:#d30971"></span> |
| r3 | | | <span style="color:#d30971"></span> | <span style="color:#d30971"></span> |
| r4 | | | <span style="color:#d30971"></span> | <span style="color:#d30971"></span> |
{{% /flex-content %}}
{{< /flex >}}
{{% /expand %}}
{{< /expand-wrapper >}}
## Use join.right to join your data
1. Import the `join` package.
2. Define the **left** and **right** data streams to join:
- Each stream must have one or more columns with common values.
Column labels do not need to match, but column values do.
- Each stream should have identical [group keys](/flux/v0.x/get-started/data-model/#group-key).
_For more information, see [join data requirements](/flux/v0.x/join-data/#data-requirements)._
3. Use `join.right()` to join the two streams together.
Provide the following required parameters:
- `left`: Stream of data representing the left side of the join.
- `right`: Stream of data representing the right side of the join.
- `on`: [Join predicate](/flux/v0.x/join-data/#join-predicate-function-on).
For example: `(l, r) => l.column == r.column`.
- `as`: [Join output function](/flux/v0.x/join-data/#join-output-function-as)
that returns a record with values from each input stream.
For example: `(l, r) => ({r with column1: l.column1, column2: l.column2})`.
The following example uses a filtered selection from the
[**machineProduction** sample data set](/flux/v0.x/stdlib/influxdata/influxdb/sample/data/#set)
as the **left** data stream and an ad-hoc table created with [`array.from()`](/flux/v0.x/stdlib/array/from/)
as the **right** data stream.
{{% note %}}
#### Example data grouping
The example below ungroups the **left** stream to match the grouping of the **right** stream.
{{% /note %}}
```js
import "array"
import "influxdata/influxdb/sample"
import "join"
left =
sample.data(set: "machineProduction")
|> filter(fn: (r) => r.stationID == "g1" or r.stationID == "g2" or r.stationID == "g3")
|> filter(fn: (r) => r._field == "oil_temp")
|> last()
right =
array.from(
rows: [
{station: "g1", opType: "auto", last_maintained: 2021-07-15T00:00:00Z},
{station: "g2", opType: "manned", last_maintained: 2021-07-02T00:00:00Z},
],
)
join.right(
left: left |> group(),
right: right,
on: (l, r) => l.stationID == r.station,
as: (l, r) => ({r with last_reported_val: l._value, last_reported_time: l._time}),
)
```
{{< expand-wrapper >}}
{{% expand "View example input and output" %}}
### Input
#### left {#left-input}
{{% note %}}
_`_start` and `_stop` columns have been omitted._
{{% /note %}}
| _time | _measurement | stationID | _field | _value |
| :---------------------- | :----------- | :-------- | :------- | -----: |
| 2021-08-01T23:59:46.17Z | machinery | g1 | oil_temp | 40.6 |
| _time | _measurement | stationID | _field | _value |
| :---------------------- | :----------- | :-------- | :------- | -----: |
| 2021-08-01T23:59:34.57Z | machinery | g2 | oil_temp | 41.34 |
| _time | _measurement | stationID | _field | _value |
| :---------------------- | :----------- | :-------- | :------- | -----: |
| 2021-08-01T23:59:41.96Z | machinery | g3 | oil_temp | 41.26 |
#### right {#right-input}
| station | opType | last_maintained |
| :------ | :----- | -------------------: |
| g1 | auto | 2021-07-15T00:00:00Z |
| g2 | manned | 2021-07-02T00:00:00Z |
### Output {#example-output}
| station | opType | last_maintained | last_reported_time | last_reported_val |
| :------ | :----- | :------------------- | :---------------------- | ----------------: |
| g1 | auto | 2021-07-15T00:00:00Z | 2021-08-01T23:59:46.17Z | 40.6 |
| g2 | manned | 2021-07-02T00:00:00Z | 2021-08-01T23:59:34.57Z | 41.34 |
{{% /expand %}}
{{< /expand-wrapper >}}

View File

@ -0,0 +1,205 @@
---
title: Join on time
description: >
Use [`join.time()`](/flux/v0.x/stdlib/join/time/) to join two streams of data
based on time values in the `_time` column.
This type of join operation is common when joining two streams of
[time series data](/influxdb/latest/reference/glossary/#time-series-data).
menu:
flux_0_x:
parent: Join data
weight: 104
related:
- /flux/v0.x/join-data/troubleshoot-joins/
- /flux/v0.x/stdlib/join/
- /flux/v0.x/stdlib/join/time/
list_code_example: |
```js
import "join"
left = from(bucket: "example-bucket-1") |> //...
right = from(bucket: "example-bucket-2") |> //...
join.time(
left: left,
right: right,
as: (l, r) => ({l with field1: l._value, field2: r._value_}),
)
```
---
Use [`join.time()`](/flux/v0.x/stdlib/join/time/) to join two streams of data
based on time values in the `_time` column.
This type of join operation is common when joining two streams of
[time series data](/influxdb/latest/reference/glossary/#time-series-data).
`join.time()` can use any of the available join methods.
Which method you use depends on your desired behavior:
- **inner** _(Default)_:
Drop any rows from both input streams that do not have a matching
row in the other stream.
- **left**:
Output a row for each row in the **left** data stream with data matching
from the **right** data stream. If there is no matching data in the **right**
data stream, non-group-key columns with values from the **right** data stream
are _null_.
- **right**:
Output a row for each row in the **right** data stream with data matching
from the **left** data stream. If there is no matching data in the **left**
data stream, non-group-key columns with values from the **left** data stream
are _null_.
- **full**:
Output a row for all rows in both the **left** and **right** input streams
and join rows that match based on their `_time` value.
## Use join.time to join your data
1. Import the `join` package.
2. Define the **left** and **right** data streams to join:
- Each stream must also have a `_time` column.
- Each stream must have one or more columns with common values.
Column labels do not need to match, but column values do.
- Each stream should have identical [group keys](/flux/v0.x/get-started/data-model/#group-key).
_For more information, see [join data requirements](/flux/v0.x/join-data/#data-requirements)._
3. Use `join.time()` to join the two streams together.
Provide the following parameters:
- `left`: ({{< req >}}) Stream of data representing the left side of the join.
- `right`: ({{< req >}}) Stream of data representing the right side of the join.
- `as`: ({{< req >}}) [Join output function](/flux/v0.x/join-data/#join-output-function-as)
that returns a record with values from each input stream.
For example: `(l, r) => ({r with column1: l.column1, column2: l.column2})`.
- `method`: Join method to use. Default is `inner`.
The following example uses a filtered selections from the
[**machineProduction** sample data set](/flux/v0.x/stdlib/influxdata/influxdb/sample/data/#set)
as the **left** and **right** data streams.
{{% note %}}
#### Example data grouping
The example below regroups both the left and right streams to remove the
`_field` column from the group key.
Because `join.time()` only compares tables with matching
[group key instances](/flux/v0.x/get-started/data-model/#example-group-key-instances),
to join streams with different `_field` column values, `_field` cannot be part
of the group key.
{{% /note %}}
```js
import "influxdata/influxdb/sample"
import "join"
left =
sample.data(set: "machineProduction")
|> filter(fn: (r) => r.stationID == "g1" or r.stationID == "g2" or r.stationID == "g3")
|> filter(fn: (r) => r._field == "pressure")
|> limit(n: 5)
|> group(columns: ["_time", "_value", "_field"], mode: "except")
right =
sample.data(set: "machineProduction")
|> filter(fn: (r) => r.stationID == "g1" or r.stationID == "g2" or r.stationID == "g3")
|> filter(fn: (r) => r._field == "pressure_target")
|> limit(n: 5)
|> group(columns: ["_time", "_value", "_field"], mode: "except")
join.time(method: "left", left: left, right: right, as: (l, r) => ({l with target: r._value}))
```
{{< expand-wrapper >}}
{{% expand "View example input and output" %}}
### Input
{{% note %}}
_`_start` and `_stop` columns have been omitted from input examples._
{{% /note %}}
#### left {#left-input}
| _time | _measurement | stationID | _field | _value |
| :---------------------- | :----------- | :-------- | :------- | -------: |
| 2021-08-01T00:00:00Z | machinery | g1 | pressure | 110.2617 |
| 2021-08-01T00:00:11.51Z | machinery | g1 | pressure | 110.3506 |
| 2021-08-01T00:00:19.53Z | machinery | g1 | pressure | 110.1836 |
| 2021-08-01T00:00:25.1Z | machinery | g1 | pressure | 109.6387 |
| 2021-08-01T00:00:36.88Z | machinery | g1 | pressure | 110.5021 |
| _time | _measurement | stationID | _field | _value |
| :---------------------- | :----------- | :-------- | :------- | -------: |
| 2021-08-01T00:00:00Z | machinery | g2 | pressure | 105.392 |
| 2021-08-01T00:00:27.93Z | machinery | g2 | pressure | 105.3786 |
| 2021-08-01T00:00:54.96Z | machinery | g2 | pressure | 105.4801 |
| 2021-08-01T00:01:17.27Z | machinery | g2 | pressure | 105.5656 |
| 2021-08-01T00:01:41.84Z | machinery | g2 | pressure | 105.5495 |
| _time | _measurement | stationID | _field | _value |
| :---------------------- | :----------- | :-------- | :------- | -------: |
| 2021-08-01T00:00:00Z | machinery | g3 | pressure | 110.5309 |
| 2021-08-01T00:00:14.46Z | machinery | g3 | pressure | 110.3746 |
| 2021-08-01T00:00:25.29Z | machinery | g3 | pressure | 110.3719 |
| 2021-08-01T00:00:38.77Z | machinery | g3 | pressure | 110.5362 |
| 2021-08-01T00:00:51.2Z | machinery | g3 | pressure | 110.4514 |
#### right {#right-input}
| _time | _measurement | stationID | _field | _value |
| :---------------------- | :----------- | :-------- | :-------------- | -----: |
| 2021-08-01T00:00:00Z | machinery | g1 | pressure_target | 110 |
| 2021-08-01T00:00:11.51Z | machinery | g1 | pressure_target | 110 |
| 2021-08-01T00:00:19.53Z | machinery | g1 | pressure_target | 110 |
| 2021-08-01T00:00:25.1Z | machinery | g1 | pressure_target | 110 |
| 2021-08-01T00:00:36.88Z | machinery | g1 | pressure_target | 110 |
| _time | _measurement | stationID | _field | _value |
| :---------------------- | :----------- | :-------- | :-------------- | -----: |
| 2021-08-01T00:00:00Z | machinery | g2 | pressure_target | 105 |
| 2021-08-01T00:00:27.93Z | machinery | g2 | pressure_target | 105 |
| 2021-08-01T00:00:54.96Z | machinery | g2 | pressure_target | 105 |
| 2021-08-01T00:01:17.27Z | machinery | g2 | pressure_target | 105 |
| 2021-08-01T00:01:41.84Z | machinery | g2 | pressure_target | 105 |
| _time | _measurement | stationID | _field | _value |
| :---------------------- | :----------- | :-------- | :-------------- | -----: |
| 2021-08-01T00:00:00Z | machinery | g3 | pressure_target | 110 |
| 2021-08-01T00:00:14.46Z | machinery | g3 | pressure_target | 110 |
| 2021-08-01T00:00:25.29Z | machinery | g3 | pressure_target | 110 |
| 2021-08-01T00:00:38.77Z | machinery | g3 | pressure_target | 110 |
| 2021-08-01T00:00:51.2Z | machinery | g3 | pressure_target | 110 |
### Output {#example-output}
| _time | _measurement | stationID | _field | _value | target |
| :---------------------- | :----------- | :-------- | :------- | -------: | :----- |
| 2021-08-01T00:00:00Z | machinery | g1 | pressure | 110.2617 | 110 |
| 2021-08-01T00:00:11.51Z | machinery | g1 | pressure | 110.3506 | 110 |
| 2021-08-01T00:00:19.53Z | machinery | g1 | pressure | 110.1836 | 110 |
| 2021-08-01T00:00:25.1Z | machinery | g1 | pressure | 109.6387 | 110 |
| 2021-08-01T00:00:36.88Z | machinery | g1 | pressure | 110.5021 | 110 |
| _time | _measurement | stationID | _field | _value | target |
| :---------------------- | :----------- | :-------- | :------- | -------: | :----- |
| 2021-08-01T00:00:00Z | machinery | g2 | pressure | 105.392 | 105 |
| 2021-08-01T00:00:27.93Z | machinery | g2 | pressure | 105.3786 | 105 |
| 2021-08-01T00:00:54.96Z | machinery | g2 | pressure | 105.4801 | 105 |
| 2021-08-01T00:01:17.27Z | machinery | g2 | pressure | 105.5656 | 105 |
| 2021-08-01T00:01:41.84Z | machinery | g2 | pressure | 105.5495 | 105 |
| _time | _measurement | stationID | _field | _value | target |
| :---------------------- | :----------- | :-------- | :------- | -------: | :----- |
| 2021-08-01T00:00:00Z | machinery | g3 | pressure | 110.5309 | 110 |
| 2021-08-01T00:00:14.46Z | machinery | g3 | pressure | 110.3746 | 110 |
| 2021-08-01T00:00:25.29Z | machinery | g3 | pressure | 110.3719 | 110 |
| 2021-08-01T00:00:38.77Z | machinery | g3 | pressure | 110.5362 | 110 |
| 2021-08-01T00:00:51.2Z | machinery | g3 | pressure | 110.4514 | 110 |
{{% /expand %}}
{{< /expand-wrapper >}}

View File

@ -0,0 +1,186 @@
---
title: Troubleshoot join operations
description: >
Learn how to troubleshoot common behaviors and errors that may occur when using
the [`join` package](/flux/v0.x/stdlib/join).
menu:
flux_0_x:
name: Troubleshoot joins
parent: Join data
weight: 105
---
Learn how to troubleshoot common behaviors and errors that may occur when using
the [`join` package](/flux/v0.x/stdlib/join).
{{% note %}}
#### Submit issues for unexplained behaviors or errors
This is a "living" document that may be updated with common issues
that users may run into when using the [`join` package](/flux/v0.x/stdlib/join).
If you have questions about a behavior or error that is not documented here,
please submit an issue to either the InfluxData Documentation or Flux GitHub repositories:
- [Submit a documentation issue](https://github.com/influxdata/docs-v2/issues/new/choose)
- [Submit a Flux issue](https://github.com/influxdata/flux/issues/new/choose)
{{% /note %}}
- [Troubleshoot join behaviors](#troubleshoot-join-behaviors)
- [Troubleshoot join error messages](#troubleshoot-join-error-messages)
## Troubleshoot join behaviors
### Columns explicitly mapped in the join are null
In some cases, your join output may include _null_ values in
columns where you expect non-null values. This may be caused by one of the following issues:
---
{{< flex class="troubleshoot-row" >}}
{{% flex-content %}}
#### Cause {#cause-b1}
**The group keys of each input stream aren't the same.**
Functions in the `join` package use group keys to quickly identify what tables
should be compared.
{{% /flex-content %}}
{{% flex-content %}}
#### Solution {#solution-b1}
Use [`group()`](/flux/v0.x/stdlib/universe/group/) to regroup
your two input streams so their group keys match before attempting to join
them together.
{{% /flex-content %}}
{{< /flex >}}
---
{{< flex >}}
{{% flex-content %}}
#### Cause {#cause-b2}
**There are no matching _group key instances_ in your data streams**.
Functions in the `join` package only compare tables with matching
[group key instances](/flux/v0.x/get-started/data-model/#example-group-key-instances).
Input streams may have matching group keys, but there are no matching group
key instances in your stream.
This may happen when joining two separate fields
queried from InfluxDB. By default, InfluxDB returns data with `_field` as part
of the group key. If each stream contains a different field, tables in the two
streams won't be compared because they won't have any matching _group key instances_.
{{% /flex-content %}}
{{% flex-content %}}
#### Solution {#solution-b2}
Use [`group()`](/flux/v0.x/stdlib/universe/group/) to remove
any columns from the group keys of each input stream that would prevent
group key instances from matching.
{{% /flex-content %}}
{{< /flex >}}
---
## Troubleshoot join error messages
- [table is missing column \'\<column\>\'](#table-is-missing-column-column)
- [table is missing label \<label\>](#table-is-missing-label-label)
- [record is missing label \<label\>](#record-is-missing-label-label)
### table is missing column `'<column>'`
##### Error message
```js
cannot set join columns in left table stream: table is missing column '<column>'
```
{{< flex >}}
{{% flex-content %}}
#### Cause {#cause-e1}
**Your `on` join predicate uses a column that doesn't exist**.
In the `on` predicate function, you're trying to compare a column
that doesn't exist in one of your input streams.
{{% /flex-content %}}
{{% flex-content %}}
#### Solution {#solution-e1}
Ensure the columns that you're comparing in the `on` predicate
function exist in the input streams.
If necessary, update column names in the predicate function.
{{% /flex-content %}}
{{< /flex >}}
---
### table is missing label `<label>`
##### Error message
```js
table is missing label <label>
```
{{< flex >}}
{{% flex-content %}}
#### Cause {#cause-e2}
**Your `on` join predicate uses a column that doesn't exist**.
In the `on` predicate function for an outer join, you're trying to use a value
from a column that doesn't exist in the "primary" input stream
(`left` for `join.left()` and `right` for `join.right()`).
{{% /flex-content %}}
{{% flex-content %}}
#### Solution {#solution-e2}
Ensure the columns that you're comparing in the `on` predicate
function actually exist in the input streams.
If necessary, update column names in the predicate function.
{{% /flex-content %}}
{{< /flex >}}
---
### record is missing label `<label>`
##### Error message
```js
record is missing label <label> (argument <left or right>)
```
{{< flex >}}
{{% flex-content %}}
#### Cause {#cause-e3}
**Your `on` join predicate uses a column that doesn't exist**.
In the `on` predicate function, you're trying to compare a column
that doesn't exist in one of your input streams.
{{% /flex-content %}}
{{% flex-content %}}
#### Solution {#solution-e3}
Ensure the columns that you're comparing in the `on` predicate
function actually exist in the input streams.
If necessary, update column names in the predicate function.
{{% /flex-content %}}
{{< /flex >}}
---
{{< flex >}}
{{% flex-content %}}
#### Cause {#cause-e4}
**Your `as` output schema function uses a column that doesn't exist**.
If using an **outer join**, the `as` is trying to use a value
from a column that doesn't exist in the "primary" input stream
(`left` for `join.left()` and `right` for `join.right()`).
{{% /flex-content %}}
{{% flex-content %}}
#### Solution {#solution-e4}
Ensure the columns that you're using in the `as` output function to assign
values to the output actually exist in the input streams.
{{% /flex-content %}}
{{< /flex >}}

View File

@ -5,19 +5,19 @@
# build/deploy process.
flux:
latest: 0.175.0
latest: 0.184.1
cloud:
current: 0.174.1
current: 0.181.0
oss:
'2.4': 0.179.0
'2.3': 0.171.0
'2.2': 0.161.0
'2.2': 0.162.0
'2.1': 0.139.0
'2.0': 0.131.0
'1.8': 0.65.1
'1.7': 0.50.2
nightly: 0.174.1
nightly: 0.181.0
enterprise:
'1.10': 0.170.0
'1.9': 0.161.0
'1.8': 0.65.1
'1.7': 0.50.2

View File

@ -1535,6 +1535,41 @@
- /influxdb/v2.0/reference/flux/stdlib/interpolate/linear/
- /influxdb/cloud/reference/flux/stdlib/interpolate/linear/
/flux/v0.x/stdlib/join/_index.md: |
related:
- /flux/v0.x/join-data/
- /flux/v0.x/join-data/troubleshoot-joins/
/flux/v0.x/stdlib/join/full.md: |
related:
- /flux/v0.x/join-data/full-outer/
- /flux/v0.x/join-data/troubleshoot-joins/
/flux/v0.x/stdlib/join/inner.md: |
related:
- /flux/v0.x/join-data/inner/
- /flux/v0.x/join-data/troubleshoot-joins/
/flux/v0.x/stdlib/join/left.md: |
related:
- /flux/v0.x/join-data/left-outer/
- /flux/v0.x/join-data/troubleshoot-joins/
/flux/v0.x/stdlib/join/right.md: |
related:
- /flux/v0.x/join-data/right-outer/
- /flux/v0.x/join-data/troubleshoot-joins/
/flux/v0.x/stdlib/join/tables.md: |
related:
- /flux/v0.x/join-data/
- /flux/v0.x/join-data/troubleshoot-joins/
/flux/v0.x/stdlib/join/time.md: |
related:
- /flux/v0.x/join-data/time/
- /flux/v0.x/join-data/troubleshoot-joins/
/flux/v0.x/stdlib/json/_index.md: |
aliases:
- /influxdb/v2.0/reference/flux/functions/json/

View File

@ -10,6 +10,14 @@ const path = require('path')
// GitHub token to access files in the private InfluxDB Enterprise repo
const githubToken = process.env.GITHUB_TOKEN
if (typeof githubToken === 'undefined') {
console.error(`
A GitHub token is required for this operation. Please set a GITHUB_TOKEN environment variable.
Use the GitHub token stored in the Docs Team vault in 1Password.
`);
process.exit(1);
}
// Get the latest version of Flux
async function getLatestFlux() {
const { request } = await axios

View File

@ -3,6 +3,7 @@
{{ $hlevel := .Get "hlevel" | default "h3"}}
{{ $readMore := .Get "readmore" | default false }}
{{ $hr := .Get "hr" | default false }}
{{ $doNotList := .Get "filterOut" | default "" }}
{{ if eq $show "all" }}
{{ .Scratch.Set "pages" (union .Page.Pages .Page.Sections) }}
@ -12,7 +13,7 @@
{{ .Scratch.Set "pages" .Page.RegularPages }}
{{ end }}
{{ $pages := .Scratch.Get "pages" }}
{{ $pages := where (.Scratch.Get "pages") "Title" "not in" (split $doNotList ", ") }}
{{ if eq $type "articles" }}
<div class="children-links">

View File

@ -1,2 +1,14 @@
{{ $svg := .Get 0 }}
{{ $svg | readFile | safeHTML }}
{{ $implicitSvg := .Get 0 }}
{{ $svg := .Get "svg" | default $implicitSvg }}
{{ $implicitClass := .Get 1 | default "" }}
{{ $class := .Get "class" | default $implicitClass }}
{{ $svgCode := $svg | readFile }}
{{ $scratch := newScratch }}
{{ $scratch.Set "svgOutput" $svgCode }}
{{ if ne $class "" }}
{{ $scratch.Set "svgOutput" ($scratch.Get "svgOutput" | replaceRE `<svg ` (print "<svg class='" $class "'")) }}
{{ end }}
{{ $scratch.Get "svgOutput" | safeHTML }}

View File

@ -0,0 +1,10 @@
<?xml version="1.0" encoding="utf-8"?>
<svg id="join-diagram" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
viewBox="0 0 98 64" xml:space="preserve">
<path id="left" d="M37.9,31.8c0-9.1,4.3-17.1,11-22.2c-4.7-3.6-10.6-5.8-17-5.8c-15.5,0-28,12.5-28,28c0,15.5,12.5,28,28,28
c6.4,0,12.3-2.1,17-5.8C42.2,48.9,37.9,40.8,37.9,31.8z"/>
<path id="center" d="M59.9,31.8c0-9.1-4.3-17.1-11-22.2c-6.7,5.1-11,13.2-11,22.2c0,9.1,4.3,17.1,11,22.2
C55.5,48.9,59.9,40.8,59.9,31.8z"/>
<path id="right" d="M65.9,3.8c-6.4,0-12.3,2.1-17,5.8c6.7,5.1,11,13.2,11,22.2c0,9.1-4.3,17.1-11,22.2c4.7,3.6,10.6,5.8,17,5.8
c15.5,0,28-12.5,28-28C93.9,16.3,81.3,3.8,65.9,3.8z"/>
</svg>

After

Width:  |  Height:  |  Size: 721 B