From 9e4034d260264feb36ffb04d1ad854297dd2250a Mon Sep 17 00:00:00 2001 From: Scott Anderson Date: Mon, 4 Mar 2024 09:15:10 -0700 Subject: [PATCH] Document v3 custom partitions (#5309) * WIP added storage engine diagram svg and info * WIP scaling strategy diagrams * WIP finished storage architecture doc * WIP added offset and time trim capbilities to current-date shortcode * WIP custom partitions * finalized define partitions doc * Apply suggestions from code review Co-authored-by: Jason Stirnaman * updates to address PR feedback * various fixes * minor updates to storage architecture * ported custom partition content to clustered * fix incorrect menu key * influxctl 2.5.0 (#5339) * updated influxctl database create with partition info * added influxctl table create command * ported influxctl table create command to clustered * Apply suggestions from code review Co-authored-by: Jason Stirnaman * fixed mentions of influx vs influxctl in v3 docs --------- Co-authored-by: Jason Stirnaman * Add manage table task-based docs with partitioning info (#5340) * add manage table task-based docs with partitioning info * Apply suggestions from code review Co-authored-by: Jason Stirnaman --------- Co-authored-by: Jason Stirnaman * influxctl 2.5.0 release notes (#5343) * influxctl 2.5.0 release notes * Apply suggestions from code review Co-authored-by: Joshua Powers --------- Co-authored-by: Joshua Powers --------- Co-authored-by: Jason Stirnaman Co-authored-by: Joshua Powers --- assets/js/datetime.js | 21 +- assets/styles/layouts/_article.scss | 17 +- assets/styles/layouts/article/_columns.scss | 39 ++ .../layouts/article/_html-diagrams.scss | 33 ++ assets/styles/layouts/article/_opacity.scss | 19 + assets/styles/layouts/article/_svgs.scss | 45 +++ .../admin/custom-partitions/_index.md | 346 ++++++++++++++++++ .../admin/custom-partitions/best-practices.md | 54 +++ .../define-custom-partitions.md | 100 +++++ .../custom-partitions/partition-templates.md | 244 ++++++++++++ .../cloud-dedicated/admin/databases/create.md | 35 +- .../cloud-dedicated/admin/tables/_index.md | 23 ++ .../cloud-dedicated/admin/tables/create.md | 71 ++++ .../cloud-dedicated/admin/tables/list.md | 85 +++++ .../cloud-dedicated/admin/tokens/_index.md | 2 +- .../reference/cli/influxctl/_index.md | 2 +- .../cli/influxctl/database/_index.md | 4 +- .../cli/influxctl/database/create.md | 49 ++- .../reference/cli/influxctl/table/_index.md | 33 ++ .../reference/cli/influxctl/table/create.md | 88 +++++ .../reference/cli/influxctl/token/_index.md | 4 +- .../reference/internals/storage-engine.md | 184 ++++++++++ .../reference/release-notes/influxctl.md | 36 ++ .../admin/custom-partitions/_index.md | 346 ++++++++++++++++++ .../admin/custom-partitions/best-practices.md | 55 +++ .../define-custom-partitions.md | 100 +++++ .../custom-partitions/partition-templates.md | 257 +++++++++++++ .../clustered/admin/databases/create.md | 46 ++- .../influxdb/clustered/admin/tables/_index.md | 23 ++ .../influxdb/clustered/admin/tables/create.md | 71 ++++ .../influxdb/clustered/admin/tables/list.md | 85 +++++ .../reference/cli/influxctl/_index.md | 2 +- .../cli/influxctl/database/_index.md | 4 +- .../cli/influxctl/database/create.md | 49 ++- .../reference/cli/influxctl/table/_index.md | 31 ++ .../reference/cli/influxctl/table/create.md | 88 +++++ .../reference/cli/influxctl/token/_index.md | 4 +- .../reference/internals/storage-engine.md | 184 ++++++++++ .../reference/release-notes/influxctl.md | 36 ++ .../influxdb/v2/reference/internals/shards.md | 4 +- data/products.yml | 2 +- layouts/shortcodes/columns.html | 6 + layouts/shortcodes/datetime/current-date.html | 7 +- .../html-diagram/scaling-strategy.html | 15 + layouts/shortcodes/icon.html | 2 + static/svgs/v3-storage-architecture.svg | 127 +++++++ 46 files changed, 3019 insertions(+), 59 deletions(-) create mode 100644 assets/styles/layouts/article/_columns.scss create mode 100644 assets/styles/layouts/article/_opacity.scss create mode 100644 content/influxdb/cloud-dedicated/admin/custom-partitions/_index.md create mode 100644 content/influxdb/cloud-dedicated/admin/custom-partitions/best-practices.md create mode 100644 content/influxdb/cloud-dedicated/admin/custom-partitions/define-custom-partitions.md create mode 100644 content/influxdb/cloud-dedicated/admin/custom-partitions/partition-templates.md create mode 100644 content/influxdb/cloud-dedicated/admin/tables/_index.md create mode 100644 content/influxdb/cloud-dedicated/admin/tables/create.md create mode 100644 content/influxdb/cloud-dedicated/admin/tables/list.md create mode 100644 content/influxdb/cloud-dedicated/reference/cli/influxctl/table/_index.md create mode 100644 content/influxdb/cloud-dedicated/reference/cli/influxctl/table/create.md create mode 100644 content/influxdb/cloud-dedicated/reference/internals/storage-engine.md create mode 100644 content/influxdb/clustered/admin/custom-partitions/_index.md create mode 100644 content/influxdb/clustered/admin/custom-partitions/best-practices.md create mode 100644 content/influxdb/clustered/admin/custom-partitions/define-custom-partitions.md create mode 100644 content/influxdb/clustered/admin/custom-partitions/partition-templates.md create mode 100644 content/influxdb/clustered/admin/tables/_index.md create mode 100644 content/influxdb/clustered/admin/tables/create.md create mode 100644 content/influxdb/clustered/admin/tables/list.md create mode 100644 content/influxdb/clustered/reference/cli/influxctl/table/_index.md create mode 100644 content/influxdb/clustered/reference/cli/influxctl/table/create.md create mode 100644 content/influxdb/clustered/reference/internals/storage-engine.md create mode 100644 layouts/shortcodes/columns.html create mode 100644 layouts/shortcodes/html-diagram/scaling-strategy.html create mode 100644 static/svgs/v3-storage-architecture.svg diff --git a/assets/js/datetime.js b/assets/js/datetime.js index 33a793c43..ec0f8ee2b 100644 --- a/assets/js/datetime.js +++ b/assets/js/datetime.js @@ -2,7 +2,18 @@ const monthNames = ["January", "February", "March", "April", "May", "June", "Jul var date = new Date() var currentTimestamp = date.toISOString().replace(/^(.*)(\.\d+)(Z)/, '$1$3') // 2023-01-01T12:34:56Z var currentTime = date.toISOString().replace(/(^.*T)(.*)(Z)/, '$2') + '084216' // 12:34:56.000084216 -var currentDate = date.toISOString().replace(/\d{2}\:\d{2}\:\d{2}\.\d*/, '00:00:00') // 2023-01-01T00:00:00Z + +function currentDate(offset=0, trimTime=false) { + outputDate = new Date(date) + outputDate.setDate(outputDate.getDate() + offset) + + if (trimTime) { + return outputDate.toISOString().replace(/T.*$/, '') // 2023-01-01 + } else { + return outputDate.toISOString().replace(/T.*$/, 'T00:00:00Z') // 2023-01-01T00:00:00Z + } +} + function enterpriseEOLDate() { var inTwoYears = date.setFullYear(date.getFullYear() + 2) earliestEOL = new Date(inTwoYears) @@ -11,5 +22,9 @@ function enterpriseEOLDate() { $('span.current-timestamp').text(currentTimestamp) $('span.current-time').text(currentTime) -$('span.current-date').text(currentDate) -$('span.enterprise-eol-date').text(enterpriseEOLDate) \ No newline at end of file +$('span.enterprise-eol-date').text(enterpriseEOLDate) +$('span.current-date').each(function() { + var dayOffset = parseInt($(this).attr("offset")) + var trimTime = $(this).attr("trim-time") === "true" + $(this).text(currentDate(dayOffset, trimTime)) +}) diff --git a/assets/styles/layouts/_article.scss b/assets/styles/layouts/_article.scss index 91df7f025..4180b42a8 100644 --- a/assets/styles/layouts/_article.scss +++ b/assets/styles/layouts/_article.scss @@ -208,7 +208,6 @@ "article/warn"; - //////////////////////////////// Miscellaneous /////////////////////////////// .required, @@ -227,17 +226,11 @@ font-weight: $medium; } - &.blue { - color: $b-dodger; - } - - &.green { - color: $gr-viridian; - } - - &.magenta { - color: $p-comet; - } + &.normal {font-style: normal;} + + &.blue {color: $b-dodger;} + &.green {color: $gr-viridian;} + &.magenta {color: $p-comet;} } h2, diff --git a/assets/styles/layouts/article/_columns.scss b/assets/styles/layouts/article/_columns.scss new file mode 100644 index 000000000..6b1f400e4 --- /dev/null +++ b/assets/styles/layouts/article/_columns.scss @@ -0,0 +1,39 @@ +div { + &.columns-wrapper { + container-type: inline-size; + container-name: columns; + } + &.columns-2 {columns: 2;} + &.columns-3 {columns: 3;} + &.columns-4 {columns: 4;} + + &.columns-2, + &.columns-3, + &.columns-4 { + margin: 1.5rem 0 2.5rem; + + > *:first-child {margin-top: 0} + } +} + +///////////////////////////////// MEDIA QUERIES //////////////////////////////// + +@container columns (max-width: 780px) { + div { + &.columns-4 {columns: 3;} + } +} +@container columns (max-width: 550px) { + div { + &.columns-3, + &.columns-4 {columns: 2;} + } +} +@container columns (max-width: 350px) { + div { + &.columns-2, + &.columns-3, + &.columns-4 {columns: 1;} + } +} + diff --git a/assets/styles/layouts/article/_html-diagrams.scss b/assets/styles/layouts/article/_html-diagrams.scss index 8d58abe0f..3124fa929 100644 --- a/assets/styles/layouts/article/_html-diagrams.scss +++ b/assets/styles/layouts/article/_html-diagrams.scss @@ -424,6 +424,39 @@ table tr.point{ } } +/////////////////////////// Scaling strategy diagrams ////////////////////////// + +.scaling-strategy { + margin: 2rem 0 3rem; + + .node { + border: 2px solid $article-text; + border-radius: 6px; + height: 6rem; + width: 4rem; + } + + .dashed { + border-style: dashed; + border-color: rgba($article-text, .5) + } + + .outer { + height: auto; + width: fit-content; + position: relative; + } + + .inner { + margin: 2rem 1.25rem -2px; + } + + &.horizontal { + display: flex; + .node:not(:first-child) {margin-left: 1.25rem;} + } +} + //////////////////////////////////////////////////////////////////////////////// ///////////////////////////////// MEDIA QUERIES //////////////////////////////// //////////////////////////////////////////////////////////////////////////////// diff --git a/assets/styles/layouts/article/_opacity.scss b/assets/styles/layouts/article/_opacity.scss new file mode 100644 index 000000000..2c9d74550 --- /dev/null +++ b/assets/styles/layouts/article/_opacity.scss @@ -0,0 +1,19 @@ +em, span, strong { + &.op90 {opacity: .9;} + &.op85 {opacity: .85;} + &.op80 {opacity: .8;} + &.op75 {opacity: .75;} + &.op70 {opacity: .7;} + &.op65 {opacity: .65;} + &.op60 {opacity: .6;} + &.op55 {opacity: .55;} + &.op50 {opacity: .5;} + &.op45 {opacity: .45;} + &.op40 {opacity: .4;} + &.op35 {opacity: .35;} + &.op30 {opacity: .3;} + &.op25 {opacity: .25;} + &.op20 {opacity: .2;} + &.op15 {opacity: .15;} + &.op10 {opacity: .1;} +} \ No newline at end of file diff --git a/assets/styles/layouts/article/_svgs.scss b/assets/styles/layouts/article/_svgs.scss index 5acb4ca62..50581a862 100644 --- a/assets/styles/layouts/article/_svgs.scss +++ b/assets/styles/layouts/article/_svgs.scss @@ -37,6 +37,51 @@ svg { &.full {path { &#left, &#center, &#right {fill:$fill-color; }}} } + ////////////////// InfluxDB v3 storage architecture diagram ////////////////// + + &#influxdb-v3-storage-architecture { + margin: 2rem 0 3rem; + max-width: 750px; + + .shape { + fill:none; + stroke: $article-text; + stroke-width: 1.5; + stroke-miterlimit :10; + + &.op70 {opacity: 0.7} + &.op50 {opacity: 0.5} + &.op25 {opacity: 0.25} + + &.dashed { + stroke-width: 1; + stroke-dasharray:4,3; + } + } + .title { + fill: $product-enterprise; + font-weight: $medium; + font-size:20px; + } + .text { + fill: $article-text; + font-size: 18px; + &.small {font-size: 16px;} + &.bold { + font-weight: $medium; + color: $article-bold; + } + &.italic {font-style: italic;} + } + .diagram-line { + fill: none; + stroke: $nav-active; + stroke-width: 1.5; + stroke-miterlimit: 10; + &.dashed {stroke-dasharray:2,4;} + } + .arrow {fill: $nav-active;} + } } @include media(small) { diff --git a/content/influxdb/cloud-dedicated/admin/custom-partitions/_index.md b/content/influxdb/cloud-dedicated/admin/custom-partitions/_index.md new file mode 100644 index 000000000..85a19e95a --- /dev/null +++ b/content/influxdb/cloud-dedicated/admin/custom-partitions/_index.md @@ -0,0 +1,346 @@ +--- +title: Manage data partitioning +seotitle: Manage data partitioning on disk +description: > + Customize your partitioning strategy to optimize query performance for your + specific schema and workload. +menu: + influxdb_cloud_dedicated: + parent: Administer InfluxDB Cloud +weight: 103 +influxdb/cloud-dedicated/tags: [storage] +related: + - /influxdb/cloud-dedicated/reference/internals/storage-engine/ +--- + +When writing data to {{< product-name >}}, the InfluxDB v3 storage engine stores +data in the [Object store](/influxdb/cloud-dedicated/reference/internals/storage-engine/#object-store) +in [Apache Parquet](https://parquet.apache.org/) format. +Each Parquet file represents a _partition_--a logical grouping of data. +By default, InfluxDB partitions each table by day. +{{< product-name >}} lets you customize the partitioning strategy and partition +by tag values and different time intervals. +Customize your partitioning strategy to optimize query performance for your +specific schema and workload. + +- [Advantages](#advantages) +- [Disadvantages](#disadvantages) +- [Limitations](#limitations) +- [How partitioning works](#how-partitioning-works) + - [Partition templates](#partition-templates) + - [Partition keys](#partition-keys) +- [Partitions in the query life cycle](#partitions-in-the-query-life-cycle) +- [Partition guides](#partition-guides) + {{< children type="anchored-list" >}} + +## Advantages + +The primary advantage of custom partitioning is that it lets you customize your +storage structure to improve query performance specific to your schema and workload. + +- **Optimized storage for improved performance on specific types of queries**. + For example, if queries often select data with a specific tag value, you can + partition by that tag to improve the performance of those queries. +- **Optimized storage for specific types of data**. For example, if the data you + store is sparse and the time ranges you query are often much larger than a day, + you could partition your data by week instead of by day. + +## Disadvantages + +Using custom partitioning may increase the load on other parts of the +[InfluxDB v3 storage engine](/influxdb/cloud-dedicated/reference/internals/storage-engine/), +but each can be scaled individually to address the added load. + +{{% note %}} +_The following disadvantages assume that your custom partitioning strategy includes +additional tags to partition by or partition intervals smaller than a day._ +{{% /note %}} + +- **Increased load on the [Ingester](/influxdb/cloud-dedicated/reference/internals/storage-engine/#ingester)** + as it groups data into smaller partitions and files. +- **Increased load on the [Catalog](/influxdb/cloud-dedicated/reference/internals/storage-engine/#catalog)** + as more references to partition Parquet file locations are stored and queried. +- **Increased load on the [Compactor](/influxdb/cloud-dedicated/reference/internals/storage-engine/#compactor)** + as more partition Parquet files need to be compacted. +- **Increased costs associated with [Object storage](/influxdb/cloud-dedicated/reference/internals/storage-engine/#object-storage)** + as more partition Parquet files are created and stored. +- **Risk of decreased performance for queries that don't use tags in the WHERE clause**. + These queries may end up reading many partitions and smaller files, degrading performance. + +## Limitations + +Custom partitioning has the following limitations: + +- Database and table partitions can only be defined on create. + You cannot update the partition strategy of a database or table after it has + been created. +- You can partition by up to eight dimensions (seven tags and a time interval). + +## How partitioning works + +### Partition templates + +A partition template defines the pattern used for _[partition keys](#partition-keys)_ +and determines the time interval that data is partitioned by. +Partition templates use tag values and +[Rust strftime date and time formatting syntax](https://docs.rs/chrono/latest/chrono/format/strftime/index.html). + +_For more detailed information, see [Partition templates](/influxdb/cloud-dedicated/admin/custom-partitions/partition-templates/)._ + +### Partition keys + +A partition key uniquely identifies a partition. The structure of partition keys +is defined by a _[partition template](#partition-templates)_. Partition keys are +composed of up to eight parts or dimensions (tags and time). +Each part is delimited by the partition key separator (`|`). + +{{< expand-wrapper >}} +{{% expand "View example partition templates and keys" %}} + +Given the following line protocol with the following timestamps: + +- 2023-12-31T23:00:00Z +- 2024-01-01T00:00:00Z +- 2024-01-01T01:00:00Z + +```text +production,line=A,station=1 temp=81.2,qty=35i 1704063600000000000 +production,line=A,station=2 temp=92.8,qty=35i 1704063600000000000 +production,line=B,station=1 temp=101.1,qty=43i 1704063600000000000 +production,line=B,station=2 temp=102.4,qty=43i 1704063600000000000 +production,line=A,station=1 temp=81.9,qty=36i 1704067200000000000 +production,line=A,station=2 temp=110.0,qty=22i 1704067200000000000 +production,line=B,station=1 temp=101.8,qty=44i 1704067200000000000 +production,line=B,station=2 temp=105.7,qty=44i 1704067200000000000 +production,line=A,station=1 temp=82.2,qty=35i 1704070800000000000 +production,line=A,station=2 temp=92.1,qty=30i 1704070800000000000 +production,line=B,station=1 temp=102.4,qty=43i 1704070800000000000 +production,line=B,station=2 temp=106.5,qty=43i 1704070800000000000 +``` + +--- + +{{% flex %}} + + + +{{% flex-content "half" %}} + +##### Partition template parts + +- `%Y-%m-%d` (by day, default format) + +{{% /flex-content %}} +{{% flex-content %}} + +##### Partition keys + +- `2023-12-31` +- `2024-01-01` + +{{% /flex-content %}} + + + +{{% /flex %}} + +--- + +{{% flex %}} + + + +{{% flex-content "half" %}} + +##### Partition template parts + +- `line` +- `%d %b %Y` (by day, non-default format) + +{{% /flex-content %}} +{{% flex-content %}} + +##### Partition keys + +- `A | 31 Dec 2023` +- `B | 31 Dec 2023` +- `A | 01 Jan 2024` +- `B | 01 Jan 2024` + +{{% /flex-content %}} + + + +{{% /flex %}} + +--- + +{{% flex %}} + + + +{{% flex-content "half" %}} + +##### Partition template parts + +- `line` +- `station` +- `%Y-%m-%d` (by day, default format) + +{{% /flex-content %}} +{{% flex-content %}} + +##### Partition keys + +- `A | 1 | 2023-12-31` +- `A | 2 | 2023-12-31` +- `B | 1 | 2023-12-31` +- `B | 2 | 2023-12-31` +- `A | 1 | 2024-01-01` +- `A | 2 | 2024-01-01` +- `B | 1 | 2024-01-01` +- `B | 2 | 2024-01-01` + +{{% /flex-content %}} + + + +{{% /flex %}} + +--- + +{{% flex %}} + + + +{{% flex-content "half" %}} + +##### Partition template parts + +- `line` +- `station` +- `%Y-%m-%d %H:00` (by hour) + +{{% /flex-content %}} +{{% flex-content %}} + +##### Partition keys + +- `A | 1 | 2023-12-31 23:00` +- `A | 2 | 2023-12-31 23:00` +- `B | 1 | 2023-12-31 23:00` +- `B | 2 | 2023-12-31 23:00` +- `A | 1 | 2024-01-01 00:00` +- `A | 2 | 2024-01-01 00:00` +- `B | 1 | 2024-01-01 00:00` +- `B | 2 | 2024-01-01 00:00` +- `A | 1 | 2024-01-01 01:00` +- `A | 2 | 2024-01-01 01:00` +- `B | 1 | 2024-01-01 01:00` +- `B | 2 | 2024-01-01 01:00` + +{{% /flex-content %}} + + + +{{% /flex %}} + +{{% /expand %}} +{{< /expand-wrapper >}} + +## Partitions in the query life cycle + +When querying data: + +1. The [Catalog](/influxdb/cloud-dedicated/reference/internals/storage-engine/#catalog) + provides the v3 query engine ([Querier](/influxdb/cloud-dedicated/reference/internals/storage-engine/#querier)) + with the locations of partitions that contain the queried time series data. +2. The query engine reads all rows in the returned partitions to identify what + rows match the logic in the query and should be included in the query result. + +The faster the query engine can identify what partitions to read and then read +the data in those partitions, the more performant queries are. + +_For more information about the query lifecycle, see +[InfluxDB v3 query life cycle](/influxdb/cloud-dedicated/reference/internals/storage-engine/#query-life-cycle)._ + +##### Query example + +Consider the following query that selects everything in the `production` table +where the `line` tag is `A` and the `station` tag is `1`: + +```sql +SELECT * +FROM production +WHERE + time >= now() - INTERVAL '1 week' + AND line = 'A' + AND station = '1' +``` + +Using the default partitioning strategy (by day), the query engine +reads eight separate partitions (one partition for today and one for each of the +last seven days): + +- {{< datetime/current-date trimTime=true >}} +- {{< datetime/current-date offset=-1 trimTime=true >}} +- {{< datetime/current-date offset=-2 trimTime=true >}} +- {{< datetime/current-date offset=-3 trimTime=true >}} +- {{< datetime/current-date offset=-4 trimTime=true >}} +- {{< datetime/current-date offset=-5 trimTime=true >}} +- {{< datetime/current-date offset=-6 trimTime=true >}} +- {{< datetime/current-date offset=-7 trimTime=true >}} + +The query engine must scan _all_ rows in the partitions to identify rows +where `line` is `A` and `station` is `1`. This process takes valuable time +and results in less performant queries. + +However, if you partition by other tags, InfluxDB can identify partitions that +contain only the tag values your query needs and spend less time +scanning rows to see if they contain the tag values. + +For example, if data is partitioned by `line`, `station`, and day, although +there are more partition files, the query engine can quickly identify and read +only those with data relevant to the query: + +{{% columns 4 %}} + +- A | 1 | {{< datetime/current-date trimTime=true >}} +- A | 2 | {{< datetime/current-date trimTime=true >}} +- B | 1 | {{< datetime/current-date trimTime=true >}} +- B | 2 | {{< datetime/current-date trimTime=true >}} +- A | 1 | {{< datetime/current-date offset=-1 trimTime=true >}} +- A | 2 | {{< datetime/current-date offset=-1 trimTime=true >}} +- B | 1 | {{< datetime/current-date offset=-1 trimTime=true >}} +- B | 2 | {{< datetime/current-date offset=-1 trimTime=true >}} +- A | 1 | {{< datetime/current-date offset=-2 trimTime=true >}} +- A | 2 | {{< datetime/current-date offset=-2 trimTime=true >}} +- B | 1 | {{< datetime/current-date offset=-2 trimTime=true >}} +- B | 2 | {{< datetime/current-date offset=-2 trimTime=true >}} +- A | 1 | {{< datetime/current-date offset=-3 trimTime=true >}} +- A | 2 | {{< datetime/current-date offset=-3 trimTime=true >}} +- B | 1 | {{< datetime/current-date offset=-3 trimTime=true >}} +- B | 2 | {{< datetime/current-date offset=-3 trimTime=true >}} +- A | 1 | {{< datetime/current-date offset=-4 trimTime=true >}} +- A | 2 | {{< datetime/current-date offset=-4 trimTime=true >}} +- B | 1 | {{< datetime/current-date offset=-4 trimTime=true >}} +- B | 2 | {{< datetime/current-date offset=-4 trimTime=true >}} +- A | 1 | {{< datetime/current-date offset=-5 trimTime=true >}} +- A | 2 | {{< datetime/current-date offset=-5 trimTime=true >}} +- B | 1 | {{< datetime/current-date offset=-5 trimTime=true >}} +- B | 2 | {{< datetime/current-date offset=-5 trimTime=true >}} +- A | 1 | {{< datetime/current-date offset=-6 trimTime=true >}} +- A | 2 | {{< datetime/current-date offset=-6 trimTime=true >}} +- B | 1 | {{< datetime/current-date offset=-6 trimTime=true >}} +- B | 2 | {{< datetime/current-date offset=-6 trimTime=true >}} +- A | 1 | {{< datetime/current-date offset=-7 trimTime=true >}} +- A | 2 | {{< datetime/current-date offset=-7 trimTime=true >}} +- B | 1 | {{< datetime/current-date offset=-7 trimTime=true >}} +- B | 2 | {{< datetime/current-date offset=-7 trimTime=true >}} + {{% /columns %}} + +--- + +## Partition guides + +{{< children >}} diff --git a/content/influxdb/cloud-dedicated/admin/custom-partitions/best-practices.md b/content/influxdb/cloud-dedicated/admin/custom-partitions/best-practices.md new file mode 100644 index 000000000..a5b38c207 --- /dev/null +++ b/content/influxdb/cloud-dedicated/admin/custom-partitions/best-practices.md @@ -0,0 +1,54 @@ +--- +title: Partitioning best practices +description: > + Learn best practices for applying custom partition strategies to your data + stored in InfluxDB. +menu: + influxdb_cloud_dedicated: + name: Best practices + parent: Manage data partitioning +weight: 202 +--- + +Use the following best practices when defining custom partitioning strategies +for your data stored in {{< product-name >}}. + +- [Partition by tags that you commonly query for a specific value](#partition-by-tags-that-you-commonly-query-for-a-specific-value) +- [Only partition by tags that _always_ have a value](#only-partition-by-tags-that-always-have-a-value) +- [Avoid over-partitioning](#avoid-over-partitioning) + +## Partition by tags that you commonly query for a specific value + +Custom partitioning primarily benefits queries that look for a specific tag +value in the `WHERE` clause. For example, if you often query data related to a +specific ID, partitioning by the tag that stores the ID helps the InfluxDB +query engine to more quickly identify what partitions contain the relevant data. + +{{% note %}} + +#### Be careful partitioning on high-cardinality tags + +Partitioning using tags with many (10K+) unique values can actually hurt +query performance as partitions are created for each unique tag value. +{{% /note %}} + +## Only partition by tags that _always_ have a value + +You should only partition by tags that _always_ have a value. +If points don't have a value for the tag, InfluxDB can't store them in the correct partitions and, at query time, must read all the partitions. + +## Avoid over-partitioning + +As you plan your partitioning strategy, keep in mind that data can be +"over-partitioned"--meaning partitions are so granular that queries end up +having to retrieve and read many partitions from the object store, which +hurts query performance. + +- Avoid using partition time intervals that are **less than one day**. + + The partition time interval should be balanced with the actual amount of data + written during each interval. If a single interval doesn't contain a lot of data, + it is better to partition by larger time intervals. + +- Don't partition by tags that you typically don't use in your query workload. +- [Be careful partitioning on high-cardinality tags](#be-careful-partitioning-on-high-cardinality-tags). diff --git a/content/influxdb/cloud-dedicated/admin/custom-partitions/define-custom-partitions.md b/content/influxdb/cloud-dedicated/admin/custom-partitions/define-custom-partitions.md new file mode 100644 index 000000000..e7e12148a --- /dev/null +++ b/content/influxdb/cloud-dedicated/admin/custom-partitions/define-custom-partitions.md @@ -0,0 +1,100 @@ +--- +title: Define custom partitions +description: > + Use the [`influxctl` CLI](/influxdb/cloud-dedicated/reference/cli/influxctl/) + to define custom partition strategies when creating a database or table. +menu: + influxdb_cloud_dedicated: + parent: Manage data partitioning +weight: 202 +related: + - /influxdb/cloud-dedicated/reference/cli/influxctl/database/create/ + - /influxdb/cloud-dedicated/reference/cli/influxctl/table/create/ +--- + +Use the [`influxctl` CLI](/influxdb/cloud-dedicated/reference/cli/influxctl/) +to define custom partition strategies when creating a database or table. +By default, {{< product-name >}} partitions data by day. + +The partitioning strategy of a database or table is determined by a +[partition template](/influxdb/cloud-dedicated/admin/custom-partitions/#partition-templates) +which defines the naming pattern for [partition keys](/influxdb/cloud-dedicated/admin/custom-partitions/#partition-keys). +Partition keys uniquely identify each partition. +When a partition template is applied to a database, it becomes the default template +for all tables in that database, but can be overridden when creating a +table. + +- [Create a database with a custom partition template](#create-a-database-with-a-custom-partition-template) +- [Create a table with a custom partition template](#create-a-table-with-a-custom-partition-template) +- [Example partition templates](#example-partition-templates) + +{{% note %}} + +#### Partition templates can only be applied on create + +You can only apply a partition template when creating a database or table. +There is no way to update a partition template on an existing resource. +{{% /note %}} + +Use the following command flags to identify +[partition template parts](/influxdb/cloud-dedicated/admin/custom-partitions/partition-templates/#tag-part-templates): + +- `--template-tag`: An [InfluxDB tag](/influxdb/cloud-dedicated/reference/glossary/#tag) + to use in the partition template. + _Supports up to 7 of these flags._ +- `--template-time`: A [Rust strftime date and time](/influxdb/cloud-dedicated/admin/custom-partitions/partition-templates/#time-part-templates) + string that specifies the time format in the partition template and determines + the time interval to partition by. + +_View [partition template part restrictions](/influxdb/cloud-dedicated/admin/custom-partitions/partition-templates/#restrictions)._ + +## Create a database with a custom partition template + +The following example creates a new `example-db` database and applies a partition +template that partitions by two tags (`room` and `sensor-type`) and by week using +the time format `%Y wk:%W`: + +```sh +influxctl database create \ + --template-tag room \ + --template-tag sensor-type \ + --template-time '%Y wk:%W' \ + example-db +``` + +## Create a table with a custom partition template + +The following example creates a new `example-table` table in the `example-db` +database and applies a partition template that partitions by two tags +(`room` and `sensor-type`) and by month using the time format `%Y-%m`: + +```sh +influxctl table create \ + --template-tag room \ + --template-tag sensor-type \ + --template-time '%Y-%m' \ + example-db \ + example-table +``` + +## Example partition templates + +Given the following [line protocol](/influxdb/cloud-dedicated/reference/syntax/line-protocol/) +with a `2024-01-01T00:00:00Z` timestamp: + +```text +prod,line=A,station=weld1 temp=81.9,qty=36i 1704067200000000000 +``` + +| Description | Tag part(s) | Time part | Resulting partition key | +| :---------------------- | :---------------- | :--------- | :----------------------- | +| By day (default) | | `%Y-%m-%d` | 2024-01-01 | +| By day (non-default) | | `%d %b %Y` | 01 Jan 2024 | +| By week | | `%Y wk:%W` | 2024 wk:01 | +| By month | | `%Y-%m` | 2024-01 | +| Single tag, by day | `line` | `%F` | A \| 2024-01-01 | +| Single tag, by week | `line` | `%Y wk:%W` | A \| 2024 wk:01 | +| Single tag, by month | `line` | `%Y-%m` | A \| 2024-01 | +| Multiple tags, by day | `line`, `station` | `%F` | A \| weld1 \| 2024-01-01 | +| Multiple tags, by week | `line`, `station` | `%Y wk:%W` | A \| weld1 \| 2024 wk:01 | +| Multiple tags, by month | `line`, `station` | `%Y-%m` | A \| weld1 \| 2024-01 | diff --git a/content/influxdb/cloud-dedicated/admin/custom-partitions/partition-templates.md b/content/influxdb/cloud-dedicated/admin/custom-partitions/partition-templates.md new file mode 100644 index 000000000..087a39845 --- /dev/null +++ b/content/influxdb/cloud-dedicated/admin/custom-partitions/partition-templates.md @@ -0,0 +1,244 @@ +--- +title: Partition templates +list_title: Use partition templates +description: > + Learn how to define custom partitioning strategies using partition templates. + Data can be partitioned by tag and time. +menu: + influxdb_cloud_dedicated: + parent: Manage data partitioning +weight: 202 +--- + +Use partition templates to define the patterns used to generate partition keys. +A partition key uniquely identifies a partition and is used to name the partition +Parquet file in the [Object store](/influxdb/cloud-dedicated/reference/internals/storage-engine/#object-store). + +A partition template consists of 1-8 _template parts_---dimensions to partition data by. +There are two types of parts: + +- **tag**: [InfluxDB tag](/influxdb/cloud-dedicated/reference/glossary/#tag) to + partition by. + _A partition template can include up to seven tag parts._ +- **time**: A Rust strftime date and time string that specifies the time interval + to partition data by. The smallest unit of time included in the time part + template is the interval used to partition data. + _A partition template includes only 1 time part._ + + +- [Restrictions](#restrictions) + - [Template part size limit](#template-part-size-limit) + - [Reserved keywords](#reserved-keywords) + - [Reserved Characters](#reserved-characters) +- [Tag part templates](#tag-part-templates) +- [Time part templates](#time-part-templates) + - [Date specifiers](#date-specifiers) + - [Time specifiers](#time-specifiers) + - [Time zone specifiers](#time-zone-specifiers) + - [Date and time specifiers](#date-and-time-specifiers) + - [Special specifiers](#special-specifiers) + + +## Restrictions + +### Template part size limit + +Each template part is limited to 200 bytes in length. +Anything longer will be truncated at 200 bytes and appended with `#`. + +### Reserved keywords + +The following reserved keywords cannot be used in partition templates: + +- `time` + +### Reserved Characters + +If used in template parts, non-ASCII characters and the following reserved +characters must be [percent encoded](https://developer.mozilla.org/en-US/docs/Glossary/Percent-encoding): + +- `|`: Partition key part delimiter +- `!`: Null or missing partition key part +- `^`: Empty string partition key part +- `#`: Key part truncation marker +- `%`: Required for unambiguous reversal of percent encoding + +## Tag part templates + +Tag part templates consist of a _tag key_ to partition by. +Generated partition keys include the unique _tag value_ specific to each partition. + +## Time part templates + +Time part templates use [Rust strftime date and time formatting syntax](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) +to specify time format in partition keys. +The smallest unit of time included in the time part template is the interval +used to partition data. + +{{% warn %}} +#### Avoid partitioning by less than one day + +We do not recommend using time intervals less than one day to partition data. +This can result in [over-partitioned data](/influxdb/cloud-dedicated/admin/custom-partitions/best-practices/#avoid-over-partitioning) +and may hurt query performance. +{{% /warn %}} + +- [Date specifiers](#date-specifiers) +- [Time specifiers](#time-specifiers) +- [Time zone specifiers](#time-zone-specifiers) +- [Date and time specifiers](#date-and-time-specifiers) +- [Special specifiers](#special-specifiers) + +{{% note %}} +The following is adapted from the +[Rust strftime source code](https://docs.rs/chrono/latest/src/chrono/format/strftime.rs.html). +{{% /note %}} + +### Date specifiers + +| Variable | Example | Description | +| :------: | :--------------------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `%Y` | `2001` | The full proleptic Gregorian year, zero-padded to 4 digits. chrono supports years from -262144 to 262143. Note: years before 1 BCE or after 9999 CE, require an initial sign (+/-). | +| `%C` | `20` | The proleptic Gregorian year divided by 100, zero-padded to 2 digits. [^1] | +| `%y` | `01` | The proleptic Gregorian year modulo 100, zero-padded to 2 digits. [^1] | +| `%m` | `07` | Month number (01--12), zero-padded to 2 digits. | +| `%b` | `Jul` | Abbreviated month name. Always 3 letters. | +| `%B` | `July` | Full month name. Also accepts corresponding abbreviation in parsing. | +| `%h` | `Jul` | Same as `%b`. | +| `%d` | `08` | Day number (01--31), zero-padded to 2 digits. | +| `%e` | ` 8` | Same as `%d` but space-padded. Same as `%_d`. | +| `%a` | `Sun` | Abbreviated weekday name. Always 3 letters. | +| `%A` | `Sunday` | Full weekday name. Also accepts corresponding abbreviation in parsing. | +| `%w` | `0` | Sunday = 0, Monday = 1, ..., Saturday = 6. | +| `%u` | `7` | Monday = 1, Tuesday = 2, ..., Sunday = 7. (ISO 8601) | +| `%U` | `28` | Week number starting with Sunday (00--53), zero-padded to 2 digits. [^2] | +| `%W` | `27` | Same as `%U`, but week 1 starts with the first Monday in that year instead. | +| `%G` | `2001` | Same as `%Y` but uses the year number in ISO 8601 week date. [^3] | +| `%g` | `01` | Same as `%y` but uses the year number in ISO 8601 week date. [^3] | +| `%V` | `27` | Same as `%U` but uses the week number in ISO 8601 week date (01--53). [^3] | +| `%j` | `189` | Day of the year (001--366), zero-padded to 3 digits. | +| `%D` | `07/08/01` | Month-day-year format. Same as `%m/%d/%y`. | +| `%x` | `07/08/01` | Locale's date representation (e.g., 12/31/99). | +| `%F` | `2001-07-08` | Year-month-day format (ISO 8601). Same as `%Y-%m-%d`. | +| `%v` | ` 8-Jul-2001` | Day-month-year format. Same as `%e-%b-%Y`. | + +### Time specifiers + +| Variable | Example | Description | +| :------: | :--------------------------------- | :----------------------------------------------------------------------------------------------------------------------- | +| `%H` | `00` | Hour number (00--23), zero-padded to 2 digits. | +| `%k` | ` 0` | Same as `%H` but space-padded. Same as `%_H`. | +| `%I` | `12` | Hour number in 12-hour clocks (01--12), zero-padded to 2 digits. | +| `%l` | `12` | Same as `%I` but space-padded. Same as `%_I`. | +| `%P` | `am` | `am` or `pm` in 12-hour clocks. | +| `%p` | `AM` | `AM` or `PM` in 12-hour clocks. | +| `%M` | `34` | Minute number (00--59), zero-padded to 2 digits. | +| `%S` | `60` | Second number (00--60), zero-padded to 2 digits. [^4] | +| `%f` | `26490000` | Number of nanoseconds since last whole second. [^7] | +| `%.f` | `.026490` | Decimal fraction of a second. Consumes the leading dot. [^7] | +| `%.3f` | `.026` | Decimal fraction of a second with a fixed length of 3. | +| `%.6f` | `.026490` | Decimal fraction of a second with a fixed length of 6. | +| `%.9f` | `.026490000` | Decimal fraction of a second with a fixed length of 9. | +| `%3f` | `026` | Decimal fraction of a second like `%.3f` but without the leading dot. | +| `%6f` | `026490` | Decimal fraction of a second like `%.6f` but without the leading dot. | +| `%9f` | `026490000` | Decimal fraction of a second like `%.9f` but without the leading dot. | +| `%R` | `00:34` | Hour-minute format. Same as `%H:%M`. | +| `%T` | `00:34:60` | Hour-minute-second format. Same as `%H:%M:%S`. | +| `%X` | `00:34:60` | Locale's time representation (e.g., 23:13:48). | +| `%r` | `12:34:60 AM` | Locale's 12 hour clock time. (e.g., 11:11:04 PM). Falls back to `%X` if the locale does not have a 12 hour clock format. | + +### Time zone specifiers + +| Variable | Example | Description | +| :------: | :--------------------------------- | :----------------------------------------------------------------------------------------------------------------- | +| `%Z` | `ACST` | Local time zone name. Skips all non-whitespace characters during parsing. Identical to `%:z` when formatting. [^8] | +| `%z` | `+0930` | Offset from the local time to UTC (with UTC being `+0000`). | +| `%:z` | `+09:30` | Same as `%z` but with a colon. | +| `%::z` | `+09:30:00` | Offset from the local time to UTC with seconds. | +| `%:::z` | `+09` | Offset from the local time to UTC without minutes. | +| `%#z` | `+09` | *Parsing only:* Same as `%z` but allows minutes to be missing or present. | + +### Date and time specifiers + +| Variable | Example | Description | +| :------: | :--------------------------------- | :--------------------------------------------------------------------- | +| `%c` | `Sun Jul 8 00:34:60 2001` | Locale's date and time (e.g., Thu Mar 3 23:05:25 2005). | +| `%+` | `2001-07-08T00:34:60.026490+09:30` | ISO 8601 / RFC 3339 date & time format. [^5] | +| `%s` | `994518299` | UNIX timestamp, the number of seconds since 1970-01-01 00:00 UTC. [^6] | + +### Special specifiers + +| Variable | Example | Description | +| :------: | :------ | :---------------------- | +| `%t` | | Literal tab (`\t`). | +| `%n` | | Literal newline (`\n`). | +| `%%` | | Literal percent sign. | + +It is possible to override the default padding behavior of numeric specifiers `%?`. +This is not allowed for other specifiers and results in the `BAD_FORMAT` error. + +Modifier | Description +-------- | ----------- +`%-?` | Suppresses any padding including spaces and zeroes. (e.g. `%j` = `012`, `%-j` = `12`) +`%_?` | Uses spaces as a padding. (e.g. `%j` = `012`, `%_j` = ` 12`) +`%0?` | Uses zeroes as a padding. (e.g. `%e` = ` 9`, `%0e` = `09`) + +Notes: + +[^1]: `%C`, `%y`: + This is floor division, so 100 BCE (year number -99) will print `-1` and `99` respectively. +[^2]: `%U`: + Week 1 starts with the first Sunday in that year. + It is possible to have week 0 for days before the first Sunday. + +[^3]: `%G`, `%g`, `%V`: + Week 1 is the first week with at least 4 days in that year. + Week 0 does not exist, so this should be used with `%G` or `%g`. + +[^4]: `%S`: + It accounts for leap seconds, so `60` is possible. + +[^5]: `%+`: Same as `%Y-%m-%dT%H:%M:%S%.f%:z`, i.e. 0, 3, 6 or 9 fractional + digits for seconds and colons in the time zone offset. +
+
+ This format also supports having a `Z` or `UTC` in place of `%:z`. They + are equivalent to `+00:00`. +
+
+ Note that all `T`, `Z`, and `UTC` are parsed case-insensitively. +
+
+ The typical `strftime` implementations have different (and locale-dependent) + formats for this specifier. While Chrono's format for `%+` is far more + stable, it is best to avoid this specifier if you want to control the exact + output. + +[^6]: `%s`: + This is not padded and can be negative. + For the purpose of Chrono, it only accounts for non-leap seconds + so it slightly differs from ISO C `strftime` behavior. + +[^7]: `%f`, `%.f`: +
+ `%f` and `%.f` are notably different formatting specifiers.
+ `%f` counts the number of nanoseconds since the last whole second, while `%.f` is a fraction of a + second.
+ Example: 7μs is formatted as `7000` with `%f`, and formatted as `.000007` with `%.f`. + +[^8]: `%Z`: + Since `chrono` is not aware of timezones beyond their offsets, this specifier + **only prints the offset** when used for formatting. The timezone abbreviation + will NOT be printed. See [this issue](https://github.com/chronotope/chrono/issues/960) + for more information. +
+
+ Offset will not be populated from the parsed data, nor will it be validated. + Timezone is completely ignored. Similar to the glibc `strptime` treatment of + this format code. +
+
+ It is not possible to reliably convert from an abbreviation to an offset, + for example CDT can mean either Central Daylight Time (North America) or + China Daylight Time. +*/ \ No newline at end of file diff --git a/content/influxdb/cloud-dedicated/admin/databases/create.md b/content/influxdb/cloud-dedicated/admin/databases/create.md index b2d581daa..af21e49b8 100644 --- a/content/influxdb/cloud-dedicated/admin/databases/create.md +++ b/content/influxdb/cloud-dedicated/admin/databases/create.md @@ -18,10 +18,11 @@ list_code_example: | ``` related: - /influxdb/cloud-dedicated/reference/cli/influxctl/database/create/ + - /influxdb/cloud-dedicated/admin/custom-partitions/ --- Use the [`influxctl database create` command](/influxdb/cloud-dedicated/reference/cli/influxctl/database/create/) -to create a database in your InfluxDB Cloud Dedicated cluster. +to create a database in your {{< product-name omit=" Clustered" >}} cluster. 1. If you haven't already, [download and install the `influxctl` CLI](/influxdb/cloud-dedicated/reference/cli/influxctl/#download-and-install-influxctl). 2. Run the `influxctl database create` command and provide the following: @@ -30,6 +31,11 @@ to create a database in your InfluxDB Cloud Dedicated cluster. _(default is infinite)_ - _Optional_: Database table (measurement) limit _(default is 500)_ - _Optional_: Database column limit _(default is 250)_ + - _Optional_: [InfluxDB tags](/influxdb/cloud-dedicated/reference/glossary/#tag) + to use in the partition template _(supports up to 7 different tags)_ + - _Optional_: A [Rust strftime date and time string](/influxdb/cloud-dedicated/admin/custom-partitions/partition-templates/#time-part-templates) + that specifies the time format in the partition template and determines + the time interval to partition by _(default is `%Y-%m-%d`)_ - Database name _(see [Database naming restrictions](#database-naming-restrictions))_ {{% code-placeholders "DATABASE_NAME|30d|500|200" %}} @@ -38,6 +44,9 @@ influxctl database create \ --retention-period 30d \ --max-tables 500 \ --max-columns 250 \ + --template-tag tag1 \ + --template-tag tag2 \ + --template-time '%Y-%m-%d' \ DATABASE_NAME ``` {{% /code-placeholders %}} @@ -46,6 +55,7 @@ influxctl database create \ - [Database naming restrictions](#database-naming-restrictions) - [InfluxQL DBRP naming convention](#influxql-dbrp-naming-convention) - [Table and column limits](#table-and-column-limits) +- [Custom partitioning](#custom-partitioning) ## Retention period syntax @@ -59,7 +69,7 @@ A zero duration (`0d`) retention period is infinite and data won't expire. The retention period value cannot be negative or contain whitespace. {{< flex >}} -{{% flex-content %}} +{{% flex-content "half" %}} ##### Valid durations units include @@ -71,7 +81,7 @@ The retention period value cannot be negative or contain whitespace. - **y**: year {{% /flex-content %}} -{{% flex-content %}} +{{% flex-content "half" %}} ##### Example retention period values @@ -190,3 +200,22 @@ threshold beyond which query performance may be affected {{% /expand %}} {{< /expand-wrapper >}} + +### Custom partitioning + +{{< product-name >}} lets you define a custom partitioning strategy for each database. +A _partition_ is a logical grouping of data stored in [Apache Parquet](https://parquet.apache.org/) +format in the InfluxDB v3 storage engine. By default, data is partitioned by day, +but, depending on your schema and workload, customizing the partitioning +strategy can improve query performance. + +Use the `--template-tag` and `--template-time` flags define partition template +parts used to generate partition keys for the database. +For more information, see [Manage data partitioning](/influxdb/cloud-dedicated/admin/custom-partitions/). + +{{% note %}} +#### Partition templates can only be applied on create + +You can only apply a partition template when creating a database. +There is no way to update a partition template on an existing database. +{{% /note %}} diff --git a/content/influxdb/cloud-dedicated/admin/tables/_index.md b/content/influxdb/cloud-dedicated/admin/tables/_index.md new file mode 100644 index 000000000..48a02ae26 --- /dev/null +++ b/content/influxdb/cloud-dedicated/admin/tables/_index.md @@ -0,0 +1,23 @@ +--- +title: Manage tables +seotitle: Manage tables in InfluxDB Cloud Dedicated +description: > + Manage tables in your InfluxDB Cloud Dedicated cluster. + A table is a collection of related data stored in table format. + In previous versions of InfluxDB, tables were known as "measurements." +menu: + influxdb_cloud_dedicated: + parent: Administer InfluxDB Cloud +weight: 101 +influxdb/cloud-dedicated/tags: [tables] +--- + +Manage tables in your {{< product-name omit=" Clustered" >}} cluster. +A table is a collection of related data stored in table format. + +{{% note %}} +In previous versions of InfluxDB and in the context of InfluxQL, tables are +known as "measurements." +{{% /note %}} + +{{< children hlevel="h2" >}} diff --git a/content/influxdb/cloud-dedicated/admin/tables/create.md b/content/influxdb/cloud-dedicated/admin/tables/create.md new file mode 100644 index 000000000..8a372ad29 --- /dev/null +++ b/content/influxdb/cloud-dedicated/admin/tables/create.md @@ -0,0 +1,71 @@ +--- +title: Create a table +description: > + Use the [`influxctl table create` command](/influxdb/cloud-dedicated/reference/cli/influxctl/table/create/) + to create a new table in a specified database your InfluxDB cluster. + Provide the database name and a table name. +menu: + influxdb_cloud_dedicated: + parent: Manage tables +weight: 201 +list_code_example: | + ```sh + influxctl table create + ``` +related: + - /influxdb/cloud-dedicated/reference/cli/influxctl/table/create/ + - /influxdb/cloud-dedicated/admin/custom-partitions/ +--- + +Use the [`influxctl table create` command](/influxdb/cloud-dedicated/reference/cli/influxctl/table/create/) +to create a table in a specified database in your +{{< product-name omit=" Clustered" >}} cluster. + +With {{< product-name >}}, tables and measurements are synonymous. +Typically, tables are created automatically on write using the measurement name +specified in line protocol written to InfluxDB. +However, to apply a [custom partition template](/influxdb/cloud-dedicated/admin/custom-partitions/) +to a table, you must manually create the table before you write any data to it. + +1. If you haven't already, [download and install the `influxctl` CLI](/influxdb/cloud-dedicated/reference/cli/influxctl/#download-and-install-influxctl). +2. Run the `influxctl table create` command and provide the following: + + - _Optional_: [InfluxDB tags](/influxdb/cloud-dedicated/reference/glossary/#tag) + to use in the partition template _(supports up to 7 different tags)_ + - _Optional_: A [Rust strftime date and time string](/influxdb/cloud-dedicated/admin/custom-partitions/partition-templates/#time-part-templates) + that specifies the time format in the partition template and determines + the time interval to partition by _(default is `%Y-%m-%d`)_ + - The name of the database to create the table in + - The name of the table to create + +{{% code-placeholders "(DATABASE|TABLE)_NAME" %}} +```sh +influxctl table create \ + --template-tag tag1 \ + --template-tag tag2 \ + --template-time '%Y-%m-%d' \ + DATABASE_NAME \ + TABLE_NAME +``` +{{% /code-placeholders %}} + +### Custom partitioning + +{{< product-name >}} lets you define a custom partitioning strategy for each table. +A _partition_ is a logical grouping of data stored in [Apache Parquet](https://parquet.apache.org/) +format in the InfluxDB v3 storage engine. By default, data is partitioned by day, +but, depending on your schema and workload, customizing the partitioning +strategy can improve query performance. + +Use the `--template-tag` and `--template-time` flags define partition template +parts used to generate partition keys for the table. +If no template flags are provided, the table uses the partition template of the +target database. +For more information, see [Manage data partitioning](/influxdb/cloud-dedicated/admin/custom-partitions/). + +{{% note %}} +#### Partition templates can only be applied on create + +You can only apply a partition template when creating a table. +There is no way to update a partition template on an existing table. +{{% /note %}} diff --git a/content/influxdb/cloud-dedicated/admin/tables/list.md b/content/influxdb/cloud-dedicated/admin/tables/list.md new file mode 100644 index 000000000..cc9c76453 --- /dev/null +++ b/content/influxdb/cloud-dedicated/admin/tables/list.md @@ -0,0 +1,85 @@ +--- +title: List tables +description: > + Use the [`SHOW TABLES` SQL statement](/influxdb/cloud-dedicated/query-data/sql/explore-schema/#list-measurements-in-a-database) + or the [`SHOW MEASUREMENTS` InfluxQL statement](/influxdb/cloud-dedicated/query-data/influxql/explore-schema/#list-measurements-in-a-database) + to list tables in a database. +menu: + influxdb_cloud_dedicated: + parent: Manage tables +weight: 201 +list_code_example: | + ###### SQL + + ```sql + SHOW TABLES + ``` + + ###### InfluxQL + + ```sql + SHOW MEASUREMENTS + ``` +related: + - /influxdb/cloud-dedicated/query-data/sql/explore-schema/ + - /influxdb/cloud-dedicated/query-data/influxql/explore-schema/ +--- + +Use the [`SHOW TABLES` SQL statement](/influxdb/cloud-dedicated/query-data/sql/explore-schema/#list-measurements-in-a-database) +or the [`SHOW MEASUREMENTS` InfluxQL statement](/influxdb/cloud-dedicated/query-data/influxql/explore-schema/#list-measurements-in-a-database) +to list tables in a database. + +{{% note %}} +With {{< product-name >}}, tables and measurements are synonymous. +{{% /note %}} + +###### SQL + +```sql +SHOW TABLES +``` + +###### InfluxQL + +```sql +SHOW MEASUREMENTS +``` + +## List tables with the influxctl CLI + +To list tables using the `influxctl` CLI, use the `influxctl query` command to pass +the `SHOW TABLES` SQL statement. + +{{% note %}} +The `influxctl query` command only supports SQL queries; not InfluxQL. +{{% /note %}} + +Provide the following with your command: + +- **Database token**: [Database token](/influxdb/cloud-dedicated/admin/tokens/) + with read permissions on the queried database. Uses the `token` setting from + the [`influxctl` connection profile](/influxdb/cloud-dedicated/reference/cli/influxctl/#configure-connection-profiles) + or the `--token` command flag. +- **Database name**: Name of the database to query. Uses the `database` setting + from the [`influxctl` connection profile](/influxdb/cloud-dedicated/reference/cli/influxctl/#configure-connection-profiles) + or the `--database` command flag. +- **SQL query**: SQL query with the `SHOW TABLES` statement. + +{{% code-placeholders "DATABASE_(TOKEN|NAME)" %}} + +```sh +influxctl query \ + --token DATABASE_TOKEN \ + --database DATABASE_NAME \ + "SHOW TABLES" +``` + +{{% /code-placeholders %}} + +Replace the following: + +- {{% code-placeholder-key %}}`DATABASE_TOKEN`{{% /code-placeholder-key %}}: + Database token with read access to the queried database +- {{% code-placeholder-key %}}`DATABASE_NAME`{{% /code-placeholder-key %}}: + Name of the database to query + diff --git a/content/influxdb/cloud-dedicated/admin/tokens/_index.md b/content/influxdb/cloud-dedicated/admin/tokens/_index.md index caa69d6bc..369e85926 100644 --- a/content/influxdb/cloud-dedicated/admin/tokens/_index.md +++ b/content/influxdb/cloud-dedicated/admin/tokens/_index.md @@ -11,7 +11,7 @@ menu: weight: 101 influxdb/cloud-dedicated/tags: [tokens] aliases: -- /influxdb/cloud-dedicated/security/tokens/ + - /influxdb/cloud-dedicated/security/tokens/ --- InfluxDB uses token authentication to authorize access to data in your InfluxDB diff --git a/content/influxdb/cloud-dedicated/reference/cli/influxctl/_index.md b/content/influxdb/cloud-dedicated/reference/cli/influxctl/_index.md index 04ebb1c44..0a0c91fac 100644 --- a/content/influxdb/cloud-dedicated/reference/cli/influxctl/_index.md +++ b/content/influxdb/cloud-dedicated/reference/cli/influxctl/_index.md @@ -255,7 +255,7 @@ sudo yum install influxctl 'C:\Program Files\InfluxData\influxctl' ``` -3. **Grant network access to the influx CLI**. +3. **Grant network access to the influxctl CLI**. When using the `influxctl` CLI for the first time, Windows Defender displays the following message: diff --git a/content/influxdb/cloud-dedicated/reference/cli/influxctl/database/_index.md b/content/influxdb/cloud-dedicated/reference/cli/influxctl/database/_index.md index cdaa1c95c..fac61a818 100644 --- a/content/influxdb/cloud-dedicated/reference/cli/influxctl/database/_index.md +++ b/content/influxdb/cloud-dedicated/reference/cli/influxctl/database/_index.md @@ -1,7 +1,7 @@ --- title: influxctl database description: > - The `influx database` command and its subcommands manage databases in an + The `influxctl database` command and its subcommands manage databases in an InfluxDB Cloud Dedicated cluster. menu: influxdb_cloud_dedicated: @@ -9,7 +9,7 @@ menu: weight: 201 --- -The `influx database` command and its subcommands manage databases in an +The `influxctl database` command and its subcommands manage databases in an InfluxDB Cloud Dedicated cluster. ## Usage diff --git a/content/influxdb/cloud-dedicated/reference/cli/influxctl/database/create.md b/content/influxdb/cloud-dedicated/reference/cli/influxctl/database/create.md index b60613b8f..f306c2904 100644 --- a/content/influxdb/cloud-dedicated/reference/cli/influxctl/database/create.md +++ b/content/influxdb/cloud-dedicated/reference/cli/influxctl/database/create.md @@ -7,10 +7,13 @@ menu: influxdb_cloud_dedicated: parent: influxctl database weight: 301 +related: + - /influxdb/cloud-dedicated/admin/custom-partitions/define-custom-partitions/ + - /influxdb/cloud-dedicated/admin/custom-partitions/partition-templates/ --- The `influxctl database create` command creates a new database with a specified -retention period in an InfluxDB Cloud Dedicated cluster. +retention period in an {{< product-name omit=" Clustered" >}} cluster. The retention period defines the maximum age of data retained in the database, based on the timestamp of the data. @@ -20,7 +23,7 @@ A zero duration retention period is infinite and data will not expire. The retention period value cannot be negative or contain whitespace. {{< flex >}} -{{% flex-content %}} +{{% flex-content "half" %}} ##### Valid durations units include @@ -32,7 +35,7 @@ The retention period value cannot be negative or contain whitespace. - **y**: year {{% /flex-content %}} -{{% flex-content %}} +{{% flex-content "half" %}} ##### Example retention period values @@ -47,6 +50,14 @@ The retention period value cannot be negative or contain whitespace. {{% /flex-content %}} {{< /flex >}} +#### Custom partitioning + +You can override the default partition template (`%Y-%m-%d`) of the database +with the `--template-tag` and `--template-time` flags when you create the database. +Provide a time format using [Rust strftime](/influxdb/cloud-dedicated/admin/custom-partitions/partition-templates/#time-part-templates) +and include specific tags to use in the partition template. +Be sure to follow [partitioning best practices](/influxdb/cloud-dedicated/admin/custom-partitions/best-practices/). + ## Usage ```sh @@ -61,12 +72,14 @@ influxctl database create [--retention-period 0s] ## Flags -| Flag | | Description | -| :--- | :------------------- | :----------------------------------------------------------- | -| | `--retention-period` | Database retention period (default is 0s or infinite) | -| | `--max-tables` | Maximum tables per database (default is 500, 0 uses default) | -| | `--max-columns` | Maximum columns per table (default is 250, 0 uses default) | -| `-h` | `--help` | Output command help | +| Flag | | Description | +| :--- | :------------------- | :------------------------------------------------------------------- | +| | `--retention-period` | Database retention period (default is 0s or infinite) | +| | `--max-tables` | Maximum tables per database (default is 500, 0 uses default) | +| | `--max-columns` | Maximum columns per table (default is 250, 0 uses default) | +| | `--template-tag` | Tag to add to partition template (can include multiple of this flag) | +| | `--template-time` | Timestamp format for partition template (default is `%Y-%m-%d`) | +| `-h` | `--help` | Output command help | {{% caption %}} _Also see [`influxctl` global flags](/influxdb/cloud-dedicated/reference/cli/influxctl/#global-flags)._ @@ -77,6 +90,7 @@ _Also see [`influxctl` global flags](/influxdb/cloud-dedicated/reference/cli/inf - [Create a database with an infinite retention period](#create-a-database-with-an-infinite-retention-period) - [Create a database with a 30-day retention period](#create-a-database-with-a-30-day-retention-period) - [Create a database with non-default table and column limits](#create-a-database-with-non-default-table-and-column-limits) +- [Create a database with with a custom partition template](#create-a-database-with-with-a-custom-partition-template) ### Create a database with an infinite retention period @@ -100,3 +114,20 @@ influxctl database create \ --max-columns 150 \ mydb ``` + +### Create a database with with a custom partition template + +The following example creates a new `mydb` database and applies a partition +template that partitions by two tags (`room` and `sensor-type`) and by week using +the time format `%Y wk:%W`: + +```sh +influxctl database create \ + --template-tag room \ + --template-tag sensor-type \ + --template-time '%Y wk:%W' \ + mydb +``` + +_For more information about custom partitioning, see +[Manage data partitioning](/influxdb/cloud-dedicated/admin/custom-partitions/)._ diff --git a/content/influxdb/cloud-dedicated/reference/cli/influxctl/table/_index.md b/content/influxdb/cloud-dedicated/reference/cli/influxctl/table/_index.md new file mode 100644 index 000000000..a29c85ff9 --- /dev/null +++ b/content/influxdb/cloud-dedicated/reference/cli/influxctl/table/_index.md @@ -0,0 +1,33 @@ +--- +title: influxctl table +description: > + The `influxctl table` command and its subcommands manage tables in an + InfluxDB Cloud Dedicated cluster. +menu: + influxdb_cloud_dedicated: + parent: influxctl +weight: 201 +cascade: + metadata: [influxctl 2.5.0+] +--- + +The `influxctl table` command and its subcommands manage tables in an +InfluxDB Cloud Dedicated cluster. + +## Usage + +```sh +influxctl table [subcommand] [flags] +``` + +## Subcommands + +| Subcommand | Description | +| :------------------------------------------------------------------------ | :------------- | +| [create](/influxdb/cloud-dedicated/reference/cli/influxctl/table/create/) | Create a table | + +## Flags + +| Flag | | Description | +| :--- | :------- | :------------------ | +| `-h` | `--help` | Output command help | diff --git a/content/influxdb/cloud-dedicated/reference/cli/influxctl/table/create.md b/content/influxdb/cloud-dedicated/reference/cli/influxctl/table/create.md new file mode 100644 index 000000000..998d9d7ed --- /dev/null +++ b/content/influxdb/cloud-dedicated/reference/cli/influxctl/table/create.md @@ -0,0 +1,88 @@ +--- +title: influxctl table create +description: > + The `influxctl table create` command creates a new table in the specified database. +menu: + influxdb_cloud_dedicated: + parent: influxctl table +weight: 301 +related: + - /influxdb/cloud-dedicated/admin/custom-partitions/define-custom-partitions/ + - /influxdb/cloud-dedicated/admin/custom-partitions/partition-templates/ +--- + +The `influxctl table create` command creates a new table in the specified +database in an {{< product-name omit=" Clustered" >}} cluster. + +#### Custom partitioning + +You can override the default partition template (the partition template of the target database) +with the `--template-tag` and `--template-time` flags when you create the table. +Provide a time format using [Rust strftime](/influxdb/cloud-dedicated/admin/custom-partitions/partition-templates/#time-part-templates) +and include specific tags to use in the partition template. +Be sure to follow [partitioning best practices](/influxdb/cloud-dedicated/admin/custom-partitions/best-practices/). + +## Usage + +```sh +influxctl table create [flags] +``` + +## Arguments + +| Argument | Description | +| :---------------- | :-------------------------- | +| **DATABASE_NAME** | Name of the target database | +| **TABLE_NAME** | Table name | + +## Flags + +| Flag | | Description | +| :--- | :------------------- | :------------------------------------------------------------------- | +| | `--template-tag` | Tag to add to partition template (can include multiple of this flag) | +| | `--template-time` | Timestamp format for partition template (default is `%Y-%m-%d`) | +| `-h` | `--help` | Output command help | + +{{% caption %}} +_Also see [`influxctl` global flags](/influxdb/cloud-dedicated/reference/cli/influxctl/#global-flags)._ +{{% /caption %}} + +## Examples + +- [Create a table](#create-a-table) +- [Create a table with with a custom partition template](#create-a-table-with-with-a-custom-partition-template) + +In the following examples, replace: + +- {{% code-placeholder-key %}}`DATABASE_NAME`{{% /code-placeholder-key %}}: + The name of the database to create the table in. +- {{% code-placeholder-key %}}`TABLE_NAME` {{% /code-placeholder-key %}}: + The name of table to create. + +### Create a table + +{{% code-placeholders "(DATABASE|TABLE)_NAME" %}} +```sh +influxctl table create DATABASE_NAME TABLE_NAME +``` +{{% /code-placeholders %}} + +### Create a table with with a custom partition template + +The following example creates a new table and applies a partition +template that partitions by two tags (`room` and `sensor-type`) and by week using +the time format `%Y wk:%W`: + +{{% code-placeholders "(DATABASE|TABLE)_NAME" %}} +```sh +influxctl table create \ + --template-tag room \ + --template-tag sensor-type \ + --template-time '%Y wk:%W' \ + DATABASE_NAME \ + TABLE_NAME +``` +{{% /code-placeholders %}} + +_For more information about custom partitioning, see +[Manage data partitioning](/influxdb/cloud-dedicated/admin/custom-partitions/)._ diff --git a/content/influxdb/cloud-dedicated/reference/cli/influxctl/token/_index.md b/content/influxdb/cloud-dedicated/reference/cli/influxctl/token/_index.md index 5c8738574..f6261919d 100644 --- a/content/influxdb/cloud-dedicated/reference/cli/influxctl/token/_index.md +++ b/content/influxdb/cloud-dedicated/reference/cli/influxctl/token/_index.md @@ -1,7 +1,7 @@ --- title: influxctl token description: > - The `influx token` command and its subcommands manage database tokens in an + The `influxctl token` command and its subcommands manage database tokens in an InfluxDB Cloud Dedicated cluster. menu: influxdb_cloud_dedicated: @@ -9,7 +9,7 @@ menu: weight: 201 --- -The `influx token` command and its subcommands manage database tokens in an +The `influxctl token` command and its subcommands manage database tokens in an InfluxDB Cloud Dedicated cluster. ## Usage diff --git a/content/influxdb/cloud-dedicated/reference/internals/storage-engine.md b/content/influxdb/cloud-dedicated/reference/internals/storage-engine.md new file mode 100644 index 000000000..508dab4ad --- /dev/null +++ b/content/influxdb/cloud-dedicated/reference/internals/storage-engine.md @@ -0,0 +1,184 @@ +--- +title: InfluxDB v3 storage engine architecture +description: > + The InfluxDB v3 storage engine is a real-time, columnar database optimized for + time series data that supports infinite tag cardinality, real-time queries, + and is optimized to reduce storage cost. +weight: 103 +menu: + influxdb_cloud_dedicated: + name: Storage engine architecture + parent: InfluxDB internals +influxdb/cloud-dedicated/tags: [storage, internals] +related: + - /influxdb/cloud-dedicated/admin/custom-partitions/ +--- + +The InfluxDB v3 storage engine is a real-time, columnar database optimized for +time series data built in [Rust](https://www.rust-lang.org/) on top of +[Apache Arrow](https://arrow.apache.org/) and +[DataFusion](https://arrow.apache.org/datafusion/user-guide/introduction.html). +It supports infinite tag cardinality (number of unique tag values), real-time +queries, and is optimized to reduce storage cost. + +- [Storage engine diagram](#storage-engine-diagram) +- [Storage engine components](#storage-engine-components) + - [Ingester](#ingester) + - [Querier](#querier) + - [Catalog](#catalog) + - [Object store](#object-store) + - [Compactor](#compactor) +- [Scaling strategies](#scaling-strategies) + - [Vertical scaling](#vertical-scaling) + - [Horizontal scaling](#horizontal-scaling) + +## Storage engine diagram + +{{< svg "/static/svgs/v3-storage-architecture.svg" >}} + +## Storage engine components + +- [Ingester](#ingester) +- [Querier](#querier) +- [Catalog](#catalog) +- [Object store](#object-store) +- [Compactor](#compactor) + +### Ingester + +The Ingester processes line protocol submitted in write requests and persists +time series data to the [Object store](#object-store). +In this process, the Ingester does the following: + +- Queries the [Catalog](#catalog) to identify where data should be persisted and + to ensure the schema of the line protocol is compatible with the + [schema](/influxdb/cloud-dedicated/reference/glossary/#schema) of persisted data. +- Accepts or [rejects](/influxdb/cloud-dedicated/write-data/troubleshoot/#troubleshoot-rejected-points) + points in the write request and generates a [response](/influxdb/cloud-dedicated/write-data/troubleshoot/). +- Processes line protocol and persists time series data to the + [Object store](#object-store) in Apache Parquet format. Each Parquet file + represents a _partition_--a logical grouping of data. +- Makes [yet-to-be-persisted](/influxdb/cloud-dedicated/reference/internals/durability/#data-ingest) + data available to [Queriers](#querier) to ensure leading edge data is included + in query results. +- Maintains a short-term [write-ahead log (WAL)](/influxdb/cloud-dedicated/reference/internals/durability/) + to prevent data loss in case of a service interruption. + +##### Ingester scaling strategies + +The Ingester can be scaled both [vertically](#vertical-scaling) and +[horizontally](#horizontal-scaling). +Horizontal scaling increases write throughput and is typically the most +effective scaling strategy for the Ingester. + +### Querier + +The Querier handles query requests and returns query results for requests. +It supports both SQL and InfluxQL through +[Apache Arrow DataFusion](https://arrow.apache.org/datafusion/user-guide/introduction.html). + +#### Query life cycle + +At query time, the querier: + +1. Receives the query request and builds a query plan. +2. Queries the [Ingesters](#ingester) to: + + - ensure the schema assumed by the query plan matches the schema of written data + - include recently written, [yet-to-be-persisted](/influxdb/cloud-dedicated/reference/internals/durability/#data-ingest) + data in query results + +3. Queries the [Catalog](#catalog) to find partitions in the [Object store](#object-store) + that contain the queried data. +4. Reads partition Parquet files that contain the queried data and scans each + row to filter data that matches predicates in the query plan. +5. Performs any additional operations (for example: deduplicating, merging, and sorting) + specified in the query plan. +6. Returns the query result to the client. + +##### Querier scaling strategies + +The Querier can be scaled both [vertically](#vertical-scaling) and +[horizontally](#horizontal-scaling). +Horizontal scaling increases query throughput to handle more concurrent queries. +Vertical scaling improves the Querier's ability to process computationally intensive queries. + +### Catalog + +The Catalog is a PostgreSQL-compatible relational database that stores metadata +related to your time series data including schema information and physical +locations of partitions in the [Object store](#object-store). +It fulfills the following roles: + +- Provides information about the schema of written data. +- Tells the [Ingester](#ingester) what partitions to persist data to. +- Tells the [Querier](#querier) what partitions contain the queried data. + +##### Catalog scaling strategies + +Scaling strategies available for the Catalog depend on the PostgreSQL-compatible +database used to run the catalog. All support [vertical scaling](#vertical-scaling). +Most support [horizontal scaling](#horizontal-scaling) for redundancy and failover. + +### Object store + +The Object store contains time series data in [Apache Parquet](https://parquet.apache.org/) format. +Each Parquet file represents a partition. +By default, InfluxDB partitions tables by day, but you can +[customize the partitioning strategy](/influxdb/cloud-dedicated/admin/custom-partitions/). +Data in each Parquet file is sorted, encoded, and compressed. + +##### Object store scaling strategies + +Scaling strategies available for the Object store depend on the underlying +object storage services used to run the object store. +Most support [horizontal scaling](#horizontal-scaling) for redundancy, failover, +and increased capacity. + +### Compactor + +The Compactor processes and compresses partitions in the [Object store](#object-store) +to continually optimize storage. +It then updates the [Catalog](#catalog) with locations of compacted data. + +##### Compactor scaling strategies + +The Compactor can be scaled both [vertically](#vertical-scaling) and +[horizontally](#horizontal-scaling). +Because compaction is a compute-heavy process, vertical scaling (especially +increasing the available CPU) is the most effective scaling strategy for the Compactor. +Horizontal scaling increases compaction throughput, but not as efficiently as +vertical scaling. + +--- + +## Scaling strategies + +The following scaling strategies can be applied to components of the InfluxDB v3 +storage architecture. + +{{% note %}} + + + +For information about scaling your {{< product-name >}} infrastructure, +[contact InfluxData support](https://support.influxdata.com). +{{% /note %}} + +### Vertical scaling + +Vertical scaling (also known as "scaling up") involves increasing the resources +(such as RAM or CPU) available to a process or system. +Vertical scaling is typically used to handle resource-intensive tasks that +require more processing power. + +{{< html-diagram/scaling-strategy "vertical" >}} + +### Horizontal scaling + +Horizontal scaling (also known as "scaling out") involves increasing the number of +nodes or processes available to perform a given task. +Horizontal scaling is typically used to increase the amount of workload or +throughput a system can manage, but also provides additional redundancy and failover. + +{{< html-diagram/scaling-strategy "horizontal" >}} diff --git a/content/influxdb/cloud-dedicated/reference/release-notes/influxctl.md b/content/influxdb/cloud-dedicated/reference/release-notes/influxctl.md index 49a60c86b..840b0163c 100644 --- a/content/influxdb/cloud-dedicated/reference/release-notes/influxctl.md +++ b/content/influxdb/cloud-dedicated/reference/release-notes/influxctl.md @@ -11,6 +11,42 @@ menu: weight: 202 --- +## v2.5.0 {date="2024-03-04"} + +`influxctl` 2.5.0 introduces the ability to set +[partition templates](/influxdb/cloud-dedicated/admin/custom-partitions/) during +database or table creation. It introduces the +[`table` subcommand](/influxdb/cloud-dedicated/reference/cli/influxctl/table/) +that lets users manually create tables. Additionally, `influxctl` now removes a +previously cached token if the response from InfluxDB is unauthorized. This +helps InfluxDB Clustered users who deploy new clusters using unexpired tokens +associated with another InfluxDB cluster. + +### New Features + +- Add partition templates to database and table creation. +- Remove token if unauthorized. + +### Bug Fixes + +- Update arrow to allow non-TLS connections. +- Do not attempt to load cached tokens when an admin token file is provided. +- Print retention period up to days rather than very large hours. +- Fix indentation of help output. + +### Dependency Updates + +- Update `github.com/golangci/golangcilint` from v1.56.1 to v1.56.2. +- Update `golang.org/x/mod` from v0.15.0 to v0.16.0. +- Update `github.com/pkg/browser` from v0.0.0-20210911075715-681adbf594b8 to + v0.0.0-20240102092130-5ac0b6a4141c. +- Update `github.com/stretchr/testify` from 1.8.4 to 1.9.0. +- Update `go.uber.org/zap` from 1.26.0 to 1.27.0. +- Update `google.golang.org/grpc` from 1.61.0 to 1.61.1. +- Update `google.golang.org/grpc` from 1.61.1 to 1.62.0. + +--- + ## v2.4.4 {date="2024-02-16"} ### Bug fixes diff --git a/content/influxdb/clustered/admin/custom-partitions/_index.md b/content/influxdb/clustered/admin/custom-partitions/_index.md new file mode 100644 index 000000000..314885b82 --- /dev/null +++ b/content/influxdb/clustered/admin/custom-partitions/_index.md @@ -0,0 +1,346 @@ +--- +title: Manage data partitioning +seotitle: Manage data partitioning on disk +description: > + Customize your partitioning strategy to optimize query performance for your + specific schema and workload. +menu: + influxdb_clustered: + parent: Administer InfluxDB Cloud +weight: 103 +influxdb/clustered/tags: [storage] +related: + - /influxdb/clustered/reference/internals/storage-engine/ +--- + +When writing data to {{< product-name >}}, the InfluxDB v3 storage engine stores +data in the [Object store](/influxdb/clustered/reference/internals/storage-engine/#object-store) +in [Apache Parquet](https://parquet.apache.org/) format. +Each Parquet file represents a _partition_--a logical grouping of data. +By default, InfluxDB partitions each table by day. +{{< product-name >}} lets you customize the partitioning strategy and partition +by tag values and different time intervals. +Customize your partitioning strategy to optimize query performance for your +specific schema and workload. + +- [Advantages](#advantages) +- [Disadvantages](#disadvantages) +- [Limitations](#limitations) +- [How partitioning works](#how-partitioning-works) + - [Partition templates](#partition-templates) + - [Partition keys](#partition-keys) +- [Partitions in the query life cycle](#partitions-in-the-query-life-cycle) +- [Partition guides](#partition-guides) + {{< children type="anchored-list" >}} + +## Advantages + +The primary advantage of custom partitioning is that it lets you customize your +storage structure to improve query performance specific to your schema and workload. + +- **Optimized storage for improved performance on specific types of queries**. + For example, if queries often select data with a specific tag value, you can + partition by that tag to improve the performance of those queries. +- **Optimized storage for specific types of data**. For example, if the data you + store is sparse and the time ranges you query are often much larger than a day, + you could partition your data by week instead of by day. + +## Disadvantages + +Using custom partitioning may increase the load on other parts of the +[InfluxDB v3 storage engine](/influxdb/clustered/reference/internals/storage-engine/), +but each can be scaled individually to address the added load. + +{{% note %}} +_The following disadvantages assume that your custom partitioning strategy includes +additional tags to partition by or partition intervals smaller than a day._ +{{% /note %}} + +- **Increased load on the [Ingester](/influxdb/clustered/reference/internals/storage-engine/#ingester)** + as it groups data into smaller partitions and files. +- **Increased load on the [Catalog](/influxdb/clustered/reference/internals/storage-engine/#catalog)** + as more references to partition Parquet file locations are stored and queried. +- **Increased load on the [Compactor](/influxdb/clustered/reference/internals/storage-engine/#compactor)** + as more partition Parquet files need to be compacted. +- **Increased costs associated with [Object storage](/influxdb/clustered/reference/internals/storage-engine/#object-storage)** + as more partition Parquet files are created and stored. +- **Risk of decreased performance for queries that don't use tags in the WHERE clause**. + These queries may end up reading many partitions and smaller files, degrading performance. + +## Limitations + +Custom partitioning has the following limitations: + +- Database and table partitions can only be defined on create. + You cannot update the partition strategy of a database or table after it has + been created. +- You can partition by up to eight dimensions (seven tags and a time interval). + +## How partitioning works + +### Partition templates + +A partition template defines the pattern used for _[partition keys](#partition-keys)_ +and determines the time interval that data is partitioned by. +Partition templates use tag values and +[Rust strftime date and time formatting syntax](https://docs.rs/chrono/latest/chrono/format/strftime/index.html). + +_For more detailed information, see [Partition templates](/influxdb/clustered/admin/custom-partitions/partition-templates/)._ + +### Partition keys + +A partition key uniquely identifies a partition. The structure of partition keys +is defined by a _[partition template](#partition-templates)_. Partition keys are +composed of up to eight parts or dimensions (tags and time). +Each part is delimited by the partition key separator (`|`). + +{{< expand-wrapper >}} +{{% expand "View example partition templates and keys" %}} + +Given the following line protocol with the following timestamps: + +- 2023-12-31T23:00:00Z +- 2024-01-01T00:00:00Z +- 2024-01-01T01:00:00Z + +```text +production,line=A,station=1 temp=81.2,qty=35i 1704063600000000000 +production,line=A,station=2 temp=92.8,qty=35i 1704063600000000000 +production,line=B,station=1 temp=101.1,qty=43i 1704063600000000000 +production,line=B,station=2 temp=102.4,qty=43i 1704063600000000000 +production,line=A,station=1 temp=81.9,qty=36i 1704067200000000000 +production,line=A,station=2 temp=110.0,qty=22i 1704067200000000000 +production,line=B,station=1 temp=101.8,qty=44i 1704067200000000000 +production,line=B,station=2 temp=105.7,qty=44i 1704067200000000000 +production,line=A,station=1 temp=82.2,qty=35i 1704070800000000000 +production,line=A,station=2 temp=92.1,qty=30i 1704070800000000000 +production,line=B,station=1 temp=102.4,qty=43i 1704070800000000000 +production,line=B,station=2 temp=106.5,qty=43i 1704070800000000000 +``` + +--- + +{{% flex %}} + + + +{{% flex-content "half" %}} + +##### Partition template parts + +- `%Y-%m-%d` (by day, default format) + +{{% /flex-content %}} +{{% flex-content %}} + +##### Partition keys + +- `2023-12-31` +- `2024-01-01` + +{{% /flex-content %}} + + + +{{% /flex %}} + +--- + +{{% flex %}} + + + +{{% flex-content "half" %}} + +##### Partition template parts + +- `line` +- `%d %b %Y` (by day, non-default format) + +{{% /flex-content %}} +{{% flex-content %}} + +##### Partition keys + +- `A | 31 Dec 2023` +- `B | 31 Dec 2023` +- `A | 01 Jan 2024` +- `B | 01 Jan 2024` + +{{% /flex-content %}} + + + +{{% /flex %}} + +--- + +{{% flex %}} + + + +{{% flex-content "half" %}} + +##### Partition template parts + +- `line` +- `station` +- `%Y-%m-%d` (by day, default format) + +{{% /flex-content %}} +{{% flex-content %}} + +##### Partition keys + +- `A | 1 | 2023-12-31` +- `A | 2 | 2023-12-31` +- `B | 1 | 2023-12-31` +- `B | 2 | 2023-12-31` +- `A | 1 | 2024-01-01` +- `A | 2 | 2024-01-01` +- `B | 1 | 2024-01-01` +- `B | 2 | 2024-01-01` + +{{% /flex-content %}} + + + +{{% /flex %}} + +--- + +{{% flex %}} + + + +{{% flex-content "half" %}} + +##### Partition template parts + +- `line` +- `station` +- `%Y-%m-%d %H:00` (by hour) + +{{% /flex-content %}} +{{% flex-content %}} + +##### Partition keys + +- `A | 1 | 2023-12-31 23:00` +- `A | 2 | 2023-12-31 23:00` +- `B | 1 | 2023-12-31 23:00` +- `B | 2 | 2023-12-31 23:00` +- `A | 1 | 2024-01-01 00:00` +- `A | 2 | 2024-01-01 00:00` +- `B | 1 | 2024-01-01 00:00` +- `B | 2 | 2024-01-01 00:00` +- `A | 1 | 2024-01-01 01:00` +- `A | 2 | 2024-01-01 01:00` +- `B | 1 | 2024-01-01 01:00` +- `B | 2 | 2024-01-01 01:00` + +{{% /flex-content %}} + + + +{{% /flex %}} + +{{% /expand %}} +{{< /expand-wrapper >}} + +## Partitions in the query life cycle + +When querying data: + +1. The [Catalog](/influxdb/clustered/reference/internals/storage-engine/#catalog) + provides the v3 query engine ([Querier](/influxdb/clustered/reference/internals/storage-engine/#querier)) + with the locations of partitions that contain the queried time series data. +2. The query engine reads all rows in the returned partitions to identify what + rows match the logic in the query and should be included in the query result. + +The faster the query engine can identify what partitions to read and then read +the data in those partitions, the more performant queries are. + +_For more information about the query lifecycle, see +[InfluxDB v3 query life cycle](/influxdb/clustered/reference/internals/storage-engine/#query-life-cycle)._ + +##### Query example + +Consider the following query that selects everything in the `production` table +where the `line` tag is `A` and the `station` tag is `1`: + +```sql +SELECT * +FROM production +WHERE + time >= now() - INTERVAL '1 week' + AND line = 'A' + AND station = '1' +``` + +Using the default partitioning strategy (by day), the query engine +reads eight separate partitions (one partition for today and one for each of the +last seven days): + +- {{< datetime/current-date trimTime=true >}} +- {{< datetime/current-date offset=-1 trimTime=true >}} +- {{< datetime/current-date offset=-2 trimTime=true >}} +- {{< datetime/current-date offset=-3 trimTime=true >}} +- {{< datetime/current-date offset=-4 trimTime=true >}} +- {{< datetime/current-date offset=-5 trimTime=true >}} +- {{< datetime/current-date offset=-6 trimTime=true >}} +- {{< datetime/current-date offset=-7 trimTime=true >}} + +The query engine must scan _all_ rows in the partitions to identify rows +where `line` is `A` and `station` is `1`. This process takes valuable time +and results in less performant queries. + +However, if you partition by other tags, InfluxDB can identify partitions that +contain only the tag values your query needs and spend less time +scanning rows to see if they contain the tag values. + +For example, if data is partitioned by `line`, `station`, and day, although +there are more partition files, the query engine can quickly identify and read +only those with data relevant to the query: + +{{% columns 4 %}} + +- A | 1 | {{< datetime/current-date trimTime=true >}} +- A | 2 | {{< datetime/current-date trimTime=true >}} +- B | 1 | {{< datetime/current-date trimTime=true >}} +- B | 2 | {{< datetime/current-date trimTime=true >}} +- A | 1 | {{< datetime/current-date offset=-1 trimTime=true >}} +- A | 2 | {{< datetime/current-date offset=-1 trimTime=true >}} +- B | 1 | {{< datetime/current-date offset=-1 trimTime=true >}} +- B | 2 | {{< datetime/current-date offset=-1 trimTime=true >}} +- A | 1 | {{< datetime/current-date offset=-2 trimTime=true >}} +- A | 2 | {{< datetime/current-date offset=-2 trimTime=true >}} +- B | 1 | {{< datetime/current-date offset=-2 trimTime=true >}} +- B | 2 | {{< datetime/current-date offset=-2 trimTime=true >}} +- A | 1 | {{< datetime/current-date offset=-3 trimTime=true >}} +- A | 2 | {{< datetime/current-date offset=-3 trimTime=true >}} +- B | 1 | {{< datetime/current-date offset=-3 trimTime=true >}} +- B | 2 | {{< datetime/current-date offset=-3 trimTime=true >}} +- A | 1 | {{< datetime/current-date offset=-4 trimTime=true >}} +- A | 2 | {{< datetime/current-date offset=-4 trimTime=true >}} +- B | 1 | {{< datetime/current-date offset=-4 trimTime=true >}} +- B | 2 | {{< datetime/current-date offset=-4 trimTime=true >}} +- A | 1 | {{< datetime/current-date offset=-5 trimTime=true >}} +- A | 2 | {{< datetime/current-date offset=-5 trimTime=true >}} +- B | 1 | {{< datetime/current-date offset=-5 trimTime=true >}} +- B | 2 | {{< datetime/current-date offset=-5 trimTime=true >}} +- A | 1 | {{< datetime/current-date offset=-6 trimTime=true >}} +- A | 2 | {{< datetime/current-date offset=-6 trimTime=true >}} +- B | 1 | {{< datetime/current-date offset=-6 trimTime=true >}} +- B | 2 | {{< datetime/current-date offset=-6 trimTime=true >}} +- A | 1 | {{< datetime/current-date offset=-7 trimTime=true >}} +- A | 2 | {{< datetime/current-date offset=-7 trimTime=true >}} +- B | 1 | {{< datetime/current-date offset=-7 trimTime=true >}} +- B | 2 | {{< datetime/current-date offset=-7 trimTime=true >}} + {{% /columns %}} + +--- + +## Partition guides + +{{< children >}} diff --git a/content/influxdb/clustered/admin/custom-partitions/best-practices.md b/content/influxdb/clustered/admin/custom-partitions/best-practices.md new file mode 100644 index 000000000..501764bf7 --- /dev/null +++ b/content/influxdb/clustered/admin/custom-partitions/best-practices.md @@ -0,0 +1,55 @@ +--- +title: Partitioning best practices +description: > + Learn best practices for applying custom partition strategies to your data + stored in InfluxDB. +menu: + influxdb_clustered: + name: Best practices + parent: Manage data partitioning +weight: 202 +--- + +Use the following best practices when defining custom partitioning strategies +for your data stored in {{< product-name >}}. + +- [Partition by tags that you commonly query for a specific value](#partition-by-tags-that-you-commonly-query-for-a-specific-value) +- [Only partition by tags that _always_ have a value](#only-partition-by-tags-that-always-have-a-value) +- [Avoid over-partitioning](#avoid-over-partitioning) + +## Partition by tags that you commonly query for a specific value + +Custom partitioning primarily benefits queries that look for a specific tag +value in the `WHERE` clause. For example, if you often query data related to a +specific ID, partitioning by the tag that stores the ID helps the InfluxDB +query engine to more quickly identify what partitions contain the relevant data. + +{{% note %}} + +#### Be careful partitioning on high-cardinality tags + +Partitioning using tags with many (10K+) unique values can actually hurt +query performance as partitions are created for each unique tag value. + +{{% /note %}} + +## Only partition by tags that _always_ have a value + +You should only partition by tags that _always_ have a value. +If points don't have a value for the tag, InfluxDB can't store them in the correct partitions and, at query time, must read all the partitions. + +## Avoid over-partitioning + +As you plan your partitioning strategy, keep in mind that data can be +"over-partitioned"--meaning partitions are so granular that queries end up +having to retrieve and read many partitions from the object store, which +hurts query performance. + +- Avoid using partition time intervals that are **less than one day**. + + The partition time interval should be balanced with the actual amount of data + written during each interval. If a single interval doesn't contain a lot of data, + it is better to partition by larger time intervals. + +- Don't partition by tags that you typically don't use in your query workload. +- [Be careful partitioning on high-cardinality tags](#be-careful-partitioning-on-high-cardinality-tags). diff --git a/content/influxdb/clustered/admin/custom-partitions/define-custom-partitions.md b/content/influxdb/clustered/admin/custom-partitions/define-custom-partitions.md new file mode 100644 index 000000000..06efb30ed --- /dev/null +++ b/content/influxdb/clustered/admin/custom-partitions/define-custom-partitions.md @@ -0,0 +1,100 @@ +--- +title: Define custom partitions +description: > + Use the [`influxctl` CLI](/influxdb/clustered/reference/cli/influxctl/) + to define custom partition strategies when creating a database or table. +menu: + influxdb_clustered: + parent: Manage data partitioning +weight: 202 +related: + - /influxdb/clustered/reference/cli/influxctl/database/create/ + - /influxdb/clustered/reference/cli/influxctl/table/create/ +--- + +Use the [`influxctl` CLI](/influxdb/clustered/reference/cli/influxctl/) +to define custom partition strategies when creating a database or table. +By default, {{< product-name >}} partitions data by day. + +The partitioning strategy of a database or table is determined by a +[partition template](/influxdb/clustered/admin/custom-partitions/#partition-templates) +which defines the naming pattern for [partition keys](/influxdb/clustered/admin/custom-partitions/#partition-keys). +Partition keys uniquely identify each partition. +When a partition template is applied to a database, it becomes the default template +for all tables in that database, but can be overridden when creating a +table. + +- [Create a database with a custom partition template](#create-a-database-with-a-custom-partition-template) +- [Create a table with a custom partition template](#create-a-table-with-a-custom-partition-template) +- [Example partition templates](#example-partition-templates) + +{{% note %}} + +#### Partition templates can only be applied on create + +You can only apply a partition template when creating a database or table. +There is no way to update a partition template on an existing resource. +{{% /note %}} + +Use the following command flags to identify +[partition template parts](/influxdb/clustered/admin/custom-partitions/partition-templates/#tag-part-templates): + +- `--template-tag`: An [InfluxDB tag](/influxdb/clustered/reference/glossary/#tag) + to use in the partition template. + _Supports up to 7 of these flags._ +- `--template-time`: A [Rust strftime date and time](/influxdb/clustered/admin/custom-partitions/partition-templates/#time-part-templates) + string that specifies the time format in the partition template and determines + the time interval to partition by. + +_View [partition template part restrictions](/influxdb/clustered/admin/custom-partitions/partition-templates/#restrictions)._ + +## Create a database with a custom partition template + +The following example creates a new `example-db` database and applies a partition +template that partitions by two tags (`room` and `sensor-type`) and by week using +the time format `%Y wk:%W`: + +```sh +influxctl database create \ + --template-tag room \ + --template-tag sensor-type \ + --template-time '%Y wk:%W' \ + example-db +``` + +## Create a table with a custom partition template + +The following example creates a new `example-table` table in the `example-db` +database and applies a partition template that partitions by two tags +(`room` and `sensor-type`) and by month using the time format `%Y-%m`: + +```sh +influxctl table create \ + --template-tag room \ + --template-tag sensor-type \ + --template-time '%Y-%m' \ + example-db \ + example-table +``` + +## Example partition templates + +Given the following [line protocol](/influxdb/clustered/reference/syntax/line-protocol/) +with a `2024-01-01T00:00:00Z` timestamp: + +```text +prod,line=A,station=weld1 temp=81.9,qty=36i 1704067200000000000 +``` + +| Description | Tag part(s) | Time part | Resulting partition key | +| :---------------------- | :---------------- | :--------- | :----------------------- | +| By day (default) | | `%Y-%m-%d` | 2024-01-01 | +| By day (non-default) | | `%d %b %Y` | 01 Jan 2024 | +| By week | | `%Y wk:%W` | 2024 wk:01 | +| By month | | `%Y-%m` | 2024-01 | +| Single tag, by day | `line` | `%F` | A \| 2024-01-01 | +| Single tag, by week | `line` | `%Y wk:%W` | A \| 2024 wk:01 | +| Single tag, by month | `line` | `%Y-%m` | A \| 2024-01 | +| Multiple tags, by day | `line`, `station` | `%F` | A \| weld1 \| 2024-01-01 | +| Multiple tags, by week | `line`, `station` | `%Y wk:%W` | A \| weld1 \| 2024 wk:01 | +| Multiple tags, by month | `line`, `station` | `%Y-%m` | A \| weld1 \| 2024-01 | diff --git a/content/influxdb/clustered/admin/custom-partitions/partition-templates.md b/content/influxdb/clustered/admin/custom-partitions/partition-templates.md new file mode 100644 index 000000000..6eda4c0b4 --- /dev/null +++ b/content/influxdb/clustered/admin/custom-partitions/partition-templates.md @@ -0,0 +1,257 @@ +--- +title: Partition templates +list_title: Use partition templates +description: > + Learn how to define custom partitioning strategies using partition templates. + Data can be partitioned by tag and time. +menu: + influxdb_clustered: + parent: Manage data partitioning +weight: 202 +--- + +Use partition templates to define the patterns used to generate partition keys. +A partition key uniquely identifies a partition and is used to name the partition +Parquet file in the [Object store](/influxdb/clustered/reference/internals/storage-engine/#object-store). + +A partition template consists of 1-8 _template parts_---dimensions to partition data by. +There are two types of parts: + +- **tag**: [InfluxDB tag](/influxdb/clustered/reference/glossary/#tag) to + partition by. + _A partition template can include up to seven tag parts._ +- **time**: A Rust strftime date and time string that specifies the time interval + to partition data by. The smallest unit of time included in the time part + template is the interval used to partition data. + _A partition template includes only 1 time part._ + + + +- [Restrictions](#restrictions) + - [Template part size limit](#template-part-size-limit) + - [Reserved keywords](#reserved-keywords) + - [Reserved Characters](#reserved-characters) +- [Tag part templates](#tag-part-templates) +- [Time part templates](#time-part-templates) + + - [Date specifiers](#date-specifiers) + - [Time specifiers](#time-specifiers) + - [Time zone specifiers](#time-zone-specifiers) + - [Date and time specifiers](#date-and-time-specifiers) + - [Special specifiers](#special-specifiers) + + + +## Restrictions + +### Template part size limit + +Each template part is limited to 200 bytes in length. +Anything longer will be truncated at 200 bytes and appended with `#`. + +### Reserved keywords + +The following reserved keywords cannot be used in partition templates: + +- `time` + +### Reserved Characters + +If used in template parts, non-ASCII characters and the following reserved +characters must be [percent encoded](https://developer.mozilla.org/en-US/docs/Glossary/Percent-encoding): + +- `|`: Partition key part delimiter +- `!`: Null or missing partition key part +- `^`: Empty string partition key part +- `#`: Key part truncation marker +- `%`: Required for unambiguous reversal of percent encoding + +## Tag part templates + +Tag part templates consist of a _tag key_ to partition by. +Generated partition keys include the unique _tag value_ specific to each partition. + +## Time part templates + +Time part templates use [Rust strftime date and time formatting syntax](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) +to specify time format in partition keys. +The smallest unit of time included in the time part template is the interval +used to partition data. + +{{% warn %}} + +#### Avoid partitioning by less than one day + +We do not recommend using time intervals less than one day to partition data. +This can result in [over-partitioned data](/influxdb/clustered/admin/custom-partitions/best-practices/#avoid-over-partitioning) +and may hurt query performance. +{{% /warn %}} + +- [Date specifiers](#date-specifiers) +- [Time specifiers](#time-specifiers) +- [Time zone specifiers](#time-zone-specifiers) +- [Date and time specifiers](#date-and-time-specifiers) +- [Special specifiers](#special-specifiers) + +{{% note %}} +The following is adapted from the +[Rust strftime source code](https://docs.rs/chrono/latest/src/chrono/format/strftime.rs.html). +{{% /note %}} + +### Date specifiers + +| Variable | Example | Description | +| :------: | :------------ | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `%Y` | `2001` | The full proleptic Gregorian year, zero-padded to 4 digits. chrono supports years from -262144 to 262143. Note: years before 1 BCE or after 9999 CE, require an initial sign (+/-). | +| `%C` | `20` | The proleptic Gregorian year divided by 100, zero-padded to 2 digits. [^1] | +| `%y` | `01` | The proleptic Gregorian year modulo 100, zero-padded to 2 digits. [^1] | +| `%m` | `07` | Month number (01--12), zero-padded to 2 digits. | +| `%b` | `Jul` | Abbreviated month name. Always 3 letters. | +| `%B` | `July` | Full month name. Also accepts corresponding abbreviation in parsing. | +| `%h` | `Jul` | Same as `%b`. | +| `%d` | `08` | Day number (01--31), zero-padded to 2 digits. | +| `%e` | ` 8` | Same as `%d` but space-padded. Same as `%_d`. | +| `%a` | `Sun` | Abbreviated weekday name. Always 3 letters. | +| `%A` | `Sunday` | Full weekday name. Also accepts corresponding abbreviation in parsing. | +| `%w` | `0` | Sunday = 0, Monday = 1, ..., Saturday = 6. | +| `%u` | `7` | Monday = 1, Tuesday = 2, ..., Sunday = 7. (ISO 8601) | +| `%U` | `28` | Week number starting with Sunday (00--53), zero-padded to 2 digits. [^2] | +| `%W` | `27` | Same as `%U`, but week 1 starts with the first Monday in that year instead. | +| `%G` | `2001` | Same as `%Y` but uses the year number in ISO 8601 week date. [^3] | +| `%g` | `01` | Same as `%y` but uses the year number in ISO 8601 week date. [^3] | +| `%V` | `27` | Same as `%U` but uses the week number in ISO 8601 week date (01--53). [^3] | +| `%j` | `189` | Day of the year (001--366), zero-padded to 3 digits. | +| `%D` | `07/08/01` | Month-day-year format. Same as `%m/%d/%y`. | +| `%x` | `07/08/01` | Locale's date representation (e.g., 12/31/99). | +| `%F` | `2001-07-08` | Year-month-day format (ISO 8601). Same as `%Y-%m-%d`. | +| `%v` | ` 8-Jul-2001` | Day-month-year format. Same as `%e-%b-%Y`. | + +### Time specifiers + +| Variable | Example | Description | +| :------: | :------------ | :----------------------------------------------------------------------------------------------------------------------- | +| `%H` | `00` | Hour number (00--23), zero-padded to 2 digits. | +| `%k` | ` 0` | Same as `%H` but space-padded. Same as `%_H`. | +| `%I` | `12` | Hour number in 12-hour clocks (01--12), zero-padded to 2 digits. | +| `%l` | `12` | Same as `%I` but space-padded. Same as `%_I`. | +| `%P` | `am` | `am` or `pm` in 12-hour clocks. | +| `%p` | `AM` | `AM` or `PM` in 12-hour clocks. | +| `%M` | `34` | Minute number (00--59), zero-padded to 2 digits. | +| `%S` | `60` | Second number (00--60), zero-padded to 2 digits. [^4] | +| `%f` | `26490000` | Number of nanoseconds since last whole second. [^7] | +| `%.f` | `.026490` | Decimal fraction of a second. Consumes the leading dot. [^7] | +| `%.3f` | `.026` | Decimal fraction of a second with a fixed length of 3. | +| `%.6f` | `.026490` | Decimal fraction of a second with a fixed length of 6. | +| `%.9f` | `.026490000` | Decimal fraction of a second with a fixed length of 9. | +| `%3f` | `026` | Decimal fraction of a second like `%.3f` but without the leading dot. | +| `%6f` | `026490` | Decimal fraction of a second like `%.6f` but without the leading dot. | +| `%9f` | `026490000` | Decimal fraction of a second like `%.9f` but without the leading dot. | +| `%R` | `00:34` | Hour-minute format. Same as `%H:%M`. | +| `%T` | `00:34:60` | Hour-minute-second format. Same as `%H:%M:%S`. | +| `%X` | `00:34:60` | Locale's time representation (e.g., 23:13:48). | +| `%r` | `12:34:60 AM` | Locale's 12 hour clock time. (e.g., 11:11:04 PM). Falls back to `%X` if the locale does not have a 12 hour clock format. | + +### Time zone specifiers + +| Variable | Example | Description | +| :------: | :---------- | :----------------------------------------------------------------------------------------------------------------- | +| `%Z` | `ACST` | Local time zone name. Skips all non-whitespace characters during parsing. Identical to `%:z` when formatting. [^8] | +| `%z` | `+0930` | Offset from the local time to UTC (with UTC being `+0000`). | +| `%:z` | `+09:30` | Same as `%z` but with a colon. | +| `%::z` | `+09:30:00` | Offset from the local time to UTC with seconds. | +| `%:::z` | `+09` | Offset from the local time to UTC without minutes. | +| `%#z` | `+09` | _Parsing only:_ Same as `%z` but allows minutes to be missing or present. | + +### Date and time specifiers + +| Variable | Example | Description | +| :------: | :--------------------------------- | :--------------------------------------------------------------------- | +| `%c` | `Sun Jul 8 00:34:60 2001` | Locale's date and time (e.g., Thu Mar 3 23:05:25 2005). | +| `%+` | `2001-07-08T00:34:60.026490+09:30` | ISO 8601 / RFC 3339 date & time format. [^5] | +| `%s` | `994518299` | UNIX timestamp, the number of seconds since 1970-01-01 00:00 UTC. [^6] | + +### Special specifiers + +| Variable | Example | Description | +| :------: | :------ | :---------------------- | +| `%t` | | Literal tab (`\t`). | +| `%n` | | Literal newline (`\n`). | +| `%%` | | Literal percent sign. | + +It is possible to override the default padding behavior of numeric specifiers `%?`. +This is not allowed for other specifiers and results in the `BAD_FORMAT` error. + +| Modifier | Description | +| -------- | ------------------------------------------------------------------------------------- | +| `%-?` | Suppresses any padding including spaces and zeroes. (e.g. `%j` = `012`, `%-j` = `12`) | +| `%_?` | Uses spaces as a padding. (e.g. `%j` = `012`, `%_j` = ` 12`) | +| `%0?` | Uses zeroes as a padding. (e.g. `%e` = ` 9`, `%0e` = `09`) | + +Notes: + +[^1]: + `%C`, `%y`: + This is floor division, so 100 BCE (year number -99) will print `-1` and `99` respectively. + +[^2]: + `%U`: + Week 1 starts with the first Sunday in that year. + It is possible to have week 0 for days before the first Sunday. + +[^3]: + `%G`, `%g`, `%V`: + Week 1 is the first week with at least 4 days in that year. + Week 0 does not exist, so this should be used with `%G` or `%g`. + +[^4]: + `%S`: + It accounts for leap seconds, so `60` is possible. + +[^5]: + `%+`: Same as `%Y-%m-%dT%H:%M:%S%.f%:z`, i.e. 0, 3, 6 or 9 fractional + digits for seconds and colons in the time zone offset. +
+
+ This format also supports having a `Z` or `UTC` in place of `%:z`. They + are equivalent to `+00:00`. +
+
+ Note that all `T`, `Z`, and `UTC` are parsed case-insensitively. +
+
+ The typical `strftime` implementations have different (and locale-dependent) + formats for this specifier. While Chrono's format for `%+` is far more + stable, it is best to avoid this specifier if you want to control the exact + output. + +[^6]: + `%s`: + This is not padded and can be negative. + For the purpose of Chrono, it only accounts for non-leap seconds + so it slightly differs from ISO C `strftime` behavior. + +[^7]: + `%f`, `%.f`: +
+ `%f` and `%.f` are notably different formatting specifiers.
+ `%f` counts the number of nanoseconds since the last whole second, while `%.f` is a fraction of a + second.
+ Example: 7μs is formatted as `7000` with `%f`, and formatted as `.000007` with `%.f`. + +[^8]: + `%Z`: + Since `chrono` is not aware of timezones beyond their offsets, this specifier + **only prints the offset** when used for formatting. The timezone abbreviation + will NOT be printed. See [this issue](https://github.com/chronotope/chrono/issues/960) + for more information. +
+
+ Offset will not be populated from the parsed data, nor will it be validated. + Timezone is completely ignored. Similar to the glibc `strptime` treatment of + this format code. +
+
+ It is not possible to reliably convert from an abbreviation to an offset, + for example CDT can mean either Central Daylight Time (North America) or + China Daylight Time. + \*/ diff --git a/content/influxdb/clustered/admin/databases/create.md b/content/influxdb/clustered/admin/databases/create.md index c01ecfa93..e02fe1948 100644 --- a/content/influxdb/clustered/admin/databases/create.md +++ b/content/influxdb/clustered/admin/databases/create.md @@ -18,18 +18,24 @@ list_code_example: | ``` related: - /influxdb/clustered/reference/cli/influxctl/database/create/ + - /influxdb/cloud-dedicated/admin/custom-partitions/ --- Use the [`influxctl database create` command](/influxdb/clustered/reference/cli/influxctl/database/create/) -to create a database in your InfluxDB cluster. +to create a database in your {{< product-name omit=" Clustered" >}} cluster. 1. If you haven't already, [download and install the `influxctl` CLI](/influxdb/clustered/reference/cli/influxctl/#download-and-install-influxctl). 2. Run the `influxctl database create` command and provide the following: - - _Optional_: Database [retention period](/influxdb/clustered/admin/databases/#retention-periods) + - _Optional:_ Database [retention period](/influxdb/clustered/admin/databases/#retention-periods) _(default is infinite)_ - _Optional_: Database table (measurement) limit _(default is 500)_ - _Optional_: Database column limit _(default is 250)_ + - _Optional_: [InfluxDB tags](/influxdb/clustered/reference/glossary/#tag) + to use in the partition template _(supports up to 7 different tags)_ + - _Optional_: A [Rust strftime date and time string](/influxdb/clustered/admin/custom-partitions/partition-templates/#time-part-templates) + that specifies the time format in the partition template and determines + the time interval to partition by _(default is `%Y-%m-%d`)_ - Database name _(see [Database naming restrictions](#database-naming-restrictions))_ {{% code-placeholders "DATABASE_NAME|30d|500|200" %}} @@ -38,6 +44,9 @@ influxctl database create \ --retention-period 30d \ --max-tables 500 \ --max-columns 250 \ + --template-tag tag1 \ + --template-tag tag2 \ + --template-time '%Y-%m-%d' \ DATABASE_NAME ``` {{% /code-placeholders %}} @@ -46,6 +55,7 @@ influxctl database create \ - [Database naming restrictions](#database-naming-restrictions) - [InfluxQL DBRP naming convention](#influxql-dbrp-naming-convention) - [Table and column limits](#table-and-column-limits) +- [Custom partitioning](#custom-partitioning) ## Retention period syntax @@ -53,12 +63,13 @@ Use the `--retention-period` flag to define a specific [retention period](/influxdb/clustered/admin/databases/#retention-periods) for the database. The retention period value is a time duration value made up of a numeric value -plus a duration unit. For example, `30d` means 30 days. -A zero duration retention period is infinite and data will not expire. +plus a duration unit. +For example, `30d` means 30 days. +A zero duration (`0d`) retention period is infinite and data won't expire. The retention period value cannot be negative or contain whitespace. {{< flex >}} -{{% flex-content %}} +{{% flex-content "half" %}} ##### Valid durations units include @@ -70,7 +81,7 @@ The retention period value cannot be negative or contain whitespace. - **y**: year {{% /flex-content %}} -{{% flex-content %}} +{{% flex-content "half" %}} ##### Example retention period values @@ -99,14 +110,14 @@ Database names must adhere to the following naming restrictions: In InfluxDB 1.x, data is stored in [databases](/influxdb/v1/concepts/glossary/#database) and [retention policies](/influxdb/v1/concepts/glossary/#retention-policy-rp). -In InfluxDB Clustered, databases and retention policies have been merged into +In {{% product-name %}}, databases and retention policies have been merged into _databases_, where databases have a retention period, but retention policies are no longer part of the data model. Because InfluxQL uses the 1.x data model, a database must be mapped to a v1 database and retention policy (DBRP) to be queryable with InfluxQL. **When naming a database that you want to query with InfluxQL**, use the following -naming convention to automatically map v1 DBRP combinations to a database: +naming convention to automatically map v1 DBRP combinations to an {{% product-name %}} database: ```sh database_name/retention_policy_name @@ -189,3 +200,22 @@ threshold beyond which query performance may be affected {{% /expand %}} {{< /expand-wrapper >}} + +### Custom partitioning + +{{< product-name >}} lets you define a custom partitioning strategy for each database. +A _partition_ is a logical grouping of data stored in [Apache Parquet](https://parquet.apache.org/) +format in the InfluxDB v3 storage engine. By default, data is partitioned by day, +but, depending on your schema and workload, customizing the partitioning +strategy can improve query performance. + +Use the `--template-tag` and `--template-time` flags define partition template +parts used to generate partition keys for the database. +For more information, see [Manage data partitioning](/influxdb/clustered/admin/custom-partitions/). + +{{% note %}} +#### Partition templates can only be applied on create + +You can only apply a partition template when creating a database. +There is no way to update a partition template on an existing database. +{{% /note %}} diff --git a/content/influxdb/clustered/admin/tables/_index.md b/content/influxdb/clustered/admin/tables/_index.md new file mode 100644 index 000000000..d0bbf131b --- /dev/null +++ b/content/influxdb/clustered/admin/tables/_index.md @@ -0,0 +1,23 @@ +--- +title: Manage tables +seotitle: Manage tables in InfluxDB Clustered +description: > + Manage tables in your InfluxDB cluster. + A table is a collection of related data stored in table format. + In previous versions of InfluxDB, tables were known as "measurements." +menu: + influxdb_clustered: + parent: Administer InfluxDB Cloud +weight: 101 +influxdb/clustered/tags: [tables] +--- + +Manage tables in your {{< product-name omit=" Clustered" >}} cluster. +A table is a collection of related data stored in table format. + +{{% note %}} +In previous versions of InfluxDB and in the context of InfluxQL, tables are +known as "measurements." +{{% /note %}} + +{{< children hlevel="h2" >}} diff --git a/content/influxdb/clustered/admin/tables/create.md b/content/influxdb/clustered/admin/tables/create.md new file mode 100644 index 000000000..f92630c80 --- /dev/null +++ b/content/influxdb/clustered/admin/tables/create.md @@ -0,0 +1,71 @@ +--- +title: Create a table +description: > + Use the [`influxctl table create` command](/influxdb/clustered/reference/cli/influxctl/table/create/) + to create a new table in a specified database your InfluxDB cluster. + Provide the database name and a table name. +menu: + influxdb_clustered: + parent: Manage tables +weight: 201 +list_code_example: | + ```sh + influxctl table create + ``` +related: + - /influxdb/clustered/reference/cli/influxctl/table/create/ + - /influxdb/clustered/admin/custom-partitions/ +--- + +Use the [`influxctl table create` command](/influxdb/clustered/reference/cli/influxctl/table/create/) +to create a table in a specified database in your +{{< product-name omit=" Clustered" >}} cluster. + +With {{< product-name >}}, tables and measurements are synonymous. +Typically, tables are created automatically on write using the measurement name +specified in line protocol written to InfluxDB. +However, to apply a [custom partition template](/influxdb/clustered/admin/custom-partitions/) +to a table, you must manually create the table before you write any data to it. + +1. If you haven't already, [download and install the `influxctl` CLI](/influxdb/clustered/reference/cli/influxctl/#download-and-install-influxctl). +2. Run the `influxctl table create` command and provide the following: + + - _Optional_: [InfluxDB tags](/influxdb/clustered/reference/glossary/#tag) + to use in the partition template _(supports up to 7 different tags)_ + - _Optional_: A [Rust strftime date and time string](/influxdb/clustered/admin/custom-partitions/partition-templates/#time-part-templates) + that specifies the time format in the partition template and determines + the time interval to partition by _(default is `%Y-%m-%d`)_ + - The name of the database to create the table in + - The name of the table to create + +{{% code-placeholders "(DATABASE|TABLE)_NAME" %}} +```sh +influxctl table create \ + --template-tag tag1 \ + --template-tag tag2 \ + --template-time '%Y-%m-%d' \ + DATABASE_NAME \ + TABLE_NAME +``` +{{% /code-placeholders %}} + +### Custom partitioning + +{{< product-name >}} lets you define a custom partitioning strategy for each table. +A _partition_ is a logical grouping of data stored in [Apache Parquet](https://parquet.apache.org/) +format in the InfluxDB v3 storage engine. By default, data is partitioned by day, +but, depending on your schema and workload, customizing the partitioning +strategy can improve query performance. + +Use the `--template-tag` and `--template-time` flags define partition template +parts used to generate partition keys for the table. +If no template flags are provided, the table uses the partition template of the +target database. +For more information, see [Manage data partitioning](/influxdb/clustered/admin/custom-partitions/). + +{{% note %}} +#### Partition templates can only be applied on create + +You can only apply a partition template when creating a table. +There is no way to update a partition template on an existing table. +{{% /note %}} diff --git a/content/influxdb/clustered/admin/tables/list.md b/content/influxdb/clustered/admin/tables/list.md new file mode 100644 index 000000000..3ed840fb1 --- /dev/null +++ b/content/influxdb/clustered/admin/tables/list.md @@ -0,0 +1,85 @@ +--- +title: List tables +description: > + Use the [`SHOW TABLES` SQL statement](/influxdb/clustered/query-data/sql/explore-schema/#list-measurements-in-a-database) + or the [`SHOW MEASUREMENTS` InfluxQL statement](/influxdb/clustered/query-data/influxql/explore-schema/#list-measurements-in-a-database) + to list tables in a database. +menu: + influxdb_clustered: + parent: Manage tables +weight: 201 +list_code_example: | + ###### SQL + + ```sql + SHOW TABLES + ``` + + ###### InfluxQL + + ```sql + SHOW MEASUREMENTS + ``` +related: + - /influxdb/clustered/query-data/sql/explore-schema/ + - /influxdb/clustered/query-data/influxql/explore-schema/ +--- + +Use the [`SHOW TABLES` SQL statement](/influxdb/clustered/query-data/sql/explore-schema/#list-measurements-in-a-database) +or the [`SHOW MEASUREMENTS` InfluxQL statement](/influxdb/clustered/query-data/influxql/explore-schema/#list-measurements-in-a-database) +to list tables in a database. + +{{% note %}} +With {{< product-name >}}, tables and measurements are synonymous. +{{% /note %}} + +###### SQL + +```sql +SHOW TABLES +``` + +###### InfluxQL + +```sql +SHOW MEASUREMENTS +``` + +## List tables with the influxctl CLI + +To list tables using the `influxctl` CLI, use the `influxctl query` command to pass +the `SHOW TABLES` SQL statement. + +{{% note %}} +The `influxctl query` command only supports SQL queries; not InfluxQL. +{{% /note %}} + +Provide the following with your command: + +- **Database token**: [Database token](/influxdb/clustered/admin/tokens/) + with read permissions on the queried database. Uses the `token` setting from + the [`influxctl` connection profile](/influxdb/clustered/reference/cli/influxctl/#configure-connection-profiles) + or the `--token` command flag. +- **Database name**: Name of the database to query. Uses the `database` setting + from the [`influxctl` connection profile](/influxdb/clustered/reference/cli/influxctl/#configure-connection-profiles) + or the `--database` command flag. +- **SQL query**: SQL query with the `SHOW TABLES` statement. + +{{% code-placeholders "DATABASE_(TOKEN|NAME)" %}} + +```sh +influxctl query \ + --token DATABASE_TOKEN \ + --database DATABASE_NAME \ + "SHOW TABLES" +``` + +{{% /code-placeholders %}} + +Replace the following: + +- {{% code-placeholder-key %}}`DATABASE_TOKEN`{{% /code-placeholder-key %}}: + Database token with read access to the queried database +- {{% code-placeholder-key %}}`DATABASE_NAME`{{% /code-placeholder-key %}}: + Name of the database to query + diff --git a/content/influxdb/clustered/reference/cli/influxctl/_index.md b/content/influxdb/clustered/reference/cli/influxctl/_index.md index 6aaa61fe0..3fb9eca19 100644 --- a/content/influxdb/clustered/reference/cli/influxctl/_index.md +++ b/content/influxdb/clustered/reference/cli/influxctl/_index.md @@ -255,7 +255,7 @@ sudo yum install influxctl 'C:\Program Files\InfluxData\influxctl' ``` -3. **Grant network access to the influx CLI**. +3. **Grant network access to the influxctl CLI**. When using the `influxctl` CLI for the first time, Windows Defender displays the following message: diff --git a/content/influxdb/clustered/reference/cli/influxctl/database/_index.md b/content/influxdb/clustered/reference/cli/influxctl/database/_index.md index a9092650d..574b1e41b 100644 --- a/content/influxdb/clustered/reference/cli/influxctl/database/_index.md +++ b/content/influxdb/clustered/reference/cli/influxctl/database/_index.md @@ -1,7 +1,7 @@ --- title: influxctl database description: > - The `influx database` command and its subcommands manage databases in an + The `influxctl database` command and its subcommands manage databases in an InfluxDB cluster. menu: influxdb_clustered: @@ -9,7 +9,7 @@ menu: weight: 201 --- -The `influx database` command and its subcommands manage databases in an +The `influxctl database` command and its subcommands manage databases in an InfluxDB cluster. ## Usage diff --git a/content/influxdb/clustered/reference/cli/influxctl/database/create.md b/content/influxdb/clustered/reference/cli/influxctl/database/create.md index d2b9ea880..5374146c0 100644 --- a/content/influxdb/clustered/reference/cli/influxctl/database/create.md +++ b/content/influxdb/clustered/reference/cli/influxctl/database/create.md @@ -6,10 +6,13 @@ menu: influxdb_clustered: parent: influxctl database weight: 301 +related: + - /influxdb/clustered/admin/custom-partitions/define-custom-partitions/ + - /influxdb/clustered/admin/custom-partitions/partition-templates/ --- The `influxctl database create` command creates a new database with a specified -retention period in an InfluxDB cluster. +retention period in an {{< product-name omit=" Clustered" >}} cluster. The retention period defines the maximum age of data retained in the database, based on the timestamp of the data. @@ -19,7 +22,7 @@ A zero duration retention period is infinite and data will not expire. The retention period value cannot be negative or contain whitespace. {{< flex >}} -{{% flex-content %}} +{{% flex-content "half" %}} ##### Valid durations units include @@ -31,7 +34,7 @@ The retention period value cannot be negative or contain whitespace. - **y**: year {{% /flex-content %}} -{{% flex-content %}} +{{% flex-content "half" %}} ##### Example retention period values @@ -46,6 +49,14 @@ The retention period value cannot be negative or contain whitespace. {{% /flex-content %}} {{< /flex >}} +#### Custom partitioning + +You can override the default partition template (`%Y-%m-%d`) of the database +with the `--template-tag` and `--template-time` flags when you create the database. +Provide a time format using [Rust strftime](/influxdb/clustered/admin/custom-partitions/partition-templates/#time-part-templates) +and include specific tags to use in the partition template. +Be sure to follow [partitioning best practices](/influxdb/clustered/admin/custom-partitions/best-practices/). + ## Usage ```sh @@ -60,12 +71,14 @@ influxctl database create [--retention-period 0s] ## Flags -| Flag | | Description | -| :--- | :------------------- | :----------------------------------------------------------- | -| | `--retention-period` | Database retention period (default is 0s or infinite) | -| | `--max-tables` | Maximum tables per database (default is 500, 0 uses default) | -| | `--max-columns` | Maximum columns per table (default is 250, 0 uses default) | -| `-h` | `--help` | Output command help | +| Flag | | Description | +| :--- | :------------------- | :------------------------------------------------------------------- | +| | `--retention-period` | Database retention period (default is 0s or infinite) | +| | `--max-tables` | Maximum tables per database (default is 500, 0 uses default) | +| | `--max-columns` | Maximum columns per table (default is 250, 0 uses default) | +| | `--template-tag` | Tag to add to partition template (can include multiple of this flag) | +| | `--template-time` | Timestamp format for partition template (default is `%Y-%m-%d`) | +| `-h` | `--help` | Output command help | {{% caption %}} _Also see [`influxctl` global flags](/influxdb/clustered/reference/cli/influxctl/#global-flags)._ @@ -76,6 +89,7 @@ _Also see [`influxctl` global flags](/influxdb/clustered/reference/cli/influxctl - [Create a database with an infinite retention period](#create-a-database-with-an-infinite-retention-period) - [Create a database with a 30-day retention period](#create-a-database-with-a-30-day-retention-period) - [Create a database with non-default table and column limits](#create-a-database-with-non-default-table-and-column-limits) +- [Create a database with a custom partition template](#create-a-database-with-a-custom-partition-template) ### Create a database with an infinite retention period @@ -99,3 +113,20 @@ influxctl database create \ --max-columns 150 \ mydb ``` + +### Create a database with a custom partition template + +The following example creates a new `mydb` database and applies a partition +template that partitions by two tags (`room` and `sensor-type`) and by week using +the time format `%Y wk:%W`: + +```sh +influxctl database create \ + --template-tag room \ + --template-tag sensor-type \ + --template-time '%Y wk:%W' \ + mydb +``` + +_For more information about custom partitioning, see +[Manage data partitioning](/influxdb/clustered/admin/custom-partitions/)._ diff --git a/content/influxdb/clustered/reference/cli/influxctl/table/_index.md b/content/influxdb/clustered/reference/cli/influxctl/table/_index.md new file mode 100644 index 000000000..a0a37c5e9 --- /dev/null +++ b/content/influxdb/clustered/reference/cli/influxctl/table/_index.md @@ -0,0 +1,31 @@ +--- +title: influxctl table +description: > + The `influxctl table` command and its subcommands manage tables in an InfluxDB cluster. +menu: + influxdb_clustered: + parent: influxctl +weight: 201 +cascade: + metadata: [influxctl 2.5.0+] +--- + +The `influxctl table` command and its subcommands manage tables in an InfluxDB cluster. + +## Usage + +```sh +influxctl table [subcommand] [flags] +``` + +## Subcommands + +| Subcommand | Description | +| :------------------------------------------------------------------ | :------------- | +| [create](/influxdb/clustered/reference/cli/influxctl/table/create/) | Create a table | + +## Flags + +| Flag | | Description | +| :--- | :------- | :------------------ | +| `-h` | `--help` | Output command help | diff --git a/content/influxdb/clustered/reference/cli/influxctl/table/create.md b/content/influxdb/clustered/reference/cli/influxctl/table/create.md new file mode 100644 index 000000000..ba3b341b3 --- /dev/null +++ b/content/influxdb/clustered/reference/cli/influxctl/table/create.md @@ -0,0 +1,88 @@ +--- +title: influxctl table create +description: > + The `influxctl table create` command creates a new table in the specified database. +menu: + influxdb_clustered: + parent: influxctl table +weight: 301 +related: + - /influxdb/clustered/admin/custom-partitions/define-custom-partitions/ + - /influxdb/clustered/admin/custom-partitions/partition-templates/ +--- + +The `influxctl table create` command creates a new table in the specified +database in an {{< product-name omit=" Clustered" >}} cluster. + +#### Custom partitioning + +You can override the default partition template (the partition template of the target database) +with the `--template-tag` and `--template-time` flags when you create the table. +Provide a time format using [Rust strftime](/influxdb/clustered/admin/custom-partitions/partition-templates/#time-part-templates) +and include specific tags to use in the partition template. +Be sure to follow [partitioning best practices](/influxdb/clustered/admin/custom-partitions/best-practices/). + +## Usage + +```sh +influxctl table create [flags] +``` + +## Arguments + +| Argument | Description | +| :---------------- | :-------------------------- | +| **DATABASE_NAME** | Name of the target database | +| **TABLE_NAME** | Table name | + +## Flags + +| Flag | | Description | +| :--- | :------------------- | :------------------------------------------------------------------- | +| | `--template-tag` | Tag to add to partition template (can include multiple of this flag) | +| | `--template-time` | Timestamp format for partition template (default is `%Y-%m-%d`) | +| `-h` | `--help` | Output command help | + +{{% caption %}} +_Also see [`influxctl` global flags](/influxdb/clustered/reference/cli/influxctl/#global-flags)._ +{{% /caption %}} + +## Examples + +- [Create a table](#create-a-table) +- [Create a table with with a custom partition template](#create-a-table-with-with-a-custom-partition-template) + +In the following examples, replace: + +- {{% code-placeholder-key %}}`DATABASE_NAME`{{% /code-placeholder-key %}}: + The name of the database to create the table in. +- {{% code-placeholder-key %}}`TABLE_NAME` {{% /code-placeholder-key %}}: + The name of table to create. + +### Create a table + +{{% code-placeholders "(DATABASE|TABLE)_NAME" %}} +```sh +influxctl table create DATABASE_NAME TABLE_NAME +``` +{{% /code-placeholders %}} + +### Create a table with with a custom partition template + +The following example creates a new table and applies a partition +template that partitions by two tags (`room` and `sensor-type`) and by week using +the time format `%Y wk:%W`: + +{{% code-placeholders "(DATABASE|TABLE)_NAME" %}} +```sh +influxctl table create \ + --template-tag room \ + --template-tag sensor-type \ + --template-time '%Y wk:%W' \ + DATABASE_NAME \ + TABLE_NAME +``` +{{% /code-placeholders %}} + +_For more information about custom partitioning, see +[Manage data partitioning](/influxdb/clustered/admin/custom-partitions/)._ diff --git a/content/influxdb/clustered/reference/cli/influxctl/token/_index.md b/content/influxdb/clustered/reference/cli/influxctl/token/_index.md index b9d080d7f..9004d769f 100644 --- a/content/influxdb/clustered/reference/cli/influxctl/token/_index.md +++ b/content/influxdb/clustered/reference/cli/influxctl/token/_index.md @@ -1,7 +1,7 @@ --- title: influxctl token description: > - The `influx token` command and its subcommands manage database tokens in an + The `influxctl token` command and its subcommands manage database tokens in an InfluxDB cluster. menu: influxdb_clustered: @@ -9,7 +9,7 @@ menu: weight: 201 --- -The `influx token` command and its subcommands manage database tokens in an +The `influxctl token` command and its subcommands manage database tokens in an InfluxDB cluster. ## Usage diff --git a/content/influxdb/clustered/reference/internals/storage-engine.md b/content/influxdb/clustered/reference/internals/storage-engine.md new file mode 100644 index 000000000..00315e49c --- /dev/null +++ b/content/influxdb/clustered/reference/internals/storage-engine.md @@ -0,0 +1,184 @@ +--- +title: InfluxDB v3 storage engine architecture +description: > + The InfluxDB v3 storage engine is a real-time, columnar database optimized for + time series data that supports infinite tag cardinality, real-time queries, + and is optimized to reduce storage cost. +weight: 103 +menu: + influxdb_clustered: + name: Storage engine architecture + parent: InfluxDB internals +influxdb/clustered/tags: [storage, internals] +related: + - /influxdb/clustered/admin/custom-partitions/ +--- + +The InfluxDB v3 storage engine is a real-time, columnar database optimized for +time series data built in [Rust](https://www.rust-lang.org/) on top of +[Apache Arrow](https://arrow.apache.org/) and +[DataFusion](https://arrow.apache.org/datafusion/user-guide/introduction.html). +It supports infinite tag cardinality (number of unique tag values), real-time +queries, and is optimized to reduce storage cost. + +- [Storage engine diagram](#storage-engine-diagram) +- [Storage engine components](#storage-engine-components) + - [Ingester](#ingester) + - [Querier](#querier) + - [Catalog](#catalog) + - [Object store](#object-store) + - [Compactor](#compactor) +- [Scaling strategies](#scaling-strategies) + - [Vertical scaling](#vertical-scaling) + - [Horizontal scaling](#horizontal-scaling) + +## Storage engine diagram + +{{< svg "/static/svgs/v3-storage-architecture.svg" >}} + +## Storage engine components + +- [Ingester](#ingester) +- [Querier](#querier) +- [Catalog](#catalog) +- [Object store](#object-store) +- [Compactor](#compactor) + +### Ingester + +The Ingester processes line protocol submitted in write requests and persists +time series data to the [Object store](#object-store). +In this process, the Ingester does the following: + +- Queries the [Catalog](#catalog) to identify where data should be persisted and + to ensure the schema of the line protocol is compatible with the + [schema](/influxdb/clustered/reference/glossary/#schema) of persisted data. +- Accepts or [rejects](/influxdb/clustered/write-data/troubleshoot/#troubleshoot-rejected-points) + points in the write request and generates a [response](/influxdb/clustered/write-data/troubleshoot/). +- Processes line protocol and persists time series data to the + [Object store](#object-store) in Apache Parquet format. Each Parquet file + represents a _partition_--a logical grouping of data. +- Makes [yet-to-be-persisted](/influxdb/clustered/reference/internals/durability/#data-ingest) + data available to [Queriers](#querier) to ensure leading edge data is included + in query results. +- Maintains a short-term [write-ahead log (WAL)](/influxdb/clustered/reference/internals/durability/) + to prevent data loss in case of a service interruption. + +##### Ingester scaling strategies + +The Ingester can be scaled both [vertically](#vertical-scaling) and +[horizontally](#horizontal-scaling). +Horizontal scaling increases write throughput and is typically the most +effective scaling strategy for the Ingester. + +### Querier + +The Querier handles query requests and returns query results for requests. +It supports both SQL and InfluxQL through +[Apache Arrow DataFusion](https://arrow.apache.org/datafusion/user-guide/introduction.html). + +#### Query life cycle + +At query time, the querier: + +1. Receives the query request and builds a query plan. +2. Queries the [Ingesters](#ingester) to: + + - ensure the schema assumed by the query plan matches the schema of written data + - include recently written, [yet-to-be-persisted](/influxdb/clustered/reference/internals/durability/#data-ingest) + data in query results + +3. Queries the [Catalog](#catalog) to find partitions in the [Object store](#object-store) + that contain the queried data. +4. Reads partition Parquet files that contain the queried data and scans each + row to filter data that matches predicates in the query plan. +5. Performs any additional operations (for example: deduplicating, merging, and sorting) + specified in the query plan. +6. Returns the query result to the client. + +##### Querier scaling strategies + +The Querier can be scaled both [vertically](#vertical-scaling) and +[horizontally](#horizontal-scaling). +Horizontal scaling increases query throughput to handle more concurrent queries. +Vertical scaling improves the Querier's ability to process computationally intensive queries. + +### Catalog + +The Catalog is a PostgreSQL-compatible relational database that stores metadata +related to your time series data including schema information and physical +locations of partitions in the [Object store](#object-store). +It fulfills the following roles: + +- Provides information about the schema of written data. +- Tells the [Ingester](#ingester) what partitions to persist data to. +- Tells the [Querier](#querier) what partitions contain the queried data. + +##### Catalog scaling strategies + +Scaling strategies available for the Catalog depend on the PostgreSQL-compatible +database used to run the catalog. All support [vertical scaling](#vertical-scaling). +Most support [horizontal scaling](#horizontal-scaling) for redundancy and failover. + +### Object store + +The Object store contains time series data in [Apache Parquet](https://parquet.apache.org/) format. +Each Parquet file represents a partition. +By default, InfluxDB partitions tables by day, but you can +[customize the partitioning strategy](/influxdb/clustered/admin/custom-partitions/). +Data in each Parquet file is sorted, encoded, and compressed. + +##### Object store scaling strategies + +Scaling strategies available for the Object store depend on the underlying +object storage services used to run the object store. +Most support [horizontal scaling](#horizontal-scaling) for redundancy, failover, +and increased capacity. + +### Compactor + +The Compactor processes and compresses partitions in the [Object store](#object-store) +to continually optimize storage. +It then updates the [Catalog](#catalog) with locations of compacted data. + +##### Compactor scaling strategies + +The Compactor can be scaled both [vertically](#vertical-scaling) and +[horizontally](#horizontal-scaling). +Because compaction is a compute-heavy process, vertical scaling (especially +increasing the available CPU) is the most effective scaling strategy for the Compactor. +Horizontal scaling increases compaction throughput, but not as efficiently as +vertical scaling. + +--- + +## Scaling strategies + +The following scaling strategies can be applied to components of the InfluxDB v3 +storage architecture. + +{{% note %}} + + + +For information about scaling your {{< product-name >}} infrastructure, +[contact InfluxData support](https://support.influxdata.com). +{{% /note %}} + +### Vertical scaling + +Vertical scaling (also known as "scaling up") involves increasing the resources +(such as RAM or CPU) available to a process or system. +Vertical scaling is typically used to handle resource-intensive tasks that +require more processing power. + +{{< html-diagram/scaling-strategy "vertical" >}} + +### Horizontal scaling + +Horizontal scaling (also known as "scaling out") involves increasing the number of +nodes or processes available to perform a given task. +Horizontal scaling is typically used to increase the amount of workload or +throughput a system can manage, but also provides additional redundancy and failover. + +{{< html-diagram/scaling-strategy "horizontal" >}} diff --git a/content/influxdb/clustered/reference/release-notes/influxctl.md b/content/influxdb/clustered/reference/release-notes/influxctl.md index c1626926d..06b863548 100644 --- a/content/influxdb/clustered/reference/release-notes/influxctl.md +++ b/content/influxdb/clustered/reference/release-notes/influxctl.md @@ -12,6 +12,42 @@ weight: 202 canonical: /influxdb/cloud-dedicated/reference/release-notes/influxctl/ --- +## v2.5.0 {date="2024-03-04"} + +`influxctl` 2.5.0 introduces the ability to set +[partition templates](/influxdb/clustered/admin/custom-partitions/) during +database or table creation. It introduces the +[`table` subcommand](/influxdb/clustered/reference/cli/influxctl/table/) +that lets users manually create tables. Additionally, `influxctl` now removes a +previously cached token if the response from InfluxDB is unauthorized. This +helps InfluxDB Clustered users who deploy new clusters using unexpired tokens +associated with another InfluxDB cluster. + +### New Features + +- Add partition templates to database and table creation. +- Remove token if unauthorized. + +### Bug Fixes + +- Update arrow to allow non-TLS connections. +- Do not attempt to load cached tokens when an admin token file is provided. +- Print retention period up to days rather than very large hours. +- Fix indentation of help output. + +### Dependency Updates + +- Update `github.com/golangci/golangcilint` from v1.56.1 to v1.56.2. +- Update `golang.org/x/mod` from v0.15.0 to v0.16.0. +- Update `github.com/pkg/browser` from v0.0.0-20210911075715-681adbf594b8 to + v0.0.0-20240102092130-5ac0b6a4141c. +- Update `github.com/stretchr/testify` from 1.8.4 to 1.9.0. +- Update `go.uber.org/zap` from 1.26.0 to 1.27.0. +- Update `google.golang.org/grpc` from 1.61.0 to 1.61.1. +- Update `google.golang.org/grpc` from 1.61.1 to 1.62.0. + +--- + ## v2.4.4 {date="2024-02-16"} ### Bug fixes diff --git a/content/influxdb/v2/reference/internals/shards.md b/content/influxdb/v2/reference/internals/shards.md index cb58858c0..3b5c93987 100644 --- a/content/influxdb/v2/reference/internals/shards.md +++ b/content/influxdb/v2/reference/internals/shards.md @@ -25,7 +25,7 @@ Learn the relationships between buckets, shards, and shard groups. - [Shard groups](#shard-groups) - [Shard group duration](#shard-group-duration) - [Shard group diagram](#shard-group-diagram) -- [Shard life-cycle](#shard-life-cycle) +- [Shard life cycle](#shard-life-cycle) - [Shard precreation](#shard-precreation) - [Shard writes](#shard-writes) - [Shard compaction](#shard-compaction) @@ -82,7 +82,7 @@ and a **1d shard group duration**: --- -## Shard life-cycle +## Shard life cycle ### Shard precreation The InfluxDB **shard precreation service** pre-creates shards with future start diff --git a/data/products.yml b/data/products.yml index c20490b9a..7a3bef4e3 100644 --- a/data/products.yml +++ b/data/products.yml @@ -55,7 +55,7 @@ influxdb_cloud_dedicated: list_order: 3 latest: cloud-dedicated link: "https://www.influxdata.com/contact-sales-form/" - latest_cli: 2.4.4 + latest_cli: 2.5.0 placeholder_host: cluster-id.influxdb.io influxdb_clustered: diff --git a/layouts/shortcodes/columns.html b/layouts/shortcodes/columns.html new file mode 100644 index 000000000..e28f8a376 --- /dev/null +++ b/layouts/shortcodes/columns.html @@ -0,0 +1,6 @@ +{{- $columns := .Get 0 | default 2 -}} +
+
+ {{ .Inner }} +
+
\ No newline at end of file diff --git a/layouts/shortcodes/datetime/current-date.html b/layouts/shortcodes/datetime/current-date.html index 862779778..a1aed9838 100644 --- a/layouts/shortcodes/datetime/current-date.html +++ b/layouts/shortcodes/datetime/current-date.html @@ -1 +1,6 @@ -2021-01-01T00:00:00Z \ No newline at end of file +{{- $offset := .Get "offset" | default 0 -}} +{{- $convertedOffset := math.Mul $offset 24 -}} +{{- $trimTime := .Get "trimTime" | default false -}} +{{- $date := time.Now.Add (time.ParseDuration (print $convertedOffset "h")) -}} +{{- $formattedDate := cond ($trimTime) ($date | time.Format "2006-01-02") (print ($date | time.Format "2006-01-02") "T00:00:00Z") -}} +{{ $formattedDate }} \ No newline at end of file diff --git a/layouts/shortcodes/html-diagram/scaling-strategy.html b/layouts/shortcodes/html-diagram/scaling-strategy.html new file mode 100644 index 000000000..d61e2b297 --- /dev/null +++ b/layouts/shortcodes/html-diagram/scaling-strategy.html @@ -0,0 +1,15 @@ +{{ $strategy := .Get 0 | default "vertical" }} + +
+{{ if eq $strategy "vertical" }} +
+
+
+{{ else if eq $strategy "horizontal" }} +
+
+
+{{ else }} +

Unknown scaling strategy, "{{$strategy}}".

+{{ end }} +
\ No newline at end of file diff --git a/layouts/shortcodes/icon.html b/layouts/shortcodes/icon.html index d9cadbc06..31da61945 100644 --- a/layouts/shortcodes/icon.html +++ b/layouts/shortcodes/icon.html @@ -257,5 +257,7 @@ {{- else if or (eq $icon "link") (eq $icon "chain") -}} + {{- else if or (eq $icon "question-mark") (eq $icon "question") (eq $icon "?") -}} + {{- end -}} {{- end -}} diff --git a/static/svgs/v3-storage-architecture.svg b/static/svgs/v3-storage-architecture.svg new file mode 100644 index 000000000..f22289a03 --- /dev/null +++ b/static/svgs/v3-storage-architecture.svg @@ -0,0 +1,127 @@ + + + + + + + + + +Ingester + + + + +Querier +Object StorageTime series data stored inApache Parquet format + +CatalogRelational metadataservice + + + Compactor + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Query yet-to-be-persisted data + + + +WALShort-termpersistence + + Write requests + + + Query requests + + + + + + + + + + + + + + + + + + + + + + + + + + + + +