diff --git a/.ci/vale/styles/Google/Units.yml b/.ci/vale/styles/Google/Units.yml index 53522ab2d..024c816e1 100644 --- a/.ci/vale/styles/Google/Units.yml +++ b/.ci/vale/styles/Google/Units.yml @@ -5,4 +5,5 @@ nonword: true level: error tokens: - \b\d+(?:B|kB|MB|GB|TB) - - \b\d+(?:ns|ms|s|min|h|d) +# Ignore duration literals in code blocks. + - \b(?!\`)\d+(?:ns|ms|s|min|h|d) diff --git a/content/influxdb/cloud-dedicated/admin/databases/_index.md b/content/influxdb/cloud-dedicated/admin/databases/_index.md index 07c10c137..9f58b9c8e 100644 --- a/content/influxdb/cloud-dedicated/admin/databases/_index.md +++ b/content/influxdb/cloud-dedicated/admin/databases/_index.md @@ -11,6 +11,14 @@ menu: parent: Administer InfluxDB Cloud weight: 101 influxdb/cloud-dedicated/tags: [databases] +related: + - /influxdb/cloud-dedicated/write-data/best-practices/schema-design/ + - /influxdb/cloud-dedicated/reference/cli/influxctl/ +alt_links: + cloud: /influxdb/cloud/admin/buckets/ + cloud_serverless: /influxdb/cloud-serverless/admin/buckets/ + clustered: /influxdb/cloud-dedicated/admin/databases/ + oss: /influxdb/v2/admin/buckets/ --- An InfluxDB database is a named location where time series data is stored. @@ -19,11 +27,13 @@ Each InfluxDB database has a [retention period](#retention-periods). {{% note %}} **If coming from InfluxDB v1**, the concepts of databases and retention policies have been combined into a single concept--database. Retention policies are no -longer part of the InfluxDB data model. However, InfluxDB Cloud Dedicated does +longer part of the InfluxDB data model. +However, {{% product-name %}} does support InfluxQL, which requires databases and retention policies. See [InfluxQL DBRP naming convention](/influxdb/cloud-dedicated/admin/databases/create/#influxql-dbrp-naming-convention). -**If coming from InfluxDB v2 or InfluxDB Cloud**, _database_ and _bucket_ are synonymous. +**If coming from InfluxDB v2, InfluxDB Cloud (TSM), or InfluxDB Cloud Serverless**, +_database_ and _bucket_ are synonymous. {{% /note %}} ## Retention periods @@ -40,9 +50,10 @@ never be removed by the retention enforcement service. ## Table and column limits -In {{< product-name >}}, table (measurement) and column limits can be -customized when [creating](#create-a-database) or -[updating a database](#update-a-database). +You can customize [table (measurement) limits](#table-limit) and +[table column limits](#column-limit) when you +[create](#create-a-database) or +[update a database](#update-a-database) in {{< product-name >}}. ### Table limit @@ -60,7 +71,8 @@ cluster in the following ways: {{% expand "**May improve query performance** View more info" %}} Schemas with many measurements that contain -[focused sets of tags and fields](/influxdb/cloud-dedicated/write-data/best-practices/schema-design/#design-for-performance) can make it easier for the query engine to +[focused sets of tags and fields](/influxdb/cloud-dedicated/write-data/best-practices/schema-design/#design-for-performance) +can make it easier for the query engine to identify what partitions contain the queried data, resulting in better query performance. @@ -72,7 +84,7 @@ data by measurement and time range and stores each partition as a Parquet file in your cluster's object store. By increasing the number of measurements (tables) you can store in your database, you also increase the potential for more `PUT` requests into your object store as InfluxDB creates more partitions. -Each `PUT` request incurs a monetary cost and will increase the operating cost of +Each `PUT` request incurs a monetary cost and increases the operating cost of your cluster. {{% /expand %}} @@ -91,20 +103,33 @@ operating cost of your cluster. **Default maximum number of columns**: 250 -Time, fields, and tags are each represented by a column in a table. +**Configurable maximum number of columns**: 1000 + +Each row must include a time column, with the remaining columns representing +tags and fields. +As a result, a table with 250 columns can have one time column and up to +249 field and tag columns. + +If you attempt to write to a table and exceed the column limit, the write +request fails and InfluxDB returns an error. + +If you update the column limit for a database, the limit applies to newly +created tables; doesn't override the column limit for existing tables. + Increasing your column limit affects your {{% product-name omit=" Clustered" %}} cluster in the following ways: {{< expand-wrapper >}} -{{% expand "May adversely affect query performance" %}} +{{% expand "May adversely affect system performance" %}} -At query time, the InfluxDB query engine identifies what table contains the queried -data and then evaluates each row in the table to match the conditions of the query. -The more columns that are in each row, the longer it takes to evaluate each row. - -Through performance testing, InfluxData has identified 250 columns as the -threshold where query performance may be affected -(depending on the shape of and data types in your schema). +When creating or updating a database, you can configure the table column limit to be +lower than the default or up to 1000, based on your requirements. +InfluxData identified 250 columns as the safe limit for maintaining system +performance and stability. +Exceeding this threshold can result in +[wide schemas](/influxdb/cloud-dedicated/write-data/best-practices/schema-design/#avoid-wide-schemas), +which can negatively impact performance and resource use, +depending on your queries, the shape of your schema, and data types in the schema. {{% /expand %}} {{< /expand-wrapper >}} diff --git a/content/influxdb/cloud-dedicated/admin/databases/update.md b/content/influxdb/cloud-dedicated/admin/databases/update.md index d7d762c6a..9e7b38e32 100644 --- a/content/influxdb/cloud-dedicated/admin/databases/update.md +++ b/content/influxdb/cloud-dedicated/admin/databases/update.md @@ -57,9 +57,11 @@ to update a database in your {{< product-name omit=" Clustered" >}} cluster. - Database name - _Optional_: Database [retention period](/influxdb/cloud-dedicated/admin/databases/#retention-periods). - Default is infinite (`0`). - - _Optional_: Database table (measurement) limit. Default is `500`. - - _Optional_: Database column limit. Default is `250`. + Default is infinite (`0`). + - _Optional_: Database [table (measurement) limit](/influxdb/cloud-dedicated/admin/databases/#table-limit). + Default is `500`. + - _Optional_: Database [column limit](/influxdb/cloud-dedicated/admin/databases/#column-limit). + Default is `250`. {{% code-placeholders "DATABASE_NAME|30d|500|200" %}} diff --git a/content/influxdb/cloud-dedicated/query-data/troubleshoot-and-optimize/optimize-queries.md b/content/influxdb/cloud-dedicated/query-data/troubleshoot-and-optimize/optimize-queries.md index acf8d9ca9..5a297bd9a 100644 --- a/content/influxdb/cloud-dedicated/query-data/troubleshoot-and-optimize/optimize-queries.md +++ b/content/influxdb/cloud-dedicated/query-data/troubleshoot-and-optimize/optimize-queries.md @@ -15,6 +15,7 @@ related: - /influxdb/cloud-dedicated/query-data/execute-queries/analyze-query-plan/ aliases: - /influxdb/cloud-dedicated/query-data/execute-queries/optimize-queries/ + - /influxdb/cloud-dedicated/query-data/execute-queries/analyze-query-plan/ --- Optimize SQL and InfluxQL queries to improve performance and reduce their memory and compute (CPU) requirements. @@ -22,6 +23,7 @@ Learn how to use observability tools to analyze query execution and view metrics - [Why is my query slow?](#why-is-my-query-slow) - [Strategies for improving query performance](#strategies-for-improving-query-performance) + - [Query only the data you need](#query-only-the-data-you-need) - [Analyze and troubleshoot queries](#analyze-and-troubleshoot-queries) ## Why is my query slow? @@ -29,7 +31,7 @@ Learn how to use observability tools to analyze query execution and view metrics Query performance depends on time range and complexity. If a query is slower than you expect, it might be due to the following reasons: -- It queries data from a large time range. +- It queries data from a large time range. - It includes intensive operations, such as querying many string values or `ORDER BY` sorting or re-sorting large amounts of data. ## Strategies for improving query performance @@ -37,9 +39,7 @@ If a query is slower than you expect, it might be due to the following reasons: The following design strategies generally improve query performance and resource use: - Follow [schema design best practices](/influxdb/cloud-dedicated/write-data/best-practices/schema-design/) to make querying easier and more performant. -- Query only the data you need--for example, include a [`WHERE` clause](/influxdb/cloud-dedicated/reference/sql/where/) that filters data by a time range. - InfluxDB v3 stores data in a Parquet file for each measurement and day, and retrieves files from the Object store to answer a query. - The smaller the time range in your query, the fewer files InfluxDB needs to retrieve from the Object store. +- [Query only the data you need](#query-only-the-data-you-need). - [Downsample data](/influxdb/cloud-dedicated/process-data/downsample/) to reduce the amount of data you need to query. Some bottlenecks may be out of your control and are the result of a suboptimal execution plan, such as: @@ -52,9 +52,41 @@ Some bottlenecks may be out of your control and are the result of a suboptimal e {{% note %}} #### Analyze query plans to view metrics and recognize bottlenecks -To view runtime metrics for a query, such as the number of files scanned, use the [`EXPLAIN ANALYZE` keywords](/influxdb/cloud-dedicated/reference/sql/explain/#explain-analyze) and learn how to [analyze a query plan](/influxdb/cloud-dedicated/query-data/troubleshoot-and-optimize/analyze-query-plan/). +To view runtime metrics for a query, such as the number of files scanned, use +the [`EXPLAIN ANALYZE` keywords](/influxdb/cloud-dedicated/reference/sql/explain/#explain-analyze) +and learn how to [analyze a query plan](/influxdb/cloud-dedicated/query-data/troubleshoot-and-optimize/analyze-query-plan/). {{% /note %}} +### Query only the data you need + +#### Include a WHERE clause + +InfluxDB v3 stores data in a Parquet file for each partition. +By default, {{< product-name >}} partitions tables by day, but you can also +[custom-partition your data](/influxdb/cloud-dedicated/admin/custom-partitions/). +At query time, InfluxDB retrieves files from the Object store to answer a query. +To reduce the number of files that a query needs to retrieve from the Object store, +include a [`WHERE` clause](/influxdb/cloud-dedicated/reference/sql/where/) that +filters data by a time range or by specific tag values. + +#### SELECT only columns you need + +Because InfluxDB v3 is a columnar database, it only processes the columns +selected in a query, which can mitigate the query performance impact of +[wide schemas](/influxdb/cloud-dedicated/write-data/best-practices/schema-design/#avoid-wide-schemas). + +However, a non-specific query that retrieves a large number of columns from a +wide schema can be slower and less efficient than a more targeted +query--for example, consider the following queries: + +- `SELECT time,a,b,c` +- `SELECT *` + +If the table contains 10 columns, the difference in performance between the +two queries is minimal. +In a table with over 1000 columns, the `SELECT *` query is slower and +less efficient. + ## Analyze and troubleshoot queries Use the following tools to analyze and troubleshoot queries and find performance bottlenecks: diff --git a/content/influxdb/cloud-dedicated/reference/cli/influxctl/database/create.md b/content/influxdb/cloud-dedicated/reference/cli/influxctl/database/create.md index 366d9d1a7..8f4ba1624 100644 --- a/content/influxdb/cloud-dedicated/reference/cli/influxctl/database/create.md +++ b/content/influxdb/cloud-dedicated/reference/cli/influxctl/database/create.md @@ -102,9 +102,9 @@ influxctl database create [flags] | Flag | | Description | | :--- | :---------------------- | :--------------------------------------------------------------------------------------------------------------------------------------- | -| | `--retention-period` | Database retention period (default is `0s`, infinite) | -| | `--max-tables` | Maximum tables per database (default is 500, `0` uses default) | -| | `--max-columns` | Maximum columns per table (default is 250, `0` uses default) | +| | `--retention-period` | [Database retention period ](/influxdb/cloud-dedicated/admin/databases/#retention-periods)(default is `0s`, infinite) | +| | `--max-tables` | [Maximum tables per database](/influxdb/cloud-dedicated/admin/databases/#table-limit) (default is 500, `0` uses default) | +| | `--max-columns` | [Maximum columns per table](/influxdb/cloud-dedicated/admin/databases/#column-limit) (default is 250, `0` uses default) | | | `--template-tag` | Tag to add to partition template (can include multiple of this flag) | | | `--template-tag-bucket` | Tag and number of buckets to partition tag values into separated by a comma--for example: `tag1,100` (can include multiple of this flag) | | | `--template-timeformat` | Timestamp format for partition template (default is `%Y-%m-%d`) | diff --git a/content/influxdb/cloud-dedicated/reference/cli/influxctl/database/update.md b/content/influxdb/cloud-dedicated/reference/cli/influxctl/database/update.md index 1f076a98e..a743650b3 100644 --- a/content/influxdb/cloud-dedicated/reference/cli/influxctl/database/update.md +++ b/content/influxdb/cloud-dedicated/reference/cli/influxctl/database/update.md @@ -14,6 +14,8 @@ table (measurement), or column limits in InfluxDB. ## Usage + + ```sh influxctl database update [flags] ``` @@ -28,9 +30,9 @@ influxctl database update [flags] | Flag | | Description | | :--- | :------------------- | :----------------------------------------------------------- | -| | `--retention-period` | Database retention period (default is 0s or infinite) | -| | `--max-tables` | Maximum tables per database (default is 500, 0 uses default) | -| | `--max-columns` | Maximum columns per table (default is 250, 0 uses default) | +| | `--retention-period` | [Database retention period ](/influxdb/cloud-dedicated/admin/databases/#retention-periods)(default is `0s` or infinite) | +| | `--max-tables` | [Maximum tables per database](/influxdb/cloud-dedicated/admin/databases/#table-limit) (default is 500, 0 uses default) | +| | `--max-columns` | [Maximum columns per table](/influxdb/cloud-dedicated/admin/databases/#column-limit) (default is 250, 0 uses default) | | `-h` | `--help` | Output command help | {{% caption %}} diff --git a/content/influxdb/cloud-dedicated/write-data/best-practices/schema-design.md b/content/influxdb/cloud-dedicated/write-data/best-practices/schema-design.md index 7d466f453..27e74a121 100644 --- a/content/influxdb/cloud-dedicated/write-data/best-practices/schema-design.md +++ b/content/influxdb/cloud-dedicated/write-data/best-practices/schema-design.md @@ -8,6 +8,10 @@ menu: name: Schema design weight: 201 parent: write-best-practices +related: + - /influxdb/cloud-dedicated/admin/databases/ + - /influxdb/cloud-dedicated/reference/cli/influxctl/ + - /influxdb/cloud-dedicated/query-data/troubleshoot-and-optimize/ --- Use the following guidelines to design your [schema](/influxdb/cloud-dedicated/reference/glossary/#schema) @@ -18,7 +22,7 @@ for simpler and more performant queries. - [Tags versus fields](#tags-versus-fields) - [Schema restrictions](#schema-restrictions) - [Do not use duplicate names for tags and fields](#do-not-use-duplicate-names-for-tags-and-fields) - - [Tables can contain up to 250 columns](#tables-can-contain-up-to-250-columns) + - [Maximum number of columns per table](#maximum-number-of-columns-per-table) - [Design for performance](#design-for-performance) - [Avoid wide schemas](#avoid-wide-schemas) - [Avoid sparse schemas](#avoid-sparse-schemas) @@ -37,10 +41,13 @@ Tables contain multiple tags and fields. - **Database**: A named location where time series data is stored. - In {{% product-name %}}, _database_ is synonymous with _bucket_ in InfluxDB Cloud Serverless and InfluxDB TSM implementations. + In {{% product-name %}}, _database_ is synonymous with _bucket_ in InfluxDB + Cloud Serverless and InfluxDB TSM implementations. + A database can contain multiple _tables_. - **Table**: A logical grouping for time series data. - In {{% product-name %}}, _table_ is synonymous with _measurement_ in InfluxDB Cloud Serverless and InfluxDB TSM implementations. + In {{% product-name %}}, _table_ is synonymous with _measurement_ in + InfluxDB Cloud Serverless and InfluxDB TSM implementations. All _points_ in a given table should have the same _tags_. A table contains multiple _tags_ and _fields_. - **Tags**: Key-value pairs that store metadata string values for each point--for example, @@ -52,7 +59,9 @@ Tables contain multiple tags and fields. Field values may be null, but at least one field value is not null on any given row. - **Timestamp**: Timestamp associated with the data. When stored on disk and queried, all data is ordered by time. - In InfluxDB, a timestamp is a nanosecond-scale [Unix timestamp](/influxdb/cloud-dedicated/reference/glossary/#unix-timestamp) in UTC. + In InfluxDB, a timestamp is a nanosecond-scale + [Unix timestamp](/influxdb/cloud-dedicated/reference/glossary/#unix-timestamp) + in UTC. A timestamp is never null. {{% note %}} @@ -91,8 +100,9 @@ question as you design your schema. - String - Boolean -{{% product-name %}} doesn't index tag values or field values. -Tag keys, field keys, and other metadata are indexed to optimize performance. +{{% product-name %}} indexes tag keys, field keys, and other metadata + to optimize performance. +It doesn't index tag values or field values. {{% note %}} The InfluxDB v3 storage engine supports infinite tag value and series cardinality. @@ -106,26 +116,39 @@ cardinality doesn't affect the overall performance of your database. ### Do not use duplicate names for tags and fields -Tags and fields within the same table can't be named the same. -All tags and fields are stored as unique columns in a table representing the -table on disk. +Use unique names for tags and fields within the same table. +{{% product-name %}} stores tags and fields as unique columns in a table that +represents the table on disk. If you attempt to write a table that contains tags or fields with the same name, the write fails due to a column conflict. -### Tables can contain up to 250 columns +### Maximum number of columns per table -A table can contain **up to 250 columns**. Each row requires a time column, -but the rest represent tags and fields stored in the table. -Therefore, a table can contain one time column and 249 total field and tag columns. -If you attempt to write to a table and exceed the 250 column limit, the -write request fails and InfluxDB returns an error. +A table has a [maximum number of columns](/influxdb/cloud-dedicated/admin/databases/#column-limit). +Each row must include a time column. +As a result, a table can have the following: + +- a time column +- field and tag columns up to the configured maximum + +If you attempt to write to a table and exceed the column limit, then the write +request fails and InfluxDB returns an error. + +InfluxData identified the +[default maximum](/influxdb/cloud-dedicated/admin/databases/#column-limit) +as the safe limit for maintaining system performance and stability. +Exceeding this threshold can result in +[wide schemas](#avoid-wide-schemas), which can negatively impact performance +and resource use, [depending on your queries](#avoid-non-specific-queries), +the shape of your schema, and data types in the schema. --- ## Design for performance -How you structure your schema within a table can affect the overall -performance of queries against that table. +How you structure your schema within a table can affect resource use and +the performance of queries against that table. + The following guidelines help to optimize query performance: - [Avoid wide schemas](#avoid-wide-schemas) @@ -135,26 +158,26 @@ The following guidelines help to optimize query performance: ### Avoid wide schemas -A wide schema is one with many tags and fields and corresponding columns for each. -With the InfluxDB v3 storage engine, wide schemas don't impact query execution performance. -Because InfluxDB v3 is a columnar database, it executes queries only against columns selected in the query. +A wide schema refers to a schema with a large number of columns (tags and fields). -Although a wide schema won't affect query performance, it can lead to the following: +Wide schemas can lead to the following issues: -- More resources required for persisting and compacting data during ingestion. -- Decreased sorting performance due to complex primary keys with [too many tags](#avoid-too-many-tags). +- Increased resource usage for persisting and compacting data during ingestion. +- Reduced sorting performance due to complex primary keys with [too many tags](#avoid-too-many-tags). +- Reduced query performance when + [selecting too many columns](/influxdb/cloud-dedicated/query-data/troubleshoot-and-optimize/optimize-queries/#select-only-columns-you-need). -The InfluxDB v3 storage engine has a -[limit of 250 columns per table](#tables-can-contain-up-to-250-columns). - -To avoid a wide schema, limit the number of tags and fields stored in a table. -If you need to store more than 249 total tags and fields, consider segmenting -your fields into a separate table. +To prevent wide schema issues, limit the number of tags and fields stored in a table. +If you need to store more than the [maximum number of columns](/influxdb/cloud-dedicated/admin/databases/), +consider segmenting your fields into separate tables. #### Avoid too many tags -In InfluxDB, the primary key for a row is the combination of the point's timestamp and _tag set_ - the collection of [tag keys](/influxdb/cloud-dedicated/reference/glossary/#tag-key) and [tag values](/influxdb/cloud-dedicated/reference/glossary/#tag-value) on the point. -A point that contains more tags has a more complex primary key, which could impact sorting performance if you sort using all parts of the key. +In InfluxDB, the primary key for a row is the combination of the point's +timestamp and _tag set_ - the collection of [tag keys](/influxdb/cloud-dedicated/reference/glossary/#tag-key) +and [tag values](/influxdb/cloud-dedicated/reference/glossary/#tag-value) on the point. +A point that contains more tags has a more complex primary key, which could +impact sorting performance if you sort using all parts of the key. ### Avoid sparse schemas @@ -275,7 +298,8 @@ Without regular expressions, your queries will be easier to write and more perfo #### Not recommended {.orange} -For example, consider the following [line protocol](/influxdb/cloud-dedicated/reference/syntax/line-protocol/) that embeds multiple attributes (location, model, and ID) into a `sensor` tag value: +For example, consider the following [line protocol](/influxdb/cloud-dedicated/reference/syntax/line-protocol/) +that embeds multiple attributes (location, model, and ID) into a `sensor` tag value: ```text home,sensor=loc-kitchen.model-A612.id-1726ZA temp=72.1 diff --git a/content/influxdb/cloud-serverless/admin/buckets/_index.md b/content/influxdb/cloud-serverless/admin/buckets/_index.md index 897dcf571..50085343b 100644 --- a/content/influxdb/cloud-serverless/admin/buckets/_index.md +++ b/content/influxdb/cloud-serverless/admin/buckets/_index.md @@ -12,8 +12,12 @@ weight: 105 influxdb/cloud-serverless/tags: [buckets] aliases: - /influxdb/cloud-serverless/organizations/buckets/ + - /influxdb/cloud-serverless/admin/databases/ alt_links: cloud: /influxdb/cloud/admin/buckets/ + cloud_dedicated: /influxdb/cloud-dedicated/admin/databases/ + clustered: /influxdb/clustered/admin/databases/ + oss: /influxdb/v2/admin/buckets/ --- A **bucket** is a named location where time series data is stored. @@ -30,6 +34,8 @@ support InfluxQL and the InfluxDB v1 API `/write` and `/query` endpoints, which See how to [map v1 databases and retention policies to buckets](/influxdb/cloud-serverless/guides/api-compatibility/v1/#map-v1-databases-and-retention-policies-to-buckets). **If coming from InfluxDB v2 or InfluxDB Cloud**, _buckets_ are functionally equivalent. + +**If coming from InfluxDB Cloud Dedicated or InfluxDB Clustered**, _database_ and _bucket_ are synonymous. {{% /note %}} ## Retention period @@ -53,6 +59,7 @@ Each measurement is represented by a table. Time, fields, and tags are each represented by a column. **Maximum number of tables**: 500 + **Maximum number of columns**: 200 The following articles provide information about managing buckets: diff --git a/content/influxdb/cloud-serverless/query-data/troubleshoot-and-optimize/optimize-queries.md b/content/influxdb/cloud-serverless/query-data/troubleshoot-and-optimize/optimize-queries.md index be151b629..c4856774a 100644 --- a/content/influxdb/cloud-serverless/query-data/troubleshoot-and-optimize/optimize-queries.md +++ b/content/influxdb/cloud-serverless/query-data/troubleshoot-and-optimize/optimize-queries.md @@ -22,6 +22,7 @@ Learn how to use observability tools to analyze query execution and view metrics - [Why is my query slow?](#why-is-my-query-slow) - [Strategies for improving query performance](#strategies-for-improving-query-performance) + - [Query only the data you need](#query-only-the-data-you-need) - [Analyze and troubleshoot queries](#analyze-and-troubleshoot-queries) ## Why is my query slow? @@ -36,11 +37,9 @@ If a query is slower than you expect, it might be due to the following reasons: The following design strategies generally improve query performance and resource use: -- Follow [schema design best practices](/influxdb/cloud-serverless/write-data/best-practices/schema-design/) to make querying easier and more performant. -- Query only the data you need--for example, include a [`WHERE` clause](/influxdb/cloud-serverless/reference/sql/where/) that filters data by a time range. - InfluxDB v3 stores data in a Parquet file for each measurement and day, and retrieves files from the Object store to answer a query. - The smaller the time range in your query, the fewer files InfluxDB needs to retrieve from the Object store. -- [Downsample data](/influxdb/cloud-serverless/process-data/downsample/) to reduce the amount of data you need to query. +- Follow [schema design best practices](/influxdb/cloud-dedicated/write-data/best-practices/schema-design/) to make querying easier and more performant. +- [Query only the data you need](#query-only-the-data-you-need). +- [Downsample data](/influxdb/cloud-dedicated/process-data/downsample/) to reduce the amount of data you need to query. Some bottlenecks may be out of your control and are the result of a suboptimal execution plan, such as: @@ -55,6 +54,34 @@ Some bottlenecks may be out of your control and are the result of a suboptimal e To view runtime metrics for a query, such as the number of files scanned, use the [`EXPLAIN ANALYZE` keywords](/influxdb/cloud-serverless/reference/sql/explain/#explain-analyze) and learn how to [analyze a query plan](/influxdb/cloud-serverless/query-data/troubleshoot-and-optimize/analyze-query-plan/). {{% /note %}} +### Query only the data you need + +#### Include a WHERE clause + +InfluxDB v3 stores data in a Parquet file for each measurement and day, and +retrieves files from the Object store to answer a query. +To reduce the number of files that a query needs to retrieve from the Object store, +include a [`WHERE` clause](/influxdb/cloud-dedicated/reference/sql/where/) that +filters data by a time range. + +#### SELECT only columns you need + +Because InfluxDB v3 is a columnar database, it only processes the columns +selected in a query, which can mitigate the query performance impact of +[wide schemas](/influxdb/cloud-serverless/write-data/best-practices/schema-design/#avoid-wide-schemas). + +However, a non-specific query that retrieves a large number of columns from a +wide schema can be slower and less efficient than a more targeted +query--for example, consider the following queries: + +- `SELECT time,a,b,c` +- `SELECT *` + +If the table contains 10 columns, the difference in performance between the +two queries is minimal. +In a table with over 1000 columns, the `SELECT *` query is slower and +less efficient. + ## Analyze and troubleshoot queries Use the following tools to analyze and troubleshoot queries and find performance bottlenecks: diff --git a/content/influxdb/cloud-serverless/write-data/best-practices/schema-design.md b/content/influxdb/cloud-serverless/write-data/best-practices/schema-design.md index 0e0018aab..b4d834774 100644 --- a/content/influxdb/cloud-serverless/write-data/best-practices/schema-design.md +++ b/content/influxdb/cloud-serverless/write-data/best-practices/schema-design.md @@ -8,25 +8,25 @@ menu: name: Schema design weight: 201 parent: write-best-practices +related: + - /influxdb/cloud-serverless/admin/buckets/ + - /influxdb/cloud-serverless/query-data/troubleshoot-and-optimize/ --- Use the following guidelines to design your [schema](/influxdb/cloud-serverless/reference/glossary/#schema) for simpler and more performant queries. - - - [InfluxDB data structure](#influxdb-data-structure) - [Primary keys](#primary-keys) - [Tags versus fields](#tags-versus-fields) - [Schema restrictions](#schema-restrictions) - [Do not use duplicate names for tags and fields](#do-not-use-duplicate-names-for-tags-and-fields) - - [Measurements can contain up to 200 columns](#measurements-can-contain-up-to-200-columns) + - [Maximum number of columns per measurement](#maximum-number-of-columns-per-measurement) - [Design for performance](#design-for-performance) - [Avoid wide schemas](#avoid-wide-schemas) - - [Avoid too many tags](#avoid-too-many-tags) - [Avoid sparse schemas](#avoid-sparse-schemas) - - [Writing individual fields with different timestamps](#writing-individual-fields-with-different-timestamps) - [Measurement schemas should be homogenous](#measurement-schemas-should-be-homogenous) + - [Use the best data type for your data](#use-the-best-data-type-for-your-data) - [Design for query simplicity](#design-for-query-simplicity) - [Keep measurement names, tags, and fields simple](#keep-measurement-names-tags-and-fields-simple) - [Avoid keywords and special characters](#avoid-keywords-and-special-characters) @@ -55,7 +55,7 @@ tags and fields. Field values may be null, but at least one field value is not null on any given row. - **Timestamp**: Timestamp associated with the data. When stored on disk and queried, all data is ordered by time. - In InfluxDB, a timestamp is a nanosecond-scale [unix timestamp](#unix-timestamp) in UTC. + In InfluxDB, a timestamp is a nanosecond-scale [Unix timestamp](#unix-timestamp) in UTC. A timestamp is never null. ### Primary keys @@ -80,13 +80,14 @@ question as you design your schema. - String - Boolean -{{% product-name %}} doesn't index tag values or field values. -Tag keys, field keys, and other metadata are indexed to optimize performance. +{{% product-name %}} indexes tag keys, field keys, and other metadata + to optimize performance. +It doesn't index tag values or field values. {{% note %}} The InfluxDB v3 storage engine supports infinite tag value and series cardinality. Unlike InfluxDB backed by the TSM storage engine, **tag value** -cardinality doesn't affect the overall performance of your database. +cardinality doesn't affect the overall performance of your bucket. {{% /note %}} --- @@ -95,19 +96,23 @@ cardinality doesn't affect the overall performance of your database. ### Do not use duplicate names for tags and fields -Tags and fields within the same measurement can't be named the same. -All tags and fields are stored as unique columns in a table representing the -measurement on disk. +Use unique names for tags and fields within the same measurement. +{{% product-name %}} stores tags and fields as unique columns in a measurement that +represents the measurement on disk. If you attempt to write a measurement that contains tags or fields with the same name, the write fails due to a column conflict. -### Measurements can contain up to 200 columns +### Maximum number of columns per measurement -A measurement can contain **up to 200 columns**. Each row requires a time column, -but the rest represent tags and fields stored in the measurement. -Therefore, a measurement can contain one time column and 199 total field and tag columns. -If you attempt to write to a measurement and exceed the 200 column limit, the -write request fails and InfluxDB returns an error. +A measurement has a [maximum number of columns](/influxdb/cloud-serverless/admin/buckets/#column-limit). +Each row must include a time column. +As a result, a measurement can have the following: + +- a time column +- field and tag columns up to the maximum number of columns + +If you attempt to write to a measurement and exceed the column limit, then the write +request fails and InfluxDB returns an error. --- @@ -124,21 +129,18 @@ The following guidelines help to optimize query performance: ### Avoid wide schemas -A wide schema is one with many tags and fields and corresponding columns for each. -With the InfluxDB v3 storage engine, wide schemas don't impact query execution performance. -Because InfluxDB v3 is a columnar database, it executes queries only against columns selected in the query. +A wide schema refers to a schema with a large number of columns (tags and fields). -Although a wide schema won't affect query performance, it can lead to the following: +Wide schemas can lead to the following issues: -- More resources required for persisting and compacting data during ingestion. -- Decreased sorting performance due to complex primary keys with [too many tags](#avoid-too-many-tags). +- Increased resource usage for persisting and compacting data during ingestion. +- Reduced sorting performance due to complex primary keys with [too many tags](#avoid-too-many-tags). +- Reduced query performance when + [selecting too many columns](/influxdb/cloud-dedicated/query-data/troubleshoot-and-optimize/optimize-queries/#select-only-columns-you-need). -The InfluxDB v3 storage engine has a -[limit of 200 columns per measurement](#measurements-can-contain-up-to-200-columns). - -To avoid a wide schema, limit the number of tags and fields stored in a measurement. -If you need to store more than 199 total tags and fields, consider segmenting -your fields into a separate measurement. +To prevent wide schema issues, limit the number of tags and fields stored in a measurement. +If you need to store more than the [maximum number of columns](/influxdb/cloud-serverless/admin/buckets/), +consider segmenting your fields into separate measurements. #### Avoid too many tags @@ -225,6 +227,12 @@ full of null values (also known as a _sparse schema_): {{% /expand %}} {{< /expand-wrapper >}} +### Use the best data type for your data + +When writing data to a field, use the most appropriate [data type](/influxdb/cloud-serverless/reference/glossary/#data-type) for your data--write integers as integers, decimals as floats, and booleans as booleans. +A query against a field that stores integers outperforms a query against string data; +querying over many long string values can negatively affect performance. + ## Design for query simplicity Naming conventions for measurements, tag keys, and field keys can simplify or diff --git a/content/influxdb/clustered/admin/databases/_index.md b/content/influxdb/clustered/admin/databases/_index.md index 31c6f0a1e..23ab6d1f6 100644 --- a/content/influxdb/clustered/admin/databases/_index.md +++ b/content/influxdb/clustered/admin/databases/_index.md @@ -11,6 +11,14 @@ menu: parent: Administer InfluxDB Clustered weight: 103 influxdb/clustered/tags: [databases] +related: + - /influxdb/clustered/write-data/best-practices/schema-design/ + - /influxdb/clustered/reference/cli/influxctl/ +alt_links: + cloud: /influxdb/cloud/admin/buckets/ + cloud_dedicated: /influxdb/cloud-dedicated/admin/databases/ + cloud_serverless: /influxdb/cloud-serverless/admin/buckets/ + oss: /influxdb/v2/admin/buckets/ --- An InfluxDB database is a named location where time series data is stored. @@ -19,7 +27,8 @@ Each InfluxDB database has a [retention period](#retention-periods). {{% note %}} **If coming from InfluxDB v1**, the concepts of databases and retention policies have been combined into a single concept--database. Retention policies are no -longer part of the InfluxDB data model. However, InfluxDB Clustered does +longer part of the InfluxDB data model. +However, {{% product-name %}} does support InfluxQL, which requires databases and retention policies. See [InfluxQL DBRP naming convention](/influxdb/clustered/admin/databases/create/#influxql-dbrp-naming-convention). @@ -41,9 +50,10 @@ never be removed by the retention enforcement service. ## Table and column limits -In {{< product-name >}}, table (measurement) and column limits can be -custom configured when [creating](#create-a-database) or -[updating a database](#update-a-database). +You can customize [table (measurement) limits](#table-limit) and +[table column limits](#column-limit) when you +[create](#create-a-database) or +[update a database](#update-a-database) in {{< product-name >}}. ### Table limit @@ -61,7 +71,8 @@ cluster in the following ways: {{% expand "**May improve query performance** View more info" %}} Schemas with many measurements that contain -[focused sets of tags and fields](/influxdb/clustered/write-data/best-practices/schema-design/#design-for-performance) can make it easier for the query engine to +[focused sets of tags and fields](/influxdb/clustered/write-data/best-practices/schema-design/#design-for-performance) +can make it easier for the query engine to identify what partitions contain the queried data, resulting in better query performance. @@ -73,7 +84,7 @@ data by measurement and time range and stores each partition as a Parquet file in your cluster's object store. By increasing the number of measurements (tables) you can store in your database, you also increase the potential for more `PUT` requests into your object store as InfluxDB creates more partitions. -Each `PUT` request incurs a monetary cost and will increase the operating cost of +Each `PUT` request incurs a monetary cost and increases the operating cost of your cluster. {{% /expand %}} @@ -92,20 +103,33 @@ operating cost of your cluster. **Default maximum number of columns**: 250 -Time, fields, and tags are each represented by a column in a table. +**Configurable maximum number of columns**: 1000 + +Each row must include a time column, with the remaining columns representing +tags and fields. +As a result, a table with 250 columns can have one time column and up to +249 field and tag columns. + +If you attempt to write to a table and exceed the column limit, the write +request fails and InfluxDB returns an error. + +If you update the column limit for a database, the limit applies to newly +created tables; doesn't override the column limit for existing tables. + Increasing your column limit affects your {{% product-name omit=" Clustered" %}} cluster in the following ways: {{< expand-wrapper >}} -{{% expand "May adversely affect query performance" %}} +{{% expand "May adversely affect system performance" %}} -At query time, the InfluxDB query engine identifies what table contains the queried -data and then evaluates each row in the table to match the conditions of the query. -The more columns that are in each row, the longer it takes to evaluate each row. - -Through performance testing, InfluxData has identified 250 columns as the -threshold beyond which query performance may be affected -(depending on the shape of and data types in your schema). +When creating or updating a database, you can configure the table column limit to be +lower than the default or up to 1000, based on your requirements. +InfluxData identified 250 columns as the safe limit for maintaining system +performance and stability. +Exceeding this threshold can result in +[wide schemas](/influxdb/clustered/write-data/best-practices/schema-design/#avoid-wide-schemas), +which can negatively impact performance and resource use, +depending on your queries, the shape of your schema, and data types in the schema. {{% /expand %}} {{< /expand-wrapper >}} diff --git a/content/influxdb/clustered/admin/databases/update.md b/content/influxdb/clustered/admin/databases/update.md index 99534f0ad..785799ad6 100644 --- a/content/influxdb/clustered/admin/databases/update.md +++ b/content/influxdb/clustered/admin/databases/update.md @@ -26,10 +26,12 @@ to update a database in your {{< product-name omit=" Clustered" >}} cluster. 2. Run the `influxctl database update` command and provide the following: - Database name - - _Optional_: Database [retention period](/influxdb/cloud-dedicated/admin/databases/#retention-periods). - Default is infinite (`0`). - - _Optional_: Database table (measurement) limit. Default is `500`. - - _Optional_: Database column limit. Default is `250`. + - _Optional_: Database [retention period](/influxdb/clustered/admin/databases/#retention-periods). + Default is infinite (`0`). + - _Optional_: Database [table (measurement) limit](/influxdb/clustered/admin/databases/#table-limit). + Default is `500`. + - _Optional_: Database [column limit](/influxdb/clustered/admin/databases/#column-limit). + Default is `250`. {{% code-placeholders "DATABASE_NAME|30d|500|200" %}} diff --git a/content/influxdb/clustered/query-data/troubleshoot-and-optimize/optimize-queries.md b/content/influxdb/clustered/query-data/troubleshoot-and-optimize/optimize-queries.md index 18644f588..b7751578e 100644 --- a/content/influxdb/clustered/query-data/troubleshoot-and-optimize/optimize-queries.md +++ b/content/influxdb/clustered/query-data/troubleshoot-and-optimize/optimize-queries.md @@ -12,6 +12,7 @@ influxdb/clustered/tags: [query, performance, observability, errors, sql, influx related: - /influxdb/clustered/query-data/sql/ - /influxdb/clustered/query-data/influxql/ + - /influxdb/clustered/query-data/execute-queries/analyze-query-plan/ aliases: - /influxdb/clustered/query-data/execute-queries/optimize-queries/ - /influxdb/clustered/query-data/execute-queries/analyze-query-plan/ @@ -22,6 +23,7 @@ Learn how to use observability tools to analyze query execution and view metrics - [Why is my query slow?](#why-is-my-query-slow) - [Strategies for improving query performance](#strategies-for-improving-query-performance) + - [Query only the data you need](#query-only-the-data-you-need) - [Analyze and troubleshoot queries](#analyze-and-troubleshoot-queries) ## Why is my query slow? @@ -37,10 +39,7 @@ If a query is slower than you expect, it might be due to the following reasons: The following design strategies generally improve query performance and resource use: - Follow [schema design best practices](/influxdb/clustered/write-data/best-practices/schema-design/) to make querying easier and more performant. -- Query only the data you need--for example, include a [`WHERE` clause](/influxdb/clustered/reference/sql/where/) that filters data by a time range. - InfluxDB v3 stores data in a Parquet file for each measurement and day, and retrieves files from the Object store to answer a query. - The smaller the time range in your query, the fewer files InfluxDB needs to retrieve from the Object store. - +- [Query only the data you need](#query-only-the-data-you-need). - [Downsample data](/influxdb/clustered/process-data/downsample/) to reduce the amount of data you need to query. Some bottlenecks may be out of your control and are the result of a suboptimal execution plan, such as: @@ -53,9 +52,41 @@ Some bottlenecks may be out of your control and are the result of a suboptimal e {{% note %}} #### Analyze query plans to view metrics and recognize bottlenecks -To view runtime metrics for a query, such as the number of files scanned, use the [`EXPLAIN ANALYZE` keywords](/influxdb/clustered/reference/sql/explain/#explain-analyze) and learn how to [analyze a query plan](/influxdb/clustered/query-data/troubleshoot-and-optimize/analyze-query-plan/). +To view runtime metrics for a query, such as the number of files scanned, use +the [`EXPLAIN ANALYZE` keywords](/influxdb/clustered/reference/sql/explain/#explain-analyze) +and learn how to [analyze a query plan](/influxdb/clustered/query-data/troubleshoot-and-optimize/analyze-query-plan/). {{% /note %}} +### Query only the data you need + +#### Include a WHERE clause + +InfluxDB v3 stores data in a Parquet file for each partition. +By default, {{< product-name >}} partitions tables by day, but you can also +[custom-partition your data](/influxdb/clustered/admin/custom-partitions/). +At query time, InfluxDB retrieves files from the Object store to answer a query. +To reduce the number of files that a query needs to retrieve from the Object store, +include a [`WHERE` clause](/influxdb/clustered/reference/sql/where/) that +filters data by a time range or by specific tag values. + +#### SELECT only columns you need + +Because InfluxDB v3 is a columnar database, it only processes the columns +selected in a query, which can mitigate the query performance impact of +[wide schemas](/influxdb/clustered/write-data/best-practices/schema-design/#avoid-wide-schemas). + +However, a non-specific query that retrieves a large number of columns from a +wide schema can be slower and less efficient than a more targeted +query--for example, consider the following queries: + +- `SELECT time,a,b,c` +- `SELECT *` + +If the table contains 10 columns, the difference in performance between the +two queries is minimal. +In a table with over 1000 columns, the `SELECT *` query is slower and +less efficient. + ## Analyze and troubleshoot queries Learn how to [analyze a query plan](/influxdb/clustered/query-data/troubleshoot-and-optimize/analyze-query-plan/) diff --git a/content/influxdb/clustered/reference/cli/influxctl/database/create.md b/content/influxdb/clustered/reference/cli/influxctl/database/create.md index d77c3f3ca..fdc3c52e0 100644 --- a/content/influxdb/clustered/reference/cli/influxctl/database/create.md +++ b/content/influxdb/clustered/reference/cli/influxctl/database/create.md @@ -101,9 +101,9 @@ influxctl database create [flags] | Flag | | Description | | :--- | :---------------------- | :--------------------------------------------------------------------------------------------------------------------------------------- | -| | `--retention-period` | Database retention period (default is `0s`, infinite) | -| | `--max-tables` | Maximum tables per database (default is 500, `0` uses default) | -| | `--max-columns` | Maximum columns per table (default is 250, `0` uses default) | +| | `--retention-period` | [Database retention period ](/influxdb/clustered/admin/databases/#retention-periods)(default is `0s`, infinite) | +| | `--max-tables` | [Maximum tables per database](/influxdb/clustered/admin/databases/#table-limit) (default is 500, `0` uses default) | +| | `--max-columns` | [Maximum columns per table](/influxdb/clustered/admin/databases/#column-limit) (default is 250, `0` uses default) | | | `--template-tag` | Tag to add to partition template (can include multiple of this flag) | | | `--template-tag-bucket` | Tag and number of buckets to partition tag values into separated by a comma--for example: `tag1,100` (can include multiple of this flag) | | | `--template-timeformat` | Timestamp format for partition template (default is `%Y-%m-%d`) | diff --git a/content/influxdb/clustered/reference/cli/influxctl/database/update.md b/content/influxdb/clustered/reference/cli/influxctl/database/update.md index 1afce5b97..d8935f24a 100644 --- a/content/influxdb/clustered/reference/cli/influxctl/database/update.md +++ b/content/influxdb/clustered/reference/cli/influxctl/database/update.md @@ -14,6 +14,8 @@ table (measurement), or column limits in InfluxDB. ## Usage + + ```sh influxctl database update [flags] ``` @@ -28,9 +30,9 @@ influxctl database update [flags] | Flag | | Description | | :--- | :------------------- | :----------------------------------------------------------- | -| | `--retention-period` | Database retention period (default is 0s or infinite) | -| | `--max-tables` | Maximum tables per database (default is 500, 0 uses default) | -| | `--max-columns` | Maximum columns per table (default is 250, 0 uses default) | +| | `--retention-period` | [Database retention period ](/influxdb/clustered/admin/databases/#retention-periods)(default is `0s` or infinite) | +| | `--max-tables` | [Maximum tables per database](/influxdb/clustered/admin/databases/#table-limit) (default is 500, `0` uses default) | +| | `--max-columns` | [Maximum columns per table](/influxdb/clustered/admin/databases/#column-limit) (default is 250, `0` uses default) | | `-h` | `--help` | Output command help | {{% caption %}} diff --git a/content/influxdb/clustered/write-data/best-practices/schema-design.md b/content/influxdb/clustered/write-data/best-practices/schema-design.md index 658523e07..f99ed18ea 100644 --- a/content/influxdb/clustered/write-data/best-practices/schema-design.md +++ b/content/influxdb/clustered/write-data/best-practices/schema-design.md @@ -8,6 +8,10 @@ menu: name: Schema design weight: 201 parent: write-best-practices +related: + - /influxdb/clustered/admin/databases/ + - /influxdb/clustered/reference/cli/influxctl/ + - /influxdb/clustered/query-data/troubleshoot-and-optimize/ --- Use the following guidelines to design your [schema](/influxdb/clustered/reference/glossary/#schema) @@ -18,7 +22,7 @@ for simpler and more performant queries. - [Tags versus fields](#tags-versus-fields) - [Schema restrictions](#schema-restrictions) - [Do not use duplicate names for tags and fields](#do-not-use-duplicate-names-for-tags-and-fields) - - [Tables can contain up to 250 columns](#tables-can-contain-up-to-250-columns) + - [Maximum number of columns per table](#maximum-number-of-columns-per-table) - [Design for performance](#design-for-performance) - [Avoid wide schemas](#avoid-wide-schemas) - [Avoid sparse schemas](#avoid-sparse-schemas) @@ -37,10 +41,13 @@ Tables contain multiple tags and fields. - **Database**: A named location where time series data is stored. - In {{% product-name %}}, _database_ is synonymous with _bucket_ in InfluxDB Cloud Serverless and InfluxDB TSM implementations. + In {{% product-name %}}, _database_ is synonymous with _bucket_ in InfluxDB + Cloud Serverless and InfluxDB TSM implementations. + A database can contain multiple _tables_. - **Table**: A logical grouping for time series data. - In {{% product-name %}}, _table_ is synonymous with _measurement_ in InfluxDB Cloud Serverless and InfluxDB TSM implementations. + In {{% product-name %}}, _table_ is synonymous with _measurement_ in + InfluxDB Cloud Serverless and InfluxDB TSM implementations. All _points_ in a given table should have the same _tags_. A table contains multiple _tags_ and _fields_. - **Tags**: Key-value pairs that store metadata string values for each point--for example, @@ -52,7 +59,9 @@ Tables contain multiple tags and fields. Field values may be null, but at least one field value is not null on any given row. - **Timestamp**: Timestamp associated with the data. When stored on disk and queried, all data is ordered by time. - In InfluxDB, a timestamp is a nanosecond-scale [unix timestamp](/influxdb/clustered/reference/glossary/#unix-timestamp) in UTC. + In InfluxDB, a timestamp is a nanosecond-scale + [Unix timestamp](/influxdb/clustered/reference/glossary/#unix-timestamp) + in UTC. A timestamp is never null. {{% note %}} @@ -91,8 +100,9 @@ question as you design your schema. - String - Boolean -{{% product-name %}} doesn't index tag values or field values. -Tag keys, field keys, and other metadata are indexed to optimize performance. +{{% product-name %}} indexes tag keys, field keys, and other metadata + to optimize performance. +It doesn't index tag values or field values. {{% note %}} The InfluxDB v3 storage engine supports infinite tag value and series cardinality. @@ -106,26 +116,39 @@ cardinality doesn't affect the overall performance of your database. ### Do not use duplicate names for tags and fields -Tags and fields within the same table can't be named the same. -All tags and fields are stored as unique columns in a table representing the -table on disk. +Use unique names for tags and fields within the same table. +{{% product-name %}} stores tags and fields as unique columns in a table that +represents the table on disk. If you attempt to write a table that contains tags or fields with the same name, the write fails due to a column conflict. -### Tables can contain up to 250 columns +### Maximum number of columns per table -A table can contain **up to 250 columns**. Each row requires a time column, -but the rest represent tags and fields stored in the table. -Therefore, a table can contain one time column and 249 total field and tag columns. -If you attempt to write to a table and exceed the 250 column limit, the -write request fails and InfluxDB returns an error. +A table has a [maximum number of columns](/influxdb/clustered/admin/databases/#column-limit). +Each row must include a time column. +As a result, a table can have the following: + +- a time column +- field and tag columns up to the configured maximum + +If you attempt to write to a table and exceed the column limit, then the write +request fails and InfluxDB returns an error. + +InfluxData identified the +[default maximum](/influxdb/clustered/admin/databases/#column-limit) +as the safe limit for maintaining system performance and stability. +Exceeding this threshold can result in +[wide schemas](#avoid-wide-schemas), which can negatively impact performance +and resource use, [depending on your queries](#avoid-non-specific-queries), +the shape of your schema, and data types in the schema. --- ## Design for performance -How you structure your schema within a table can affect the overall -performance of queries against that table. +How you structure your schema within a table can affect resource use and +the performance of queries against that table. + The following guidelines help to optimize query performance: - [Avoid wide schemas](#avoid-wide-schemas) @@ -135,26 +158,26 @@ The following guidelines help to optimize query performance: ### Avoid wide schemas -A wide schema is one with many tags and fields and corresponding columns for each. -With the InfluxDB v3 storage engine, wide schemas don't impact query execution performance. -Because InfluxDB v3 is a columnar database, it executes queries only against columns selected in the query. +A wide schema refers to a schema with a large number of columns (tags and fields). -Although a wide schema won't affect query performance, it can lead to the following: +Wide schemas can lead to the following issues: -- More resources required for persisting and compacting data during ingestion. -- Decreased sorting performance due to complex primary keys with [too many tags](#avoid-too-many-tags). +- Increased resource usage for persisting and compacting data during ingestion. +- Reduced sorting performance due to complex primary keys with [too many tags](#avoid-too-many-tags). +- Reduced query performance when + [selecting too many columns](/influxdb/clustered/query-data/troubleshoot-and-optimize/optimize-queries/#select-only-columns-you-need). -The InfluxDB v3 storage engine has a -[limit of 250 columns per table](#tables-can-contain-up-to-250-columns). - -To avoid a wide schema, limit the number of tags and fields stored in a table. -If you need to store more than 249 total tags and fields, consider segmenting -your fields into a separate table. +To prevent wide schema issues, limit the number of tags and fields stored in a table. +If you need to store more than the [maximum number of columns](/influxdb/clustered/admin/databases/), +consider segmenting your fields into separate tables. #### Avoid too many tags -In InfluxDB, the primary key for a row is the combination of the point's timestamp and _tag set_ - the collection of [tag keys](/influxdb/clustered/reference/glossary/#tag-key) and [tag values](/influxdb/clustered/reference/glossary/#tag-value) on the point. -A point that contains more tags has a more complex primary key, which could impact sorting performance if you sort using all parts of the key. +In InfluxDB, the primary key for a row is the combination of the point's +timestamp and _tag set_ - the collection of [tag keys](/influxdb/clustered/reference/glossary/#tag-key) +and [tag values](/influxdb/clustered/reference/glossary/#tag-value) on the point. +A point that contains more tags has a more complex primary key, which could +impact sorting performance if you sort using all parts of the key. ### Avoid sparse schemas @@ -275,7 +298,8 @@ Without regular expressions, your queries will be easier to write and more perfo #### Not recommended {.orange} -For example, consider the following [line protocol](/influxdb/clustered/reference/syntax/line-protocol/) that embeds multiple attributes (location, model, and ID) into a `sensor` tag value: +For example, consider the following [line protocol](/influxdb/clustered/reference/syntax/line-protocol/) +that embeds multiple attributes (location, model, and ID) into a `sensor` tag value: ```text home,sensor=loc-kitchen.model-A612.id-1726ZA temp=72.1 diff --git a/lefthook.yml b/lefthook.yml index 24349243e..7608bc2f6 100644 --- a/lefthook.yml +++ b/lefthook.yml @@ -10,49 +10,49 @@ pre-commit: parallel: true commands: cloud-lint: - tags: lint v2 + tags: lint,v2 glob: "content/influxdb/cloud/**/*.md" run: '.ci/vale/vale.sh --config=.vale.ini --minAlertLevel=error {staged_files}' cloud-dedicated-lint: - tags: lint v3 + tags: lint,v3 glob: "content/influxdb/cloud-dedicated/**/*.md" run: '.ci/vale/vale.sh --config=content/influxdb/cloud-dedicated/.vale.ini --minAlertLevel=error {staged_files}' cloud-serverless-lint: - tags: lint v3 + tags: lint,v3 glob: "content/influxdb/cloud-serverless/**/*.md" run: '.ci/vale/vale.sh --config=content/influxdb/cloud-serverless/.vale.ini --minAlertLevel=error {staged_files}' clustered-lint: - tags: lint v3 + tags: lint,v3 glob: "content/influxdb/clustered/**/*.md" run: '.ci/vale/vale.sh --config=content/influxdb/cloud-serverless/.vale.ini --minAlertLevel=error {staged_files}' telegraf-lint: - tags: lint clients + tags: lint,clients glob: "content/telegraf/**/*.md" run: '.ci/vale/vale.sh --config=.vale.ini --minAlertLevel=error {staged_files}' v2-lint: - tags: lint v2 + tags: lint,v2 glob: "content/influxdb/v2/**/*.md" run: '.ci/vale/vale.sh --config=content/influxdb/v2/.vale.ini --minAlertLevel=error {staged_files}' cloud-pytest: glob: content/influxdb/cloud/**/*.md - tags: test codeblocks v2 + tags: test,codeblocks,v2 env: - SERVICE: cloud-pytest run: docker compose run $SERVICE '{staged_files}' cloud-dedicated-pytest: - tags: test codeblocks v3 + tags: test,codeblocks,v3 glob: content/influxdb/cloud-dedicated/**/*.md env: - SERVICE: cloud-dedicated-pytest @@ -61,13 +61,13 @@ pre-commit: docker compose run $SERVICE {staged_files} ; ./test/scripts/monitor-tests.sh stop $SERVICE cloud-serverless-pytest: - tags: test codeblocks v3 + tags: test,codeblocks,v3 glob: content/influxdb/cloud-serverless/**/*.md env: - SERVICE: cloud-serverless-pytest run: docker compose run $SERVICE '{staged_files}' clustered-pytest: - tags: test codeblocks v3 + tags: test,codeblocks,v3 glob: content/influxdb/clustered/**/*.md env: - SERVICE: clustered-pytest @@ -76,19 +76,19 @@ pre-commit: docker compose run $SERVICE {staged_files} ; ./test/scripts/monitor-tests.sh stop $SERVICE telegraf-pytest: - tags: test codeblocks + tags: test,codeblocks glob: content/telegraf/**/*.md env: - SERVICE: telegraf-pytest run: docker compose run $SERVICE '{staged_files}' v2-pytest: - tags: test codeblocks v2 + tags: test,codeblocks,v2 glob: content/influxdb/v2/**/*.md env: - SERVICE: v2-pytest run: docker compose run --rm $SERVICE '{staged_files}' prettier: - tags: frontend style + tags: frontend,style glob: "*.{css,js,ts,jsx,tsx}" run: yarn prettier {staged_files} diff --git a/package.json b/package.json index 3522aa659..e51fa8ed1 100644 --- a/package.json +++ b/package.json @@ -15,7 +15,7 @@ "prettier-plugin-sql": "^0.18.0" }, "dependencies": { - "axios": "^1.6.0", + "axios": "^1.7.4", "js-yaml": "^4.1.0" }, "scripts": { diff --git a/yarn.lock b/yarn.lock index 6ef4a9416..a9f404cd1 100644 --- a/yarn.lock +++ b/yarn.lock @@ -164,7 +164,7 @@ autoprefixer@>=10.2.5: picocolors "^1.0.1" postcss-value-parser "^4.2.0" -axios@^1.4.0, axios@^1.6.0: +axios@^1.4.0, axios@^1.7.4: version "1.7.4" resolved "https://registry.yarnpkg.com/axios/-/axios-1.7.4.tgz#4c8ded1b43683c8dd362973c393f3ede24052aa2" integrity sha512-DukmaFRnY6AzAALSH4J2M3k6PkaC+MfaAGdEERRWcC9q3/TWQwLpHR8ZRLKTdQ3aBDL64EdluRDjJqKw+BPZEw==