From 312342af5e1381c317aa269c550c5b84dc49322d Mon Sep 17 00:00:00 2001 From: Scott Anderson Date: Wed, 29 Jul 2020 16:16:31 -0600 Subject: [PATCH] ported influxdb 1.7 docs --- content/influxdb/v1.7/_index.md | 32 + .../influxdb/v1.7/about_the_project/_index.md | 31 + .../influxdb/v1.7/about_the_project/cla.md | 12 + .../v1.7/about_the_project/contributing.md | 12 + .../v1.7/about_the_project/licenses.md | 11 + .../releasenotes-changelog.md | 1281 +++ .../v1.7/about_the_project/third-party.md | 20 + .../additional_resources.md | 36 + .../influxdb/v1.7/administration/_index.md | 56 + .../authentication_and_authorization.md | 514 ++ .../v1.7/administration/backup_and_restore.md | 393 + .../influxdb/v1.7/administration/config.md | 1255 +++ .../v1.7/administration/https_setup.md | 161 + content/influxdb/v1.7/administration/logs.md | 351 + content/influxdb/v1.7/administration/ports.md | 59 + .../v1.7/administration/rebuild-tsi-index.md | 52 + .../influxdb/v1.7/administration/security.md | 51 + .../v1.7/administration/server_monitoring.md | 166 + .../stability_and_compatibility.md | 27 + .../administration/subscription-management.md | 207 + .../influxdb/v1.7/administration/upgrading.md | 60 + content/influxdb/v1.7/concepts/_index.md | 40 + content/influxdb/v1.7/concepts/crosswalk.md | 200 + content/influxdb/v1.7/concepts/glossary.md | 384 + .../v1.7/concepts/insights_tradeoffs.md | 57 + .../influxdb/v1.7/concepts/key_concepts.md | 213 + .../v1.7/concepts/schema_and_data_layout.md | 212 + .../influxdb/v1.7/concepts/storage_engine.md | 436 + .../v1.7/concepts/time-series-index.md | 50 + content/influxdb/v1.7/concepts/tsi-details.md | 155 + content/influxdb/v1.7/data_sources/carbon.md | 0 .../influxdb/v1.7/data_sources/collectd.md | 0 content/influxdb/v1.7/data_sources/diamond.md | 22 + .../influxdb/v1.7/data_sources/opentsdb.md | 19 + content/influxdb/v1.7/external_resources.md | 32 + content/influxdb/v1.7/guides/_index.md | 10 + .../v1.7/guides/calculating_percentages.md | 115 + .../v1.7/guides/downsampling_and_retention.md | 232 + .../influxdb/v1.7/guides/hardware_sizing.md | 481 ++ .../v1.7/guides/migrate-to-enterprise.md | 11 + content/influxdb/v1.7/guides/querying_data.md | 166 + content/influxdb/v1.7/guides/writing_data.md | 178 + .../influxdb/v1.7/high_availability/_index.md | 15 + .../v1.7/high_availability/clusters.md | 18 + content/influxdb/v1.7/introduction/_index.md | 21 + .../influxdb/v1.7/introduction/downloading.md | 10 + .../v1.7/introduction/getting-started.md | 196 + .../v1.7/introduction/installation.md | 368 + .../influxdb/v1.7/query_language/_index.md | 73 + .../v1.7/query_language/continuous_queries.md | 985 +++ .../v1.7/query_language/data_download.md | 121 + .../v1.7/query_language/data_exploration.md | 3332 ++++++++ .../query_language/database_management.md | 379 + .../influxdb/v1.7/query_language/functions.md | 7196 +++++++++++++++++ .../v1.7/query_language/math_operators.md | 307 + .../v1.7/query_language/schema_exploration.md | 1265 +++ content/influxdb/v1.7/query_language/spec.md | 1480 ++++ .../v1.7/supported_protocols/_index.md | 27 + .../v1.7/supported_protocols/collectd.md | 52 + .../v1.7/supported_protocols/graphite.md | 210 + .../v1.7/supported_protocols/opentsdb.md | 24 + .../v1.7/supported_protocols/prometheus.md | 120 + .../influxdb/v1.7/supported_protocols/udp.md | 141 + content/influxdb/v1.7/tools/_index.md | 72 + content/influxdb/v1.7/tools/api.md | 870 ++ .../v1.7/tools/api_client_libraries.md | 140 + content/influxdb/v1.7/tools/grafana.md | 11 + content/influxdb/v1.7/tools/inch.md | 75 + .../influxdb/v1.7/tools/influx-cli/_index.md | 43 + content/influxdb/v1.7/tools/influx_inspect.md | 539 ++ .../influxdb/v1.7/tools/influxd-cli/_index.md | 27 + .../influxdb/v1.7/tools/influxd-cli/backup.md | 31 + .../influxdb/v1.7/tools/influxd-cli/config.md | 23 + .../v1.7/tools/influxd-cli/restore.md | 30 + .../influxdb/v1.7/tools/influxd-cli/run.md | 34 + .../v1.7/tools/influxd-cli/version.md | 24 + content/influxdb/v1.7/tools/shell.md | 398 + .../influxdb/v1.7/troubleshooting/_index.md | 20 + .../influxdb/v1.7/troubleshooting/errors.md | 407 + .../frequently-asked-questions.md | 1261 +++ .../v1.7/troubleshooting/query_management.md | 180 + .../influxdb/v1.7/write_protocols/_index.md | 18 + .../line_protocol_reference.md | 290 + .../write_protocols/line_protocol_tutorial.md | 505 ++ .../influxdb/v1.8/guides/hardware_sizing.md | 2 +- 85 files changed, 29139 insertions(+), 1 deletion(-) create mode 100644 content/influxdb/v1.7/_index.md create mode 100644 content/influxdb/v1.7/about_the_project/_index.md create mode 100644 content/influxdb/v1.7/about_the_project/cla.md create mode 100644 content/influxdb/v1.7/about_the_project/contributing.md create mode 100644 content/influxdb/v1.7/about_the_project/licenses.md create mode 100644 content/influxdb/v1.7/about_the_project/releasenotes-changelog.md create mode 100644 content/influxdb/v1.7/about_the_project/third-party.md create mode 100644 content/influxdb/v1.7/additional_resources/additional_resources.md create mode 100644 content/influxdb/v1.7/administration/_index.md create mode 100644 content/influxdb/v1.7/administration/authentication_and_authorization.md create mode 100644 content/influxdb/v1.7/administration/backup_and_restore.md create mode 100644 content/influxdb/v1.7/administration/config.md create mode 100644 content/influxdb/v1.7/administration/https_setup.md create mode 100644 content/influxdb/v1.7/administration/logs.md create mode 100644 content/influxdb/v1.7/administration/ports.md create mode 100644 content/influxdb/v1.7/administration/rebuild-tsi-index.md create mode 100644 content/influxdb/v1.7/administration/security.md create mode 100644 content/influxdb/v1.7/administration/server_monitoring.md create mode 100644 content/influxdb/v1.7/administration/stability_and_compatibility.md create mode 100644 content/influxdb/v1.7/administration/subscription-management.md create mode 100644 content/influxdb/v1.7/administration/upgrading.md create mode 100644 content/influxdb/v1.7/concepts/_index.md create mode 100644 content/influxdb/v1.7/concepts/crosswalk.md create mode 100644 content/influxdb/v1.7/concepts/glossary.md create mode 100644 content/influxdb/v1.7/concepts/insights_tradeoffs.md create mode 100644 content/influxdb/v1.7/concepts/key_concepts.md create mode 100644 content/influxdb/v1.7/concepts/schema_and_data_layout.md create mode 100644 content/influxdb/v1.7/concepts/storage_engine.md create mode 100644 content/influxdb/v1.7/concepts/time-series-index.md create mode 100644 content/influxdb/v1.7/concepts/tsi-details.md create mode 100644 content/influxdb/v1.7/data_sources/carbon.md create mode 100644 content/influxdb/v1.7/data_sources/collectd.md create mode 100644 content/influxdb/v1.7/data_sources/diamond.md create mode 100644 content/influxdb/v1.7/data_sources/opentsdb.md create mode 100644 content/influxdb/v1.7/external_resources.md create mode 100644 content/influxdb/v1.7/guides/_index.md create mode 100644 content/influxdb/v1.7/guides/calculating_percentages.md create mode 100644 content/influxdb/v1.7/guides/downsampling_and_retention.md create mode 100644 content/influxdb/v1.7/guides/hardware_sizing.md create mode 100644 content/influxdb/v1.7/guides/migrate-to-enterprise.md create mode 100644 content/influxdb/v1.7/guides/querying_data.md create mode 100644 content/influxdb/v1.7/guides/writing_data.md create mode 100644 content/influxdb/v1.7/high_availability/_index.md create mode 100644 content/influxdb/v1.7/high_availability/clusters.md create mode 100644 content/influxdb/v1.7/introduction/_index.md create mode 100644 content/influxdb/v1.7/introduction/downloading.md create mode 100644 content/influxdb/v1.7/introduction/getting-started.md create mode 100644 content/influxdb/v1.7/introduction/installation.md create mode 100644 content/influxdb/v1.7/query_language/_index.md create mode 100644 content/influxdb/v1.7/query_language/continuous_queries.md create mode 100644 content/influxdb/v1.7/query_language/data_download.md create mode 100644 content/influxdb/v1.7/query_language/data_exploration.md create mode 100644 content/influxdb/v1.7/query_language/database_management.md create mode 100644 content/influxdb/v1.7/query_language/functions.md create mode 100644 content/influxdb/v1.7/query_language/math_operators.md create mode 100644 content/influxdb/v1.7/query_language/schema_exploration.md create mode 100644 content/influxdb/v1.7/query_language/spec.md create mode 100644 content/influxdb/v1.7/supported_protocols/_index.md create mode 100644 content/influxdb/v1.7/supported_protocols/collectd.md create mode 100644 content/influxdb/v1.7/supported_protocols/graphite.md create mode 100644 content/influxdb/v1.7/supported_protocols/opentsdb.md create mode 100644 content/influxdb/v1.7/supported_protocols/prometheus.md create mode 100644 content/influxdb/v1.7/supported_protocols/udp.md create mode 100644 content/influxdb/v1.7/tools/_index.md create mode 100644 content/influxdb/v1.7/tools/api.md create mode 100644 content/influxdb/v1.7/tools/api_client_libraries.md create mode 100644 content/influxdb/v1.7/tools/grafana.md create mode 100644 content/influxdb/v1.7/tools/inch.md create mode 100644 content/influxdb/v1.7/tools/influx-cli/_index.md create mode 100644 content/influxdb/v1.7/tools/influx_inspect.md create mode 100644 content/influxdb/v1.7/tools/influxd-cli/_index.md create mode 100644 content/influxdb/v1.7/tools/influxd-cli/backup.md create mode 100644 content/influxdb/v1.7/tools/influxd-cli/config.md create mode 100644 content/influxdb/v1.7/tools/influxd-cli/restore.md create mode 100644 content/influxdb/v1.7/tools/influxd-cli/run.md create mode 100644 content/influxdb/v1.7/tools/influxd-cli/version.md create mode 100644 content/influxdb/v1.7/tools/shell.md create mode 100644 content/influxdb/v1.7/troubleshooting/_index.md create mode 100644 content/influxdb/v1.7/troubleshooting/errors.md create mode 100644 content/influxdb/v1.7/troubleshooting/frequently-asked-questions.md create mode 100644 content/influxdb/v1.7/troubleshooting/query_management.md create mode 100644 content/influxdb/v1.7/write_protocols/_index.md create mode 100644 content/influxdb/v1.7/write_protocols/line_protocol_reference.md create mode 100644 content/influxdb/v1.7/write_protocols/line_protocol_tutorial.md diff --git a/content/influxdb/v1.7/_index.md b/content/influxdb/v1.7/_index.md new file mode 100644 index 000000000..a5c26cb11 --- /dev/null +++ b/content/influxdb/v1.7/_index.md @@ -0,0 +1,32 @@ +--- +title: InfluxDB 1.7 documentation + +menu: + influxdb: + name: v1.7 + identifier: influxdb_1_7 + weight: 8 +--- + +InfluxDB is a [time series database](https://www.influxdata.com/time-series-database/) designed to handle high write and query loads. +It is an integral component of the +[TICK stack](https://influxdata.com/time-series-platform/). +InfluxDB is meant to be used as a backing store for any use case involving large amounts of timestamped data, including DevOps monitoring, application metrics, IoT sensor data, and real-time analytics. + +## Key features + +Here are some of the features that InfluxDB currently supports that make it a great choice for working with time series data. + +* Custom high performance datastore written specifically for time series data. +The TSM engine allows for high ingest speed and data compression +* Written entirely in Go. +It compiles into a single binary with no external dependencies. +* Simple, high performing write and query HTTP APIs. +* Plugins support for other data ingestion protocols such as Graphite, collectd, and OpenTSDB. +* Expressive SQL-like query language tailored to easily query aggregated data. +* Tags allow series to be indexed for fast and efficient queries. +* Retention policies efficiently auto-expire stale data. +* Continuous queries automatically compute aggregate data to make frequent queries more efficient. + +The open source edition of InfluxDB runs on a single node. +If you require high availability to eliminate a single point of failure, consider the [InfluxDB Enterprise Edition](https://docs.influxdata.com/influxdb/latest/high_availability/). diff --git a/content/influxdb/v1.7/about_the_project/_index.md b/content/influxdb/v1.7/about_the_project/_index.md new file mode 100644 index 000000000..b879a994a --- /dev/null +++ b/content/influxdb/v1.7/about_the_project/_index.md @@ -0,0 +1,31 @@ +--- +title: About the InfluxDB OSS project +alias: + -/docs/v1.7/about/ +menu: + influxdb_1_7: + name: About the project + weight: 10 +--- + +## [Release notes](/influxdb/v1.7/about_the_project/releasenotes-changelog/) + +Details about features, bug fixes, and breaking changes for the current and earlier InfluxDB open source (OSS) releases are available in the [InfluxDB OSS release notes](/influxdb/v1.7/about_the_project/releasenotes-changelog/). + +## [Contributing to InfluxDB](/influxdb/v1.7/about_the_project/contributing/) + +To learn how you can contribute to the InfluxDB OSS project, see [Contributing to InfluxDB OSS](https://github.com/influxdata/influxdb/tree/1.7/CONTRIBUTING.md) in the InfluxDB OSS GitHub project. + +## [InfluxData Contributor License Agreement (CLA)](/influxdb/v1.7/about_the_project/cla/) + +Before contributing to the InfluxDB OSS project, you must complete and sign +the [InfluxData Contributor License Agreement (CLA)](https://www.influxdata.com/legal/cla/). + +## [InfluxDB open source license](/influxdb/v1.7/about_the_project/licenses/) + +The [open source license for InfluxDB](https://github.com/influxdata/influxdb/blob/master/LICENSE) +is available in the GitHub repository. + +## [Third party software](/influxdb/v1.7/about_the_project/third-party/) + +The [list of third party software components, including references to associated licenses and other materials](https://github.com/influxdata/influxdb/blob/1.7/DEPENDENCIES.md), is maintained on a version by version basis. diff --git a/content/influxdb/v1.7/about_the_project/cla.md b/content/influxdb/v1.7/about_the_project/cla.md new file mode 100644 index 000000000..4201f8043 --- /dev/null +++ b/content/influxdb/v1.7/about_the_project/cla.md @@ -0,0 +1,12 @@ +--- +title: InfluxData Contributor License Agreement (CLA) +menu: + influxdb_1_7: + name: Contributor license agreement + weight: 30 + parent: About the project +--- + +Before contributing to the InfluxDB OSS project, you must complete and sign +the [InfluxData Contributor License Agreement (CLA)](https://www.influxdata.com/legal/cla/), +available on the InfluxData website. diff --git a/content/influxdb/v1.7/about_the_project/contributing.md b/content/influxdb/v1.7/about_the_project/contributing.md new file mode 100644 index 000000000..c46ab9593 --- /dev/null +++ b/content/influxdb/v1.7/about_the_project/contributing.md @@ -0,0 +1,12 @@ +--- +title: Contributing to the InfluxDB OSS project +menu: + influxdb_1_7: + name: Contributing + weight: 20 + parent: About the project +--- + +To learn how you can contribute to the InfluxDB OSS project, see +[Contributing to InfluxDB](https://github.com/influxdata/influxdb/tree/1.7/CONTRIBUTING.md) +in the GitHub repository. diff --git a/content/influxdb/v1.7/about_the_project/licenses.md b/content/influxdb/v1.7/about_the_project/licenses.md new file mode 100644 index 000000000..802d29ff8 --- /dev/null +++ b/content/influxdb/v1.7/about_the_project/licenses.md @@ -0,0 +1,11 @@ +--- +title: Open source license for InfluxDB +menu: + influxdb_1_7: + name: InfluxDB license + weight: 40 + parent: About the project +--- + +The [open source license for InfluxDB](https://github.com/influxdata/influxdb/blob/master/LICENSE) +is available in the GitHub repository. diff --git a/content/influxdb/v1.7/about_the_project/releasenotes-changelog.md b/content/influxdb/v1.7/about_the_project/releasenotes-changelog.md new file mode 100644 index 000000000..413bff36e --- /dev/null +++ b/content/influxdb/v1.7/about_the_project/releasenotes-changelog.md @@ -0,0 +1,1281 @@ +--- +title: InfluxDB 1.7 release notes +menu: + influxdb_1_7: + name: Release notes + weight: 10 + parent: About the project +--- + +## v1.7.10 [2020-02-07] + +### Bug fixes +- Fix failing corrupt data file renaming process. +- Make shard digests safe for concurrent use. +- Fix defect in TSI index where negative equality filters (`!=`) could result in no matching series. +- Fix compaction logic on infrequent cache snapshots which resulted in frequent full + compactions rather than [level compactions](/influxdb/v1.7/concepts/storage_engine/#compactions). +- Fix for series key block data being truncated when read into an empty buffer. + Ensure all block data is returned. +- During compactions, skip TSM files with block read errors from previous compactions. + +## v1.7.9 [2019-10-27] + +### Release notes +- This release is built using Go 1.12.10 which eliminates the + [HTTP desync vulnerability](https://portswigger.net/research/http-desync-attacks-request-smuggling-reborn). + +### Bug fixes +- Guard against compaction burst throughput limit. +- Replace TSI compaction wait group with counter. +- Update InfluxQL dependency. +- Add option to authenticate debug/pprof and ping endpoints. +- Honor `insecure-skip-verify` even if custom TLS config is specified. + +### Features +- Update Go version to 1.12.10. +- Remove Godeps file. +- Update Flux version to v0.50.2. + +## v1.7.8 [2019-08-20] + +{{% warn %}} +InfluxDB now rejects all non-UTF-8 characters. +To successfully write data to InfluxDB, use only UTF-8 characters in +database names, measurement names, tag sets, and field sets. +{{% /warn %}} + +### Bug fixes +- Fix Prometheus read panic. +- Remove stray `fmt.Println` in `tsm1.StringArrayEncodeAll`. +- Fix issue where fields re-appear after `DROP MEASUREMENT`. +- Remove a dubugging `println` call. +- Subquery ordering with aggregates in descending mode was wrong. +- Fix the http handler to not mislabel series as partial. +- Make `SHOW SERIES EXACT CARDINALITY` count only distinct series. +- Fix time range exceeding min/max bounds. + +### Features +- Update Flux version to v0.36.2 + +## 1.7.7 [2019-06-26] + +### Known issues + +- The Flux Technical Preview was not advanced and remains at version 0.24.0. Next month's maintenance release will update the preview. +- After upgrading, customers have experienced an excessively large output additional lines due to a `Println` statement introduced in this release. For a possible workaround, see https://github.com/influxdata/influxdb/issues/14265#issuecomment-508875853. Next month's maintenance release will address this. + +### Bug fixes + +- Fix the sort order for aggregates so that they are sorted by tag and then time. +- Use the timezone when evaluating time literals in subqueries. +- Fix CSV decoder bug where empty tag values cause an array index panic. +- Fix open/close race in `SeriesFile`. +- Sync series segment after truncate. +- Fix the ordering for selectors within a subquery with different outer tags. + +## 1.7.6 [2019-04-16] + +{{% warn %}} +If your InfluxDB OSS server is using the default in-memory index (`[data].index-version = "inmem"`), +this release includes the fix for InfluxDB 1.7.5 servers that stopped responding without warning. +{{% /warn %}} + +### Features + +- Upgrade Flux to `0.24.0` and remove the platform dependency. + - If Flux is enabled, use Chronograf 1.7.11 or later. + - When using Flux, there is a known issue that using `now` will cause a panic. The proper syntax is `now()`. +- Track remote read requests to Prometheus remote read handler. + +### Bug fixes + +- Ensure credentials are correctly passed when executing Flux HTTP requests in the `influx` CLI with the `-type=flux` option. +- Back port of data generation improvements: renamed files for consistency between versions, added `time-interval` schema option, and updated schema example documentation. +- Fix security vulnerability when `[http]shared-secret` configuration setting is blank. +- Add nil check for `tagKeyValueEntry.setIDs()`. +- Extend the Prometheus remote write endpoint to drop unsupported Prometheus values (`NaN`,`-Inf`, and `+Inf`) rather than reject the entire batch. + - If write trace logging enabled (`[http] write-tracing = true`), then summaries of dropped values are logged. + - If a batch of values contains values that are subsequently dropped, HTTP status code `204` is returned. +- Update predicate key mapping to match InfluxDB `2.x` behavior. +- Fix panic in Prometheus read API. +- Add a version constraint for influxql. + +## 1.7.5 [2019-03-26] + +{{% warn %}} +**Update (2019-04-01):** If your InfluxDB OSS server is using the default in-memory index (`[data].index-version = "inmem"`), then do not upgrade to this release. Customers have reported that InfluxDB 1.7.5 stops responding without warning. For details, see [GitHub issue #13010](https://github.com/influxdata/influxdb/issues/13010). The [planned fix](https://github.com/influxdata/influxdb/issues/13053) will be available soon. +{{% /warn %}} + +### Bug fixes + +- Update `tagKeyValue` mutex to write lock. +- Fix some more shard epoch races. + +## 1.7.4 [2019-02-13] + +### Features + +- Allow TSI bitset cache to be configured. See: [InfluxDB Configuration `[data]`](/influxdb/v1.7/administration/config/#tsi-tsi1-index-settings) + +### Bug fixes + +- Remove copy-on-write when caching bitmaps in TSI. +- Use `systemd` for Amazon Linux 2. +- Revert "Limit force-full and cold compaction size." +- Convert `TagValueSeriesIDCache` to use string fields. +- Ensure that cached series id sets are Go heap backed. + +## 1.7.3 [2019-01-11] + +### Important update [2019-02-13] + +If you have not installed this release, then install the 1.7.4 release. + +**If you are currently running this release, then upgrade to the 1.7.4 release as soon as possible.** + +- A critical defect in the InfluxDB 1.7.3 release was discovered and our engineering team fixed the issue in the 1.7.4 release. Out of high concern for your data and projects, upgrade to the 1.7.4 release as soon as possible. + - **Critical defect:** Shards larger than 16GB are at high risk for data loss during full compaction. The full compaction process runs when a shard go "cold" – no new data is being written into the database during the time range specified by the shard. + - **Post-mortem analysis:** InfluxData engineering is performing a post-mortem analysis to determine how this defect was introduced. Their discoveries will be shared in a blog post. + +### Breaking changes + +- Fix invalid UTF-8 bytes preventing shard opening. Treat fields and measurements as raw bytes. + +### Features + +- Update Flux to `0.12.0` + +### Bug fixes + +* Limit force-full and cold compaction size. +* Add user authentication and authorization support for Flux HTTP requests. +* Call `storage.Group` API to correctly map group mode. +* Marked functions that always return floats as always returning floats. +* Add support for optionally logging Flux queries. +* Fix cardinality estimation error. + +## 1.7.2 [2018-12-11] + +### Bug fixes + +* Update to Flux 0.7.1. +* Conflict-based concurrency resolution adds guards and an epoch-based system to + coordinate modifications when deletes happen against writes to the same points + at the same time. +* Skip and warn that series file should not be in a retention policy directory. +* Checks if measurement was removed from index, and if it was, then cleans up out + of fields index. Also fix cleanup issue where only prefix was checked when + matching measurements like "m1" and "m10". +* Error message to user that databases must be run in non-mixed index mode + to allow deletes. +* Update platform dependency to simplify Flux support in Enterprise. +* Verify series file in presence of tombstones. +* Fix `ApplyEnvOverrides` when a type that implements Unmarshaler is in a slice to + not call `UnMarshaltext` when the environment variable is set to empty. +* Drop NaN values when writing back points and fix the point writer to report the + number of points actually written and omits the ones that were dropped. +* Query authorizer was not properly passed to subqueries so rejections did not + happen when a subquery was the one reading the value. Max series limit was not propagated downward. + +## 1.7.1 [2018-11-14] + +### Bug fixes + +* Simple8B `EncodeAll` incorrectly encodes entries: For a run of `1s`, if the 120th or 240th entry is not a `1`, the run will be incorrectly encoded as selector `0` (`240 1s`) or selector `1` (`120 1s`), resulting in a loss of data for the 120th or 240th value. Manifests itself as consuming significant CPU resources and as compactions running indefinitely. + +## 1.7.0 [2018-11-06] + +### Breaking changes + +Chunked query was added into the Go client v2 interface. If you compiled against the Go client v2 previously, you need to recompile using the updated interface. + +### Features + +#### Flux v0.7 technical preview + +Support for the Flux language and queries has been added in this release. To begin exploring Flux 0.7 (technical preview): + +* Enable Flux using the new configuration setting [`[http] flux-enabled = true`](/influxdb/v1.7/administration/config/#flux-enabled-false). +* Use the new [`influx -type=flux`](/influxdb/v1.7/tools/shell/#type) option to enable the Flux REPL shell for creating Flux queries. +* Read about Flux and the Flux language, enabling Flux, or jump into the getting started and other guides in the [Flux v0.7 (technical preview) documentation](/flux/v0.7/). + +#### Time Series Index (TSI) query performance and throughputs improvements + +* Faster index planning for queries against indexes with many series that share tag pairs. +* Reduced index planning for queries that include previously queried tag pairs — the TSI + index now caches partial index results for later reuse. +* Performance improvements required a change in on-disk TSI format to be used. +* **To take advantage of these improvements**: + * Rebuild your indexes or wait for a TSI compaction of your indexes, + at which point the new TSI format will be applied. + * Hot shards and new shards immediately use the new TSI format. + +#### Other features + +* Enable the storage service by default. +* Ensure read service regular expressions get optimized. +* Add chunked query into the Go client v2. +* Add `access-log-status-filters` config setting to create an access log filter. +* Compaction performance improvements for Time Series Index (TSI). +* Add roaring bitmaps to TSI index files. + + +### Bug fixes + +* Missing `hardwareAddr` in `uuid` v1 generation. +* Fix the inherited interval for derivative and others. +* Fix subquery functionality when a function references a tag from the subquery. +* Strip tags from a subquery when the outer query does not group by that tag. + +## 1.6.6 [2019-02-28] + +### Bug fixes + +* Marked functions that always return floats as always returning floats. +* Fix cardinality estimation error. +* Update `tagKeyValue` mutex to write lock. + +## 1.6.5 [2019-01-10] + +### Features + +* Reduce allocations in TSI `TagSets` implementation. + +### Bug fixes + +* Fix panic in `IndexSet`. +* Pass the query authorizer to subqueries. +* Fix TSM1 panic on reader error. +* Limit database and retention policy names to 255 characters. +* Update Go runtime to 1.10.6. + +## 1.6.4 [2018-10-16] + +### Features + +* Set maximum cache size using `-max-cache-size` in `buildtsi` when building TSI index. + +### Bug fixes + +* Fix `tsi1` sketch locking. +* Fix subquery functionality when a function references a tag from the subquery. +* Strip tags from a subquery when the outer query does not group by that tag. +* Add `-series-file` flag to `dumptsi` command help. +* Cleanup failed TSM snapshots. +* Fix TSM1 panic on reader error. +* Fix series file tombstoning. +* Fixing the stream iterator to not ignore the error. +* Do not panic when a series ID iterator is nil. +* Fix append of possible nil iterator. + +## 1.6.3 [2018-09-14] + +### Features + +* Remove TSI1 HLL sketches from heap. + +### Bug fixes + +* Fix the inherited interval for derivative and others. The inherited interval from an outer query should not have caused +an inner query to fail because inherited intervals are only implicitly passed to inner queries that support group +by time functionality. Since an inner query with a derivative doesn't support grouping by time and the inner query itself +doesn't specify a time, the outer query shouldn't have invalidated the inner query. +* Fix the derivative and others time ranges for aggregate data. The derivative function and others similar to it would +preload themselves with data so that the first interval would be the start of the time range. That meant reading data outside +of the time range. One change to the shard mapper made in v1.4.0 caused the shard mapper to constrict queries to the +intervals given to the shard mapper. This was correct because the shard mapper can only deal with times it has mapped, +but this broke the functionality of looking back into the past for the derivative and other functions that used that +functionality. The query compiler has been updated with an additional attribute that records how many intervals in the past +will need to be read so that the shard mapper can include extra times that it may not necessarily read from, +but may be queried because of the above described functionality. + +## 1.6.2 [2018-08-27] + +### Features + +* Reduce allocations in TSI TagSets implementation. + +### Bug fixes + +* Ensure orphaned series cleaned up with shard drop. + +## 1.6.1 [2018-08-03] + +### Features + +* Improve LogFile performance with bitset iterator. +* Add TSI index cardinality report to `influx_inspect`. +* Update to Go 1.10. +* Improve performance of `buildtsi` and TSI planning. +* Improve performance of read service for single measurements. +* Remove max concurrent compaction limit. +* Provide configurable TLS options. +* Add option to hint MADV_WILLNEED to kernel. + +### Bug fixes + +* Improve series segment recovery. +* Fix windows mmap on zero length file. +* Ensure Filter iterators executed as late as possible. +* Document UDP precision setting in config. +* Allow tag keys to contain underscores. +* Fix a panic when matching on a specific type of regular expression. + +## 1.6.0 [2018-07-05] + +### Breaking changes + +* If math is used with the same selector multiple times, it will now act as a selector +rather than an aggregate. See [#9563](https://github.com/influxdata/influxdb/pull/9563) for details. +* For data received from Prometheus endpoints, every Prometheus measurement is now +stored in its own InfluxDB measurement rather than storing everything in the `_` measurement +using the Prometheus measurement name as the `__name__` label. + +### Features + +* Support proxy environment variables in the `influx` client. +* Implement basic trigonometry functions. +* Add ability to delete many series with predicate. +* Implement `floor`, `ceil`, and `round` functions. +* Add more math functions to InfluxQL. +* Allow customizing the unix socket group and permissions created by the server. +* Add `suppress-write-log` option to disable the write log when the log is enabled. +* Add additional technical analysis algorithms. +* Validate points on input. +* Log information about index version during startup. +* Add key sanitization to `deletetsm` command in `influx_inspect` utility. +* Optimize the `spread` function to process points iteratively instead of in batch. +* Allow math functions to be used in the condition. +* Add HTTP write throttle settings: `max-concurrent-write-limit`, `max-enqueued-write-limit`, and `enqueued-write-timeout`. +* Implement `SHOW STATS FOR indexes`. +* Add `dumptsmwal` command to `influx_inspect` utility. +* Improve the number of regex patterns that are optimized to static OR conditions. + +### Bug fixes + +* Support setting the log level through the environment variable. +* Fix panic when checking fieldsets. +* Ensure correct number of tags parsed when commas used. +* Fix data race in WAL. +* Allow `SHOW SERIES` kill. +* Revert "Use MADV_WILLNEED when loading TSM files". +* Fix regression to allow now() to be used as the group by offset again. +* Delete deleted shards in retention service. +* Ignore index size in `Engine.DiskSize()`. +* Enable casting values from a subquery. +* Avoid a panic when using show diagnostics with text/csv. +* Properly track the response bytes written for queries in all format types. +* Remove error for series file when no shards exist. +* Fix the validation for multiple nested distinct calls. +* TSM: `TSMReader.Close` blocks until reads complete. +* Return the correct auxiliary values for `top` and `bottom`. +* Close TSMReaders from `FileStore.Close` after releasing FileStore mutex. + +## 1.5.5 [2018-12-19] + +### Features + +* Reduce allocations in TSI `TagSets` implementation. + +### Bug fixes + +* Copy return value of `IndexSet.MeasurementNamesByExpr`. +* Ensure orphaned series cleaned up with shard drop. +* Fix the derivative and others time ranges for aggregate data. +* Fix the stream iterator to not ignore errors. +* Do not panic when a series ID iterator is `nil`. +* Fix panic in `IndexSet`. +* Pass the query authorizer to subqueries. +* Fix TSM1 panic on reader error. + +## 1.5.4 [2018-06-21] + +### Features + +* Add `influx_inspect deletetsm` command for bulk deletes of measurements in raw TSM files. + +### Bug fixes + +* Fix panic in readTombstoneV4. +* buildtsi: Do not escape measurement names. + +## 1.5.3 [2018-05-25] + +### Features + +* Add `[http] debug-pprof-enabled` configuration setting immediately on startup. Useful for debugging startup performance issues. + +### Bug fixes + +* Fix the validation for multiple nested `DISTINCT` calls. +* Return the correct auxiliary values for `TOP` and `BOTTOM`. + +## 1.5.2 [2018-04-12] + +### Features + +* Check for root user when running `buildtsi`. +* Adjustable TSI Compaction Threshold. + +### Bug fixes + +* backport: check for failure case where backup directory has no manifest files. +* Fix regression to allow `now()` to be used as the group by offset again. +* Revert `Use MADV_WILLNEED when loading TSM files`. +* Ignore index size in `Engine.DiskSize()`. +* Fix `buildtsi` partition key. +* Ensure that conditions are encoded correctly even if the AST is not properly formed. + +## 1.5.1 [2018-03-20] + +### Bug fixes + +- Allow time variable to be case insensitive again. +- Support setting the log level through the environment variable. +- Ensure correct number of tags parsed. +- Fix panic when checking fieldsets. +- Fix data race in WAL. + +## 1.5.0 [2018-03-06] + +### Breaking changes + +The default logging format has been changed. See [Logging and tracing in InfluxDB](/influxdb/v1.7/administration/logs) for details. + +### Features + +- Improve CLI connection warnings. +- Backup utility prints a list of backup files. +- Backup and restore for OSS produces and consumes data in the Enterprise-compatible backup format. +- Restore runs in online mode and does not delete existing databases. +- Export functionality using `start` and `end` to filter exported data by `timestamp`. +- Handle high cardinality deletes in TSM engine. +- Improve in-memory index startup performance for high cardinality. +- Add further TSI support for streaming and copying shards. +- Schedule a full compaction after a successful import. +- Add Prometheus `/metrics` endpoint. +- Add ability to generate shard digests. +- Allow setting the node ID in the InfluxDB CLI program. + +### Bug fixes + +- Refuse extra arguments to influx CLI. +- Fix space required after regex operator. Thanks @stop-start! +- Fix `panic: sync: WaitGroup is reused before previous Wait has returned`. +- Fix race condition in the merge iterator close method. +- Fix query compilation so multiple nested distinct calls is allowable +- Fix CLI to allow quoted database names in use statement. +- Updated client `4xx` error message when response body length is zero. +- Remove extraneous newlines from the log. +- Allow lone Boolean literals in a condition expression. +- Improve performance when writes exceed `max-values-per-tag` or `max-series`. +- Prevent a panic when a query simultaneously finishes and is killed at the same time. +- Fix missing sorting of blocks by time when compacting. +- WAL: update `lastWriteTime` behavior + +## 1.4.3 [unreleased] + +### Configuration Changes + +#### `[data]` Section + +`cache-snapshot-memory-size`: default value changed from `25m` to `256m`. + +### Bug fixes + +- Fix higher disk I/O utilization + +## 1.4.2 [2017-11-15] + +Refer to the 1.4.0 breaking changes section if `influxd` fails to start with an `incompatible tsi1 index MANIFEST` error. + +### Bug fixes + +- Fix `panic: runtime error: slice bounds out of range` when running `dep init` + +## 1.4.1 [2017-11-13] + +### Bug fixes + +- Fix descending cursors and range queries via IFQL RPC API. + +## 1.4.0 [2017-11-13] + +### TSI Index +This feature remains experimental in this release. +However, a number of improvements have been made and new meta query changes will allow for this feature to be explored at more depth than previously possible. It is not recommended for production use at this time. +We appreciate all of the feedback we receive on this feature. Please keep it coming! + +### Breaking changes + +You can no longer specify a different `ORDER BY` clause in a subquery than the one in the top level query. +This functionality never worked properly, +but was not explicitly forbidden. + +As part of the ongoing development of the `tsi1` index, +the implementation of a Bloom Filter, +used to efficiently determine if series are not present in the index, +was altered. +While this significantly increases the performance of the index and reduces its memory consumption, +the existing `tsi1` indexes created while running previous versions of the database are not compatible with 1.4.0. + +Users with databases using the `tsi1` index must go through the following process to upgrade to 1.4.0: + +1. Stop `influxd`. +2. Remove all `index` directories on databases using the `tsi1` index. With default configuration these can be found in + `/var/lib/influxdb/data/DB_NAME/RP_NAME/SHARD_ID/index` or `~/.influxdb/data/DB_NAME/RP_NAME/SHARD_ID/index`. + It's worth noting at this point how many different `shard_ids` you visit. +3. Run the `influx_inspect inmem2tsi` tool using the shard's data and WAL directories for `-datadir` and `-waldir`, respectively. + Given the example in step (2) that would be + `influx_inspect inmem2tsi -datadir /var/lib/influxdb/data/DB_NAME/RP_NAME/SHARD_ID -waldir /path/to/influxdb/wal/DB_NAME/RP_NAME/SHARD_ID`. +4. Repeat step (3) for each shard that needs to be converted. +5. Start `influxd`. + +Users with existing `tsi1` shards, +who attempt to start version 1.4.0 without following the steps above, +will find the shards refuse to open and will most likely see the following error message: `incompatible tsi1 index MANIFEST`. + +### Configuration Changes + +#### `[collectd]` Section + +* `parse-multivalue-plugin` option was added with a default of `split`. When set to `split`, multivalue plugin data (e.g. `df free:5000,used:1000`) will be split into separate measurements (e.g., `df_free, value=5000` and `df_used, value=1000`). When set to `join`, multivalue plugin will be stored as a single multi-value measurement (e.g., `df, free=5000,used=1000`). + +### Features + +- Add `influx_inspect inmem2tsi` command to convert existing in-memory (TSM-based) shards to the TSI (Time Series Index) format. +- Add support for the Prometheus remote read and write APIs. +- Support estimated and exact SHOW CARDINALITY commands for measurements, series, tag keys, tag key values, and field keys. +- Improve `SHOW TAG KEYS` performance. +- Add `EXPLAIN ANALYZE` command, which produces a detailed execution plan of a `SELECT` statement. +- Improved compaction scheduling. +- Support Ctrl+C to cancel a running query in the Influx CLI. +- Allow human-readable byte sizes in configuation file. +- Respect X-Request-Id/Request-Id headers. +- Add 'X-Influxdb-Build' to http response headers so users can identify if a response is from an OSS or Enterprise service. +- All errors from queries or writes are available via X-InfluxDB-Error header, and 5xx error messages will be written + to server logs. +- Add `parse-multivalue-plugin` to allow users to choose how multivalue plugins should be handled by the collectd service. +- Make client errors more helpful on downstream errors. +- Allow panic recovery to be disabled when investigating server issues. +- Support http pipelining for `/query` endpoint. +- Reduce allocations when reading data. +- Mutex profiles are now available. +- Batch up writes for monitor service. +- Use system cursors for measurement, series, and tag key meta queries. +- Initial implementation of explain plan. +- Include the number of scanned cached values in the iterator cost. +- Improve performance of `Include` and `Exclude` functions. +- Report the task status for a query. +- Reduce allocations, improve `readEntries` performance by simplifying loop +- Separate importer log statements to stdout and stderr. +- Improve performance of Bloom Filter in TSI index. +- Add message pack format for query responses. +- Implicitly decide on a lower limit for fill queries when none is present. +- Streaming `inmem2tsi` conversion. +- Sort & validate TSI key value insertion. +- Handle nil MeasurementIterator. +- Add long-line support to client importer. +- Update to go 1.9.2. +- InfluxDB now uses MIT licensed version of BurntSushi/toml. + +### Bug fixes + +- Change the default stats interval to 1 second instead of 10 seconds. +- illumos build broken on `syscall.Mmap`. +- Prevent privileges on non-existent databases from being set. +- `influxd backup` tool now separates out logging to `stdout` and `stderr`. Thanks @xginn8! +- Dropping measurement used several GB disk space. +- Fix the CQ start and end times to use Unix timestamps. +- `influx` CLI case-sensitivity. +- Fixed time boundaries for continuous queries with time zones. +- Return query parsing errors in CSV formats. +- Fix time zone shifts when the shift happens on a time zone boundary. +- Parse time literals using the time zone in the `SELECT` statement. +- Reduce CPU usage when checking series cardinality +- Fix backups when snapshot is empty. +- Cursor leak, resulting in an accumulation of `.tsm.tmp` files after compactions. +- Improve condition parsing. +- Ensure inputs are closed on error. Add runtime GC finalizer as additional guard to close iterators. +- Fix merging bug on system iterators. +- Force subqueries to match the parent queries ordering. +- Fix race condition accessing `seriesByID` map. +- Fix deadlock when calling `SeriesIDsAllOrByExpr`. +- Fix `influx_inspect export` so it skips missing files. +- Reduce how long it takes to walk the varrefs in an expression. +- Address `panic: runtime error: invalid memory address or nil pointer dereference`. +- Drop Series Cause Write Fail/Write Timeouts/High Memory Usage. +- Fix increased memory usage in cache and wal readers. +- An OSS read-only user should be able to list measurements on a database. +- Ensure time and tag-based condition can be used with tsi1 index when deleting. +- Prevent deadlock when doing math on the result of a subquery. +- Fix a minor memory leak in batching points in TSDB. +- Don't assume `which` is present in package post-install script. +- Fix missing man pages in new packaging output. +- Fix use of `INFLUXD_OPTS` in service file. +- Fix WAL panic: runtime error: makeslice: cap out of range. +- Copy returned bytes from TSI meta functions. +- Fix data deleted outside of time range. +- Fix data dropped incorrectly during compaction. +- Prevent deadlock during Collectd, Graphite, openTSDB, and UDP shutdown. +- Remove the pidfile after the server has exited. +- Return `query.ErrQueryInterrupted` for successful read on `InterruptCh`. +- Fix race inside Measurement index. +- Ensure retention service always removes local shards. +- Handle utf16 files when reading the configuration file. +- Fix `panic: runtime error: slice bounds out of range`. + +## 1.3.7 [2017-10-26] + +### Release Notes +Bug fix identified via Community and InfluxCloud. The build artifacts are now consistent with v1.3.5. + +### Bug fixes + +- Don't assume `which` is present in package post-install script. +- Fix use of `INFLUXD_OPTS` in service file. +- Fix missing man pages in new packaging output. +- Add RPM dependency on shadow-utils for `useradd`. +- Fix data deleted outside of specified time range when using `delete` +- Fix data dropped incorrectly during compaction. +- Return `query.ErrQueryInterrupted` for a successful read on `InterruptCh`. +- Copy returned bytes from TSI meta functions. + +## v1.3.6 [2017-09-28] + +### Release Notes +Bug fix identified via Community and InfluxCloud. + +### Bug fixes +- Reduce how long it takes to walk the varrefs in an expression. +- Address `panic: runtime error: invalid memory address or nil pointer dereference`. +- Fix increased memory usage in cache and WAL readers for clusters with a large number of shards. +- Prevent deadlock when doing math on the result of a subquery. +- Fix several race conditions present in the shard and storage engine. +- Fix race condition on cache entry. + +### Release Notes +Bug fix identified via Community and InfluxCloud. + +### Bug fixes +- Fix race condition accessing `seriesByID` map. +- Fix deadlock when calling `SeriesIDsAllOrByExpr`. + +## v1.3.5 [2017-08-29] + +### Release Notes +Bug fix identified via Community and InfluxCloud. + +### Bug fixes +- Fix race condition accessing `seriesByID` map. +- Fix deadlock when calling `SeriesIDsAllOrByExpr`. + +## v1.3.4 [2017-08-23] + +### Release Notes +Bug fix identified via Community and InfluxCloud. + +### Bug fixes +- Fixed time boundaries for continuous queries with time zones. +- Fix time zone shifts when the shift happens on a time zone boundary. +- Parse time literals using the time zone in the select statement. +- Fix drop measurement not dropping all data. +- Fix backups when snapshot is empty. +- Eliminated cursor leak, resulting in an accumulation of .tsm.tmp files after compactions. +- Fix Deadlock when dropping measurement and writing. +- Ensure inputs are closed on error. Add runtime GC finalizer as additional guard to close iterators. +- Fix leaking tmp file when large compaction aborted. + +## v1.3.3 [2017-08-10] + +### Release Notes +Bug fix identified via Community and InfluxCloud. + +### Bug fixes + +- Resolves a memory leak when NewReaderIterator creates a nilFloatIterator, the reader is not closed. + +## v1.3.2 [2017-08-04] + +### Release Notes +Minor bug fixes were identified via Community and InfluxCloud. + +### Bug fixes + +- Interrupt "in-progress" TSM compactions. +- Prevent excessive memory usage when dropping series. +- Significantly improve performance of SHOW TAG VALUES. + +## v1.3.1 [2017-07-20] + +### Release Notes +Minor bug fixes were identified via Community and InfluxCloud. + +### Bug fixes + +- Ensure temporary TSM files get cleaned up when compaction aborted. +- Address deadlock issue causing 1.3.0 to become unresponsive. +- Duplicate points generated via INSERT after DELETE. +- Fix the CQ start and end times to use Unix timestamps. + +## v1.3.0 [2017-06-21] + +### Release Notes + +#### TSI + +Version 1.3.0 marks the first official release of the new InfluxDB time series index (TSI) engine. + +The TSI engine is a significant technical advancement in InfluxDB. +It offers a solution to the [time-structured merge tree](/influxdb/v1.2/concepts/storage_engine/) engine's [high series cardinality issue](/influxdb/v1.3/troubleshooting/frequently-asked-questions/#why-does-series-cardinality-matter). +With TSI, the number of series should be unbounded by the memory on the server hardware and the number of existing series will have a negligible impact on database startup time. +See Paul Dix's blogpost [Path to 1 Billion Time Series: InfluxDB High Cardinality Indexing Ready for Testing](https://www.influxdata.com/path-1-billion-time-series-influxdb-high-cardinality-indexing-ready-testing/) for additional information. + +TSI is disabled by default in version 1.3. +To enable TSI, uncomment the [`index-version` setting](/influxdb/v1.3/administration/config#index-version-inmem) and set it to `tsi1`. +The `index-version` setting is in the `[data]` section of the configuration file. +Next, restart your InfluxDB instance. + +``` +[data] + dir = "/var/lib/influxdb/data" + index-version = "tsi1" +``` + +#### Continuous Query Statistics + +When enabled, each time a continuous query is completed, a number of details regarding the execution are written to the `cq_query` measurement of the internal monitor database (`_internal` by default). The tags and fields of interest are + +| tag / field | description | +| :---------------- | :--------------------------------------- | +| `db` | name of database | +| `cq` | name of continuous query | +| `durationNS` | query execution time in nanoseconds | +| `startTime` | lower bound of time range | +| `endTime` | upper bound of time range | +| `pointsWrittenOK` | number of points written to the target measurement | + + +* `startTime` and `endTime` are UNIX timestamps, in nanoseconds. +* The number of points written is also included in CQ log messages. + +### Removals + +The admin UI is removed and unusable in this release. The `[admin]` configuration section will be ignored. + +### Configuration Changes + +* The top-level config `bind-address` now defaults to `localhost:8088`. + The previous default was just `:8088`, causing the backup and restore port to be bound on all available interfaces (i.e. including interfaces on the public internet). + +The following new configuration options are available. + +#### `[http]` Section + +* `max-body-size` was added with a default of 25,000,000, but can be disabled by setting it to 0. + Specifies the maximum size (in bytes) of a client request body. When a client sends data that exceeds + the configured maximum size, a `413 Request Entity Too Large` HTTP response is returned. + +#### `[continuous_queries]` Section + +* `query-stats-enabled` was added with a default of `false`. When set to `true`, continuous query execution statistics are written to the default monitor store. + +### Features + +- Add WAL sync delay +- Add chunked request processing back into the Go client v2 +- Allow non-admin users to execute SHOW DATABASES +- Reduce memory allocations by reusing gzip.Writers across requests +- Add system information to /debug/vars +- Add modulo operator to the query language. +- Failed points during an import now result in a non-zero exit code +- Expose some configuration settings via SHOW DIAGNOSTICS +- Support single and multiline comments in InfluxQL +- Support timezone offsets for queries +- Add "integral" function to InfluxQL +- Add "non_negative_difference" function to InfluxQL +- Add bitwise AND, OR and XOR operators to the query language +- Write throughput/concurrency improvements +- Remove the admin UI +- Update to go1.8.1 +- Add max concurrent compaction limits +- Add TSI support tooling +- Track HTTP client requests for /write and /query with /debug/requests +- Write and compaction stability +- Add new profile endpoint for gathering all debug profiles and queries in single archive +- Add nanosecond duration literal support +- Optimize top() and bottom() using an incremental aggregator +- Maintain the tags of points selected by top() or bottom() when writing the results. +- Write CQ stats to the `_internal` database + +### Bug fixes + +- Several statements were missing the DefaultDatabase method +- Fix spelling mistake in HTTP section of config -- shared-secret +- History file should redact passwords before saving to history +- Suppress headers in output for influx cli when they are the same +- Add chunked/chunk size as setting/options in cli +- Do not increment the continuous query statistic if no query is run +- Forbid wildcards in binary expressions +- Fix fill(linear) when multiple series exist and there are null values +- Update liner dependency to handle docker exec +- Bind backup and restore port to localhost by default +- Kill query not killing query +- KILL QUERY should work during all phases of a query +- Simplify admin user check. +- Significantly improve DROP DATABASE speed +- Return an error when an invalid duration literal is parsed +- Fix the time range when an exact timestamp is selected +- Fix query parser when using addition and subtraction without spaces +- Fix a regression when math was used with selectors +- Ensure the input for certain functions in the query engine are ordered +- Significantly improve shutdown speed for high cardinality databases +- Fix racy integration test +- Prevent overflowing or underflowing during window computation +- Enabled golint for admin, httpd, subscriber, udp, thanks @karlding +- Implicitly cast null to false in binary expressions with a Boolean +- Restrict fill(none) and fill(linear) to be usable only with aggregate queries +- Restrict top() and bottom() selectors to be used with no other functions +- top() and bottom() now returns the time for every point +- Remove default upper time bound on DELETE queries +- Fix LIMIT and OFFSET for certain aggregate queries +- Refactor the subquery code and fix outer condition queries +- Fix compaction aborted log messages +- TSM compaction does not remove .tmp on error +- Set the CSV output to an empty string for null values +- Compaction exhausting disk resources in InfluxDB +- Small edits to the etc/config.sample.toml file +- Points beyond retention policy scope are dropped silently +- Fix TSM tmp file leaked on disk +- Fix large field keys preventing snapshot compactions +- URL query parameter credentials take priority over Authentication header +- TSI branch has duplicate tag values +- Out of memory when using HTTP API +- Check file count before attempting a TSI level compaction. +- index file fd leak in tsi branch +- Fix TSI non-contiguous compaction panic + +## v1.2.4 [2017-05-08] + +### Bug fixes + +- Prefix partial write errors with `partial write:` to generalize identification in other subsystems. + +## v1.2.3 [2017-04-17] + +### Bug fixes + +- Redact passwords before saving them to the history file. +- Add the missing DefaultDatabase method to several InfluxQL statements. +- Fix segment violation in models.Tags.Get. +- Simplify the admin user check. +- Fix a regression when math was used with selectors. +- Ensure the input for certain functions in the query engine are ordered. +- Fix issue where deleted `time` field keys created unparseable points. + +## v1.2.2 [2017-03-14] + +### Release Notes + +### Configuration Changes + +#### `[http]` Section + +* [`max-row-limit`](/influxdb/v1.3/administration/config#max-row-limit-0) now defaults to `0`. + In versions 1.0 and 1.1, the default setting was `10000`, but due to a bug, the value in use in versions 1.0 and 1.1 was effectively `0`. + In versions 1.2.0 through 1.2.1, we fixed that bug, but the fix caused a breaking change for Grafana and Kapacitor users; users who had not set `max-row-limit` to `0` experienced truncated/partial data due to the `10000` row limit. + In version 1.2.2, we've changed the default `max-row-limit` setting to `0` to match the behavior in versions 1.0 and 1.1. + +### Bug fixes + +- Change the default [`max-row-limit`](/influxdb/v1.3/administration/config#max-row-limit-0) setting from `10000` to `0` to prevent the absence of data in Grafana or Kapacitor. + +## v1.2.1 [2017-03-08] + +### Release Notes + +### Bug fixes + +- Treat non-reserved measurement names with underscores as normal measurements. + - Reduce the expression in a subquery to avoid a panic. + - Properly select a tag within a subquery. + - Prevent a panic when aggregates are used in an inner query with a raw query. + - Points missing after compaction. + - Point.UnmarshalBinary() bounds check. + - Interface conversion: tsm1.Value is tsm1.IntegerValue, not tsm1.FloatValue. + - Map types correctly when using a regex and one of the measurements is empty. + - Map types correctly when selecting a field with multiple measurements where one of the measurements is empty. + - Include IsRawQuery in the rewritten statement for meta queries. + - Fix race in WALEntry.Encode and Values.Deduplicate + - Fix panic in collectd when configured to read types DB from directory. + - Fix ORDER BY time DESC with ordering series keys. + - Fix mapping of types when the measurement uses a regular expression. + - Fix LIMIT and OFFSET when they are used in a subquery. + - Fix incorrect math when aggregates that emit different times are used. + - Fix EvalType when a parenthesis expression is used. + - Fix authentication when subqueries are present. + - Expand query dimensions from the subquery. + - Dividing aggregate functions with different outputs doesn't panic. + - Anchors not working as expected with case-insensitive regular expression. + +## v1.2.0 [2017-01-24] + +### Release Notes + +This release introduces a major new querying capability in the form of sub-queries, and provides several performance improvements, including a 50% or better gain in write performance on larger numbers of cores. The release adds some stability and memory-related improvements, as well as several CLI-related bug fixes. If upgrading from a prior version, please read the configuration changes in the following section before upgrading. + +### Configuration Changes + +The following new configuration options are available, if upgrading to `1.2.0` from prior versions. + +#### `[[collectd]]` Section + +* `security-level` which defaults to `"none"`. This field also accepts `"sign"` and `"encrypt"` and enables different levels of transmission security for the collectd plugin. +* `auth-file` which defaults to `"/etc/collectd/auth_file"`. Specifies where to locate the authentication file used to authenticate clients when using signed or encrypted mode. + +### Deprecations + +The stress tool `influx_stress` will be removed in a subsequent release. We recommend using [`influx-stress`](https://github.com/influxdata/influx-stress) as a replacement. + +### Features + +- Remove the override of GOMAXPROCS. +- Uncomment section headers from the default configuration file. +- Improve write performance significantly. +- Prune data in meta store for deleted shards. +- Update latest dependencies with Godeps. +- Introduce syntax for marking a partial response with chunking. +- Use X-Forwarded-For IP address in HTTP logger if present. +- Add support for secure transmission via collectd. +- Switch logging to use structured logging everywhere. +- [CLI feature request] USE retention policy for queries. +- Add clear command to CLI. +- Adding ability to use parameters in queries in the v2 client using the `Parameters` map in the `Query` struct. +- Allow add items to array config via ENV. +- Support subquery execution in the query language. +- Verbose output for SSL connection errors. +- Cache snapshotting performance improvements + +### Bug fixes + +- Fix potential race condition in correctness of tsm1_cache memBytes statistic. +- Fix broken error return on meta client's UpdateUser and DropContinuousQuery methods. +- Fix string quoting and significantly improve performance of `influx_inspect export`. +- CLI was caching db/rp for insert into statements. +- Fix CLI import bug when using self-signed SSL certificates. +- Fix cross-platform backup/restore. +- Ensures that all user privileges associated with a database are removed when the database is dropped. +- Return the time from a percentile call on an integer. +- Expand string and Boolean fields when using a wildcard with `sample()`. +- Fix chuid argument order in init script. +- Reject invalid subscription URLs. +- CLI should use spaces for alignment, not tabs. +- 0.12.2 InfluxDB CLI client PRECISION returns "Unknown precision...". +- Fix parse key panic when missing tag value. +- Rentention Policy should not allow `INF` or `0` as a shard duration. +- Return Error instead of panic when decoding point values. +- Fix slice out of bounds panic when pruning shard groups. +- Drop database will delete /influxdb/data directory. +- Ensure Subscriber service can be disabled. +- Fix race in storage engine. +- InfluxDB should do a partial write on mismatched type errors. + +## v1.1.5 [2017-05-08] + +### Bug fixes + +- Redact passwords before saving them to the history file. +- Add the missing DefaultDatabase method to several InfluxQL statements. + +## v1.1.4 [2017-02-27] + +### Bug fixes + +- Backport from 1.2.0: Reduce GC allocations. + +## v1.1.3 [2017-02-17] + +### Bug fixes + +- Remove Tags.shouldCopy, replace with forceCopy on series creation. + +## v1.1.2 [2017-02-16] + +### Bug fixes + +- Fix memory leak when writing new series over HTTP. +- Fix series tag iteration segfault. +- Fix tag dereferencing panic. + +## v1.1.1 [2016-12-06] + +### Features + +- Update Go version to 1.7.4. + +### Bug fixes + +- Fix string fields w/ trailing slashes. +- Quote the empty string as an ident. +- Fix incorrect tag value in error message. + +### Security + +[Go 1.7.4](https://golang.org/doc/devel/release.html#go1.7.minor) was released to address two security issues. This release includes these security fixes. + +## v1.1.0 [2016-11-14] + +### Release Notes + +This release is built with GoLang 1.7.3 and provides many performance optimizations, stability changes and a few new query capabilities. If upgrading from a prior version, please read the configuration changes below section before upgrading. + +### Deprecations + +The admin interface is deprecated and will be removed in a subsequent release. +The configuration setting to enable the admin UI is now disabled by default, but can be enabled if necessary. +We recommend using [Chronograf](https://github.com/influxdata/chronograf) or [Grafana](https://github.com/grafana/grafana) as a replacement. + +### Configuration Changes + +The following configuration changes may need to changed before upgrading to `1.1.0` from prior versions. + +#### `[admin]` Section + +* `enabled` now default to false. If you are currently using the admin interaface, you will need to change this value to `true` to re-enable it. The admin interface is currently deprecated and will be removed in a subsequent release. + +#### `[data]` Section + +* `max-values-per-tag` was added with a default of 100,000, but can be disabled by setting it to `0`. Existing measurements with tags that exceed this limit will continue to load, but writes that would cause the tags cardinality to increase will be dropped and a `partial write` error will be returned to the caller. This limit can be used to prevent high cardinality tag values from being written to a measurement. +* `cache-max-memory-size` has been increased to from `524288000` to `1048576000`. This setting is the maximum amount of RAM, in bytes, a shard cache can use before it rejects writes with an error. Setting this value to `0` disables the limit. +* `cache-snapshot-write-cold-duration` has been decreased from `1h` to `10m`. This setting determines how long values will stay in the shard cache while the shard is cold for writes. +* `compact-full-write-cold-duration` has been decreased from `24h` to `4h`. The shorter duration allows cold shards to be compacted to an optimal state more quickly. + +### Features + +The query language has been extended with a few new features: + +- Support regular expressions on fields keys in select clause. +- New `linear` fill option. +- New `cumulative_sum` function. +- Support `ON` for `SHOW` commands. + +All Changes: + +- Filter out series within shards that do not have data for that series. +- Rewrite regular expressions of the form host = /^server-a$/ to host = 'server-a', to take advantage of the tsdb index. +- Improve compaction planning performance by caching tsm file stats. +- Align binary math expression streams by time. +- Reduce map allocations when computing the TagSet of a measurement. +- Make input plugin services open/close idempotent. +- Speed up shutdown by closing shards concurrently. +- Add sample function to query language. +- Add `fill(linear)` to query language. +- Implement cumulative_sum() function. +- Update defaults in config for latest best practices. +- UDP Client: Split large points. +- Add stats for active compactions, compaction errors. +- More man pages for the other tools we package and compress man pages fully. +- Add max-values-per-tag to limit high tag cardinality data. +- Update jwt-go dependency to version 3. +- Support enable HTTP service over unix domain socket. +- Add additional statistics to query executor. +- Feature request: `influx inspect -export` should dump WAL files. +- Implement text/csv content encoding for the response writer. +- Support tools for running async queries. +- Support ON and use default database for SHOW commands. +- Correctly read in input from a non-interactive stream for the CLI. +- Support `INFLUX_USERNAME` and `INFLUX_PASSWORD` for setting username/password in the CLI. +- Optimize first/last when no group by interval is present. +- Make regular expressions work on field and dimension keys in SELECT clause. +- Change default time boundaries for raw queries. +- Support mixed duration units. + +### Bug fixes + +- Avoid deadlock when `max-row-limit` is hit. +- Fix incorrect grouping when multiple aggregates are used with sparse data. +- Fix output duration units for SHOW QUERIES. +- Truncate the version string when linking to the documentation. +- influx_inspect: export does not escape field keys. +- Fix issue where point would be written to wrong shard. +- Fix retention policy inconsistencies. +- Remove accidentally added string support for the stddev call. +- Remove /data/process_continuous_queries endpoint. +- Enable https subscriptions to work with custom CA certificates. +- Reduce query planning allocations. +- Shard stats include WAL path tag so disk bytes make more sense. +- Panic with unread show series iterators during drop database. +- Use consistent column output from the CLI for column formatted responses. +- Correctly use password-type field in Admin UI. +- Duplicate parsing bug in ALTER RETENTION POLICY. +- Fix database locked up when deleting shards. +- Fix mmap dereferencing. +- Fix base64 encoding issue with /debug/vars stats. +- Drop measurement causes cache max memory exceeded error. +- Decrement number of measurements only once when deleting the last series from a measurement. +- Delete statement returns an error when retention policy or database is specified. +- Fix the dollar sign so it properly handles reserved keywords. +- Exceeding max retention policy duration gives incorrect error message. +- Drop time when used as a tag or field key. + +## v1.0.2 [2016-10-05] + +### Bug fixes + +- Fix RLE integer decoding producing negative numbers. +- Avoid stat syscall when planning compactions. +- Subscription data loss under high write load. +- Do not automatically reset the shard duration when using ALTER RETENTION POLICY. +- Ensure correct shard groups created when retention policy has been altered. + +## v1.0.1 [2016-09-26] + +### Bug fixes + +- Prevent users from manually using system queries since incorrect use would result in a panic. +- Ensure fieldsCreated stat available in shard measurement. +- Report cmdline and memstats in /debug/vars. +- Fixing typo within example configuration file. +- Implement time math for lazy time literals. +- Fix database locked up when deleting shards. +- Skip past points at the same time in derivative call within a merged series. +- Read an invalid JSON response as an error in the Influx client. + +## v1.0.0 [2016-09-08] + +### Release Notes +Inital release of InfluxDB. + +### Breaking changes + +* `max-series-per-database` was added with a default of 1M but can be disabled by setting it to `0`. Existing databases with series that exceed this limit will continue to load but writes that would create new series will fail. +* Config option `[cluster]` has been replaced with `[coordinator]`. +* Support for config options `[collectd]` and `[opentsdb]` has been removed; use `[[collectd]]` and `[[opentsdb]]` instead. +* Config option `data-logging-enabled` within the `[data]` section, has been renamed to `trace-logging-enabled`, and defaults to `false`. +* The keywords `IF`, `EXISTS`, and `NOT` where removed for this release. This means you no longer need to specify `IF NOT EXISTS` for `DROP DATABASE` or `IF EXISTS` for `CREATE DATABASE`. If these are specified, a query parse error is returned. +* The Shard `writePointsFail` stat has been renamed to `writePointsErr` for consistency with other stats. + +With this release the systemd configuration files for InfluxDB will use the system configured default for logging and will no longer write files to `/var/log/influxdb` by default. On most systems, the logs will be directed to the systemd journal and can be accessed by `journalctl -u influxdb.service`. Consult the systemd journald documentation for configuring journald. + +### Features + +- Add mode function. +- Support negative timestamps for the query engine. +- Write path stats. +- Add MaxSeriesPerDatabase config setting. +- Remove IF EXISTS/IF NOT EXISTS from influxql language. +- Update go package library dependencies. +- Add tsm file export to influx_inspect tool. +- Create man pages for commands. +- Return 403 Forbidden when authentication succeeds but authorization fails. +- Added favicon. +- Run continuous query for multiple buckets rather than one per bucket. +- Log the CQ execution time when continuous query logging is enabled. +- Trim BOM from Windows Notepad-saved config files. +- Update help and remove unused config options from the configuration file. +- Add NodeID to execution options. +- Make httpd logger closer to Common (& combined) Log Format. +- Allow any variant of the help option to trigger the help. +- Reduce allocations during query parsing. +- Optimize timestamp run-length decoding. +- Adds monitoring statistic for on-disk shard size. +- Add HTTP(s) based subscriptions. +- Add new HTTP statistics to monitoring. +- Speed up drop database. +- Add Holt-Winter forecasting function. +- Add support for JWT token authentication. +- Add ability to create snapshots of shards. +- Parallelize iterators. +- Teach the http service how to enforce connection limits. +- Support cast syntax for selecting a specific type. +- Refactor monitor service to avoid expvar and write monitor statistics on a truncated time interval. +- Dynamically update the documentation link in the admin UI. +- Support wildcards in aggregate functions. +- Support specifying a retention policy for the graphite service. +- Add extra trace logging to tsm engine. +- Add stats and diagnostics to the TSM engine. +- Support regex selection in SHOW TAG VALUES for the key. +- Modify the default retention policy name and make it configurable. +- Update SHOW FIELD KEYS to return the field type with the field key. +- Support bound parameters in the parser. +- Add https-private-key option to httpd config. +- Support loading a folder for collectd typesdb files. + +### Bug fixes + +- Optimize queries that compare a tag value to an empty string. +- Allow blank lines in the line protocol input. +- Runtime: goroutine stack exceeds 1000000000-byte limit. +- Fix alter retention policy when all options are used. +- Concurrent series limit. +- Ensure gzip writer is closed in influx_inspect export. +- Fix CREATE DATABASE when dealing with default values. +- Fix UDP pointsRx being incremented twice. +- Tombstone memory improvements. +- Hardcode auto generated RP names to autogen. +- Ensure IDs can't clash when managing Continuous Queries. +- Continuous full compactions. +- Remove limiter from walkShards. +- Copy tags in influx_stress to avoid a concurrent write panic on a map. +- Do not run continuous queries that have no time span. +- Move the CQ interval by the group by offset. +- Fix panic parsing empty key. +- Update connection settings when changing hosts in CLI. +- Always use the demo config when outputting a new config. +- Minor improvements to init script. Removes sysvinit-utils as package dependency. +- Fix compaction planning with large TSM files. +- Duplicate data for the same timestamp. +- Fix panic: truncate the slice when merging the caches. +- Fix regex binary encoding for a measurement. +- Fix fill(previous) when used with math operators. +- Rename dumptsmdev to dumptsm in influx_inspect. +- Remove a double lock in the tsm1 index writer. +- Remove FieldCodec from TSDB package. +- Allow a non-admin to call "use" for the influx CLI. +- Set the condition cursor instead of aux iterator when creating a nil condition cursor. +- Update `stress/v2` to work with clusters, ssl, and username/password auth. Code cleanup. +- Modify the max nanosecond time to be one nanosecond less. +- Include sysvinit-tools as an rpm dependency. +- Add port to all graphite log output to help with debugging multiple endpoints. +- Fix panic: runtime error: index out of range. +- Remove systemd output redirection. +- Database unresponsive after DROP MEASUREMENT. +- Address Out of Memory Error when Dropping Measurement. +- Fix the point validation parser to identify and sort tags correctly. +- Prevent panic in concurrent auth cache write. +- Set X-Influxdb-Version header on every request (even 404 requests). +- Prevent panic if there are no values. +- Time sorting broken with overwritten points. +- queries with strings that look like dates end up with date types, not string types. +- Concurrent map read write panic. +- Drop writes from before the retention policy time window. +- Fix SELECT statement required privileges. +- Filter out sources that do not match the shard database/retention policy. +- Truncate the shard group end time if it exceeds MaxNanoTime. +- Batch SELECT INTO / CQ writes. +- Fix compaction planning re-compacting large TSM files. +- Ensure client sends correct precision when inserting points. +- Accept points with trailing whitespace. +- Fix panic in SHOW FIELD KEYS. +- Disable limit optimization when using an aggregate. +- Fix panic: interface conversion: tsm1.Value is \*tsm1.StringValue, not \*tsm1.FloatValue. +- Data race when dropping a database immediately after writing to it. +- Make sure admin exists before authenticating query. +- Print the query executor's stack trace on a panic to the log. +- Fix read tombstones: EOF. +- Query-log-enabled in config not ignored anymore. +- Ensure clients requesting gzip encoded bodies don't receive empty body. +- Optimize shard loading. +- Queries slow down hundreds times after overwriting points. +- SHOW TAG VALUES accepts != and !~ in WHERE clause. +- Remove old cluster code. +- Ensure that future points considered in SHOW queries. +- Fix full compactions conflicting with level compactions. +- Overwriting points on large series can cause memory spikes during compactions. +- Fix parseFill to check for fill ident before attempting to parse an expression. +- Max index entries exceeded. +- Address slow startup time. +- Fix measurement field panic in tsm1 engine. +- Queries against files that have just been compacted need to point to new files. +- Check that retention policies exist before creating CQ. diff --git a/content/influxdb/v1.7/about_the_project/third-party.md b/content/influxdb/v1.7/about_the_project/third-party.md new file mode 100644 index 000000000..86e8d3f10 --- /dev/null +++ b/content/influxdb/v1.7/about_the_project/third-party.md @@ -0,0 +1,20 @@ +--- +title: Third party software +menu: + influxdb_1_7: + name: Third party software + weight: 50 + parent: About the project +--- + +InfluxData products contain third party software, which means the copyrighted, +patented, or otherwise legally protected software of third parties that is +incorporated in InfluxData products. + +Third party suppliers make no representation nor warranty with respect to +such third party software or any portion thereof. +Third party suppliers assume no liability for any claim that might arise with +respect to such third party software, nor for a +customer’s use of or inability to use the third party software. + +The [list of third party software components, including references to associated licenses and other materials](https://github.com/influxdata/influxdb/blob/1.7/DEPENDENCIES.md), is maintained on a version by version basis. diff --git a/content/influxdb/v1.7/additional_resources/additional_resources.md b/content/influxdb/v1.7/additional_resources/additional_resources.md new file mode 100644 index 000000000..863c95462 --- /dev/null +++ b/content/influxdb/v1.7/additional_resources/additional_resources.md @@ -0,0 +1,36 @@ +--- +title: Additional InfluxDB resources +description: InfluxDB resources, including InfluxData blog, technical papers, meetup and training videos, and upcoming virtual training and other events. +menu: + influxdb_1_7: + name: Additional resources + weight: 120 +--- + +Check out the following InfluxData resources to learn more about InfluxDB OSS and other InfluxData products. + +## [InfluxData blog](https://www.influxdata.com/blog/) + +Check out the [InfluxData blog](https://www.influxdata.com/blog/) for announcements, updates, and +weekly [tech tips](https://www.influxdata.com/category/tech-tips/). + +## [Technical papers](https://www.influxdata.com/_resources/techpapers-new/) + +[InfluxData technical papers](https://www.influxdata.com/_resources/techpapers-new/) series offer in-depth analysis on performance, time series, +and benchmarking of InfluxDB compared to other popular databases. + +## [Meetup videos](https://www.influxdata.com/_resources/videosnew//) + +Check out our growing collection of [meetup videos](https://www.influxdata.com/_resources/videosnew//) for introductory content, how-tos, and more. + +## [Virtual training videos](https://www.influxdata.com/_resources/videosnew/) + +Watch [virtual training videos](https://www.influxdata.com/_resources/videosnew/) from our weekly training webinar. + +## [Virtual training schedule](https://www.influxdata.com/virtual-training-courses/) + +Check out our [virtual training schedule](https://www.influxdata.com/virtual-training-courses/) to register for future webinars. + +## [InfluxData events](https://www.influxdata.com/events/) + +Learn about and sign up for upcoming [InfluxData events](https://www.influxdata.com/events/). diff --git a/content/influxdb/v1.7/administration/_index.md b/content/influxdb/v1.7/administration/_index.md new file mode 100644 index 000000000..df5e09e3a --- /dev/null +++ b/content/influxdb/v1.7/administration/_index.md @@ -0,0 +1,56 @@ +--- +title: Administering InfluxDB +menu: + influxdb_1_7: + name: Administration + weight: 50 +--- +The administration documentation contains all the information needed to administer a working InfluxDB installation. + +## [Configuring InfluxDB](/influxdb/v1.7/administration/config/) + +Information about the config file `influx.conf` + +#### [Authentication and authorization](/influxdb/v1.7/administration/authentication_and_authorization/) + +Covers how to +[set up authentication](/influxdb/v1.7/administration/authentication_and_authorization/#set-up-authentication) +and how to +[authenticate requests](/influxdb/v1.7/administration/authentication_and_authorization/#authenticate-requests) in InfluxDB. +This page also describes the different +[user types](/influxdb/v1.7/administration/authentication_and_authorization/#user-types-and-privileges) and the InfluxQL for +[managing database users](/influxdb/v1.7/administration/authentication_and_authorization/#user-management-commands). + + +## [Upgrading](/influxdb/v1.7/administration/upgrading/) + +Information about upgrading from previous versions of InfluxDB + +## [Enabling HTTPS](/influxdb/v1.7/administration/https_setup/) + +Enabling HTTPS encrypts the communication between clients and the InfluxDB server. +HTTPS can also verify the authenticity of the InfluxDB server to connecting clients. + +## [Logging in InfluxDB](/influxdb/v1.7/administration/logs/) + +Information on how to direct InfluxDB log output. + +## [Ports](/influxdb/v1.7/administration/ports/) + +## [Backing up and restoring](/influxdb/v1.7/administration/backup_and_restore/) + +Procedures to backup data created by InfluxDB and to restore from a backup. + +## [Managing security](/influxdb/v1.7/administration/security/) + +Overview of security options and configurations. + +## [Stability and compatibility](/influxdb/v1.7/administration/stability_and_compatibility/) + +Management of breaking changes, upgrades, and ongoing support. + +## Downgrading + +To revert to a prior version, complete the same steps as when [Upgrading to InfluxDB 1.7.x](/influxdb/v1.7/administration/upgrading/), replacing 1.7.x with the version you want to downgrade to. After downloading the release, migrating your configuration settings, and enabling TSI or TSM, make sure to [rebuild your index](/influxdb/v1.7/administration/rebuild-tsi-index/#sidebar). + +>**Note:** Some versions of InfluxDB may have breaking changes that impact your ability to upgrade and downgrade. For example, you cannot downgrade from InfluxDB 1.3 or later to an earlier version. Please review the applicable version of release notes to check for compatibility issues between releases. diff --git a/content/influxdb/v1.7/administration/authentication_and_authorization.md b/content/influxdb/v1.7/administration/authentication_and_authorization.md new file mode 100644 index 000000000..5be535591 --- /dev/null +++ b/content/influxdb/v1.7/administration/authentication_and_authorization.md @@ -0,0 +1,514 @@ +--- +title: Authentication and authorization in InfluxDB +aliases: + - influxdb/v1.7/administration/authentication_and_authorization/ +menu: + influxdb_1_7: + name: Authentication and authorization + weight: 20 + parent: Administration +--- + +This document covers setting up and managing authentication and authorization in InfluxDB. + + + + + + + + + + + + + + + + + + +
Authentication:Authorization:
Set up AuthenticationUser Types and Privileges
Authenticate RequestsUser Management Commands
HTTP ErrorsHTTP Errors
+ +> **Note:** Authentication and authorization should not be relied upon to prevent access and protect data from malicious actors. +If additional security or compliance features are desired, InfluxDB should be run behind a third-party service. If InfluxDB +is being deployed on a publicly accessible endpoint, we strongly recommend authentication be enabled. Otherwise the data will +be publicly available to any unauthenticated user. + +## Authentication + +The InfluxDB API and the [command line interface](/influxdb/v1.7/tools/shell/) (CLI), which connects to the database using the API, include simple, built-in authentication based on user credentials. +When you enable authentication, InfluxDB only executes HTTP requests that are sent with valid credentials. + +> **Note:** Authentication only occurs at the HTTP request scope. +Plugins do not currently have the ability to authenticate requests and service endpoints (for example, Graphite, collectd, etc.) are not authenticated. + +### Set up authentication + +#### 1. Create at least one [admin user](#admin-users). +See the [authorization section](#authorization) for how to create an admin user. + +> **Note:** If you enable authentication and have no users, InfluxDB will **not** enforce authentication and will only accept the [query](#user-management-commands) that creates a new admin user. + +InfluxDB will enforce authentication once there is an admin user. + +#### 2. By default, authentication is disabled in the configuration file. +Enable authentication by setting the `auth-enabled` option to `true` in the `[http]` section of the configuration file: + +```toml +[http] + enabled = true + bind-address = ":8086" + auth-enabled = true # ✨ + log-enabled = true + write-tracing = false + pprof-enabled = true + pprof-auth-enabled = true + debug-pprof-enabled = false + ping-auth-enabled = true + https-enabled = true + https-certificate = "/etc/ssl/influxdb.pem" +``` + +{{% note %}} +If `pprof-enabled` is set to `true`, set `pprof-auth-enabled` and `ping-auth-enabled` +to `true` to require authentication on profiling and ping endpoints. +{{% /note %}} + +#### 3. Restart the process + +Now InfluxDB will check user credentials on every request and will only process requests that have valid credentials for an existing user. + +### Authenticate requests + +#### Authenticate with the InfluxDB API + +There are two options for authenticating with the [InfluxDB API](/influxdb/v1.7/tools/api/). + +If you authenticate with both Basic Authentication **and** the URL query parameters, the user credentials specified in the query parameters take precedence. +The queries in the following examples assume that the user is an [admin user](#admin-users). +See the section on [authorization](#authorization) for the different user types, their privileges, and more on user management. + +> **Note:** InfluxDB redacts passwords when you enable authentication. + +##### Authenticate with Basic Authentication as described in [RFC 2617, Section 2](http://tools.ietf.org/html/rfc2617) + +This is the preferred method for providing user credentials. + +Example: + +```bash +curl -G http://localhost:8086/query -u todd:influxdb4ever --data-urlencode "q=SHOW DATABASES" +``` + +##### Authenticate by providing query parameters in the URL or request body + +Set `u` as the username and `p` as the password. + +###### Example using query parameters + +```bash +curl -G "http://localhost:8086/query?u=todd&p=influxdb4ever" --data-urlencode "q=SHOW DATABASES" +``` + +###### Example using request body + +```bash +curl -G http://localhost:8086/query --data-urlencode "u=todd" --data-urlencode "p=influxdb4ever" --data-urlencode "q=SHOW DATABASES" +``` + +#### Authenticate with the CLI + +There are three options for authenticating with the [CLI](/influxdb/v1.7/tools/shell/). + +##### Authenticate with the `INFLUX_USERNAME` and `INFLUX_PASSWORD` environment variables + +Example: + +```bash +export INFLUX_USERNAME=todd +export INFLUX_PASSWORD=influxdb4ever +echo $INFLUX_USERNAME $INFLUX_PASSWORD +todd influxdb4ever + +influx +Connected to http://localhost:8086 version 1.4.x +InfluxDB shell 1.4.x +``` + +##### Authenticate by setting the `username` and `password` flags when you start the CLI + +Example: + +```bash +influx -username todd -password influxdb4ever +Connected to http://localhost:8086 version 1.4.x +InfluxDB shell 1.4.x +``` + +##### Authenticate with `auth ` after starting the CLI + +Example: + +```bash +influx +Connected to http://localhost:8086 version 1.4.x +InfluxDB shell 1.4.x +> auth +username: todd +password: +> +``` + +#### Authenticate using JWT tokens +Passing JWT tokens in each request is a more secure alternative to using passwords. +This is currently only possible through the [InfluxDB HTTP API](/influxdb/v1.7/tools/api/). + +##### 1. Add a shared secret in your InfluxDB configuration file +InfluxDB uses the shared secret to encode the JWT signature. +By default, `shared-secret` is set to an empty string, in which case no JWT authentication takes place. +Add a custom shared secret in your [InfluxDB configuration file](/influxdb/v1.7/administration/config/#shared-secret). +The longer the secret string, the more secure it is: + +``` +[http] + shared-secret = "my super secret pass phrase" +``` + +Alternatively, to avoid keeping your secret phrase as plain text in your InfluxDB configuration file, set the value with the `INFLUXDB_HTTP_SHARED_SECRET` environment variable. + + +##### 2. Generate your token +Use an authentication service to generate a secure token using your InfluxDB username, an expiration time, and your shared secret. +There are online tools, such as [https://jwt.io/](https://jwt.io/), that will do this for you. + +The payload (or claims) of the token must be in the following format: + +``` +{ + "username": "myUserName", + "exp": 1516239022 +} +``` +◦ **username** - The name of your InfluxDB user. +◦ **exp** - The expiration time of the token in UNIX epoch time. +For increased security, keep token expiration periods short. +For testing, you can manually generate UNIX timestamps using [https://www.unixtimestamp.com/index.php](https://www.unixtimestamp.com/index.php). + +Encode the payload using your shared secret. +You can do this with either a JWT library in your own authentication server or by hand at [https://jwt.io/](https://jwt.io/). +The generated token should look similar to the following: + +``` +eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.he0ErCNloe4J7Id0Ry2SEDg09lKkZkfsRiGsdX_vgEg +``` + +##### 3. Include the token in HTTP requests +Include your generated token as part of the ``Authorization`` header in HTTP requests. +Use the ``Bearer`` authorization scheme: + +``` +Authorization: Bearer +``` +{{% note %}} +Only unexpired tokens will successfully authenticate. +Be sure your token has not expired. +{{% /note %}} + +###### Example query request with JWT authentication + +```bash +curl -XGET "http://localhost:8086/query?db=demodb" \ + --data-urlencode "q=SHOW DATABASES" \ + --header "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.he0ErCNloe4J7Id0Ry2SEDg09lKkZkfsRiGsdX_vgEg" +``` + + +## Authenticate Telegraf requests to InfluxDB + +Authenticating [Telegraf](/telegraf/latest/) requests to an InfluxDB instance with +authentication enabled requires some additional steps. +In the Telegraf configuration file (`/etc/telegraf/telegraf.conf`), uncomment +and edit the `username` and `password` settings. + +```toml +> + ############################################################################### + # OUTPUT PLUGINS # + ############################################################################### +> + [...] +> + ## Write timeout (for the InfluxDB client), formatted as a string. + ## If not provided, will default to 5s. 0s means no timeout (not recommended). + timeout = "5s" + username = "telegraf" #💥 + password = "metricsmetricsmetricsmetrics" #💥 +> + [...] + +``` + +Next, restart Telegraf and you're all set! + +## Authorization + +Authorization is only enforced once you've [enabled authentication](#set-up-authentication). +By default, authentication is disabled, all credentials are silently ignored, and all users have all privileges. + +### User types and privileges + +#### Admin users + +Admin users have `READ` and `WRITE` access to all databases and full access to the following administrative queries: + +Database management: +   ◦   `CREATE DATABASE`, and `DROP DATABASE` +   ◦   `DROP SERIES` and `DROP MEASUREMENT` +   ◦   `CREATE RETENTION POLICY`, `ALTER RETENTION POLICY`, and `DROP RETENTION POLICY` +   ◦   `CREATE CONTINUOUS QUERY` and `DROP CONTINUOUS QUERY` + +See the [database management](/influxdb/v1.7/query_language/database_management/) and [continuous queries](/influxdb/v1.7/query_language/continuous_queries/) pages for a complete discussion of the commands listed above. + +User management: +   ◦   Admin user management: +        [`CREATE USER`](#user-management-commands), [`GRANT ALL PRIVILEGES`](#grant-administrative-privileges-to-an-existing-user), [`REVOKE ALL PRIVILEGES`](#revoke-administrative-privileges-from-an-admin-user), and [`SHOW USERS`](#show-all-existing-users-and-their-admin-status) +   ◦   Non-admin user management: +        [`CREATE USER`](#user-management-commands), [`GRANT [READ,WRITE,ALL]`](#grant-read-write-or-all-database-privileges-to-an-existing-user), [`REVOKE [READ,WRITE,ALL]`](#revoke-read-write-or-all-database-privileges-from-an-existing-user), and [`SHOW GRANTS`](#show-a-user-s-database-privileges) +   ◦   General user management: +        [`SET PASSWORD`](#re-set-a-user-s-password) and [`DROP USER`](#drop-a-user) + +See [below](#user-management-commands) for a complete discussion of the user management commands. + +#### Non-admin users + +Non-admin users can have one of the following three privileges per database: +   ◦   `READ` +   ◦   `WRITE` +   ◦   `ALL` (both `READ` and `WRITE` access) + +`READ`, `WRITE`, and `ALL` privileges are controlled per user per database. A new non-admin user has no access to any database until they are specifically [granted privileges to a database](#grant-read-write-or-all-database-privileges-to-an-existing-user) by an admin user. +Non-admin users can [`SHOW`](/influxdb/v1.7/query_language/schema_exploration/#show-databases) the databases on which they have `READ` and/or `WRITE` permissions. + +### User management commands + +#### Admin user management + +When you enable HTTP authentication, InfluxDB requires you to create at least one admin user before you can interact with the system. + +`CREATE USER admin WITH PASSWORD '' WITH ALL PRIVILEGES` + +##### `CREATE` another admin user + +```sql +CREATE USER WITH PASSWORD '' WITH ALL PRIVILEGES +``` + +CLI example: + +```sql +> CREATE USER paul WITH PASSWORD 'timeseries4days' WITH ALL PRIVILEGES +> +``` + +> **Note:** Repeating the exact `CREATE USER` statement is idempotent. If any values change the database will return a duplicate user error. See GitHub Issue [#6890](https://github.com/influxdata/influxdb/pull/6890) for details. +> +CLI example: +> + > CREATE USER todd WITH PASSWORD '123456' WITH ALL PRIVILEGES + > CREATE USER todd WITH PASSWORD '123456' WITH ALL PRIVILEGES + > CREATE USER todd WITH PASSWORD '123' WITH ALL PRIVILEGES + ERR: user already exists + > CREATE USER todd WITH PASSWORD '123456' + ERR: user already exists + > CREATE USER todd WITH PASSWORD '123456' WITH ALL PRIVILEGES + > + +##### `GRANT` administrative privileges to an existing user + +```sql +GRANT ALL PRIVILEGES TO +``` + +CLI example: + +```sql +> GRANT ALL PRIVILEGES TO "todd" +> +``` + +##### `REVOKE` administrative privileges from an admin user + +```sql +REVOKE ALL PRIVILEGES FROM +``` + +CLI example: + +```sql +> REVOKE ALL PRIVILEGES FROM "todd" +> +``` + +##### `SHOW` all existing users and their admin status + +```sql +SHOW USERS +``` + +CLI example: + +```sql +> SHOW USERS +user admin +todd false +paul true +hermione false +dobby false +``` + +#### Non-admin user management + +##### `CREATE` a new non-admin user + +```sql +CREATE USER WITH PASSWORD '' +``` + +CLI example: + +```sql +> CREATE USER todd WITH PASSWORD 'influxdb41yf3' +> CREATE USER alice WITH PASSWORD 'wonder\'land' +> CREATE USER "rachel_smith" WITH PASSWORD 'asdf1234!' +> CREATE USER "monitoring-robot" WITH PASSWORD 'XXXXX' +> CREATE USER "$savyadmin" WITH PASSWORD 'm3tr1cL0v3r' +> +``` + +> **Notes:** +> +* The user value must be wrapped in double quotes if it starts with a digit, is an InfluxQL keyword, contains a hyphen and or includes any special characters, for example: `!@#$%^&*()-` +* The password [string](/influxdb/v1.7/query_language/spec/#strings) must be wrapped in single quotes. +* Do not include the single quotes when authenticating requests. + +> For passwords that include a single quote or a newline character, escape the single quote or newline character with a backslash both when creating the password and when submitting authentication requests. +> +* Repeating the exact `CREATE USER` statement is idempotent. If any values change the database will return a duplicate user error. See GitHub Issue [#6890](https://github.com/influxdata/influxdb/pull/6890) for details. +> +CLI example: +> + > CREATE USER "todd" WITH PASSWORD '123456' + > CREATE USER "todd" WITH PASSWORD '123456' + > CREATE USER "todd" WITH PASSWORD '123' + ERR: user already exists + > CREATE USER "todd" WITH PASSWORD '123456' + > CREATE USER "todd" WITH PASSWORD '123456' WITH ALL PRIVILEGES + ERR: user already exists + > CREATE USER "todd" WITH PASSWORD '123456' + > + + +##### `GRANT` `READ`, `WRITE` or `ALL` database privileges to an existing user + +```sql +GRANT [READ,WRITE,ALL] ON TO +``` + +CLI examples: + +`GRANT` `READ` access to `todd` on the `NOAA_water_database` database: + +```sql +> GRANT READ ON "NOAA_water_database" TO "todd" +> +``` + +`GRANT` `ALL` access to `todd` on the `NOAA_water_database` database: + +```sql +> GRANT ALL ON "NOAA_water_database" TO "todd" +> +``` + +##### `REVOKE` `READ`, `WRITE`, or `ALL` database privileges from an existing user + +``` +REVOKE [READ,WRITE,ALL] ON FROM +``` + +CLI examples: + +`REVOKE` `ALL` privileges from `todd` on the `NOAA_water_database` database: + +```sql +> REVOKE ALL ON "NOAA_water_database" FROM "todd" +> +``` + +`REVOKE` `WRITE` privileges from `todd` on the `NOAA_water_database` database: + +```sql +> REVOKE WRITE ON "NOAA_water_database" FROM "todd" +> +``` + +>**Note:** If a user with `ALL` privileges has `WRITE` privileges revoked, they are left with `READ` privileges, and vice versa. + +##### `SHOW` a user's database privileges + +```sql +SHOW GRANTS FOR +``` + +CLI example: + +```sql +> SHOW GRANTS FOR "todd" +database privilege +NOAA_water_database WRITE +another_database_name READ +yet_another_database_name ALL PRIVILEGES +one_more_database_name NO PRIVILEGES +``` + +#### General admin and non-admin user management + +##### Re`SET` a user's password + +```sql +SET PASSWORD FOR = '' +``` + +CLI example: + +```sql +> SET PASSWORD FOR "todd" = 'influxdb4ever' +> +``` + + {{% note %}} + + **Note:** The password [string](/influxdb/v1.7/query_language/spec/#strings) must be wrapped in single quotes. Do not include the single quotes when authenticating requests. For passwords that include a single quote or a newline character, escape the single quote or newline character with a backslash both when creating the password and when submitting authentication requests. + + {{% /note %}} + + +##### `DROP` a user + +```sql +DROP USER +``` + +CLI example: + +```sql +> DROP USER "todd" +> +``` + +## Authentication and authorization HTTP errors + +Requests with no authentication credentials or incorrect credentials yield the `HTTP 401 Unauthorized` response. + +Requests by unauthorized users yield the `HTTP 403 Forbidden` response. diff --git a/content/influxdb/v1.7/administration/backup_and_restore.md b/content/influxdb/v1.7/administration/backup_and_restore.md new file mode 100644 index 000000000..647d7c941 --- /dev/null +++ b/content/influxdb/v1.7/administration/backup_and_restore.md @@ -0,0 +1,393 @@ +--- +title: Backing up and restoring in InfluxDB OSS +description: Using InfluxDB OSS backup and restore utilities for online, Enterprise-compatible use and portability between InfluxDB Enterprise and InfluxDB OSS servers. +aliases: + - /influxdb/v1.7/administration/backup-and-restore/ +menu: + influxdb_1_7: + name: Backing up and restoring + weight: 60 + parent: Administration +--- + +## Overview + +The InfluxDB OSS `backup` utility provides: + +* Option to run backup and restore functions on online (live) databases. +* Backup and restore functions for single or multiple databases, along with optional timestamp filtering. +* Data can be imported from [InfluxDB Enterprise](/enterprise_influxdb/latest/) clusters +* Backup files that can be imported into an InfluxDB Enterprise database. + +> **InfluxDB Enterprise users:** See [Backing up and restoring in InfluxDB Enterprise](/enterprise_influxdb/latest/administration/backup-and-restore/). + +> ***Note:*** Prior to InfluxDB OSS 1.5, the `backup` utility created backup file formats incompatible with InfluxDB Enterprise. This legacy format is still supported in the new `backup` utility as input for the new *online* restore function. The *offline* backup and restore utilities in InfluxDB OSS versions 1.4 and earlier are deprecated, but are documented below in [Backward compatible offline backup and restore](#backward-compatible-offline-backup-and-restore-legacy-format). + +## Online backup and restore (for InfluxDB OSS) + +Use the `backup` and `restore` utilities to back up and restore between `influxd` instances with the same versions or with only minor version differences. For example, you can back up from 1.7.3 and restore on 1.7.7. + +### Configuring remote connections + +The online backup and restore processes execute over a TCP connection to the database. + +**To enable the port for the backup and restore service:** + +1. At the root level of the InfluxDB config file (`influxdb.conf`), uncomment the [`bind-address` configuration setting](/influxdb/v1.7/administration/config#bind-address-127-0-0-1-8088) on the remote node. + +2. Update the `bind-address` value to `:8088` + +3. Provide the IP address and port to the `-host` parameter when you run commands. + +**Example** + +``` +$ influxd backup -portable -database mydatabase -host :8088 /tmp/mysnapshot +``` + +### `backup` +The improved `backup` command is similar to previous versions, except that it +generates backups in an InfluxDB Enterprise-compatible format and has some new filtering options to constrain the range of data points that are exported to the backup. + +``` +influxd backup + [ -database ] + [ -portable ] + [ -host ] + [ -retention ] | [ -shard -retention ] + [ -start [ -end ] | -since ] + +``` + +To invoke the new InfluxDB Enterprise-compatible format, run the `influxd backup` command with the `-portable` flag, like this: + +``` +influxd backup -portable [ arguments ] +``` + +##### Arguments + +Optional arguments are enclosed in brackets. + +- `[ -database ]`: The database to back up. If not specified, all databases are backed up. + +- `[ -portable ]`: Generates backup files in the newer InfluxDB Enterprise-compatible format. Highly recommended for all InfluxDB OSS users. + +{{% warn %}} +**Important:** If `-portable` is not specified, the default legacy backup utility is used -- only the host metastore is backed up, unless `-database` is specified. If not using `-portable`, review [Backup (legacy)](#backup-legacy) below for expected behavior. +{{% /warn %}} + +- `[ -host ]`: Host and port for InfluxDB OSS instance . Default value is `'127.0.0.1:8088'`. Required for remote connections. Example: `-host 127.0.0.1:8088` + +- `[ -retention ]`: Retention policy for the backup. If not specified, the default is to use all retention policies. If specified, then `-database` is required. + +- `[ -shard ]`: Shard ID of the shard to be backed up. If specified, then `-retention ` is required. + +- `[ -start ]`: Include all points starting with the specified timestamp ([RFC3339 format](https://www.ietf.org/rfc/rfc3339.txt)). Not compatible with `-since`. Example: `-start 2015-12-24T08:12:23Z` + +- `[ -end ]` ]: Exclude all results after the specified timestamp ([RFC3339 format](https://www.ietf.org/rfc/rfc3339.txt)). Not compatible with `-since`. If used without `-start`, all data will be backed up starting from 1970-01-01. Example: `-end 2015-12-31T08:12:23Z` + +- `[ -since ]`: Perform an incremental backup after the specified timestamp [RFC3339 format](https://www.ietf.org/rfc/rfc3339.txt). Use `-start` instead, unless needed for legacy backup support. + + +#### Backup examples + +**To back up everything:** + +``` +influxd backup -portable +``` + +**To backup all databases recently changed at the filesystem level** + +``` +influxd backup -portable -start +``` + +**To backup only the `telegraf` database:** + +``` +influxd backup -portable -database telegraf +``` + +**To backup a database for a specified time interval:** + +``` +influxd backup -portable -database mytsd -start 2017-04-28T06:49:00Z -end 2017-04-28T06:50:00Z /tmp/backup/influxdb +``` + +### `restore` + +An online `restore` process is initiated by using the `restore` command with either the `-portable` argument (indicating the new Enterprise-compatible backup format) or `-online` flag (indicating the legacy backup format). + +``` +influxd restore [ -db ] + -portable | -online + [ -host ] + [ -newdb ] + [ -rp ] + [ -newrp ] + [ -shard ] + +``` +{{% warn %}} +Restoring backups that specified time periods (using `-start` and `-end`) + +Backups that specified time intervals using the `-start` or `-end` arguments are performed on blocks of data and not on a point-by-point basis. Since most blocks are highly compacted, extracting each block to inspect each point creates both a computational and disk-space burden on the running system. +Each data block is annotated with starting and ending timestamps for the time interval included in the block. When you specify `-start` or `-end` timestamps, all of the specified data is backed up, but other data points that are in the same blocks will also be backed up. + +**Expected behavior** + +- When restoring data, you are likely to see data that is outside of the specified time periods. +- If duplicate data points are included in the backup files, the points will be written again, overwriting any existing data. +{{% /warn %}} + +#### Arguments + +Optional arguments are enclosed in brackets. + +- `-portable`: Use the new Enterprise-compatible backup format for InfluxDB OSS. Recommended instead of `-online`. A backup created on InfluxDB Enterprise can be restored to an InfluxDB OSS instance. + +- `-online`: Use the legacy backup format. Only use if the newer `-portable` option cannot be used. + +- `[ -host ]`: Host and port for InfluxDB OSS instance . Default value is `'127.0.0.1:8088'`. Required for remote connections. Example: `-host 127.0.0.1:8088` + +- `[ -db | -database ]`: Name of the database to be restored from the backup. If not specified, all databases will be restored. + +- `[ -newdb ]`: Name of the database into which the archived data will be imported on the target system. If not specified, then the value for `-db` is used. The new database name must be unique to the target system. + +- `[ -rp ]`: Name of the retention policy from the backup that will be restored. Requires that `-db` is set. If not specified, all retention policies will be used. + +- `[ -newrp ]`: Name of the retention policy to be created on the target system. Requires that `-rp` is set. If not specified, then the `-rp` value is used. + +- `[ -shard ]`: Shard ID of the shard to be restored. If specified, then `-db` and `-rp` are required. + +> **Note:** If you have automated backups based on the legacy format, consider using the new online feature for your legacy backups. The new backup utility lets you restore a single database to a live (online) instance, while leaving all existing data on the server in place. The [offline restore method (described below)](#restore-legacy) may result in data loss, since it clears all existing databases on the server. + +#### Restore examples + +**To restore all databases found within the backup directory:** + +``` +influxd restore -portable path-to-backup +``` + +**To restore only the `telegraf` database (telegraf database must not exist):** + +``` +influxd restore -portable -db telegraf path-to-backup +``` + +**To restore data to a database that already exists:** + +You cannot restore directly into a database that already exists. If you attempt to run the `restore` command into an existing database, you will get a message like this: + +``` +influxd restore -portable -db existingdb path-to-backup + +2018/08/30 13:42:46 error updating meta: DB metadata not changed. database may already exist +restore: DB metadata not changed. database may already exist +``` + +1. Restore the existing database backup to a temporary database. + + ``` + influxd restore -portable -db telegraf -newdb telegraf_bak path-to-backup + ``` +2. Sideload the data (using a `SELECT ... INTO` statement) into the existing target database and drop the temporary database. + + ``` + > USE telegraf_bak + > SELECT * INTO telegraf..:MEASUREMENT FROM /.*/ GROUP BY * + > DROP DATABASE telegraf_bak + ``` + +**To restore to a retention policy that already exists:** + +1. Restore the retention policy to a temporary database. + + ``` + influxd restore -portable -db telegraf -newdb telegraf_bak -rp autogen -newrp autogen_bak path-to-backup + ``` +2. Sideload into the target database and drop the temporary database. + + ``` + > USE telegraf_bak + > SELECT * INTO telegraf.autogen.:MEASUREMENT FROM /telegraf_bak.autogen_bak.*/ GROUP BY * + > DROP telegraf_bak + ``` + +### Backward compatible offline backup and restore (legacy format) + +> ***Note:*** The backward compatible backup and restore for InfluxDB OSS documented below are deprecated. InfluxData recommends using the newer Enterprise-compatible backup and restore utilities with your InfluxDB OSS servers. + +InfluxDB OSS has the ability to snapshot an instance at a point-in-time and restore it. +All backups are full backups; incremental backups are not supported. +Two types of data can be backed up, the metastore and the metrics themselves. +The [metastore](/influxdb/v1.7/concepts/glossary/#metastore) is backed up in its entirety. +The metrics are backed up on a per-database basis in an operation separate from the metastore backup. + +#### Backing up the metastore + +The InfluxDB metastore contains internal information about the status of +the system, including user information, database and shard metadata, continuous queries, retention policies, and subscriptions. +While a node is running, you can create a backup of your instance's metastore by running the command: + +``` +influxd backup +``` + +Where `` is the directory where you +want the backup to be written to. Without any other arguments, +the backup will only record the current state of the system +metastore. For example, the command: + +```bash +$ influxd backup /tmp/backup +2016/02/01 17:15:03 backing up metastore to /tmp/backup/meta.00 +2016/02/01 17:15:03 backup complete +``` + +Will create a metastore backup in the directory `/tmp/backup` (the +directory will be created if it doesn't already exist). + +#### Backup (legacy) + +Each database must be backed up individually. + +To backup a database, add the `-database` flag: + +```bash +influxd backup -database +``` + +Where `` is the name of the database you would like to +backup, and `` is where the backup data should be +stored. + +Optional flags also include: + +- `-retention ` + - This flag can be used to backup a specific retention policy. For more information on retention policies, see + [Retention policy management](/influxdb/v1.7/query_language/database_management/#retention-policy-management). If unspecified, all retention policies will be backed up. + +- `-shard ` - This flag can be used to backup a specific + shard ID. To see which shards are available, you can run the command + `SHOW SHARDS` using the InfluxDB query language. If not specified, + all shards will be backed up. + +- `-since ` - This flag can be used to create a backup _since_ a + specific date, where the date must be in + [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) format (for example, + `2015-12-24T08:12:23Z`). This flag is important if you would like to + take incremental backups of your database. If not specified, all + timeranges within the database will be backed up. + +> **Note:** Metastore backups are also included in per-database backups + +As a real-world example, you can take a backup of the `autogen` +retention policy for the `telegraf` database since midnight UTC on +February 1st, 2016 by using the command: + +``` +$ influxd backup -database telegraf -retention autogen -since 2016-02-01T00:00:00Z /tmp/backup +2016/02/01 18:02:36 backing up rp=default since 2016-02-01 00:00:00 +0000 UTC +2016/02/01 18:02:36 backing up metastore to /tmp/backup/meta.01 +2016/02/01 18:02:36 backing up db=telegraf rp=default shard=2 to /tmp/backup/telegraf.default.00002.01 since 2016-02-01 00:00:00 +0000 UTC +2016/02/01 18:02:36 backup complete +``` + +Which will send the resulting backup to `/tmp/backup`, where it can +then be compressed and sent to long-term storage. + +#### Remote backups (legacy) + +The legacy backup mode also supports live, remote backup functionality. +Follow the directions in [Configuring remote connections](#configuring-remote-connections) above to configure this feature. + +## Restore (legacy) + +{{% warn %}} This offline restore method described here may result in data loss -- it clears all existing databases on the server. Consider using the `-online` flag with the newer [`restore` method (described above)](#restore) to import legacy data without any data loss. +{{% /warn %}} + +To restore a backup, you will need to use the `influxd restore` command. + +> **Note:** Restoring from backup is only supported while the InfluxDB daemon is stopped. + +To restore from a backup you will need to specify the type of backup, +the path to where the backup should be restored, and the path to the backup. +The command: + +``` +influxd restore [ -metadir | -datadir ] +``` + +The required flags for restoring a backup are: + +- `-metadir ` - This is the path to the meta + directory where you would like the metastore backup recovered + to. For packaged installations, this should be specified as + `/var/lib/influxdb/meta`. + +- `-datadir ` - This is the path to the data + directory where you would like the database backup recovered to. For + packaged installations, this should be specified as + `/var/lib/influxdb/data`. + +The optional flags for restoring a backup are: + +- `-database ` - This is the database that you would like to + restore the data to. This option is required if no `-metadir` option + is provided. + +- `-retention ` - This is the target retention policy + for the stored data to be restored to. + +- `-shard ` - This is the shard data that should be + restored. If specified, `-database` and `-retention` must also be + set. + +Following the backup example above, the backup can be restored in two +steps. + +1. The metastore needs to be restored so that InfluxDB +knows which databases exist: + +``` +$ influxd restore -metadir /var/lib/influxdb/meta /tmp/backup +Using metastore snapshot: /tmp/backup/meta.00 +``` + +2. Once the metastore has been restored, we can now recover the backed up +data. In the real-world example above, we backed up the `telegraf` +database to `/tmp/backup`, so let's restore that same dataset. To +restore the `telegraf` database: + +``` +$ influxd restore -database telegraf -datadir /var/lib/influxdb/data /tmp/backup +Restoring from backup /tmp/backup/telegraf.* +unpacking /var/lib/influxdb/data/telegraf/default/2/000000004-000000003.tsm +unpacking /var/lib/influxdb/data/telegraf/default/2/000000005-000000001.tsm +``` + +> **Note:** Once the backed up data has been recovered, the permissions on the shards may no longer be accurate. To ensure the file permissions are correct, please run this command: `$ sudo chown -R influxdb:influxdb /var/lib/influxdb` + +Once the data and metastore are recovered, start the database: + +```bash +$ service influxdb start +``` + +As a quick check, you can verify that the database is known to the metastore +by running a `SHOW DATABASES` command: + +``` +influx -execute 'show databases' +name: databases +--------------- +name +_internal +telegraf +``` + +The database has now been successfully restored! diff --git a/content/influxdb/v1.7/administration/config.md b/content/influxdb/v1.7/administration/config.md new file mode 100644 index 000000000..443f53c10 --- /dev/null +++ b/content/influxdb/v1.7/administration/config.md @@ -0,0 +1,1255 @@ +--- +title: Configuring InfluxDB OSS +menu: + influxdb_1_7: + name: Configuring InfluxDB + weight: 10 + parent: Administration +--- + +The InfluxDB open source (OSS) configuration file contains configuration settings specific to a local node. + +#### Content + +* [Configuration overview](#configuration-overview) +* [Environment variables](#environment-variables) + * [InfluxDB environment variables (`INFLUXDB_*`)](#influxdb-environment-variables-influxdb) + * [`GOMAXPROCS` environment variable](#gomaxprocs-environment-variable) +* [Using the configuration file](#using-the-configuration-file) +* [Configuration settings](#configuration-settings) + * [Global settings](#global-settings) + * [Metastore `[meta]`](#metastore-settings) + * [Data `[data]`](#data-settings) + * [Query management `[coordinator]`](#query-management-settings) + * [Retention policies `[retention]`](#retention-policy-settings) + * [Shard precreation `[shard-precreation]`](#shard-precreation-settings) + * [Monitoring `[monitor]`](#monitoring-settings) + * [HTTP endpoints `[http]`](#http-endpoints-settings) + * [Subscriptions `[subscriber]`](#subscription-settings) + * [Graphite `[[graphite]]`](#graphite-settings) + * [CollectD `[[collectd]]`](#collectd-settings) + * [OpenTSB `[[opentsdb]]`](#opentsdb-settings) + * [UDP `[[udp]]`](#udp-settings) + * [Continuous queries `[continuous_queries]`](#continuous-queries-settings) + * [TLS `[tls]`](#transport-layer-security-tls-settings) + +## Configuration overview + +InfluxDB is configured using the configuration file (`influxdb.conf`) and environment variables. +If you do not uncomment a configuration option, the system uses its default setting. +The configuration settings in this document are set to their default settings. + +Configuration settings that specify a duration support the following duration units: + +- `ns` _(nanoseconds)_ +- `us` or `µs` _(microseconds)_ +- `ms` _(milliseconds)_ +- `s` _(seconds)_ +- `m` _(minutes)_ +- `h` _(hours)_ +- `d` _(days)_ +- `w` _(weeks)_ + +>**Note:** Configuration file settings are documented here for the latest official release - the [sample configuration file on GitHub](https://github.com/influxdb/influxdb/blob/1.7/etc/config.sample.toml) might be slightly newer. + +## Environment variables + +All of the configuration settings in the configuration file can be specified either in the configuration file or in an environment variable. +The environment variable overrides the equivalent option in the configuration +file. +If a configuration option is not specified in either the configuration file or in an environment variable, InfluxDB uses its internal default configuration. + +> ***Note:*** If an environment variable has already been set, the equivalent configuration setting in the configuration file is ignored. + +### InfluxDB environment variables (`INFLUXDB_*`) + +The InfluxDB environment variables are documented below with the corresponding configuration file settings. All of the InfluxDB-specific environment variables are prefixed with `INFLUXDB_`. + + +### `GOMAXPROCS` environment variable + +> ***Note:*** The GOMAXPROCS environment variable cannot be set using the InfluxDB configuration file settings, like other environment variables. + + +The `GOMAXPROCS` [Go language environment variable](https://golang.org/pkg/runtime/#hdr-Environment_Variables) can be used to set the maximum number of CPUs that can execute simultaneously. + + +The default value of `GOMAXPROCS` is the number of CPUs (whatever your operating system considers to be a CPU) that are visible to the program *on startup.* For a 32-core machine, the `GOMAXPROCS` value would be `32`. +You can override this value to be less than the maximum value, which can be useful in cases where you are running the InfluxDB along with other processes on the same machine and want to ensure that the database doesn't completely starve those processes. + +> ***Note:*** +> Setting `GOMAXPROCS=1` will eliminate all parallelization. + + +## Using the configuration file + +The InfluxDB system has internal defaults for all of the settings in the configuration file. To view the default configuration settings, use the `influxd config` command. + +The local InfluxDB configuration file is located here: + +- Linux: `/etc/influxdb/influxdb.conf` +- macOS: `/usr/local/etc/influxdb.conf` + +Settings that are commented out are set to the internal system defaults. Uncommented settings override the internal defaults. +Note that the local configuration file does not need to include every configuration setting. + +There are two ways to launch InfluxDB with your configuration file: + +* Point the process to the configuration file by using the `-config` + option. For example: + + ```bash + influxd -config /etc/influxdb/influxdb.conf + ``` +* Set the environment variable `INFLUXDB_CONFIG_PATH` to the path of your + configuration file and start the process. + For example: + + ``` + echo $INFLUXDB_CONFIG_PATH + /etc/influxdb/influxdb.conf + + influxd + ``` + +InfluxDB first checks for the `-config` option and then for the environment +variable. + + +## Configuration settings + +> **Note:** +> To set or override settings in a config section that allows multiple +> configurations (any section with `[[double_brackets]]` in the header supports +> multiple configurations), the desired configuration must be specified by ordinal +> number. +> For example, for the first set of `[[graphite]]` environment variables, +> prefix the configuration setting name in the environment variable with the +> relevant position number (in this case: `0`): +> + INFLUXDB_GRAPHITE_0_BATCH_PENDING + INFLUXDB_GRAPHITE_0_BATCH_SIZE + INFLUXDB_GRAPHITE_0_BATCH_TIMEOUT + INFLUXDB_GRAPHITE_0_BIND_ADDRESS + INFLUXDB_GRAPHITE_0_CONSISTENCY_LEVEL + INFLUXDB_GRAPHITE_0_DATABASE + INFLUXDB_GRAPHITE_0_ENABLED + INFLUXDB_GRAPHITE_0_PROTOCOL + INFLUXDB_GRAPHITE_0_RETENTION_POLICY + INFLUXDB_GRAPHITE_0_SEPARATOR + INFLUXDB_GRAPHITE_0_TAGS + INFLUXDB_GRAPHITE_0_TEMPLATES + INFLUXDB_GRAPHITE_0_UDP_READ_BUFFER +> +>For the Nth Graphite configuration in the configuration file, the relevant +>environment variables would be of the form `INFLUXDB_GRAPHITE_(N-1)_BATCH_PENDING`. +>For each section of the configuration file the numbering restarts at zero. + + +## Global settings + +### `reporting-disabled = false` + +InfluxData uses voluntarily reported data from running InfluxDB nodes +primarily to track the adoption rates of different InfluxDB versions. +This data helps InfluxData support the continuing development of +InfluxDB. + +The `reporting-disabled` option toggles +the reporting of data every 24 hours to `usage.influxdata.com`. +Each report includes a randomly-generated identifier, OS, architecture, +InfluxDB version, and the +number of [databases](/influxdb/v1.7/concepts/glossary/#database), +[measurements](/influxdb/v1.7/concepts/glossary/#measurement), and +unique [series](/influxdb/v1.7/concepts/glossary/#series). Setting +this option to `true` will disable reporting. + +>**Note:** No data from user databases is ever transmitted. + +Environment variable: `INFLUXDB_REPORTING_DISABLED` + +### `bind-address = "127.0.0.1:8088"` + +The bind address to use for the RPC service for [backup and restore](/influxdb/v1.7/administration/backup_and_restore/). + +Environment variable: `INFLUXDB_BIND_ADDRESS` + +## Metastore settings + +### `[meta]` + +This section controls parameters for the InfluxDB metastore, +which stores information on users, databases, retention policies, shards, and continuous queries. + +### `dir = "/var/lib/influxdb/meta"` + +The directory where the metadata/raft database is stored. +Files in the `meta` directory include `meta.db`, the InfluxDB metastore file. + +>**Note:** The default directory for macOS installations is `/Users//.influxdb/meta` + +Environment variable: `INFLUXDB_META_DIR` + +### `retention-autocreate = true` + +Enables the automatic creation of the [`DEFAULT` retention policy](/influxdb/v1.7/concepts/glossary/#retention-policy-rp) `autogen` when a database is created. +The retention policy `autogen` has an infinite duration and is also set as the +database's `DEFAULT` retention policy, which is used when a write or query does +not specify a retention policy. +Disable this setting to prevent the creation of this retention policy when creating databases. + +Environment variable: `INFLUXDB_META_RETENTION_AUTOCREATE` + +### `logging-enabled = true` + +Enables the logging of messages from the meta service. + +Environment variable: `INFLUXDB_META_LOGGING_ENABLED` + +## Data settings + +### `[data]` + +The `[data]` settings control where the actual shard data for InfluxDB lives and how it is flushed from the Write-Ahead Log (WAL). +`dir` may need to be changed to a suitable place for your system, but the WAL settings are an advanced configuration. +The defaults should work for most systems. + +#### `dir = "/var/lib/influxdb/data"` + +The InfluxDB directory where the TSM engine stores TSM files. +This directory may be changed. + +>**Note:** The default directory for macOS installations is `/Users//.influxdb/data`. + +Environment variable: `INFLUXDB_DATA_DIR` + +#### `wal-dir = "/var/lib/influxdb/wal"` + +The location of the directory for [write ahead log (WAL)](/influxdb/v1.7/concepts/glossary/#wal-write-ahead-log) files. + +>**Note:** For macOS installations, the default WAL directory is `/Users//.influxdb/wal`. + +Environment variable: `INFLUXDB_DATA_WAL_DIR` + +#### `wal-fsync-delay = "0s"` + +The amount of time that a write waits before fsyncing. Use a duration greater than `0` to batch up multiple fsync calls. +This is useful for slower disks or when experiencing [WAL](/influxdb/v1.7/concepts/glossary/#wal-write-ahead-log) write contention. +The default value of `0s` fsyncs every write to the WAL. + +>**Note:** For non-SSD disks, InfluxData recommends values in the range of `0ms`-`100ms`. + +Environment variable: `INFLUXDB_DATA_WAL_FSYNC_DELAY` + +#### `index-version = "inmem"` + +The type of shard index to use for new shards. +The default (`inmem`) index is an in-memory index that is recreated at startup. +To enable the Time Series Index (TSI) disk-based index, set the value to `tsi1`. + +Environment variable: `INFLUXDB_DATA_INDEX_VERSION` + +#### `trace-logging-enabled = false` + +Enables verbose logging of additional debug information within the TSM engine and WAL. +The trace logging provide more useful output for debugging TSM engine issues. + +Environment variable: `INFLUXDB_DATA_TRACE_LOGGING_ENABLED` + +#### `query-log-enabled = true` + +Enables the logging of parsed queries before execution. +The query log can be useful for troubleshooting, but logs any sensitive data contained within a query. + +Environment variable: `INFLUXDB_DATA_QUERY_LOG_ENABLED` + +#### `validate-keys = false` + +Validates incoming writes to ensure keys only have valid Unicode characters. +This setting will incur a small overhead because every key must be checked. + + +### Settings for the TSM engine + +#### `cache-max-memory-size = "1g"` + +The maximum size that a shard cache can reach before it starts rejecting writes. + +Valid memory size suffixes are: `k`, `m`, or `g` (case-insensitive, 1024 = 1k). +Values without a size suffix are in bytes. + +Environment variable: `INFLUXDB_DATA_CACHE_MAX_MEMORY_SIZE` + +#### `cache-snapshot-memory-size = "25m"` + +The size at which the engine will snapshot the cache and write it to a TSM file, freeing up memory. + +Valid memory size suffixes are: `k`, `m`, or `g` (case-insensitive, 1024 = 1k). +Values without a size suffix are in bytes. + +Environment variable: `INFLUXDB_DATA_CACHE_SNAPSHOT_MEMORY_SIZE` + +#### `cache-snapshot-write-cold-duration = "10m"` + +The time interval at which the engine will snapshot the cache and write it to a new TSM file if the shard hasn't received writes or deletes. + +Environment variable: `INFLUXDB_DATA_CACHE_SNAPSHOT_WRITE_COLD_DURATION` + +#### `compact-full-write-cold-duration = "4h"` + +The time interval at which the TSM engine will compact all TSM files in a shard if it hasn't received a write or delete. + +Environment variable: `INFLUXDB_DATA_COMPACT_FULL_WRITE_COLD_DURATION` + +#### `max-concurrent-compactions = 0` + +The maximum number of concurrent full and level [compactions](/influxdb/v1.7/concepts/storage_engine/#compactions) that can run at one time. +The default value of `0` results in 50% of the CPU cores being used at runtime for compactions. +If explicitly set, the number of cores used for compaction is limited to the specified value. +This setting does not apply to cache snapshotting. +For more information on `GOMAXPROCS` environment variable, see [`GOMAXPROCS` environment variable](#gomaxprocs-environment-variable) on this page. + +Environment variable: `INFLUXDB_DATA_MAX_CONCURRENT_COMPACTIONS` + +#### `compact-throughput = "48m"` + +The rate limit, in bytes per second, that we will allow TSM compactions to write to disk. +Note that short bursts are allowed to happen at a possibly larger value, set by `compact-throughput-burst`. + +Environment variable: `INFLUXDB_DATA_COMPACT_THROUGHPUT` + +#### `compact-throughput-burst = "48m"` + +The rate limit, in bytes per second, that we allow TSM compactions to write to disk. + +Environment variable: `INFLUXDB_DATA_COMPACT_THROUGHPUT_BURST` + +#### `tsm-use-madv-willneed = false` + +If `true`, then the MMap Advise value `MADV_WILLNEED` advises the kernel about how to handle the mapped +memory region in terms of input/output paging and to expect access to the mapped memory region in the near future, with respect to TSM files. +Because this setting has been problematic on some kernels (including CentOS and RHEL ), the default is `false`. +Changing the value to `true` might help users who have slow disks in some cases. + +Environment variable: `INFLUXDB_DATA_TSM_USE_MADV_WILLNEED` + +### In-memory (`inmem`) index settings + +#### `max-series-per-database = 1000000` + +The maximum number of [series](/influxdb/v1.7/concepts/glossary/#series) allowed per database before writes are dropped. +The default setting is `1000000` (one million). +Change the setting to `0` to allow an unlimited number of series per database. + +If a point causes the number of series in a database to exceed +`max-series-per-database`, InfluxDB will not write the point, and it returns a +`500` with the following error: + +``` +{"error":"max series per database exceeded: "} +``` +> **Note:** Any existing databases with a series count that exceeds `max-series-per-database` +> will continue to accept writes to existing series, but writes that create a +> new series will fail. + +Environment variable: `INFLUXDB_DATA_MAX_SERIES_PER_DATABASE` + +#### `max-values-per-tag = 100000` + +The maximum number of [tag values](/influxdb/v1.7/concepts/glossary/#tag-value) allowed per [tag key](/influxdb/v1.7/concepts/glossary/#tag-key). +The default value is `100000` (one hundred thousand). +Change the setting to `0` to allow an unlimited number of tag values per tag +key. +If a tag value causes the number of tag values of a tag key to exceed +`max-values-per-tag`, then InfluxDB will not write the point, and it returns +a `partial write` error. + +Any existing tag keys with tag values that exceed `max-values-per-tag` +will continue to accept writes, but writes that create a new tag value +will fail. + +Environment variable: `INFLUXDB_DATA_MAX_VALUES_PER_TAG` + +### TSI (`tsi1`) index settings + +#### `max-index-log-file-size = "1m"` + +The threshold, in bytes, when an index write-ahead log (WAL) file will compact +into an index file. Lower sizes will cause log files to be compacted more +quickly and result in lower heap usage at the expense of write throughput. +Higher sizes will be compacted less frequently, store more series in-memory, +and provide higher write throughput. +Valid size suffixes are `k`, `m`, or `g` (case-insensitive, 1024 = 1k). +Values without a size suffix are in bytes. + +Environment variable: `INFLUXDB_DATA_MAX_INDEX_LOG_FILE_SIZE` + +#### `series-id-set-cache-size = 100` + +Specifies the number of series ID sets to cache for the TSI index (by default, 100). Series IDs in a set refer to series that match on the same index predicate (tag filter). An example filter might be `region = west`. When the index plans a query, it produces a set for each tag filter in the query. These sets are then cached in the index. + +The series ID set is an LRU cache, so once the cache is full, the least recently used set is evicted. Cached results are returned quickly because they don’t need to be recalculated when a subsequent query with a matching tag filter is executed. For example, if a query includes `region = west`, the series IDs matching `region = west` are cached and subsequent queries that include `region = west` are retrieved from the cache. + +We recommend using the default setting. Changing this value to `0` disables the cache, which may lead to query performance issues. +Increase this value only if you know the set of tag key-value predicates across all measurements for a database is larger than 100. Increasing the cache size may lead to an increase in heap usage. + +Environment variable: `INFLUXDB_DATA_SERIES_ID_SET_CACHE_SIZE` + +## Query management settings + +### `[coordinator]` + +This section contains configuration settings for query management. +For more on managing queries, see [Query Management](/influxdb/v1.7/troubleshooting/query_management/). + +#### `write-timeout = "10s"` + +The duration a write request waits until a "timeout" error is returned to the caller. The default value is 10 seconds. + +Environment variable: `INFLUXDB_COORDINATOR_WRITE_TIMEOUT` + +#### `max-concurrent-queries = 0` + +The maximum number of running queries allowed on your instance. +The default setting (`0`) allows for an unlimited number of queries. + +Environment variable: `INFLUXDB_COORDINATOR_MAX_CONCURRENT_QUERIES` + +#### `query-timeout = "0s"` + +The maximum duration that a query is allowed to execute before InfluxDB +kills the query. +The default setting (`0`) allows queries to run with no time restrictions. +This setting is a [duration](#configuration-overview). + +Environment variable: `INFLUXDB_COORDINATOR_QUERY_TIMEOUT` + +#### `log-queries-after = "0s"` + +The maximum duration that a query can until InfluxDB logs the query with a +`Detected slow query` message. +The default setting (`"0"`) will never tell InfluxDB to log the query. +This setting is a [duration](#configuration-overview). + +Environment variable: `INFLUXDB_COORDINATOR_LOG_QUERIES_AFTER` + +#### `max-select-point = 0` + +The maximum number of [points](/influxdb/v1.7/concepts/glossary/#point) that a +`SELECT` statement can process. +The default setting (`0`) allows the `SELECT` statement to process an unlimited +number of points. + +Environment variable: `INFLUXDB_COORDINATOR_MAX_SELECT_POINT` + +#### `max-select-series = 0` + +The maximum number of [series](/influxdb/v1.7/concepts/glossary/#series) that a +`SELECT` statement can process. +The default setting (`0`) allows the `SELECT` statement to process an unlimited +number of series. + +Environment variable: `INFLUXDB_COORDINATOR_MAX_SELECT_SERIES` + +#### `max-select-buckets = 0` + +The maximum number of `GROUP BY time()` buckets that a query can process. +The default setting (`0`) allows a query to process an unlimited number of +buckets. + +Environment variable: `INFLUXDB_COORDINATOR_MAX_SELECT_BUCKETS` + +----- + +## Retention policy settings + +### `[retention]` + +The `[retention]` settings control the enforcement of retention policies for evicting old data. + +#### `enabled = true` + +Set to `false` to prevent InfluxDB from enforcing retention policies. + +Environment variable: `INFLUXDB_RETENTION_ENABLED` + +#### `check-interval = "30m0s"` + +The time interval at which InfluxDB checks to enforce a retention policy. + +Environment variable: `INFLUXDB_RETENTION_CHECK_INTERVAL` + +----- + +## Shard precreation settings + +### `[shard-precreation]` + +The `[shard-precreation]` settings control the precreation of shards so that shards are available before data arrive. +Only shards that, after creation, will have both a start- and end-time in the future are ever created. +Shards that would be wholly or partially in the past are never precreated. + +#### `enabled = true` + +Determines whether the shard precreation service is enabled. + +Environment variable: `INFLUXDB_SHARD_PRECREATION_ENABLED` + +#### `check-interval = "10m"` + +The time interval when the check to precreate new shards runs. + +Environment variable: `INFLUXDB_SHARD_PRECREATION_CHECK_INTERVAL` + +#### `advance-period = "30m"` + +The maximum period in the future for which InfluxDB precreates shards. +The `30m` default should work for most systems. +Increasing this setting too far in the future can cause inefficiencies. + +Environment variable: `INFLUXDB_SHARD_PRECREATION_ADVANCE_PERIOD` + +## Monitoring settings + +### `[monitor]` + +The `[monitor]` section settings control the InfluxDB [system self-monitoring](https://github.com/influxdata/influxdb/blob/1.7/monitor/README.md). + +By default, InfluxDB writes the data to the `_internal` database. +If that database does not exist, InfluxDB creates it automatically. +The `DEFAULT` retention policy on the `_internal` database is seven days. +If you want to use a retention policy other than the seven-day retention policy, you must [create](/influxdb/v1.7/query_language/database_management/#retention-policy-management) it. + +#### `store-enabled = true` + +Set to `false` to disable recording statistics internally. +If set to `false` it will make it substantially more difficult to diagnose issues with your installation. + +Environment variable: `INFLUXDB_MONITOR_STORE_ENABLED` + +#### `store-database = "_internal"` + +The destination database for recorded statistics. + +Environment variable: `INFLUXDB_MONITOR_STORE_DATABASE` + +#### `store-interval = "10s"` + +The time interval at which InfluxDB records statistics. +The default value is every ten seconds (`10s`). + +Environment variable: `INFLUXDB_MONITOR_STORE_INTERVAL` + +## HTTP endpoints settings + +### `[http]` + +The `[http]` section settings control how InfluxDB configures the HTTP endpoints. +These are the primary mechanisms for getting data into and out of InfluxDB. +Edit the settings in this section to enable HTTPS and authentication. + +For details on enabling HTTPS and authentication, see [Authentication and Authorization](/influxdb/v1.7/administration/authentication_and_authorization/). + +#### `enabled = true` + +Determines whether the HTTP endpoints are enabled. +To disable access to the HTTP endpoints, set the value to `false`. +Note that the InfluxDB [command line interface (CLI)](/influxdb/v1.7/tools/shell/) connects to the database using the InfluxDB API. + +Environment variable: `INFLUXDB_HTTP_ENABLED` + +#### `flux-enabled = false` + +Determines whether the Flux query endpoint is enabled. To enable the use of Flux queries, set the value to `true`. + +Environment variable: `INFLUXDB_HTTP_FLUX_ENABLED` + +#### `bind-address = ":8086"` + +The bind address (port) used by the HTTP service. + +Environment variable: `INFLUXDB_HTTP_BIND_ADDRESS` + +#### `auth-enabled = false` + +Determines whether user authentication is enabled over HTTP and HTTPS. +To require authentication, set the value to `true`. + +Environment variable: `INFLUXDB_HTTP_AUTH_ENABLED` + +#### `realm = "InfluxDB"` + +The default realm sent back when issuing a basic authentication challenge. +The realm is the JWT realm used by the HTTP endpoints. + +Environment variable: `INFLUXDB_HTTP_REALM` + +#### `log-enabled = true` + +Determines whether HTTP request logging is enabled. +To disable logging, set the value to `false`. + +Environment variable: `INFLUXDB_HTTP_LOG_ENABLED` + +#### `suppress-write-log = false` + +Determines whether the HTTP write request logs should be suppressed when the log is enabled. + +#### `access-log-path = ""` + +The path to the access log, which determines whether detailed write logging is enabled using `log-enabled = true`. +Specifies whether HTTP request logging is written to the specified path when enabled. +If `influxd` is unable to access the specified path, it will log an error and fall back to `stderr`. +When HTTP request logging is enabled, this option specifies the path where log entries should be written. +If unspecified, the default is to write to stderr, which intermingles HTTP logs with internal InfluxDB logging. +If `influxd` is unable to access the specified path, it will log an error and fall back to writing the request log to `stderr`. + +Environment variable: `INFLUXDB_HTTP_ACCESS_LOG_PATH` + +#### `access-log-status-filters = []` + +Filters which requests should be logged. Each filter is of the pattern `nnn`, `nnx`, or `nxx` where `n` is +a number and `x` is the wildcard for any number. +To filter all `5xx` responses, use the string `5xx`. +If multiple filters are used, then only one has to match. +The default value is no filters, with every request being printed. + +Environment variable: `INFLUXDB_HTTP_ACCESS_LOG_STATUS_FILTERS_x` + +##### Examples + +###### Setting access log status filters using configuration settings + +`access-log-status-filter = ["4xx", "5xx"]` + +`"4xx"` is in array position `0` +`"5xx"` is in array position `1` + +###### Setting access log status filters using environment variables + +The input values for the `access-log-status-filters` is an array. +When using environment variables, the values can be supplied as follows. + +`INFLUXDB_HTTP_ACCESS_LOG_STATUS_FILTERS_0=4xx` + +`INFLUXDB_HTTP_ACCESS_LOG_STATUS_FILTERS_1=5xx` + +The `_n` at the end of the environment variable represents the array position of the entry. + + +#### `write-tracing = false` + +Determines whether detailed write logging is enabled. +Set to `true` to enable logging for the write payload. +If set to `true`, this will duplicate every write statement in the logs and is thus not recommended for general use. + +Environment variable: `INFLUXDB_HTTP_WRITE_TRACING` + +#### `pprof-enabled = true` + +Determines whether the `/net/http/pprof` HTTP endpoint is enabled. +Useful for troubleshooting and monitoring. + +Environment variable: `INFLUXDB_HTTP_PPROF_ENABLED` + +#### `pprof-auth-enabled = false` + +Enables authentication on `/debug` endpoints. +If enabled, users need admin permissions to access the following endpoints: + +- `/debug/pprof` +- `/debug/requests` +- `/debug/vars` + +This setting has no effect if either [`auth-enabled`](#auth-enabled-false) or +[`pprof-enabled`](#pprof-enabled-true) are set to `false`. + +Environment variable: `INFLUXDB_HTTP_PPROF_AUTH_ENABLED` + +#### `debug-pprof-enabled = false` + +Enable the default `/pprof` endpoint and bind against `localhost:6060`. +Useful for debugging startup performance issues. + +Environment variable: `INFLUXDB_HTTP_DEBUG_PPROF_ENABLED` + +#### `ping-auth-enabled = false` + +Enables authentication on the `/ping`, `/metrics`, and deprecated `/status` endpoints. +This setting has no effect if [`auth-enabled`](#auth-enabled-false) is set to `false`. + +Environment variable: `INFLUXDB_HTTP_PING_AUTH_ENABLED` + +#### `https-enabled = false` + +Determines whether HTTPS is enabled. +To enable HTTPS, set the value to `true`. + +Environment variable: `INFLUXDB_HTTP_HTTPS_ENABLED` + +#### `https-certificate = "/etc/ssl/influxdb.pem"` + +The path of the SSL certificate file to use when HTTPS is enabled. + +Environment variable: `INFLUXDB_HTTP_HTTPS_CERTIFICATE` + +#### `https-private-key = ""` + +Use a separate private key location. +If only the `https-certificate` is specified, the `httpd` service will try to load the private key from the `https-certificate` file. +If a separate `https-private-key` file is specified, the `httpd` service will load the private key from the `https-private-key` file. + +Environment variable: `INFLUXDB_HTTP_HTTPS_PRIVATE_KEY` + +#### `shared-secret = ""` + +The shared secret used to validate public API requests using JWT tokens. + +Environment variable: `INFLUXDB_HTTP_SHARED_SECRET` + +#### `max-row-limit = 0` + +The maximum number of rows that the system can return in a [non-chunked](/influxdb/v1.7/tools/api#query-string-parameters) query. +The default setting (`0`) allows for an unlimited number of rows. +If the query results exceed a specified value, then InfluxDB includes a `"partial":true` tag in the response body. + +Environment variable: `INFLUXDB_HTTP_MAX_ROW_LIMIT` + +#### `max-connection-limit = 0` + +The maximum number of connections that may be open at once. +New connections that would exceed the limit are dropped. +The default value of `0` disables the limit. + +Environment variable: `INFLUXDB_HTTP_MAX_CONNECTION_LIMIT` + +#### `unix-socket-enabled = false` + +Enable HTTP service over the UNIX domain socket. +To enable HTTP service over the UNIX domain socket, set the value to `true`. + +Environment variable: `INFLUXDB_HTTP_UNIX_SOCKET_ENABLED` + +#### `bind-socket = "/var/run/influxdb.sock"` + +The path of the UNIX domain socket. + +Environment variable: `INFLUXDB_HTTP_UNIX_BIND_SOCKET` + +#### `max-body-size = 25000000` + +The maximum size, in bytes, of a client request body. +When a HTTP client sends data that exceeds the configured maximum size, a `413 Request Entity Too Large` HTTP response is returned. +To disable the limit, set the value to `0`. + +Environment variable: `INFLUXDB_HTTP_MAX_BODY_SIZE` + +#### `max-concurrent-write-limit = 0` + +The maximum number of writes that can be processed concurrently. +To disable the limit, set the value to `0`. + +Environment variable: `INFLUXDB_HTTP_MAX_CONCURRENT_WRITE_LIMIT` + +#### `max-enqueued-write-limit = 0` + +The maximum number of writes queued for processing. +To disable the limit, set the value to `0`. + +Environment variable: `INFLUXDB_HTTP_MAX_ENQUEUED_WRITE_LIMIT` + +### `enqueued-write-timeout = 0` +The maximum duration for a write to wait in the queue to be processed. +To disable the limit, set this to `0` or set the `max-concurrent-write-limit` value to `0`. + +Environment variable: `INFLUXDB_HTTP_ENQUEUED_WRITE_TIMEOUT` + +----- + +## Logging settings + +### `[logging]` + +Controls how the logger emits logs to the output. + +#### `format = "auto"` + +Determines which log encoder to use for logs. +Valid values are `auto`(default), `logfmt`, and `json`. +With the default `auto` option, if the output is to a TTY device (e.g., a terminal), a more user-friendly console encoding is used. +If the output is to files, the auto option uses the `logfmt` encoding. +The `logfmt` and `json` options are useful for integration with external tools. + +Environment variable: `INFLUXDB_LOGGING_FORMAT` + +#### `level = "info"` + +The log level to be emitted. +Valid values are `error`, `warn`, `info`(default), and `debug`. +Logs that are equal to, or above, the specified level will be emitted. + +Environment variable: `INFLUXDB_LOGGING_LEVEL` + +#### `suppress-logo = false` + +Suppresses the logo output that is printed when the program is started. +The logo is always suppressed if `STDOUT` is not a TTY. + +Environment variable: `INFLUXDB_LOGGING_SUPPRESS_LOGO` + +----- + +## Subscription settings + +### `[subscriber]` + +The `[subscriber]` section controls how [Kapacitor](/kapacitor/v1.4/) will receive data. + +#### `enabled = true` + +Determines whether the subscriber service is enabled. +To disable the subscriber service, set the value to `false`. + +Environment variable: `INFLUXDB_SUBSCRIBER_ENABLED` + +#### `http-timeout = "30s"` + +The duration that an HTTP write to a subscriber runs until it times out. + +Environment variable: `INFLUXDB_SUBSCRIBER_HTTP_TIMEOUT` + +#### `insecure-skip-verify = false` + +Determines whether to allow insecure HTTPS connections to subscribers. +This is useful when testing with self-signed certificates. + +Environment variable: `INFLUXDB_SUBSCRIBER_INSECURE_SKIP_VERIFY` + +#### `ca-certs = ""` + +The path to the PEM-encoded CA certs file. +If the value is an empty string (`""`), the default system certs will be used. + +Environment variable: `INFLUXDB_SUBSCRIBER_CA_CERTS` + +#### `write-concurrency = 40` + +The number of writer goroutines processing the write channel. + +Environment variable: `INFLUXDB_SUBSCRIBER_WRITE_CONCURRENCY` + +#### `write-buffer-size = 1000` + +The number of in-flight writes buffered in the write channel. + +Environment variable: `INFLUXDB_SUBSCRIBER_WRITE_BUFFER_SIZE` + +----- + +## Graphite settings + +### `[[graphite]]` + +This section controls one or many listeners for Graphite data. +For more information, see [Graphite protocol support in InfluxDB](/influxdb/v1.7/supported_protocols/graphite/). + +#### `enabled = false` + +Set to `true` to enable Graphite input. + +Environment variable: `INFLUXDB_GRAPHITE_0_ENABLED` + +#### `database = "graphite"` + +The name of the database that you want to write to. + +Environment variable: `INFLUXDB_GRAPHITE_0_DATABASE` + +#### `retention-policy = ""` + +The relevant retention policy. +An empty string is equivalent to the database's `DEFAULT` retention policy. + +Environment variable: `INFLUXDB_GRAPHITE_0_RETENTION_POLICY` + +#### `bind-address = ":2003"` + +The default port. + +Environment variable: `INFLUXDB_GRAPHITE_0_BIND_ADDRESS` + +#### `protocol = "tcp"` + +Set to `tcp` or `udp`. + +Environment variable: `INFLUXDB_GRAPHITE_PROTOCOL` + +#### `consistency-level = "one"` + +The number of nodes that must confirm the write. +If the requirement is not met the return value will be either `partial write` if some points in the batch fail or `write failure` if all points in the batch fail. +For more information, see the Query String Parameters for Writes section in the [InfluxDB line protocol syntax reference](/influxdb/v1.7/write_protocols/write_syntax/). + +Environment variable: `INFLUXDB_GRAPHITE_CONSISTENCY_LEVEL` + +*The next three settings control how batching works. +You should have this enabled otherwise you could get dropped metrics or poor performance. +Batching will buffer points in memory if you have many coming in.* + +#### `batch-size = 5000` + +The input will flush if this many points get buffered. + +Environment variable: `INFLUXDB_GRAPHITE_BATCH_SIZE` + +#### `batch-pending = 10` + +The number of batches that may be pending in memory. + +Environment variable: `INFLUXDB_GRAPHITE_BATCH_PENDING` + +#### `batch-timeout = "1s"` + +The input will flush at least this often even if it hasn't reached the configured batch-size. + +Environment variable: `INFLUXDB_GRAPHITE_BATCH_TIMEOUT` + +#### `udp-read-buffer = 0` + +UDP Read buffer size, `0` means OS default. +UDP listener will fail if set above OS max. + +Environment variable: `INFLUXDB_GRAPHITE_UDP_READ_BUFFER` + +#### `separator = "."` + +This string joins multiple matching 'measurement' values providing more control over the final measurement name. + +Environment variable: `INFLUXDB_GRAPHITE_SEPARATOR` + + +----- + +## CollectD settings + +### `[[collectd]]` + +The `[[collectd]]` settings control the listener for `collectd` data. +For more information, see [CollectD protocol support in InfluxDB](/influxdb/v1.7/supported_protocols/collectd/). + +#### `enabled = false` + +Set to `true` to enable `collectd` writes. + +Environment variable: `INFLUXDB_COLLECTD_ENABLED` + +#### `bind-address = ":25826"` + +The port. + +Environment variable: `INFLUXDB_COLLECTD_BIND_ADDRESS` + +#### `database = "collectd"` + +The name of the database that you want to write to. +This defaults to `collectd`. + +Environment variable: `INFLUXDB_COLLECTD_DATABASE` + +#### `retention-policy = ""` + +The relevant retention policy. +An empty string is equivalent to the database's `DEFAULT` retention policy. + +Environment variable: `INFLUXDB_COLLECTD_RETENTION_POLICY` + +#### `typesdb = "/usr/local/share/collectd"` + +The collectd service supports either scanning a directory for multiple types +db files, or specifying a single db file. +A sample `types.db` file +can be found +[here](https://github.com/collectd/collectd/blob/master/src/types.db). + +Environment variable: `INFLUXDB_COLLECTD_TYPESDB` + +#### `security-level = "none"` + +Environment variable: `INFLUXDB_COLLECTD_SECURITY_LEVEL` + +#### `auth-file = "/etc/collectd/auth_file"` + +Environment variable: `INFLUXDB_COLLECTD_AUTH_FILE` + +*The next three settings control how batching works. +You should have this enabled otherwise you could get dropped metrics or poor performance. +Batching will buffer points in memory if you have many coming in.* + +#### `batch-size = 5000` + +The input will flush if this many points get buffered. + +Environment variable: `INFLUXDB_COLLECTD_BATCH_SIZE` + +#### `batch-pending = 10` + +The number of batches that may be pending in memory. + +Environment variable: `INFLUXDB_COLLECTD_BATCH_PENDING` + +#### `batch-timeout = "10s"` + +The input will flush at least this often even if it hasn't reached the configured batch-size. + +Environment variable: `INFLUXDB_COLLECTD_BATCH_TIMEOUT` + +#### `read-buffer = 0` + +UDP Read buffer size, 0 means OS default. +UDP listener will fail if set above OS max. + +Environment variable: `INFLUXDB_COLLECTD_READ_BUFFER` + +#### `parse-multivalue-plugin = "split"` + +When set to `split`, multi-value plugin data (e.g. df free:5000,used:1000) will be split into separate measurements (e.g., (df_free, value=5000) (df_used, value=1000)). When set to `join`, multi-value plugin will be stored as a single multi-value measurement (e.g., (df, free=5000,used=1000)). Defaults to `split`. + +----- + +## OpenTSDB settings + +### `[[opentsdb]]` + +Controls the listener for OpenTSDB data. +For more information, see [OpenTSDB protocol support in InfluxDB](/influxdb/v1.7/supported_protocols/opentsdb/). + +#### `enabled = false` + +Set to `true` to enable openTSDB writes. + +Environment variable: `INFLUXDB_OPENTSDB_0_ENABLED` + +#### `bind-address = ":4242"` + +The default port. + +Environment variable: `INFLUXDB_OPENTSDB_BIND_ADDRESS` + +#### `database = "opentsdb"` + +The name of the database that you want to write to. +If the database does not exist, it will be created automatically when the input is initialized. + +Environment variable: `INFLUXDB_OPENTSDB_DATABASE` + +#### `retention-policy = ""` + +The relevant retention policy. +An empty string is equivalent to the database's `DEFAULT` retention policy. + +Environment variable: `INFLUXDB_OPENTSDB_RETENTION_POLICY` + +#### `consistency-level = "one"` + +Sets the write consistency level: `any`, `one`, `quorum`, or `all` for writes. + +Environment variable: `INFLUXDB_OPENTSDB_CONSISTENCY_LEVEL` + +#### `tls-enabled = false` + +Environment variable: `INFLUXDB_OPENTSDB_TLS_ENABLED` + +#### `certificate = "/etc/ssl/influxdb.pem"` + +Environment variable: `INFLUXDB_OPENTSDB_CERTIFICATE` + +#### `log-point-errors = true` + +Log an error for every malformed point. + +Environment variable: `INFLUXDB_OPENTSDB_0_LOG_POINT_ERRORS` + +*The next three settings control how batching works. +You should have this enabled otherwise you could get dropped metrics or poor performance. +Only points metrics received over the telnet protocol undergo batching.* + +#### `batch-size = 1000` + +The input will flush if this many points get buffered. + +Environment variable: `INFLUXDB_OPENTSDB_BATCH_SIZE` + +#### `batch-pending = 5` + +The number of batches that may be pending in memory. + +Environment variable: `INFLUXDB_OPENTSDB_BATCH_PENDING` + +#### `batch-timeout = "1s"` + +The input will flush at least this often even if it hasn't reached the configured batch-size. + +Environment variable: `INFLUXDB_OPENTSDB_BATCH_TIMEOUT` + + +----- + +## UDP settings + +### `[[udp]]` + +The `[[udp]]` settings control the listeners for InfluxDB line protocol data using UDP. +For more information, see [UDP protocol support in InfluxDB](/influxdb/v1.7/supported_protocols/udp/). + +#### `enabled = false` + +Determines whether UDP listeners are enabled. +To enable writes over UDP, set the value to `true`. + +Environment variable: `INFLUXDB_UDP_ENABLED` + +#### `bind-address = ":8089"` + +An empty string is equivalent to `0.0.0.0`. + +Environment variable: `INFLUXDB_UDP_BIND_ADDRESS` + +#### `database = "udp"` + +The name of the database that you want to write to. + +Environment variable: `INFLUXDB_UDP_DATABASE` + +#### `retention-policy = ""` + +The relevant retention policy for your data. +An empty string is equivalent to the database's `DEFAULT` retention policy. + +Environment variable: `INFLUXDB_UDP_RETENTION_POLICY` + +*The next three settings control how batching works. +You should have this enabled otherwise you could get dropped metrics or poor performance. +Batching will buffer points in memory if you have many coming in.* + +#### `batch-size = 5000` + +The input will flush if this many points get buffered. + +Environment variable: `INFLUXDB_UDP_0_BATCH_SIZE` + +#### `batch-pending = 10` + +The number of batches that may be pending in memory. + +Environment variable: `INFLUXDB_UDP_0_BATCH_PENDING` + +#### `batch-timeout = "1s"` + +The input will flush at least this often even if it hasn't reached the configured batch-size. + +Environment variable: `INFLUXDB_UDP_BATCH_TIMEOUT` + +#### `read-buffer = 0` + +UDP read buffer size, 0 means OS default. +UDP listener will fail if set above OS max. + +Environment variable: `INFLUXDB_UDP_BATCH_SIZE` + +#### `precision = ""` + +[Time precision](/influxdb/v1.7/query_language/spec/#durations) used when decoding time values. Defaults to `nanoseconds` which is the default of the database. + +Environment variable: `INFLUXDB_UDP_PRECISION` + + +----- + +## Continuous queries settings + +### `[continuous_queries]` + +The `[continuous_queries]` settings control how [continuous queries (CQs)](/influxdb/v1.7/concepts/glossary/#continuous-query-cq) run within InfluxDB. +Continuous queries are automated batches of queries that execute over recent time intervals. +InfluxDB executes one auto-generated query per `GROUP BY time()` interval. + +#### `enabled = true` + +Set to `false` to disable CQs. + +Environment variable: `INFLUXDB_CONTINUOUS_QUERIES_ENABLED` + +#### `log-enabled = true` + +Set to `false` to disable logging for CQ events. + +Environment variable: `INFLUXDB_CONTINUOUS_QUERIES_LOG_ENABLED` + +#### `query-stats-enabled = false` + +When set to true, continuous query execution statistics are written to the default monitor store. + +Environment variable: `INFLUXDB_CONTINUOUS_QUERIES_QUERY_STATS_ENABLED` + +#### `run-interval = "1s"` + +The interval at which InfluxDB checks to see if a CQ needs to run. Set this option to the lowest interval at which your CQs run. For example, if your most frequent CQ runs every minute, set `run-interval` to `1m`. + +Environment variable: `INFLUXDB_CONTINUOUS_QUERIES_RUN_INTERVAL` + +----- + +## Transport Layer Security (TLS) settings + +### `[tls]` + +Global configuration settings for Transport Layer Security (TLS) in InfluxDB. + +If the TLS configuration settings is not specified, InfluxDB supports all of the cipher suite IDs listed and all TLS versions implemented in the [Constants section of the Go `crypto/tls` package documentation](https://golang.org/pkg/crypto/tls/#pkg-constants), depending on the version of Go used to build InfluxDB. +Use the `SHOW DIAGNOSTICS` command to see the version of Go used to build InfluxDB. + +### Recommended server configuration for "modern compatibility" + +InfluxData recommends configuring your InfluxDB server's TLS settings for "modern compatibility" that provides a higher level of security and assumes that backward compatibility is not required. +Our recommended TLS configuration settings for `ciphers`, `min-version`, and `max-version` are based on Mozilla's "modern compatibility" TLS server configuration described in [Security/Server Side TLS](https://wiki.mozilla.org/Security/Server_Side_TLS#Modern_compatibility). + +InfluxData's recommended TLS settings for "modern compatibility" are specified in the following configuration settings example. + +#### + +``` +ciphers = [ "TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305", + "TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305", + "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256", + "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256", + "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384", + "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384" +] + +min-version = "tls1.2" + +max-version = "tls1.2" + +``` +> **Important:*** The order of the cipher suite IDs in the `ciphers` setting determines which algorithms are selected by priority. The TLS `min-version` and the `max-version` settings restrict support to TLS 1.2. + +#### `ciphers = [ "TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305", "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256", ]` + +Specifies the set of cipher suite IDs to negotiate. +If not specified, `ciphers` supports all existing cipher suite IDs listed in the Go `crypto/tls` package. +This is consistent with the behavior within previous releases. +In this example, only the two specified cipher suite IDs would be supported. + +Environment variable: `INFLUXDB_TLS_CIPHERS` + +#### `min-version = "tls1.0"` + +Minimum version of the TLS protocol that will be negotiated. Valid values include: `tls1.0`, `tls1.1`, and `tls1.2`. If not specified, `min-version` is the minimum TLS version specified in the [Go `crypto/tls` package](https://golang.org/pkg/crypto/tls/#pkg-constants). In this example, `tls1.0` specifies the minimum version as TLS 1.0, which is consistent with the behavior of previous InfluxDB releases. + +Environment variable: `INFLUXDB_TLS_MIN_VERSION` + +#### `max-version = "tls1.2"` + +The maximum version of the TLS protocol that will be negotiated. Valid values include: `tls1.0`, `tls1.1`, and `tls1.2`. If not specified, `max-version` is the maximum TLS version specified in the [Go `crypto/tls` package](https://golang.org/pkg/crypto/tls/#pkg-constants). In this example, `tls1.2` specifies the maximum version as TLS 1.2, which is consistent with the behavior of previous InfluxDB releases. + +Environment variable: `INFLUXDB_TLS_MAX_VERSION` diff --git a/content/influxdb/v1.7/administration/https_setup.md b/content/influxdb/v1.7/administration/https_setup.md new file mode 100644 index 000000000..db9aeb784 --- /dev/null +++ b/content/influxdb/v1.7/administration/https_setup.md @@ -0,0 +1,161 @@ +--- +title: Enabling HTTPS with InfluxDB +description: Enable HTTPS and Transport Security Layer (TLS) secure communication between clients and your InfluxDB servers. +menu: + influxdb_1_7: + name: Enabling HTTPS + weight: 30 + parent: Administration +--- + +Enable TLS to encrypt communication between clients and the InfluxDB server. +When configured with a signed certificate, TLS also allows clients to verify the authenticity of the InfluxDB server. + +{{% warn %}} +InfluxData **strongly recommends** enabling HTTPS, especially if you plan to send requests to InfluxDB over a network. +{{% /warn %}} + +{{% note %}} +If setting up HTTPS for [InfluxDB Enterprise](/enterprise_influxdb), follow the [InfluxDB Enterprise HTTPS setup guide](/enterprise_influxdb/v1.7/guides/https_setup/). +{{% /note %}} + +## Requirements + +To enable HTTPS with InfluxDB, you need a Transport Layer Security (TLS) certificate, also known as a Secured Sockets Layer (SSL) certificate. +InfluxDB supports three types of TLS certificates: + +* **Single domain certificates signed by a [Certificate Authority](https://en.wikipedia.org/wiki/Certificate_authority)** + + Single domain certificates provide cryptographic security to HTTPS requests and allow clients to verify the identity of the InfluxDB server. + These certificates are signed and issued by a trusted, third-party Certificate Authority (CA). + With this certificate option, every InfluxDB instance requires a unique single domain certificate. + +* **Wildcard certificates signed by a Certificate Authority** + + Wildcard certificates provide cryptographic security to HTTPS requests and allow clients to verify the identity of the InfluxDB server. + Wildcard certificates can be used across multiple InfluxDB instances on different servers. + +* **Self-signed certificates** + + Self-signed certificates are _not_ signed by a trusted, third-party CA. + Self-signed certificates provide cryptographic security to HTTPS requests but don't allow clients to verify the identity of the InfluxDB server. + With this kind of certificate, every InfluxDB instance requires a unique self-signed certificate. + You can generate a self-signed certificate on your own machine. + + + + +## Configure InfluxDB to use TLS + +1. **Download or generate certificate files** + + If using a certificate provided by a CA, follow their instructions to download the certificate files. + + If using a self-signed certificate, use the `openssl` utility to create a certificate. + + Use the following command to generate a private key file (`.key`) and a self-signed certificate file (`.crt`) and save them to `/etc/ssl/`. + Set `NUMBER_OF_DAYS` to specify the amount of time the files will remain valid. + + ```sh + sudo openssl req -x509 -nodes -newkey rsa:2048 \ + -keyout /etc/ssl/influxdb-selfsigned.key \ + -out /etc/ssl/influxdb-selfsigned.crt \ + -days + ``` + + The command will prompt you for more information. + You can choose to fill out these fields or leave them blank; both actions generate valid certificate files. + +2. **Set certificate file permissions** + + The user running InfluxDB must have read permissions on the TLS certificate. + + {{% note %}}You may opt to set up multiple users, groups, and permissions. + Ultimately, make sure all users running InfluxDB have read permissions for the TLS certificate. + {{% /note %}} + + Run the following command to give InfluxDB read and write permissions on the certificate files. + + ```bash + sudo chmod 644 /etc/ssl/ + sudo chmod 600 /etc/ssl/ + ``` + +3. Enable HTTPS in the configuration file + + HTTPS is disabled by default. + Enable HTTPS in the `[http]` section of the configuration file (`/etc/influxdb/influxdb.conf`) by setting: + + * `https-enabled` to `true` + * `https-certificate` to `/etc/ssl/influxdb-selfsigned.crt` + * `https-private-key` to `/etc/ssl/influxdb-selfsigned.key` + + ``` + [http] + + [...] + + # Determines whether HTTPS is enabled. + https-enabled = true + + [...] + + # The TLS or SSL certificate to use when HTTPS is enabled. + https-certificate = "/etc/ssl/influxdb-selfsigned.crt" + + # Use a separate private key location. + https-private-key = "/etc/ssl/influxdb-selfsigned.key" + ``` + +4. **Verify TLS connection** + + Verify that HTTPS is working by connecting to InfluxDB with the [CLI tool](/influxdb/v1.7/tools/shell/): + + ```bash + influx -ssl -host .com + ``` + + If using a self-signed certificate, add the `-unsafeSsl` flag to the above command. + + A successful connection returns the following: + + ```bash + Connected to https://.com:8086 version 1.x.x + InfluxDB shell version: 1.x.x + > + ``` + + That's it! You've successfully set up HTTPS with InfluxDB. + +## Connect Telegraf to a secured InfluxDB instance + +Connecting [Telegraf](/telegraf/latest/) to an InfluxDB instance that's using +HTTPS requires some additional steps. + +In the Telegraf configuration file (`/etc/telegraf/telegraf.conf`), edit the `urls` +setting to indicate `https` instead of `http` and change `localhost` to the +relevant domain name. +If you're using a self-signed certificate, uncomment the `insecure_skip_verify` +setting and set it to `true`. + +```toml + ############################################################################### + # OUTPUT PLUGINS # + ############################################################################### +> + # Configuration for InfluxDB server to send metrics to + [[outputs.influxdb]] + ## The full HTTP or UDP endpoint URL for your InfluxDB instance. + ## Multiple urls can be specified as part of the same cluster, + ## this means that only ONE of the urls will be written to each interval. + # urls = ["udp://localhost:8089"] # UDP endpoint example + urls = ["https://.com:8086"] +> + [...] +> + ## Optional SSL Config + [...] + insecure_skip_verify = true # <-- Update only if you're using a self-signed certificate +``` + +Next, restart Telegraf and you're all set! diff --git a/content/influxdb/v1.7/administration/logs.md b/content/influxdb/v1.7/administration/logs.md new file mode 100644 index 000000000..278a55126 --- /dev/null +++ b/content/influxdb/v1.7/administration/logs.md @@ -0,0 +1,351 @@ +--- +title: Logging and tracing in InfluxDB + +menu: + influxdb_1_7: + name: Logging and tracing + weight: 40 + parent: Administration +--- + +**Content** + +* [Logging locations](#logging-locations) +* [HTTP access logging](#http-access-logging) +* [Structured logging](#structured-logging) +* [Tracing](#tracing) + + +## Logging locations + +InfluxDB writes log output, by default, to `stderr`. +Depending on your use case, this log information can be written to another location. + +### Running InfluxDB directly + +When you run InfluxDB directly, using `influxd`, all logs are written to `stderr`. +You can also redirect the log output, as you would any output to `stderr`, like in this example: + +``` +influxd 2>$HOME/my_log_file +``` + +### Launched as a service + + +{{< tabs-wrapper >}} +{{% tabs %}} +[systemd](#) +[sysinit](#) +{{% /tabs %}} + +{{% tab-content %}} +#### systemd + +Most Linux systems direct logs to the `systemd` journal. +To access these logs, use this command: + +```sh +sudo journalctl -u influxdb.service +``` + +For more information, see the [`journald.conf` manual page](https://www.freedesktop.org/software/systemd/man/journald.conf.html). +{{% /tab-content %}} + + +{{% tab-content %}} +#### sysvinit + +On Linux sytems not using systemd, InfluxDB writes all log data and `stderr` to `/var/log/influxdb/influxd.log`. +You can override this location by setting the environment variable `STDERR` in a start-up script at `/etc/default/influxdb`. +(If this file doesn't exist, you need to create it.) + +For example, if `/etc/default/influxdb` contains: + +```sh +STDERR=/dev/null +``` + +all log data is discarded. +Likewise, you can direct output to `stdout` by setting `STDOUT` in the same file. +`stdout` is sent to `/dev/null` by default when InfluxDB is launched as a service. + +InfluxDB must be restarted to use any changes to `/etc/default/influxdb`. +{{% /tab-content %}} + +{{< /tabs-wrapper >}} + + +> #### Log location on macOS +> On macOs, InfluxDB stores logs at `/usr/local/var/log/influxdb.log` by default. + +### Using logrotate + +You can use [logrotate](http://manpages.ubuntu.com/manpages/cosmic/en/man8/logrotate.8.html) to rotate the log files generated by InfluxDB on systems where logs are written to flat files. +If using the package install on a `sysvinit` system, the config file for logrotate is installed in `/etc/logrotate.d`. +You can view the file [here](https://github.com/influxdb/influxdb/blob/1.7/scripts/logrotate). + + +## HTTP access logging + +Use the HTTP access log to log HTTP request traffic separately from the other InfluxDB log output. + +### HTTP access log format + +The following is an example of the HTTP access log format. The table below describes each component of the HTTP access log. + +``` +172.13.8.13,172.39.5.169 - - [21/Jul/2019:03:01:27 +0000] "GET /query?db=metrics&q=SELECT+MEAN%28value%29+as+average_cpu%2C+MAX%28value%29+as+peak_cpu+FROM+%22foo.load%22+WHERE+time+%3E%3D+now%28%29+-+1m+AND+org_id+%21%3D+%27%27+AND+group_id+%21%3D+%27%27+GROUP+BY+org_id%2Cgroup_id HTTP/1.0" 200 11450 "-" "Baz Service" d6ca5a13-at63-11o9-8942-000000000000 9337349 +``` + + + +| Component | Example | +|--- |--- | +|Host |`172.13.8.13,172.39.5.169` | +|Time of log event |`[21/Jul/2019:03:01:27 +0000]` | +|Request method |`GET` | +|Username |`user` | +|HTTP API call being made* |`/query?db=metrics%26q=SELECT%20used_percent%20FROM%20%22telegraf.autogen.mem%22%20WHERE%20time%20%3E=%20now()%20-%201m%20 ` | +|Request protocol |`HTTP/1.0` | +|HTTP response code |`200` | +|Size of response in bytes |`11450` | +|Referrer |`-` | +|User agent |`Baz Service` | +|Request ID |`d4ca9a10-ab63-11e9-8942-000000000000` | +|Response time in microseconds |`9357049` | +* This field shows the database being acessed and the query being run. For more details, see [InfluxDB API reference](/influxdb/v1.7/tools/api/). Note that this field is URL-encoded. + +### Redirecting HTTP access logging + +When HTTP request logging is enabled, the HTTP logs are intermingled by default with internal InfluxDB logging. By redirecting the HTTP request log entries to a separate file, both log files are easier to read, monitor, and debug. + +**To redirect HTTP request logging:** + +Locate the `[http]` section of your InfluxDB configuration file and set the `access-log-path` option to specify the path where HTTP log entries should be written. + +**Notes:** + +* If `influxd` is unable to access the specified path, it will log an error and fall back to writing the request log to `stderr`. +* The `[httpd]` prefix is stripped when HTTP request logging is redirected to a separate file, allowing access log parsing tools (like [lnav](https://lnav.org)) to render the files without additional modification. +* To rotate the HTTP request log file, use the `copytruncate` method of `logrotate` or similar to leave the original file in place. + + +## Structured logging + +Structured logging enables machine-readable and more developer-friendly log output formats. The two structured log formats, `logfmt` and `json`, provide easier filtering and searching with external tools and simplifies integration of InfluxDB logs with Splunk, Papertrail, Elasticsearch, and other third party tools. + +The InfluxDB logging configuration options (in the `[logging]` section) now include the following options: + +* `format`: `auto` (default) | `logfmt` | `json` +* `level`: `error` | `warn` | `info` (default) | `debug` +* `suppress-logo`: `false` (default) | `true` + +For details on these logging configuration options and their corresponding environment variables, see [Logging options](/influxdb/v1.7/administration/config#logging-settings) in the configuration file documentation. + +### Logging formats + +Three logging `format` options are available: `auto`, `logfmt`, and `json`. The default logging format setting, `format = "auto"`, lets InfluxDB automatically manage the log encoding format: + +* When logging to a file, the `logfmt` is used +* When logging to a terminal (or other TTY device), a user-friendly console format is used. + +The `json` format is available when specified. + +### Examples of log output: + +**Logfmt** + +``` +ts=2018-02-20T22:48:11.291815Z lvl=info msg="InfluxDB starting" version=unknown branch=unknown commit=unknown +ts=2018-02-20T22:48:11.291858Z lvl=info msg="Go runtime" version=go1.10 maxprocs=8 +ts=2018-02-20T22:48:11.291875Z lvl=info msg="Loading configuration file" path=/Users/user_name/.influxdb/influxdb.conf +``` + +**JSON** + +``` +{"lvl":"info","ts":"2018-02-20T22:46:35Z","msg":"InfluxDB starting, version unknown, branch unknown, commit unknown"} +{"lvl":"info","ts":"2018-02-20T22:46:35Z","msg":"Go version go1.10, GOMAXPROCS set to 8"} +{"lvl":"info","ts":"2018-02-20T22:46:35Z","msg":"Using configuration at: /Users/user_name/.influxdb/influxdb.conf"} +``` + +**Console/TTY** + +``` +2018-02-20T22:55:34.246997Z info InfluxDB starting {"version": "unknown", "branch": "unknown", "commit": "unknown"} +2018-02-20T22:55:34.247042Z info Go runtime {"version": "go1.10", "maxprocs": 8} +2018-02-20T22:55:34.247059Z info Loading configuration file {"path": "/Users/user_name/.influxdb/influxdb.conf"} +``` + +### Logging levels + +The `level` option sets the log level to be emitted. Valid logging level settings are `error`, `warn`, `info`(default), and `debug`. Logs that are equal to, or above, the specified level will be emitted. + +### Logo suppression + +The `suppress-logo` option can be used to suppress the logo output that is printed when the program is started. The logo is always suppressed if `STDOUT` is not a TTY. + +## Tracing + +Logging has been enhanced to provide tracing of important InfluxDB operations. Tracing is useful for error reporting and discovering performance bottlenecks. + +### Logging keys used in tracing + +#### Tracing identifier key + +The `trace_id` key specifies a unique identifier for a specific instance of a trace. You can use this key to filter and correlate all related log entries for an operation. + +All operation traces include consistent starting and ending log entries, with the same message (`msg`) describing the operation (e.g., "TSM compaction"), but adding the appropriate `op_event` context (either `start` or `end`). For an example, see [Finding all trace log entries for an InfluxDB operation](#finding-all-trace-log-entries-for-an-influxdb-operation). + +**Example:** `trace_id=06R0P94G000` + +#### Operation keys + +The following operation keys identify an operation's name, the start and end timestamps, and the elapsed execution time. + +##### `op_name` +Unique identifier for an operation. You can filter on all operations of a specific name. + +**Example:** `op_name=tsm1_compact_group` + +##### `op_event` +Specifies the start and end of an event. The two possible values, `(start)` or `(end)`, are used to indicate when an operation started or ended. For example, you can grep by values in `op_name` AND `op_event` to find all starting operation log entries. For an example of this, see [Finding all starting log entries](#finding-all-starting-operation-log-entries). + +**Example:** `op_event=start` + +##### `op_elapsed` +Amount of time the operation spent executing. Logged with the ending trace log entry. Time unit displayed depends on how much time has elapsed -- if it was seconds, it will be suffixed with `s`. Valid time units are `ns`, `µs`, `ms`, and `s`. + +**Example:** `op_elapsed=0.352ms` + + +#### Log identifier context key + +The log identifier key (`log_id`) lets you easily identify _every_ log entry for a single execution of an `influxd` process. There are other ways a log file could be split by a single execution, but the consistent `log_id` eases the searching of log aggregation services. + +**Example:** `log_id=06QknqtW000` + +#### Database context keys + +`db_instance`: Database name + +`db_rp`: Retention policy name + +`db_shard_id`: Shard identifier + +`db_shard_group` Shard group identifier + +### Tooling + +Here are a couple of popular tools available for processing and filtering log files output in `logfmt` or `json` formats. + +#### [hutils](https://blog.heroku.com/hutils-explore-your-structured-data-logs) + +The [hutils](https://blog.heroku.com/hutils-explore-your-structured-data-logs), provided by Heroku, is a collection of command line utilities for working with logs with `logfmt` encoding, including: + +* `lcut`: Extracts values from a `logfmt` trace based on a specified field name. +* `lfmt`: Prettifies `logfmt` lines as they emerge from a stream, and highlights their key sections. +* `ltap`: Accesses messages from log providers in a consistent way to allow easy parsing by other utilities that operate on `logfmt` traces. +* `lviz`: Visualizes `logfmt` output by building a tree out of a dataset combining common sets of key-value pairs into shared parent nodes. + +#### [lnav (Log File Navigator)](http://lnav.org) + +The [lnav (Log File Navigator)](http://lnav.org) is an advanced log file viewer useful for watching and analyzing your log files from a terminal. The lnav viewer provides a single log view, automatic log format detection, filtering, timeline view, pretty-print view, and querying logs using SQL. + +### Operations + +The following operations, listed by their operation name (`op_name`) are traced in InfluxDB internal logs and available for use without changes in logging level. + +#### Initial opening of data files + +The `tsdb_open` operation traces include all events related to the initial opening of the `tsdb_store`. + + +#### Retention policy shard deletions + +The `retention.delete_check` operation includes all shard deletions related to the retention policy. + +#### TSM snapshotting in-memory cache to disk + +The `tsm1_cache_snapshot` operation represents the snapshotting of the TSM in-memory cache to disk. + +#### TSM compaction strategies + +The `tsm1_compact_group` operation includes all trace log entries related to TSM compaction strategies and displays the related TSM compaction strategy keys: + +* `tsm1_strategy`: `level` | `full` +* `tsm1_level`: `1` | `2` | `3` +* `tsm1_optimize`: `true` | `false` + +#### Series file compactions + +The `series_partition_compaction` operation includes all trace log entries related to series file compactions. + +#### Continuous query execution (if logging enabled) + +The `continuous_querier_execute` operation includes all continuous query executions, if logging is enabled. + +#### TSI log file compaction + +The `tsi1_compact_log_file` + +#### TSI level compaction + +The `tsi1_compact_to_level` operation includes all trace log entries for TSI level compactions. + + +### Tracing examples + +#### Finding all trace log entries for an InfluxDB operation + +In the example below, you can see the log entries for all trace operations related to a "TSM compaction" process. Note that the initial entry shows the message "TSM compaction (start)" and the final entry displays the message "TSM compaction (end)". \[Note: Log entries were grepped using the `trace_id` value and then the specified key values were displayed using `lcut` (an hutils tool).\] + +``` +$ grep "06QW92x0000" influxd.log | lcut ts lvl msg strategy level +2018-02-21T20:18:56.880065Z info TSM compaction (start) full +2018-02-21T20:18:56.880162Z info Beginning compaction full +2018-02-21T20:18:56.880185Z info Compacting file full +2018-02-21T20:18:56.880211Z info Compacting file full +2018-02-21T20:18:56.880226Z info Compacting file full +2018-02-21T20:18:56.880254Z info Compacting file full +2018-02-21T20:19:03.928640Z info Compacted file full +2018-02-21T20:19:03.928687Z info Finished compacting files full +2018-02-21T20:19:03.928707Z info TSM compaction (end) full +``` + + +#### Finding all starting operation log entries + +To find all starting operation log entries, you can grep by values in `op_name` AND `op_event`. In the following example, the grep returned 101 entries, so the result below only displays the first entry. In the example result entry, the timestamp, level, strategy, trace_id, op_name, and op_event values are included. + +``` +$ grep -F 'op_name=tsm1_compact_group' influxd.log | grep -F 'op_event=start' +ts=2018-02-21T20:16:16.709953Z lvl=info msg="TSM compaction" log_id=06QVNNCG000 engine=tsm1 level=1 strategy=level trace_id=06QV~HHG000 op_name=tsm1_compact_group op_event=start +... +``` + +Using the `lcut` utility (in hutils), the following command uses the previous `grep` command, but adds an `lcut` command to only display the keys and their values for keys that are not identical in all of the entries. The following example includes 19 examples of unique log entries displaying selected keys: `ts`, `strategy`, `level`, and `trace_id`. + +``` +$ grep -F 'op_name=tsm1_compact_group' influxd.log | grep -F 'op_event=start' | lcut ts strategy level trace_id | sort -u +2018-02-21T20:16:16.709953Z level 1 06QV~HHG000 +2018-02-21T20:16:40.707452Z level 1 06QW0k0l000 +2018-02-21T20:17:04.711519Z level 1 06QW2Cml000 +2018-02-21T20:17:05.708227Z level 2 06QW2Gg0000 +2018-02-21T20:17:29.707245Z level 1 06QW3jQl000 +2018-02-21T20:17:53.711948Z level 1 06QW5CBl000 +2018-02-21T20:18:17.711688Z level 1 06QW6ewl000 +2018-02-21T20:18:56.880065Z full 06QW92x0000 +2018-02-21T20:20:46.202368Z level 3 06QWFizW000 +2018-02-21T20:21:25.292557Z level 1 06QWI6g0000 +2018-02-21T20:21:49.294272Z level 1 06QWJ_RW000 +2018-02-21T20:22:13.292489Z level 1 06QWL2B0000 +2018-02-21T20:22:37.292431Z level 1 06QWMVw0000 +2018-02-21T20:22:38.293320Z level 2 06QWMZqG000 +2018-02-21T20:23:01.293690Z level 1 06QWNygG000 +2018-02-21T20:23:25.292956Z level 1 06QWPRR0000 +2018-02-21T20:24:33.291664Z full 06QWTa2l000 +2018-02-21T21:12:08.017055Z full 06QZBpKG000 +2018-02-21T21:12:08.478200Z full 06QZBr7W000 +``` diff --git a/content/influxdb/v1.7/administration/ports.md b/content/influxdb/v1.7/administration/ports.md new file mode 100644 index 000000000..773c9c648 --- /dev/null +++ b/content/influxdb/v1.7/administration/ports.md @@ -0,0 +1,59 @@ +--- +title: InfluxDB ports + +menu: + influxdb_1_7: + name: Ports + weight: 50 + parent: Administration +--- + +## Enabled ports + +### `8086` +The default port that runs the InfluxDB HTTP service. +[Configure this port](/influxdb/v1.7/administration/config#bind-address-8086) +in the configuration file. + +**Resources** [API Reference](/influxdb/v1.7/tools/api/) + +### 8088 +The default port used by the RPC service for RPC calls made by the CLI for backup and restore operations (`influxdb backup` and `influxd restore`). +[Configure this port](/influxdb/v1.7/administration/config#bind-address-127-0-0-1-8088) +in the configuration file. + +**Resources** [Backup and Restore](/influxdb/v1.7/administration/backup_and_restore/) + +## Disabled ports + +### 2003 + +The default port that runs the Graphite service. +[Enable and configure this port](/influxdb/v1.7/administration/config#bind-address-2003) +in the configuration file. + +**Resources** [Graphite README](https://github.com/influxdata/influxdb/tree/1.7/services/graphite/README.md) + +### 4242 + +The default port that runs the OpenTSDB service. +[Enable and configure this port](/influxdb/v1.7/administration/config#bind-address-4242) +in the configuration file. + +**Resources** [OpenTSDB README](https://github.com/influxdata/influxdb/tree/1.7/services/opentsdb/README.md) + +### 8089 + +The default port that runs the UDP service. +[Enable and configure this port](/influxdb/v1.7/administration/config#bind-address-8089) +in the configuration file. + +**Resources** [UDP README](https://github.com/influxdata/influxdb/tree/1.7/services/udp/README.md) + +### 25826 + +The default port that runs the Collectd service. +[Enable and configure this port](/influxdb/v1.7/administration/config#bind-address-25826) +in the configuration file. + +**Resources** [Collectd README](https://github.com/influxdata/influxdb/tree/1.7/services/collectd/README.md) diff --git a/content/influxdb/v1.7/administration/rebuild-tsi-index.md b/content/influxdb/v1.7/administration/rebuild-tsi-index.md new file mode 100644 index 000000000..250f53dc0 --- /dev/null +++ b/content/influxdb/v1.7/administration/rebuild-tsi-index.md @@ -0,0 +1,52 @@ +--- +title: Rebuild the TSI index +description: > + Rebuild your InfluxDB TSI index using the `influxd_inspect buildtsi` command. +menu: + influxdb_1_7: + weight: 60 + parent: Administration +--- + +The InfluxDB [Time Series Index (TSI)](/influxdb/v1.7/concepts/tsi-details/) +indexes or caches measurement and tag data to ensure queries are performant. +In some cases, it may be necessary to flush and rebuild the TSI Index. +Use the following steps to rebuild your InfluxDB TSI index: + +## 1. Stop InfluxDB +Stop InfluxDB by stopping the `influxd` process. + +## 2. Remove all `_series` directories +Remove all `_series` directories. +By default, `_series` directories are are stored at `/data//_series`, +however you should check for and remove `_series` files throughout the `/data` directory. + +## 3. Remove all index directories +Remove all index directories. +By default, index directories are stored at `/data///index`. + +## 4. Rebuild the TSI index +Use the [`influx_inspect` command line client (CLI)](/influxdb/v1.7/tools/influx_inspect) +to rebuild the TSI index: + +```sh +# Pattern +influx_inspect buildtsi -datadir -waldir + +# Example +influx_inspect buildtsi -datadir /data -waldir /wal +``` + +## 5. Restart InfluxDB +Restart InfluxDB by starting the `influxd` process. + +--- + +{{% note %}} +## Rebuilding the TSI index in an InfluxDB Enterprise cluster +To rebuild the TSI index in an InfluxDB Enterprise cluster, perform the steps +above on each data node in the cluster one after the other. +After restarting the `influxd` process on a data node, allow the +[hinted handoff queue (HHQ)](/enterprise_influxdb/latest/concepts/clustering/#hinted-handoff) +to write all missed data to the updated node before moving on to the next node. +{{% /note %}} diff --git a/content/influxdb/v1.7/administration/security.md b/content/influxdb/v1.7/administration/security.md new file mode 100644 index 000000000..d4d0de8dc --- /dev/null +++ b/content/influxdb/v1.7/administration/security.md @@ -0,0 +1,51 @@ +--- +title: Managing InfluxDB security +menu: + influxdb_1_7: + name: Managing security + weight: 70 + parent: Administration +--- + +Some customers may choose to install InfluxDB with public internet access, however +doing so can inadvertently expose your data and invite unwelcome attacks on your database. +Check out the sections below for how protect the data in your InfluxDB instance. + +## Enabling authentication + +Password protect your InfluxDB instance to keep any unauthorized individuals +from accessing your data. + +Resources: +[Set up Authentication](/influxdb/v1.7/administration/authentication_and_authorization/#set-up-authentication) + +## Managing users and permissions + +Restrict access by creating individual users and assigning them relevant +read and/or write permissions. + +Resources: +[User Types and Privileges](/influxdb/v1.7/administration/authentication_and_authorization/#user-types-and-privileges), +[User Management Commands](/influxdb/v1.7/administration/authentication_and_authorization/#user-management-commands) + +## Enabling HTTPS + +Enabling HTTPS encrypts the communication between clients and the InfluxDB server. +HTTPS can also verify the authenticity of the InfluxDB server to connecting clients. + +Resources: +[Enabling HTTPS](/influxdb/v1.7/administration/https_setup/) + +## Securing your host + +### Ports +If you're only running InfluxDB, close all ports on the host except for port `8086`. +You can also use a proxy to port `8086`. + +InfluxDB uses port `8088` for remote [backups and restores](/influxdb/v1.7/administration/backup_and_restore/). +We highly recommend closing that port and, if performing a remote backup, +giving specific permission only to the remote machine. + +### AWS recommendations + +We recommend implementing on-disk encryption; InfluxDB does not offer built-in support to encrypt the data. diff --git a/content/influxdb/v1.7/administration/server_monitoring.md b/content/influxdb/v1.7/administration/server_monitoring.md new file mode 100644 index 000000000..e7b76021b --- /dev/null +++ b/content/influxdb/v1.7/administration/server_monitoring.md @@ -0,0 +1,166 @@ +--- +title: InfluxDB server monitoring +aliases: + - /influxdb/v1.7/administration/statistics/ + - /influxdb/v1.7/troubleshooting/statistics/ +menu: + influxdb_1_7: + name: Server monitoring + weight: 80 + parent: Administration +--- + +**On this page** + +* [SHOW STATS](#show-stats) +* [SHOW DIAGNOSTICS](#show-diagnostics) +* [Internal monitoring](#internal-monitoring) +* [Useful performance metrics commands](#useful-performance-metrics-commands) +* [InfluxDB `/metrics` HTTP endpoint](#influxdb-metrics-http-endpoint) + + +InfluxDB can display statistical and diagnostic information about each node. +This information can be very useful for troubleshooting and performance monitoring. + +## SHOW STATS + +To see node statistics, execute the command `SHOW STATS`. +For details on this command, see [`SHOW STATS`](/influxdb/v1.7/query_language/spec#show-stats) in the InfluxQL specification. + +The statistics returned by `SHOW STATS` are stored in memory only, and are reset to zero when the node is restarted. + +## SHOW DIAGNOSTICS + +To see node diagnostic information, execute the command `SHOW DIAGNOSTICS`. +This returns information such as build information, uptime, hostname, server configuration, memory usage, and Go runtime diagnostics. +For details on this command, see [`SHOW DIAGNOSTICS`](/influxdb/v1.7/query_language/spec#show-diagnostics) in the InfluxQL specification. + +## Internal monitoring +InfluxDB also writes statistical and diagnostic information to database named `_internal`, which records metrics on the internal runtime and service performance. +The `_internal` database can be queried and manipulated like any other InfluxDB database. +Check out the [monitor service README](https://github.com/influxdata/influxdb/blob/1.7/monitor/README.md) and the [internal monitoring blog post](https://www.influxdata.com/blog/how-to-use-the-show-stats-command-and-the-_internal-database-to-monitor-influxdb/) for more detail. + +## Useful performance metrics commands + +Below are a collection of commands to find useful performance metrics about your InfluxDB instance. + +To find the number of points per second being written to the instance. Must have the `monitor` service enabled: +```bash +$ influx -execute 'select derivative(pointReq, 1s) from "write" where time > now() - 5m' -database '_internal' -precision 'rfc3339' +``` + +To find the number of writes separated by database since the beginnning of the log file: + +```bash +grep 'POST' /var/log/influxdb/influxd.log | awk '{ print $10 }' | sort | uniq -c +``` + +Or, for systemd systems logging to journald: + +```bash +journalctl -u influxdb.service | awk '/POST/ { print $10 }' | sort | uniq -c +``` + +### InfluxDB `/metrics` HTTP endpoint + +> ***Note:*** There are no outstanding PRs for improvements to the `/metrics` endpoint, but we’ll add them to the CHANGELOG as they occur. + +The InfluxDB `/metrics` endpoint is configured to produce the default Go metrics in Prometheus metrics format. + + +#### Example using InfluxDB `/metrics' endpoint + +Below is an example of the output generated using the `/metrics` endpoint. Note that HELP is available to explain the Go statistics. + +``` +# HELP go_gc_duration_seconds A summary of the GC invocation durations. +# TYPE go_gc_duration_seconds summary +go_gc_duration_seconds{quantile="0"} 6.4134e-05 +go_gc_duration_seconds{quantile="0.25"} 8.8391e-05 +go_gc_duration_seconds{quantile="0.5"} 0.000131335 +go_gc_duration_seconds{quantile="0.75"} 0.000169204 +go_gc_duration_seconds{quantile="1"} 0.000544705 +go_gc_duration_seconds_sum 0.004619405 +go_gc_duration_seconds_count 27 +# HELP go_goroutines Number of goroutines that currently exist. +# TYPE go_goroutines gauge +go_goroutines 29 +# HELP go_info Information about the Go environment. +# TYPE go_info gauge +go_info{version="go1.10"} 1 +# HELP go_memstats_alloc_bytes Number of bytes allocated and still in use. +# TYPE go_memstats_alloc_bytes gauge +go_memstats_alloc_bytes 1.581062048e+09 +# HELP go_memstats_alloc_bytes_total Total number of bytes allocated, even if freed. +# TYPE go_memstats_alloc_bytes_total counter +go_memstats_alloc_bytes_total 2.808293616e+09 +# HELP go_memstats_buck_hash_sys_bytes Number of bytes used by the profiling bucket hash table. +# TYPE go_memstats_buck_hash_sys_bytes gauge +go_memstats_buck_hash_sys_bytes 1.494326e+06 +# HELP go_memstats_frees_total Total number of frees. +# TYPE go_memstats_frees_total counter +go_memstats_frees_total 1.1279913e+07 +# HELP go_memstats_gc_cpu_fraction The fraction of this program's available CPU time used by the GC since the program started. +# TYPE go_memstats_gc_cpu_fraction gauge +go_memstats_gc_cpu_fraction -0.00014404354379774563 +# HELP go_memstats_gc_sys_bytes Number of bytes used for garbage collection system metadata. +# TYPE go_memstats_gc_sys_bytes gauge +go_memstats_gc_sys_bytes 6.0936192e+07 +# HELP go_memstats_heap_alloc_bytes Number of heap bytes allocated and still in use. +# TYPE go_memstats_heap_alloc_bytes gauge +go_memstats_heap_alloc_bytes 1.581062048e+09 +# HELP go_memstats_heap_idle_bytes Number of heap bytes waiting to be used. +# TYPE go_memstats_heap_idle_bytes gauge +go_memstats_heap_idle_bytes 3.8551552e+07 +# HELP go_memstats_heap_inuse_bytes Number of heap bytes that are in use. +# TYPE go_memstats_heap_inuse_bytes gauge +go_memstats_heap_inuse_bytes 1.590673408e+09 +# HELP go_memstats_heap_objects Number of allocated objects. +# TYPE go_memstats_heap_objects gauge +go_memstats_heap_objects 1.6924595e+07 +# HELP go_memstats_heap_released_bytes Number of heap bytes released to OS. +# TYPE go_memstats_heap_released_bytes gauge +go_memstats_heap_released_bytes 0 +# HELP go_memstats_heap_sys_bytes Number of heap bytes obtained from system. +# TYPE go_memstats_heap_sys_bytes gauge +go_memstats_heap_sys_bytes 1.62922496e+09 +# HELP go_memstats_last_gc_time_seconds Number of seconds since 1970 of last garbage collection. +# TYPE go_memstats_last_gc_time_seconds gauge +go_memstats_last_gc_time_seconds 1.520291233297057e+09 +# HELP go_memstats_lookups_total Total number of pointer lookups. +# TYPE go_memstats_lookups_total counter +go_memstats_lookups_total 397 +# HELP go_memstats_mallocs_total Total number of mallocs. +# TYPE go_memstats_mallocs_total counter +go_memstats_mallocs_total 2.8204508e+07 +# HELP go_memstats_mcache_inuse_bytes Number of bytes in use by mcache structures. +# TYPE go_memstats_mcache_inuse_bytes gauge +go_memstats_mcache_inuse_bytes 13888 +# HELP go_memstats_mcache_sys_bytes Number of bytes used for mcache structures obtained from system. +# TYPE go_memstats_mcache_sys_bytes gauge +go_memstats_mcache_sys_bytes 16384 +# HELP go_memstats_mspan_inuse_bytes Number of bytes in use by mspan structures. +# TYPE go_memstats_mspan_inuse_bytes gauge +go_memstats_mspan_inuse_bytes 1.4781696e+07 +# HELP go_memstats_mspan_sys_bytes Number of bytes used for mspan structures obtained from system. +# TYPE go_memstats_mspan_sys_bytes gauge +go_memstats_mspan_sys_bytes 1.4893056e+07 +# HELP go_memstats_next_gc_bytes Number of heap bytes when next garbage collection will take place. +# TYPE go_memstats_next_gc_bytes gauge +go_memstats_next_gc_bytes 2.38107752e+09 +# HELP go_memstats_other_sys_bytes Number of bytes used for other system allocations. +# TYPE go_memstats_other_sys_bytes gauge +go_memstats_other_sys_bytes 4.366786e+06 +# HELP go_memstats_stack_inuse_bytes Number of bytes in use by the stack allocator. +# TYPE go_memstats_stack_inuse_bytes gauge +go_memstats_stack_inuse_bytes 983040 +# HELP go_memstats_stack_sys_bytes Number of bytes obtained from system for stack allocator. +# TYPE go_memstats_stack_sys_bytes gauge +go_memstats_stack_sys_bytes 983040 +# HELP go_memstats_sys_bytes Number of bytes obtained from system. +# TYPE go_memstats_sys_bytes gauge +go_memstats_sys_bytes 1.711914744e+09 +# HELP go_threads Number of OS threads created. +# TYPE go_threads gauge +go_threads 16 +``` diff --git a/content/influxdb/v1.7/administration/stability_and_compatibility.md b/content/influxdb/v1.7/administration/stability_and_compatibility.md new file mode 100644 index 000000000..7cf98d9f6 --- /dev/null +++ b/content/influxdb/v1.7/administration/stability_and_compatibility.md @@ -0,0 +1,27 @@ +--- +title: Stability and compatibility +menu: + influxdb_1_7: + weight: 90 + parent: Administration +--- + +## 1.x API compatibility and stability + +One of the more important aspects of the 1.0 release is that this marks the stabilization of our API and storage format. Over the course of the last three years we’ve iterated aggressively, often breaking the API in the process. With the release of 1.0 and for the entire 1.x line of releases we’re committing to the following: + +### No breaking InfluxDB API changes + +When it comes to the InfluxDB API, if a command works in 1.0 it will work unchanged in all 1.x releases...with one caveat. We will be adding [keywords](/influxdb/v1.7/query_language/spec/#keywords) to the query language. New keywords won't break your queries if you wrap all [identifiers](/influxdb/v1.7/concepts/glossary/#identifier) in double quotes and all string literals in single quotes. This is generally considered best practice so it should be followed anyway. For users following that guideline, the query and ingestion APIs will have no breaking changes for all 1.x releases. Note that this does not include the Go code in the project. The underlying Go API in InfluxDB can and will change over the course of 1.x development. Users should be accessing InfluxDB through the [InfluxDB API](/influxdb/v1.7/tools/api/). + +### Storage engine stability + +The [TSM](/influxdb/v1.7/concepts/glossary/#tsm-time-structured-merge-tree) storage engine file format is now at version 1. While we may introduce new versions of the format in the 1.x releases, these new versions will run side-by-side with previous versions. What this means for users is there will be no lengthy migrations when upgrading from one 1.x release to another. + +### Additive changes + +The query engine will have additive changes over the course of the new releases. We’ll introduce new query functions and new functionality into the language without breaking backwards compatibility. We may introduce new protocol endpoints (like a binary format) and versions of the line protocol and query API to improve performance and/or functionality, but they will have to run in parallel with the existing versions. Existing versions will be supported for the entirety of the 1.x release line. + +### Ongoing support + +We’ll continue to fix bugs on the 1.x versions of the [line protocol](/influxdb/v1.7/concepts/glossary/#influxdb-line-protocol), query API, and TSM storage format. Users should expect to upgrade to the latest 1.x.x release for bug fixes, but those releases will all be compatible with the 1.0 API and won’t require data migrations. For instance, if a user is running 1.2 and there are bug fixes released in 1.3, they should upgrade to the 1.3 release. Until 1.4 is released, patch fixes will go into 1.3.x. Because all future 1.x releases are drop in replacements for previous 1.x releases, users should upgrade to the latest in the 1.x line to get all bug fixes. diff --git a/content/influxdb/v1.7/administration/subscription-management.md b/content/influxdb/v1.7/administration/subscription-management.md new file mode 100644 index 000000000..fdfb91d70 --- /dev/null +++ b/content/influxdb/v1.7/administration/subscription-management.md @@ -0,0 +1,207 @@ +--- +title: Manage subscriptions in InfluxDB +description: InfluxDB uses subscriptions to copy all written data to a local or remote endpoint. This article walks through how InfluxDB subscriptions work, how to configure them, and how to manage them. +menu: + influxdb_1_7: + parent: Administration + name: Manage subscriptions + weight: 100 +--- + +InfluxDB subscriptions are local or remote endpoints to which all data written to InfluxDB is copied. +Subscriptions are primarily used with [Kapacitor](/kapacitor/), but any endpoint +able to accept UDP, HTTP, or HTTPS connections can subscribe to InfluxDB and receive +a copy of all data as it is written. + +## How subscriptions work + +As data is written to InfluxDB, writes are duplicated to subscriber endpoints via +HTTP, HTTPS, or UDP in [line protocol](/influxdb/v1.7/write_protocols/line_protocol_tutorial/). +the InfluxDB subscriber service creates multiple "writers" ([goroutines](https://golangbot.com/goroutines/)) +which send writes to the subscription endpoints. + +_The number of writer goroutines is defined by the [`write-concurrency`](/influxdb/v1.7/administration/config#write-concurrency-40) configuration._ + +As writes occur in InfluxDB, each subscription writer sends the written data to the +specified subscription endpoints. +However, with a high `write-concurrency` (multiple writers) and a high ingest rate, +nanosecond differences in writer processes and the transport layer can result +in writes being received out of order. + +> #### Important information about high write loads +> While setting the subscriber `write-concurrency` to greater than 1 does increase your +> subscriber write throughput, it can result in out-of-order writes under high ingest rates. +> Setting `write-concurrency` to 1 ensures writes are passed to subscriber endpoints sequentially, +> but can create a bottleneck under high ingest rates. +> +> What `write-concurrency` should be set to depends on your specific workload +> and need for in-order writes to your subscription endpoint. + +## InfluxQL subscription statements + +Use the following InfluxQL statements to manage subscriptions: + +[`CREATE SUBSCRIPTION`](#create-subscriptions) +[`SHOW SUBSCRIPTIONS`](#show-subscriptions) +[`DROP SUBSCRIPTION`](#remove-subscriptions) + +## Create subscriptions + +Create subscriptions using the `CREATE SUBSCRIPTION` InfluxQL statement. +Specify the subscription name, the database name and retention policy to subscribe to, +and the URL of the host to which data written to InfluxDB should be copied. + +```sql +-- Pattern: +CREATE SUBSCRIPTION "" ON ""."" DESTINATIONS "" + +-- Examples: +-- Create a SUBSCRIPTION on database 'mydb' and retention policy 'autogen' that sends data to 'example.com:9090' via HTTP. +CREATE SUBSCRIPTION "sub0" ON "mydb"."autogen" DESTINATIONS ALL 'http://example.com:9090' + +-- Create a SUBSCRIPTION on database 'mydb' and retention policy 'autogen' that round-robins the data to 'h1.example.com:9090' and 'h2.example.com:9090' via UDP. +CREATE SUBSCRIPTION "sub0" ON "mydb"."autogen" DESTINATIONS ANY 'udp://h1.example.com:9090', 'udp://h2.example.com:9090' +``` +In case authentication is enabled on the subscriber host, adapt the URL to contain the credentials. + +``` +-- Create a SUBSCRIPTION on database 'mydb' and retention policy 'autogen' that sends data to another InfluxDB on 'example.com:8086' via HTTP. Authentication is enabled on the subscription host (user: subscriber, pass: secret). +CREATE SUBSCRIPTION "sub0" ON "mydb"."autogen" DESTINATIONS ALL 'http://subscriber:secret@example.com:8086' +``` + +{{% warn %}} +`SHOW SUBSCRIPTIONS` outputs all subscriber URL in plain text, including those with authentication credentials. +Any user with the privileges to run `SHOW SUBSCRIPTIONS` is able to see these credentials. +{{% /warn %}} + +### Sending subscription data to multiple hosts + +The `CREATE SUBSCRIPTION` statement allows you to specify multiple hosts as endpoints for the subscription. +In your `DESTINATIONS` clause, you can pass multiple host strings separated by commas. +Using `ALL` or `ANY` in the `DESTINATIONS` clause determines how InfluxDB writes data to each endpoint: + +`ALL`: Writes data to all specified hosts. + +`ANY`: Round-robins writes between specified hosts. + +_**Subscriptions with multiple hosts**_ + +```sql +-- Write all data to multiple hosts +CREATE SUBSCRIPTION "mysub" ON "mydb"."autogen" DESTINATIONS ALL 'http://host1.example.com:9090', 'http://host2.example.com:9090' + +-- Round-robin writes between multiple hosts +CREATE SUBSCRIPTION "mysub" ON "mydb"."autogen" DESTINATIONS ANY 'http://host1.example.com:9090', 'http://host2.example.com:9090' +``` + +### Subscription protocols + +Subscriptions can use HTTP, HTTPS, or UDP transport protocols. +Which to use is determined by the protocol expected by the subscription endpoint. +If creating a Kapacitor subscription, this is defined by the `subscription-protocol` +option in the `[[influxdb]]` section of your [`kapacitor.conf`](/kapacitor/latest/administration/subscription-management/#subscription-protocol). + +_**kapacitor.conf**_ + +```toml +[[influxdb]] + + # ... + + subscription-protocol = "http" + + # ... + +``` + +_For information regarding HTTPS connections and secure communication between InfluxDB and Kapacitor, +view the [Kapacitor security](/kapacitor/v1.5/administration/security/#secure-influxdb-and-kapacitor) documentation._ + +## Show subscriptions + +The `SHOW SUBSCRIPTIONS` InfluxQL statement returns a list of all subscriptions registered in InfluxDB. + +```sql +SHOW SUBSCRIPTIONS +``` + +_**Example output:**_ + +```bash +name: _internal +retention_policy name mode destinations +---------------- ---- ---- ------------ +monitor kapacitor-39545771-7b64-4692-ab8f-1796c07f3314 ANY [http://localhost:9092] +``` + +## Remove subscriptions + +Remove or drop subscriptions using the `DROP SUBSCRIPTION` InfluxQL statement. + +```sql +-- Pattern: +DROP SUBSCRIPTION "" ON ""."" + +-- Example: +DROP SUBSCRIPTION "sub0" ON "mydb"."autogen" +``` + +### Drop all subscriptions + +In some cases, it may be necessary to remove all subscriptions. +Run the following bash script that utilizes the `influx` CLI, loops through all subscriptions, and removes them. +This script depends on the `$INFLUXUSER` and `$INFLUXPASS` environment variables. +If these are not set, export them as part of the script. + +```bash +# Environment variable exports: +# Uncomment these if INFLUXUSER and INFLUXPASS are not already globally set. +# export INFLUXUSER=influxdb-username +# export INFLUXPASS=influxdb-password + +IFS=$'\n'; for i in $(influx -format csv -username $INFLUXUSER -password $INFLUXPASS -database _internal -execute 'show subscriptions' | tail -n +2 | grep -v name); do influx -format csv -username $INFLUXUSER -password $INFLUXPASS -database _internal -execute "drop subscription \"$(echo "$i" | cut -f 3 -d ',')\" ON \"$(echo "$i" | cut -f 1 -d ',')\".\"$(echo "$i" | cut -f 2 -d ',')\""; done +``` + +## Configure InfluxDB subscriptions + +InfluxDB subscription configuration options are available in the `[subscriber]` +section of the `influxdb.conf`. +In order to use subcriptions, the `enabled` option in the `[subscriber]` section must be set to `true`. +Below is an example `influxdb.conf` subscriber configuration: + +```toml +[subscriber] + enabled = true + http-timeout = "30s" + insecure-skip-verify = false + ca-certs = "" + write-concurrency = 40 + write-buffer-size = 1000 +``` + +_**Descriptions of `[subscriber]` configuration options are available in the [Configuring InfluxDB](/influxdb/v1.7/administration/config#subscription-settings) documentation.**_ + +## Troubleshooting + +### Inaccessible or decommissioned subscription endpoints + +Unless a subscription is [dropped](#remove-subscriptions), InfluxDB assumes the endpoint +should always receive data and will continue to attempt to send data. +If an endpoint host is inaccessible or has been decommissioned, you will see errors +similar to the following: + +```bash +# Some message content omitted (...) for the sake of brevity +"Post http://x.y.z.a:9092/write?consistency=...: net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers)" ... service=subscriber +"Post http://x.y.z.a:9092/write?consistency=...: dial tcp x.y.z.a:9092: getsockopt: connection refused" ... service=subscriber +"Post http://x.y.z.a:9092/write?consistency=...: dial tcp 172.31.36.5:9092: getsockopt: no route to host" ... service=subscriber +``` + +In some cases, this may be caused by a networking error or something similar +preventing a successful connection to the subscription endpoint. +In other cases, it's because the subscription endpoint no longer exists and +the subscription hasn't been dropped from InfluxDB. + +> Because InfluxDB does not know if a subscription endpoint will or will not become accessible again, +> subscriptions are not automatically dropped when an endpoint becomes inaccessible. +> If a subscription endpoint is removed, you must manually [drop the subscription](#remove-subscriptions) from InfluxDB. diff --git a/content/influxdb/v1.7/administration/upgrading.md b/content/influxdb/v1.7/administration/upgrading.md new file mode 100644 index 000000000..46e537b6e --- /dev/null +++ b/content/influxdb/v1.7/administration/upgrading.md @@ -0,0 +1,60 @@ +--- +title: Upgrading to InfluxDB 1.7.x + +menu: + influxdb_1_7: + name: Upgrading + weight: 25 + parent: Administration +--- + +We recommend enabling Time Series Index (TSI) (step 3 of Upgrade to InfluxDB 1.7.x). To learn more about TSI, see: + +- [Time Series Index (TSI) overview](/influxdb/v1.7/concepts/time-series-index/) +- [Time Series Index (TSI) details](/influxdb/v1.7/concepts/tsi-details/) + +> **_Note:_** The default InfluxDB configuration continues to use in-memory indexes (`inmem`) (as in earlier versions). + +{{% note %}} +### Upgrade to InfluxDB Enterprise +To upgrade from InfluxDB OSS to InfluxDB Enterprise, [contact InfluxData Sales](https://www.influxdata.com/contact-sales/) +and see [Migrate to InfluxDB Enterprise](/enterprise_influxdb/v1.7/guides/migration/). +{{% /note %}} + +## Upgrade to InfluxDB 1.7.x + +1. [Download](https://portal.influxdata.com/downloads) InfluxDB 1.7.x and [install the upgrade](/influxdb/v1.7/introduction/installation). + +2. Migrate configuration file customizations from your existing configuration file to the InfluxDB 1.7.x [configuration file](/influxdb/v1.7/administration/config/). Add or modify your environment variables as needed. + +3. To enable TSI in InfluxDB 1.7.x, complete the following steps: + + a. If using the InfluxDB configuration file, find the `[data]` section, uncomment `index-version = "inmem"` and change the value to `tsi1`. + + b. If using environment variables, set `INFLUXDB_DATA_INDEX_VERSION` to `tsi1`. + + c. Delete shard `index` directories (by default, located at `//index`). + + d. Build TSI by running the [influx_inspect buildtsi](/influxdb/v1.7/tools/influx_inspect/#buildtsi) command. + + > **Note** Run the buildtsi command using the user account that you are going to run the database as, or ensure that the permissions match afterward. + +4. Restart the `influxdb` service. + +## Switch index types + +Switch index types at any time by doing one of the following: + +- To switch from to `inmem` to `tsi1`, complete steps 3 and 4 above in [Upgrade to InfluxDB 1.7.x](#upgrade-to-influxdb-1-7-x). +- To switch from to `tsi1` to `inmem`, change `tsi1` to `inmem` by completing steps 3a-3c and 4 above in [Upgrade to InfluxDB 1.7.x](#upgrade-to-influxdb-1-7-x). + +## Downgrade InfluxDB + +To downgrade to an earlier version, complete the procedures above in [Upgrade to InfluxDB 1.7.x](#upgrade-to-influxdb-1-7-x), replacing the version numbers with the version that you want to downgrade to. +After downloading the release, migrating your configuration settings, and enabling TSI or TSM, make sure to [rebuild your index](/influxdb/v1.7/administration/rebuild-tsi-index/#sidebar). + +>**Note:** Some versions of InfluxDB may have breaking changes that impact your ability to upgrade and downgrade. For example, you cannot downgrade from InfluxDB 1.3 or later to an earlier version. Please review the applicable version of release notes to check for compatibility issues between releases. + +## Upgrade InfluxDB Enterprise clusters + +See [Upgrading InfluxDB Enterprise clusters](/enterprise_influxdb/v1.7/administration/upgrading/). diff --git a/content/influxdb/v1.7/concepts/_index.md b/content/influxdb/v1.7/concepts/_index.md new file mode 100644 index 000000000..dc6bbbd3f --- /dev/null +++ b/content/influxdb/v1.7/concepts/_index.md @@ -0,0 +1,40 @@ +--- +title: InfluxDB concepts +menu: + influxdb_1_7: + name: Concepts + weight: 30 + +--- + +Understanding the following concepts will help you get the most out of InfluxDB. + +## [Key concepts](/influxdb/v1.7/concepts/key_concepts/) + +A brief explanation of the InfluxDB core architecture. + +## [Glossary of terms](/influxdb/v1.7/concepts/glossary/) + +A list of InfluxDB terms and their definitions. + +## [Comparison to SQL](/influxdb/v1.7/concepts/crosswalk/) + +## [Design insights and tradeoffs](/influxdb/v1.7/concepts/insights_tradeoffs/) + +A brief treatment of some of the performance tradeoffs made during the design phase of InfluxDB + +## [Schema and data layout](/influxdb/v1.7/concepts/schema_and_data_layout/) + +A useful overview of the InfluxDB time series data structure and how it affects performance. + +## [TSM storage engine](/influxdb/v1.7/concepts/storage_engine/) + +An overview of how InfluxDB to stores data on disk and uses TSM for in-memory indexing. + +## [TSI (Time Series Index) overview](/influxdb/v1.7/concepts/time-series-index/) + +An overview of how InfluxDB uses TSI (Time Series Index) for disk-based indexing. + +## [TSI (Time Series Index) details](/influxdb/v1.7/concepts/tsi-details/) + +A detail look at understanding how TSI works, the file structure, and tooling. diff --git a/content/influxdb/v1.7/concepts/crosswalk.md b/content/influxdb/v1.7/concepts/crosswalk.md new file mode 100644 index 000000000..33715b9ea --- /dev/null +++ b/content/influxdb/v1.7/concepts/crosswalk.md @@ -0,0 +1,200 @@ +--- +title: InfluxDB compared to SQL databases +menu: + influxdb_1_7: + name: Comparing InfluxDB to SQL databases + weight: 30 + parent: Concepts +--- + +# What's in a database? + +This page gives SQL users an overview of how InfluxDB is like an SQL database and how it's not. +It highlights some of the major distinctions between the two and provides a loose crosswalk between the different database terminologies and query languages. + +## In general... + +InfluxDB is designed to work with time-series data. +SQL databases can handle time-series but weren't created strictly for that purpose. +In short, InfluxDB is made to store a large volume of time-series data and perform real-time analysis on those data, quickly. + +### Timing is everything + +In InfluxDB, a timestamp identifies a single point in any given data series. +This is like an SQL database table where the primary key is pre-set by the system and is always time. + +InfluxDB also recognizes that your [schema](/influxdb/v1.7/concepts/glossary/#schema) preferences may change over time. +In InfluxDB you don't have to define schemas up front. +Data points can have one of the fields on a measurement, all of the fields on a measurement, or any number in-between. +You can add new fields to a measurement simply by writing a point for that new field. +If you need an explanation of the terms measurements, tags, and fields check out the next section for an SQL database to InfluxDB terminology crosswalk. + +## Terminology + +The table below is a (very) simple example of a table called `foodships` in an SQL database +with the unindexed column `#_foodships` and the indexed columns `park_id`, `planet`, and `time`. + +``` sql ++---------+---------+---------------------+--------------+ +| park_id | planet | time | #_foodships | ++---------+---------+---------------------+--------------+ +| 1 | Earth | 1429185600000000000 | 0 | +| 1 | Earth | 1429185601000000000 | 3 | +| 1 | Earth | 1429185602000000000 | 15 | +| 1 | Earth | 1429185603000000000 | 15 | +| 2 | Saturn | 1429185600000000000 | 5 | +| 2 | Saturn | 1429185601000000000 | 9 | +| 2 | Saturn | 1429185602000000000 | 10 | +| 2 | Saturn | 1429185603000000000 | 14 | +| 3 | Jupiter | 1429185600000000000 | 20 | +| 3 | Jupiter | 1429185601000000000 | 21 | +| 3 | Jupiter | 1429185602000000000 | 21 | +| 3 | Jupiter | 1429185603000000000 | 20 | +| 4 | Saturn | 1429185600000000000 | 5 | +| 4 | Saturn | 1429185601000000000 | 5 | +| 4 | Saturn | 1429185602000000000 | 6 | +| 4 | Saturn | 1429185603000000000 | 5 | ++---------+---------+---------------------+--------------+ +``` + +Those same data look like this in InfluxDB: + +```sql +name: foodships +tags: park_id=1, planet=Earth +time #_foodships +---- ------------ +2015-04-16T12:00:00Z 0 +2015-04-16T12:00:01Z 3 +2015-04-16T12:00:02Z 15 +2015-04-16T12:00:03Z 15 + +name: foodships +tags: park_id=2, planet=Saturn +time #_foodships +---- ------------ +2015-04-16T12:00:00Z 5 +2015-04-16T12:00:01Z 9 +2015-04-16T12:00:02Z 10 +2015-04-16T12:00:03Z 14 + +name: foodships +tags: park_id=3, planet=Jupiter +time #_foodships +---- ------------ +2015-04-16T12:00:00Z 20 +2015-04-16T12:00:01Z 21 +2015-04-16T12:00:02Z 21 +2015-04-16T12:00:03Z 20 + +name: foodships +tags: park_id=4, planet=Saturn +time #_foodships +---- ------------ +2015-04-16T12:00:00Z 5 +2015-04-16T12:00:01Z 5 +2015-04-16T12:00:02Z 6 +2015-04-16T12:00:03Z 5 +``` + +Referencing the example above, in general: + +* An InfluxDB measurement (`foodships`) is similar to an SQL database table. +* InfluxDB tags ( `park_id` and `planet`) are like indexed columns in an SQL database. +* InfluxDB fields (`#_foodships`) are like unindexed columns in an SQL database. +* InfluxDB points (for example, `2015-04-16T12:00:00Z 5`) are similar to SQL rows. + +Building on this comparison of database terminology, +InfluxDB [continuous queries](/influxdb/v1.7/concepts/glossary/#continuous-query-cq) +and [retention policies](/influxdb/v1.7/concepts/glossary/#retention-policy-rp) are +similar to stored procedures in an SQL database. +They're specified once and then performed regularly and automatically. + +Of course, there are some major disparities between SQL databases and InfluxDB. +SQL `JOIN`s aren't available for InfluxDB measurements; your schema design should reflect that difference. +And, as we mentioned above, a measurement is like an SQL table where the primary index is always pre-set to time. +InfluxDB timestamps must be in UNIX epoch (GMT) or formatted as a date-time string valid under RFC3339. + +For more detailed descriptions of the InfluxDB terms mentioned in this section see our [Glossary of Terms](/influxdb/v1.7/concepts/glossary/). + +## InfluxQL and SQL + +InfluxQL is an SQL-like query language for interacting with InfluxDB. +It has been lovingly crafted to feel familiar to those coming from other +SQL or SQL-like environments while also providing features specific +to storing and analyzing time series data. + +InfluxQL's `SELECT` statement follows the form of an SQL `SELECT` statement: + +```sql +SELECT FROM WHERE +``` + +where `WHERE` is optional. + +To get the InfluxDB output in the section above, you'd enter: + +```sql +SELECT * FROM "foodships" +``` + +If you only wanted to see data for the planet `Saturn`, you'd enter: + +```sql +SELECT * FROM "foodships" WHERE "planet" = 'Saturn' +``` + +If you wanted to see data for the planet `Saturn` after 12:00:01 UTC on April 16, 2015, you'd enter: + +```sql +SELECT * FROM "foodships" WHERE "planet" = 'Saturn' AND time > '2015-04-16 12:00:01' +``` + +As shown in the example above, InfluxQL allows you to specify the time range of your query in the `WHERE` clause. +You can use date-time strings wrapped in single quotes that have the +format `YYYY-MM-DD HH:MM:SS.mmm` +(`mmm` is milliseconds and is optional, and you can also specify microseconds or nanoseconds). +You can also use relative time with `now()` which refers to the server's current timestamp: + +```sql +SELECT * FROM "foodships" WHERE time > now() - 1h +``` + +That query outputs the data in the `foodships` measure where the timestamp is newer than the server's current time minus one hour. +The options for specifying time durations with `now()` are: + +|Letter|Meaning| +|:---:|:---:| +| ns | nanoseconds | +|u or µ|microseconds| +| ms | milliseconds | +|s | seconds | +| m | minutes | +| h | hours | +| d | days | +| w | weeks | + +InfluxQL also supports regular expressions, arithmetic in expressions, `SHOW` statements, and `GROUP BY` statements. +See our [data exploration](/influxdb/v1.7/query_language/data_exploration/) page for an in-depth discussion of those topics. +InfluxQL functions include `COUNT`, `MIN`, `MAX`, `MEDIAN`, `DERIVATIVE` and more. +For a full list check out the [functions](/influxdb/v1.7/query_language/functions/) page. + +Now that you have the general idea, check out our [Getting Started Guide](/influxdb/v1.7/introduction/getting-started/). + +## A note on why InfluxDB isn't CRUD + +InfluxDB is a database that has been optimized for time series data. +This data commonly comes from sources like distributed sensor groups, click data from large websites, or lists of financial transactions. + +One thing this data has in common is that it is more useful in the aggregate. +One reading saying that your computer’s CPU is at 12% utilization at 12:38:35 UTC on a Tuesday is hard to draw conclusions from. +It becomes more useful when combined with the rest of the series and visualized. +This is where trends over time begin to show, and actionable insight can be drawn from the data. +In addition, time series data is generally written once and rarely updated. + +The result is that InfluxDB is not a full CRUD database but more like a CR-ud, prioritizing the performance of creating and reading data over update and destroy, and [preventing some update and destroy behaviors](/influxdb/v1.7/concepts/insights_tradeoffs/) to make create and read more performant: + +* To update a point, insert one with [the same measurement, tag set, and timestamp](/influxdb/v1.7/troubleshooting/frequently-asked-questions/#how-does-influxdb-handle-duplicate-points). +* You can [drop or delete a series](/influxdb/v1.7/query_language/database_management/#drop-series-from-the-index-with-drop-series), but not individual points based on field values. As a workaround, you can search for the field value, retrieve the time, then [DELETE based on the `time` field](/influxdb/v1.7/query_language/database_management/#delete-series-with-delete). +* You can't update or rename tags yet - see GitHub issue [#4157](https://github.com/influxdata/influxdb/issues/4157) for more information. To modify the tag of a series of points, find the points with the offending tag value, change the value to the desired one, write the points back, then drop the series with the old tag value. +* You can't delete tags by tag key (as opposed to value) - see GitHub issue [#8604](https://github.com/influxdata/influxdb/issues/8604). diff --git a/content/influxdb/v1.7/concepts/glossary.md b/content/influxdb/v1.7/concepts/glossary.md new file mode 100644 index 000000000..ea3959fb4 --- /dev/null +++ b/content/influxdb/v1.7/concepts/glossary.md @@ -0,0 +1,384 @@ +--- +title: InfluxDB glossary +menu: + influxdb_1_7: + name: Glossary + weight: 20 + parent: Concepts +--- + +## aggregation + +An InfluxQL function that returns an aggregated value across a set of points. +For a complete list of the available and upcoming aggregations, see [InfluxQL functions](/influxdb/v1.7/query_language/functions/#aggregations). + +Related entries: [function](/influxdb/v1.7/concepts/glossary/#function), [selector](/influxdb/v1.7/concepts/glossary/#selector), [transformation](/influxdb/v1.7/concepts/glossary/#transformation) + +## batch + +A collection of data points in InfluxDB line protocol format, separated by newlines (`0x0A`). +A batch of points may be submitted to the database using a single HTTP request to the write endpoint. +This makes writes using the InfluxDB API much more performant by drastically reducing the HTTP overhead. +InfluxData recommends batch sizes of 5,000-10,000 points, although different use cases may be better served by significantly smaller or larger batches. + +Related entries: [InfluxDB line protocol](/influxdb/v1.7/concepts/glossary/#influxdb-line-protocol), [point](/influxdb/v1.7/concepts/glossary/#point) + +## continuous query (CQ) + +An InfluxQL query that runs automatically and periodically within a database. +Continuous queries require a function in the `SELECT` clause and must include a `GROUP BY time()` clause. +See [Continuous Queries](/influxdb/v1.7/query_language/continuous_queries/). + + +Related entries: [function](/influxdb/v1.7/concepts/glossary/#function) + +## database + +A logical container for users, retention policies, continuous queries, and time series data. + +Related entries: [continuous query](/influxdb/v1.7/concepts/glossary/#continuous-query-cq), [retention policy](/influxdb/v1.7/concepts/glossary/#retention-policy-rp), [user](/influxdb/v1.7/concepts/glossary/#user) + +## duration + +The attribute of the retention policy that determines how long InfluxDB stores data. +Data older than the duration are automatically dropped from the database. +See [Database Management](/influxdb/v1.7/query_language/database_management/#create-retention-policies-with-create-retention-policy) for how to set duration. + +Related entries: [retention policy](/influxdb/v1.7/concepts/glossary/#retention-policy-rp) + +## field + +The key-value pair in an InfluxDB data structure that records metadata and the actual data value. +Fields are required in InfluxDB data structures and they are not indexed - queries on field values scan all points that match the specified time range and, as a result, are not performant relative to tags. + +*Query tip:* Compare fields to tags; tags are indexed. + +Related entries: [field key](/influxdb/v1.7/concepts/glossary/#field-key), [field set](/influxdb/v1.7/concepts/glossary/#field-set), [field value](/influxdb/v1.7/concepts/glossary/#field-value), [tag](/influxdb/v1.7/concepts/glossary/#tag) + +## field key + +The key part of the key-value pair that makes up a field. +Field keys are strings and they store metadata. + +Related entries: [field](/influxdb/v1.7/concepts/glossary/#field), [field set](/influxdb/v1.7/concepts/glossary/#field-set), [field value](/influxdb/v1.7/concepts/glossary/#field-value), [tag key](/influxdb/v1.7/concepts/glossary/#tag-key) + +## field set + +The collection of field keys and field values on a point. + +Related entries: [field](/influxdb/v1.7/concepts/glossary/#field), [field key](/influxdb/v1.7/concepts/glossary/#field-key), [field value](/influxdb/v1.7/concepts/glossary/#field-value), [point](/influxdb/v1.7/concepts/glossary/#point) + +## field value + +The value part of the key-value pair that makes up a field. +Field values are the actual data; they can be strings, floats, integers, or booleans. +A field value is always associated with a timestamp. + +Field values are not indexed - queries on field values scan all points that match the specified time range and, as a result, are not performant. + +*Query tip:* Compare field values to tag values; tag values are indexed. + +Related entries: [field](/influxdb/v1.7/concepts/glossary/#field), [field key](/influxdb/v1.7/concepts/glossary/#field-key), [field set](/influxdb/v1.7/concepts/glossary/#field-set), [tag value](/influxdb/v1.7/concepts/glossary/#tag-value), [timestamp](/influxdb/v1.7/concepts/glossary/#timestamp) + +## function + +InfluxQL aggregations, selectors, and transformations. +See [InfluxQL Functions](/influxdb/v1.7/query_language/functions/) for a complete list of InfluxQL functions. + +Related entries: [aggregation](/influxdb/v1.7/concepts/glossary/#aggregation), [selector](/influxdb/v1.7/concepts/glossary/#selector), [transformation](/influxdb/v1.7/concepts/glossary/#transformation) + +## identifier + +Tokens that refer to continuous query names, database names, field keys, +measurement names, retention policy names, subscription names, tag keys, and +user names. +See [Query Language Specification](/influxdb/v1.7/query_language/spec/#identifiers). + +Related entries: +[database](/influxdb/v1.7/concepts/glossary/#database), +[field key](/influxdb/v1.7/concepts/glossary/#field-key), +[measurement](/influxdb/v1.7/concepts/glossary/#measurement), +[retention policy](/influxdb/v1.7/concepts/glossary/#retention-policy-rp), +[tag key](/influxdb/v1.7/concepts/glossary/#tag-key), +[user](/influxdb/v1.7/concepts/glossary/#user) + +## InfluxDB line protocol + +The text based format for writing points to InfluxDB. See [InfluxDB line protocol](/influxdb/v1.7/write_protocols/). + +## measurement + +The part of the InfluxDB data structure that describes the data stored in the associated fields. +Measurements are strings. + +Related entries: [field](/influxdb/v1.7/concepts/glossary/#field), [series](/influxdb/v1.7/concepts/glossary/#series) + +## metastore + +Contains internal information about the status of the system. +The metastore contains the user information, databases, retention policies, shard metadata, continuous queries, and subscriptions. + +Related entries: [database](/influxdb/v1.7/concepts/glossary/#database), [retention policy](/influxdb/v1.7/concepts/glossary/#retention-policy-rp), [user](/influxdb/v1.7/concepts/glossary/#user) + +## node + +An independent `influxd` process. + +Related entries: [server](/influxdb/v1.7/concepts/glossary/#server) + +## now() + +The local server's nanosecond timestamp. + +## point + +In InfluxDB, a point represents a single data record, similar to a row in a SQL database table. Each point: + +- has a measurement, a tag set, a field key, a field value, and a timestamp; +- is uniquely identified by its series and timestamp. + +You cannot store more than one point with the same timestamp in a series. +If you write a point to a series with a timestamp that matches an existing point, the field set becomes a union of the old and new field set, and any ties go to the new field set. +For more information about duplicate points, see [How does InfluxDB handle duplicate points?](/influxdb/v1.7/troubleshooting/frequently-asked-questions/#how-does-influxdb-handle-duplicate-points) + +Related entries: [field set](/influxdb/v1.7/concepts/glossary/#field-set), [series](/influxdb/v1.7/concepts/glossary/#series), [timestamp](/influxdb/v1.7/concepts/glossary/#timestamp) + +## points per second + +A deprecated measurement of the rate at which data are persisted to InfluxDB. +The schema allows and even encourages the recording of multiple metric values per point, rendering points per second ambiguous. + +Write speeds are generally quoted in values per second, a more precise metric. + +Related entries: [point](/influxdb/v1.7/concepts/glossary/#point), [schema](/influxdb/v1.7/concepts/glossary/#schema), [values per second](/influxdb/v1.7/concepts/glossary/#values-per-second) + +## query + +An operation that retrieves data from InfluxDB. +See [Data Exploration](/influxdb/v1.7/query_language/data_exploration/), [Schema Exploration](/influxdb/v1.7/query_language/schema_exploration/), [Database Management](/influxdb/v1.7/query_language/database_management/). + +## replication factor + +The attribute of the retention policy that determines how many copies of data to concurrently store (or retain) in the cluster. If replication factor is set to 2, each series is stored on 2 separate nodes. If the replication factor is equal to the number of data nodes, data is replicated on each node in the cluster. +Replication ensures data is available on multiple nodes and more likely available when a data node (or more) is unavailable. + +The number of data nodes in a cluster **must be evenly divisible by the replication factor**. For example, a replication factor of 2 works with 2, 4, 6, or 8 data nodes, and so on. A replication factor of 3 works with 3, 6, or 9 data nodes, and so on. To increase the read or write capacity of a cluster, add a number of data nodes by a multiple of the replication factor. For example, to increase the capacity of a 6 node cluster with an RF=3, add 3 additional nodes. To further increase the capacity, continue to add nodes in groups of 3. + +> **Important:** If the replication factor isn't evenly divisible into the number of data nodes, data may be distributed unevenly across the cluster and cause poor performance. Likewise, decreasing the replication factor (fewer copies of data in a cluster) may reduce performance depending on query and write load. + +Related entries: [cluster](/influxdb/v0.10/concepts/glossary/#cluster), [duration](/influxdb/v1.7/concepts/glossary/#duration), [node](/influxdb/v1.7/concepts/glossary/#node), +[retention policy](/influxdb/v1.7/concepts/glossary/#retention-policy-rp) + +## retention policy (RP) + +Describes how long InfluxDB keeps data (duration), how many copies of the data to store in the cluster (replication factor), and the time range covered by shard groups (shard group duration). RPs are unique per database and along with the measurement and tag set define a series. + +When you create a database, InfluxDB creates a retention policy called `autogen` with an infinite duration, a replication factor set to one, and a shard group duration set to seven days. +For more information, see [Retention policy management](/influxdb/v1.7/query_language/database_management/#retention-policy-management). + +Related entries: [duration](/influxdb/v1.7/concepts/glossary/#duration), [measurement](/influxdb/v1.7/concepts/glossary/#measurement), [replication factor](/influxdb/v1.7/concepts/glossary/#replication-factor), [series](/influxdb/v1.7/concepts/glossary/#series), [shard duration](/influxdb/v1.7/concepts/glossary/#shard-duration), [tag set](/influxdb/v1.7/concepts/glossary/#tag-set) + +## schema + +How the data are organized in InfluxDB. +The fundamentals of the InfluxDB schema are databases, retention policies, series, measurements, tag keys, tag values, and field keys. +See [Schema Design](/influxdb/v1.7/concepts/schema_and_data_layout/) for more information. + +Related entries: [database](/influxdb/v1.7/concepts/glossary/#database), [field key](/influxdb/v1.7/concepts/glossary/#field-key), [measurement](/influxdb/v1.7/concepts/glossary/#measurement), [retention policy](/influxdb/v1.7/concepts/glossary/#retention-policy-rp), [series](/influxdb/v1.7/concepts/glossary/#series), [tag key](/influxdb/v1.7/concepts/glossary/#tag-key), [tag value](/influxdb/v1.7/concepts/glossary/#tag-value) + +## selector + +An InfluxQL function that returns a single point from the range of specified points. +See [InfluxQL Functions](/influxdb/v1.7/query_language/functions/#selectors) for a complete list of the available and upcoming selectors. + +Related entries: [aggregation](/influxdb/v1.7/concepts/glossary/#aggregation), [function](/influxdb/v1.7/concepts/glossary/#function), [transformation](/influxdb/v1.7/concepts/glossary/#transformation) + +## series + +A logical grouping of data defined by shared measurement, tag set, and field key. + +Related entries: [field set](/influxdb/v1.7/concepts/glossary/#field-set), [measurement](/influxdb/v1.7/concepts/glossary/#measurement), [tag set](/influxdb/v1.7/concepts/glossary/#tag-set) + +## series cardinality + +The number of unique database, measurement, tag set, and field key combinations in an InfluxDB instance. + +For example, assume that an InfluxDB instance has a single database and one measurement. +The single measurement has two tag keys: `email` and `status`. +If there are three different `email`s, and each email address is associated with two +different `status`es then the series cardinality for the measurement is 6 +(3 * 2 = 6): + +| email | status | +| :-------------------- | :----- | +| lorr@influxdata.com | start | +| lorr@influxdata.com | finish | +| marv@influxdata.com | start | +| marv@influxdata.com | finish | +| cliff@influxdata.com | start | +| cliff@influxdata.com | finish | + +Note that, in some cases, simply performing that multiplication may overestimate series cardinality because of the presence of dependent tags. +Dependent tags are tags that are scoped by another tag and do not increase series +cardinality. +If we add the tag `firstname` to the example above, the series cardinality +would not be 18 (3 * 2 * 3 = 18). +It would remain unchanged at 6, as `firstname` is already scoped by the `email` tag: + +| email | status | firstname | +| :-------------------- | :----- | :-------- | +| lorr@influxdata.com | start | lorraine | +| lorr@influxdata.com | finish | lorraine | +| marv@influxdata.com | start | marvin | +| marv@influxdata.com | finish | marvin | +| cliff@influxdata.com | start | clifford | +| cliff@influxdata.com | finish | clifford | + +See [SHOW CARDINALITY](/influxdb/v1.7/query_language/spec/#show-cardinality) to learn about the InfluxQL commands for series cardinality. + +Related entries: [field key](#field-key),[measurement](#measurement), [tag key](#tag-key), [tag set](#tag-set) + +## series key + +A series key identifies a particular series by measurement, tag set, and field key. + +For example: + +``` +# measurement, tag set, field key +h2o_level, location=santa_monica, h2o_feet +``` + +Related entries: [series](/influxdb/v1.7/concepts/glossary/#series) + +## server + +A machine, virtual or physical, that is running InfluxDB. +There should only be one InfluxDB process per server. + +Related entries: [node](/influxdb/v1.7/concepts/glossary/#node) + +## shard + +A shard contains the actual encoded and compressed data, and is represented by a TSM file on disk. +Every shard belongs to one and only one shard group. +Multiple shards may exist in a single shard group. +Each shard contains a specific set of series. +All points falling on a given series in a given shard group will be stored in the same shard (TSM file) on disk. + +Related entries: [series](/influxdb/v1.7/concepts/glossary/#series), [shard duration](/influxdb/v1.7/concepts/glossary/#shard-duration), [shard group](/influxdb/v1.7/concepts/glossary/#shard-group), [tsm](/influxdb/v1.7/concepts/glossary/#tsm-time-structured-merge-tree) + +## shard duration + +The shard duration determines how much time each shard group spans. +The specific interval is determined by the `SHARD DURATION` of the retention policy. +See [Retention Policy management](/influxdb/v1.7/query_language/database_management/#retention-policy-management) for more information. + +For example, given a retention policy with `SHARD DURATION` set to `1w`, each shard group will span a single week and contain all points with timestamps in that week. + +Related entries: [database](/influxdb/v1.7/concepts/glossary/#database), [retention policy](/influxdb/v1.7/concepts/glossary/#retention-policy-rp), [series](/influxdb/v1.7/concepts/glossary/#series), [shard](/influxdb/v1.7/concepts/glossary/#shard), [shard group](/influxdb/v1.7/concepts/glossary/#shard-group) + +## shard group + +Shard groups are logical containers for shards. +Shard groups are organized by time and retention policy. +Every retention policy that contains data has at least one associated shard group. +A given shard group contains all shards with data for the interval covered by the shard group. +The interval spanned by each shard group is the shard duration. + +Related entries: [database](/influxdb/v1.7/concepts/glossary/#database), [retention policy](/influxdb/v1.7/concepts/glossary/#retention-policy-rp), [series](/influxdb/v1.7/concepts/glossary/#series), [shard](/influxdb/v1.7/concepts/glossary/#shard), [shard duration](/influxdb/v1.7/concepts/glossary/#shard-duration) + +## subscription + +Subscriptions allow [Kapacitor](/kapacitor/latest/) to receive data from InfluxDB in a push model rather than the pull model based on querying data. +When Kapacitor is configured to work with InfluxDB, the subscription will automatically push every write for the subscribed database from InfluxDB to Kapacitor. +Subscriptions can use TCP or UDP for transmitting the writes. + +## tag + +The key-value pair in the InfluxDB data structure that records metadata. +Tags are an optional part of the data structure, but they are useful for storing commonly-queried metadata; tags are indexed so queries on tags are performant. +*Query tip:* Compare tags to fields; fields are not indexed. + +Related entries: [field](/influxdb/v1.7/concepts/glossary/#field), [tag key](/influxdb/v1.7/concepts/glossary/#tag-key), [tag set](/influxdb/v1.7/concepts/glossary/#tag-set), [tag value](/influxdb/v1.7/concepts/glossary/#tag-value) + +## tag key + +The key part of the key-value pair that makes up a tag. +Tag keys are strings and they store metadata. +Tag keys are indexed so queries on tag keys are performant. + +*Query tip:* Compare tag keys to field keys; field keys are not indexed. + +Related entries: [field key](/influxdb/v1.7/concepts/glossary/#field-key), [tag](/influxdb/v1.7/concepts/glossary/#tag), [tag set](/influxdb/v1.7/concepts/glossary/#tag-set), [tag value](/influxdb/v1.7/concepts/glossary/#tag-value) + +## tag set + +The collection of tag keys and tag values on a point. + +Related entries: [point](/influxdb/v1.7/concepts/glossary/#point), [series](/influxdb/v1.7/concepts/glossary/#series), [tag](/influxdb/v1.7/concepts/glossary/#tag), [tag key](/influxdb/v1.7/concepts/glossary/#tag-key), [tag value](/influxdb/v1.7/concepts/glossary/#tag-value) + +## tag value + +The value part of the key-value pair that makes up a tag. +Tag values are strings and they store metadata. +Tag values are indexed so queries on tag values are performant. + + +Related entries: [tag](/influxdb/v1.7/concepts/glossary/#tag), [tag key](/influxdb/v1.7/concepts/glossary/#tag-key), [tag set](/influxdb/v1.7/concepts/glossary/#tag-set) + +## timestamp + +The date and time associated with a point. +All time in InfluxDB is UTC. + +For how to specify time when writing data, see [Write Syntax](/influxdb/v1.7/write_protocols/write_syntax/). +For how to specify time when querying data, see [Data Exploration](/influxdb/v1.7/query_language/data_exploration/#time-syntax). + +Related entries: [point](/influxdb/v1.7/concepts/glossary/#point) + +## transformation + +An InfluxQL function that returns a value or a set of values calculated from specified points, but does not return an aggregated value across those points. +See [InfluxQL Functions](/influxdb/v1.7/query_language/functions/#transformations) for a complete list of the available and upcoming aggregations. + +Related entries: [aggregation](/influxdb/v1.7/concepts/glossary/#aggregation), [function](/influxdb/v1.7/concepts/glossary/#function), [selector](/influxdb/v1.7/concepts/glossary/#selector) + +## TSM (Time Structured Merge tree) + +The purpose-built data storage format for InfluxDB. TSM allows for greater compaction and higher write and read throughput than existing B+ or LSM tree implementations. See [Storage Engine](http://docs.influxdata.com/influxdb/v1.7/concepts/storage_engine/) for more. + +## user + +There are two kinds of users in InfluxDB: + +* *Admin users* have `READ` and `WRITE` access to all databases and full access to administrative queries and user management commands. +* *Non-admin users* have `READ`, `WRITE`, or `ALL` (both `READ` and `WRITE`) access per database. + +When authentication is enabled, InfluxDB only executes HTTP requests that are sent with a valid username and password. +See [Authentication and Authorization](/influxdb/v1.7/administration/authentication_and_authorization/). + +## values per second + +The preferred measurement of the rate at which data are persisted to InfluxDB. Write speeds are generally quoted in values per second. + +To calculate the values per second rate, multiply the number of points written per second by the number of values stored per point. For example, if the points have four fields each, and a batch of 5000 points is written 10 times per second, then the values per second rate is `4 field values per point * 5000 points per batch * 10 batches per second = 200,000 values per second`. + +Related entries: [batch](/influxdb/v1.7/concepts/glossary/#batch), [field](/influxdb/v1.7/concepts/glossary/#field), [point](/influxdb/v1.7/concepts/glossary/#point), [points per second](/influxdb/v1.7/concepts/glossary/#points-per-second) + +## WAL (Write Ahead Log) + +The temporary cache for recently written points. To reduce the frequency with which the permanent storage files are accessed, InfluxDB caches new points in the WAL until their total size or age triggers a flush to more permanent storage. This allows for efficient batching of the writes into the TSM. + +Points in the WAL can be queried, and they persist through a system reboot. On process start, all points in the WAL must be flushed before the system accepts new writes. + +Related entries: [tsm](/influxdb/v1.7/concepts/glossary/#tsm-time-structured-merge-tree) + + diff --git a/content/influxdb/v1.7/concepts/insights_tradeoffs.md b/content/influxdb/v1.7/concepts/insights_tradeoffs.md new file mode 100644 index 000000000..a9260300e --- /dev/null +++ b/content/influxdb/v1.7/concepts/insights_tradeoffs.md @@ -0,0 +1,57 @@ +--- +title: InfluxDB design insights and tradeoffs +menu: + influxdb_1_7: + name: InfluxDB design insights and tradeoffs + weight: 40 + parent: Concepts +--- + +InfluxDB is a time series database. +Optimizing for this use case entails some tradeoffs, primarily to increase performance at the cost of functionality. +Below is a list of some of those design insights that lead to tradeoffs: + +1. For the time series use case, we assume that if the same data is sent multiple times, it is the exact same data that a client just sent several times. + + _**Pro:**_ Simplified [conflict resolution](/influxdb/v1.7/troubleshooting/frequently-asked-questions/#how-does-influxdb-handle-duplicate-points) increases write performance. + _**Con:**_ Cannot store duplicate data; may overwrite data in rare circumstances. + +2. Deletes are a rare occurrence. + When they do occur it is almost always against large ranges of old data that are cold for writes. + + _**Pro:**_ Restricting access to deletes allows for increased query and write performance. + _**Con:**_ Delete functionality is significantly restricted. + +3. Updates to existing data are a rare occurrence and contentious updates never happen. + Time series data is predominantly new data that is never updated. + + _**Pro:**_ Restricting access to updates allows for increased query and write performance. + _**Con:**_ Update functionality is significantly restricted. + +4. The vast majority of writes are for data with very recent timestamps and the data is added in time ascending order. + + _**Pro:**_ Adding data in time ascending order is significantly more performant. + _**Con:**_ Writing points with random times or with time not in ascending order is significantly less performant. + +5. Scale is critical. + The database must be able to handle a *high* volume of reads and writes. + + _**Pro:**_ The database can handle a *high* volume of reads and writes. + _**Con:**_ The InfluxDB development team was forced to make tradeoffs to increase performance. + +6. Being able to write and query the data is more important than having a strongly consistent view. + + _**Pro:**_ Writing and querying the database can be done by multiple clients and at high loads. + _**Con:**_ Query returns may not include the most recent points if database is under heavy load. + +7. Many time [series](/influxdb/v1.7/concepts/glossary/#series) are ephemeral. + There are often time series that appear only for a few hours and then go away, e.g. + a new host that gets started and reports for a while and then gets shut down. + + _**Pro:**_ InfluxDB is good at managing discontinuous data. + _**Con:**_ Schema-less design means that some database functions are not supported e.g. there are no cross table joins. + +8. No one point is too important. + + _**Pro:**_ InfluxDB has very powerful tools to deal with aggregate data and large data sets. + _**Con:**_ Points don't have IDs in the traditional sense, they are differentiated by timestamp and series. diff --git a/content/influxdb/v1.7/concepts/key_concepts.md b/content/influxdb/v1.7/concepts/key_concepts.md new file mode 100644 index 000000000..eb7c1949b --- /dev/null +++ b/content/influxdb/v1.7/concepts/key_concepts.md @@ -0,0 +1,213 @@ +--- +title: InfluxDB key concepts +description: Covers key concepts to learn about InfluxDB. +menu: + influxdb_1_7: + name: Key concepts + weight: 10 + parent: Concepts +--- + +Before diving into InfluxDB it's good to get acquainted with some of the key concepts of the database. +This document provides a gentle introduction to those concepts and common InfluxDB terminology. +We've provided a list below of all the terms we'll cover, but we recommend reading this document from start to finish to gain a more general understanding of our favorite time series database. + + + + + + + + + + + + + + + + + + + + + + +
databasefield keyfield set
field valuemeasurementpoint
retention policyseriestag key
tag settag valuetimestamp
+ +Check out the [glossary](/influxdb/v1.7/concepts/glossary/) if you prefer the cold, hard facts. + +### Sample data + +The next section references the data printed out below. +The data is fictional, but represents a believable setup in InfluxDB. +They show the number of butterflies and honeybees counted by two scientists (`langstroth` and `perpetua`) in two locations (location `1` and location `2`) over the time period from August 18, 2015 at midnight through August 18, 2015 at 6:12 AM. +Assume that the data lives in a database called `my_database` and are subject to the `autogen` retention policy (more on databases and retention policies to come). + +*Hint:* Hover over the links for tooltips to get acquainted with InfluxDB terminology and the layout. + +**name:** census + +| time | butterflies | honeybees | location | scientist | +| ---- | ------------------------------------------------------------------------ | ---------------------------------------------------------------------- | ------------------------------------------------------------------- | -------------------------------------------------------------------- | +| 2015-08-18T00:00:00Z | 12 | 23 | 1 | langstroth | +| 2015-08-18T00:00:00Z | 1 | 30 | 1 | perpetua | +| 2015-08-18T00:06:00Z | 11 | 28 | 1 | langstroth | +| 2015-08-18T00:06:00Z | 3 | 28 | 1 | perpetua | +| 2015-08-18T05:54:00Z | 2 | 11 | 2 | langstroth | +| 2015-08-18T06:00:00Z | 1 | 10 | 2 | langstroth | +| 2015-08-18T06:06:00Z | 8 | 23 | 2 | perpetua | +| 2015-08-18T06:12:00Z | 7 | 22 | 2 | perpetua | + +### Discussion + +Now that you've seen some sample data in InfluxDB this section covers what it all means. + +InfluxDB is a time series database so it makes sense to start with what is at the root of everything we do: time. +In the data above there's a column called `time` - all data in InfluxDB have that column. +`time` stores timestamps, and the _**timestamp**_ shows the date and time, in [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) UTC, associated with particular data. + +The next two columns, called `butterflies` and `honeybees`, are fields. +Fields are made up of field keys and field values. +_**Field keys**_ (`butterflies` and `honeybees`) are strings; the field key `butterflies` tells us that the field values `12`-`7` refer to butterflies and the field key `honeybees` tells us that the field values `23`-`22` refer to, well, honeybees. + +_**Field values**_ are your data; they can be strings, floats, integers, or Booleans, and, because InfluxDB is a time series database, a field value is always associated with a timestamp. +The field values in the sample data are: + +``` +12 23 +1 30 +11 28 +3 28 +2 11 +1 10 +8 23 +7 22 +``` + +In the data above, the collection of field-key and field-value pairs make up a _**field set**_. +Here are all eight field sets in the sample data: + +* `butterflies = 12 honeybees = 23` +* `butterflies = 1 honeybees = 30` +* `butterflies = 11 honeybees = 28` +* `butterflies = 3 honeybees = 28` +* `butterflies = 2 honeybees = 11` +* `butterflies = 1 honeybees = 10` +* `butterflies = 8 honeybees = 23` +* `butterflies = 7 honeybees = 22` + +Fields are a required piece of the InfluxDB data structure - you cannot have data in InfluxDB without fields. +It's also important to note that fields are not indexed. +[Queries](/influxdb/v1.7/concepts/glossary/#query) that use field values as filters must scan all values that match the other conditions in the query. +As a result, those queries are not performant relative to queries on tags (more on tags below). +In general, fields should not contain commonly-queried metadata. + +The last two columns in the sample data, called `location` and `scientist`, are tags. +Tags are made up of tag keys and tag values. +Both _**tag keys**_ and _**tag values**_ are stored as strings and record metadata. +The tag keys in the sample data are `location` and `scientist`. +The tag key `location` has two tag values: `1` and `2`. +The tag key `scientist` also has two tag values: `langstroth` and `perpetua`. + +In the data above, the _**tag set**_ is the different combinations of all the tag key-value pairs. +The four tag sets in the sample data are: + +* `location = 1`, `scientist = langstroth` +* `location = 2`, `scientist = langstroth` +* `location = 1`, `scientist = perpetua` +* `location = 2`, `scientist = perpetua` + +Tags are optional. +You don't need to have tags in your data structure, but it's generally a good idea to make use of them because, unlike fields, tags are indexed. +This means that queries on tags are faster and that tags are ideal for storing commonly-queried metadata. + +Avoid using the following reserved keys: +* `_field` +* `_measurement` +* `time` + +If reserved keys are used as a tag or field key, the associated point is discarded. + +> **Why indexing matters: The schema case study** + +> Say you notice that most of your queries focus on the values of the field keys `honeybees` and `butterflies`: + +> `SELECT * FROM "census" WHERE "butterflies" = 1` +> `SELECT * FROM "census" WHERE "honeybees" = 23` + +> Because fields aren't indexed, InfluxDB scans every value of `butterflies` in the first query and every value of `honeybees` in the second query before it provides a response. +That behavior can hurt query response times - especially on a much larger scale. +To optimize your queries, it may be beneficial to rearrange your [schema](/influxdb/v1.7/concepts/glossary/#schema) such that the fields (`butterflies` and `honeybees`) become the tags and the tags (`location` and `scientist`) become the fields: + +> **name:** census +> +| time | location | scientist | butterflies | honeybees | +| ---- | --------------------------------------------------------------------- | ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | -------------------------------------------------------------------- | +| 2015-08-18T00:00:00Z | 1 | langstroth | 12 | 23 | +| 2015-08-18T00:00:00Z | 1 | perpetua | 1 | 30 | +| 2015-08-18T00:06:00Z | 1 | langstroth | 11 | 28 | +| 2015-08-18T00:06:00Z | 1 | perpetua | 3 | 28 | +| 2015-08-18T05:54:00Z | 2 | langstroth | 2 | 11 | +| 2015-08-18T06:00:00Z | 2 | langstroth | 1 | 10 | +| 2015-08-18T06:06:00Z | 2 | perpetua | 8 | 23 | +| 2015-08-18T06:12:00Z | 2 | perpetua | 7 | 22 | + +> Now that `butterflies` and `honeybees` are tags, InfluxDB won't have to scan every one of their values when it performs the queries above - this means that your queries are even faster. + +The _**measurement**_ acts as a container for tags, fields, and the `time` column, and the measurement name is the description of the data that are stored in the associated fields. +Measurement names are strings, and, for any SQL users out there, a measurement is conceptually similar to a table. +The only measurement in the sample data is `census`. +The name `census` tells us that the field values record the number of `butterflies` and `honeybees` - not their size, direction, or some sort of happiness index. + +A single measurement can belong to different retention policies. +A _**retention policy**_ describes how long InfluxDB keeps data (`DURATION`) and how many copies of this data is stored in the cluster (`REPLICATION`). +If you're interested in reading more about retention policies, check out [Database Management](/influxdb/v1.7/query_language/database_management/#retention-policy-management). + +{{% warn %}} Replication factors do not serve a purpose with single node instances. For multiple data nodes in a cluster, the replication factor **must be evenly divisible** into the number of data nodes. For example, a replication factor of 2 works with 2, 4, or 6 data nodes, and so on. A replication factor of 3 works with 3, 6, or 9 data nodes, and so on. +{{% /warn %}} + +In the sample data, everything in the `census` measurement belongs to the `autogen` retention policy. +InfluxDB automatically creates the `autogen` retention policy with an infinite duration and a replication factor set to one. + +Now that you're familiar with measurements, tag sets, and retention policies, let's discuss series. +In InfluxDB, a _**series**_ is a collection of points that share a measurement, tag set, and field key. +The data above consist of eight series: + +| Series number | Measurement | Tag set | Field key | +|:------------------------ | ----------- | ------- | --------- | +| series 1 | `census` | `location = 1`,`scientist = langstroth` | `butterflies` | +| series 2 | `census` | `location = 2`,`scientist = langstroth` | `butterflies` | +| series 3 | `census` | `location = 1`,`scientist = perpetua` | `butterflies` | +| series 4 | `census` | `location = 2`,`scientist = perpetua` | `butterflies` | +| series 5 | `census` | `location = 1`,`scientist = langstroth` | `honeybees` | +| series 6 | `census` | `location = 2`,`scientist = langstroth` | `honeybees` | +| series 7 | `census` | `location = 1`,`scientist = perpetua` | `honeybees` | +| series 8 | `census` | `location = 2`,`scientist = perpetua` | `honeybees` | + +Understanding the concept of a series is essential when designing your [schema](/influxdb/v1.7/concepts/glossary/#schema) and when working with your data in InfluxDB. + +A _**point**_ represents a single data record that has four components: a measurement, tag set, field set, and a timestamp. A point is uniquely identified by its series and timestamp. + +For example, here's a single point: +``` +name: census +----------------- +time butterflies honeybees location scientist +2015-08-18T00:00:00Z 1 30 1 perpetua +``` + +The point in this example is part of series 3 and defined by the measurement (`census`), the tag set (`location = 1`, `scientist = perpetua`), the field set (`butterflies = 1`, `honeybees = 30`), and the timestamp `2015-08-18T00:00:00Z`. + +All of the stuff we've just covered is stored in a database - the sample data are in the database `my_database`. +An InfluxDB _**database**_ is similar to traditional relational databases and serves as a logical container for users, retention policies, continuous queries, and, of course, your time series data. +See [Authentication and Authorization](/influxdb/v1.7/administration/authentication_and_authorization/) and [Continuous Queries](/influxdb/v1.7/query_language/continuous_queries/) for more on those topics. + +Databases can have several users, continuous queries, retention policies, and measurements. +InfluxDB is a schemaless database which means it's easy to add new measurements, tags, and fields at any time. +It's designed to make working with time series data awesome. + +You made it! +You've covered the fundamental concepts and terminology in InfluxDB. +If you're just starting out, we recommend taking a look at [Getting Started](/influxdb/v1.7/introduction/getting_started/) and the [Writing Data](/influxdb/v1.7/guides/writing_data/) and [Querying Data](/influxdb/v1.7/guides/querying_data/) guides. +May our time series database serve you well 🕔. diff --git a/content/influxdb/v1.7/concepts/schema_and_data_layout.md b/content/influxdb/v1.7/concepts/schema_and_data_layout.md new file mode 100644 index 000000000..bd1e53edc --- /dev/null +++ b/content/influxdb/v1.7/concepts/schema_and_data_layout.md @@ -0,0 +1,212 @@ +--- +title: InfluxDB schema design and data layout +description: Covers general guidelines for InfluxDB schema design and data layout. +menu: + influxdb_1_7: + name: Schema design and data layout + weight: 50 + parent: Concepts +--- + +Every InfluxDB use case is special and your [schema](/influxdb/v1.7/concepts/glossary/#schema) will reflect that uniqueness. +There are, however, general guidelines to follow and pitfalls to avoid when designing your schema. + + + + + + + + +
General RecommendationsEncouraged Schema DesignDiscouraged Schema DesignShard Group Duration Management
+ +# General recommendations + +## Encouraged schema design + +In no particular order, we recommend that you: + +### Encode meta data in tags + +[Tags](/influxdb/v1.7/concepts/glossary/#tag) are indexed and [fields](/influxdb/v1.7/concepts/glossary/#field) are not indexed. +This means that queries on tags are more performant than those on fields. + +In general, your queries should guide what gets stored as a tag and what gets stored as a field: + +* Store data in tags if they're commonly-queried meta data +* Store data in tags if you plan to use them with `GROUP BY()` +* Store data in fields if you plan to use them with an [InfluxQL function](/influxdb/v1.7/query_language/functions/) +* Store data in fields if you *need* them to be something other than a string - [tag values](/influxdb/v1.7/concepts/glossary/#tag-value) are always interpreted as strings + +### Avoid using InfluxQL keywords as identifier names + +This isn't necessary, but it simplifies writing queries; you won't have to wrap those identifiers in double quotes. +Identifiers are database names, [retention policy](/influxdb/v1.7/concepts/glossary/#retention-policy-rp) names, [user](/influxdb/v1.7/concepts/glossary/#user) names, [measurement](/influxdb/v1.7/concepts/glossary/#measurement) names, [tag keys](/influxdb/v1.7/concepts/glossary/#tag-key), and [field keys](/influxdb/v1.7/concepts/glossary/#field-key). +See [InfluxQL Keywords](https://github.com/influxdata/influxql/blob/master/README.md#keywords) for words to avoid. + +Note that you will also need to wrap identifiers in double quotes in queries if they contain characters other than `[A-z,_]`. + +## Discouraged schema design + +In no particular order, we recommend that you: + +### Don't have too many series + +[Tags](/influxdb/v1.7/concepts/glossary/#tag) containing highly variable information like UUIDs, hashes, and random strings will lead to a large number of series in the database, known colloquially as high series cardinality. +High series cardinality is a primary driver of high memory usage for many database workloads. + +See [Hardware sizing guidelines](/influxdb/v1.7/guides/hardware_sizing/#general-hardware-guidelines-for-a-single-node) for [series cardinality](/influxdb/v1.7/concepts/glossary/#series-cardinality) recommendations based on your hardware. If the system has memory constraints, consider storing high-cardinality data as a field rather than a tag. + +### Don't use the same name for a tag and a field + +Avoid using the same name for a tag and field key. +This often results in unexpected behavior when querying data. + +If you inadvertently add the same name for a tag and field key, see +[Frequently asked questions](/influxdb/v1.7/troubleshooting/frequently-asked-questions/#tag-and-field-key-with-the-same-name) +for information about how to query the data predictably and how to fix the issue. + +### Don't encode data in measurement names + +In general, taking this step will simplify your queries. +InfluxDB queries merge data that fall within the same [measurement](/influxdb/v1.7/concepts/glossary/#measurement); it's better to differentiate data with [tags](/influxdb/v1.7/concepts/glossary/#tag) than with detailed measurement names. + +_Example:_ + +Consider the following schema represented by line protocol. + +``` +Schema 1 - Data encoded in the measurement name +------------- +blueberries.plot-1.north temp=50.1 1472515200000000000 +blueberries.plot-2.midwest temp=49.8 1472515200000000000 +``` + +The long measurement names (`blueberries.plot-1.north`) with no tags are similar to Graphite metrics. +Encoding information like `plot` and `region` in the measurement name will make the data much harder to query. + +For instance, calculating the average temperature of both plots 1 and 2 would not be possible with schema 1. +Compare this to the following schema represented in line protocol. + +``` +Schema 2 - Data encoded in tags +------------- +weather_sensor,crop=blueberries,plot=1,region=north temp=50.1 1472515200000000000 +weather_sensor,crop=blueberries,plot=2,region=midwest temp=49.8 1472515200000000000 +``` + +The following queries calculate the average of `temp` for blueberries that fall in the `north` region. +While both queries are relatively simple, use of the regular expression make certain queries much more complicated or impossible. + +``` +# Schema 1 - Query for data encoded in the measurement name +> SELECT mean("temp") FROM /\.north$/ + +# Schema 2 - Query for data encoded in tags +> SELECT mean("temp") FROM "weather_sensor" WHERE "region" = 'north' +``` + +### Don't put more than one piece of information in one tag + +Similar to the point above, splitting a single tag with multiple pieces into separate tags will simplify your queries and reduce the need for regular expressions. + +_Example:_ + +Consider the following schema represented by line protocol. + +``` +Schema 1 - Multiple data encoded in a single tag +------------- +weather_sensor,crop=blueberries,location=plot-1.north temp=50.1 1472515200000000000 +weather_sensor,crop=blueberries,location=plot-2.midwest temp=49.8 1472515200000000000 +``` + +The above data encodes multiple separate parameters, the `plot` and `region` into a long tag value (`plot-1.north`). +Compare this to the following schema represented in line protocol. + +``` +Schema 2 - Data encoded in multiple tags +------------- +weather_sensor,crop=blueberries,plot=1,region=north temp=50.1 1472515200000000000 +weather_sensor,crop=blueberries,plot=2,region=midwest temp=49.8 1472515200000000000 +``` + +The following queries calculate the average of `temp` for blueberries that fall in the `north` region. +While both queries are similar, the use of multiple tags in Schema 2 avoids the use of a regular expressions. + +``` +# Schema 1 - Query for multiple data encoded in a single tag +> SELECT mean("temp") FROM "weather_sensor" WHERE location =~ /\.north$/ + +# Schema 2 - Query for data encoded in multiple tags +> SELECT mean("temp") FROM "weather_sensor" WHERE region = 'north' +``` + +# Shard group duration management + +## Shard group duration overview + +InfluxDB stores data in shard groups. +Shard groups are organized by [retention policy](/influxdb/v1.7/concepts/glossary/#retention-policy-rp) (RP) and store data with timestamps that fall within a specific time interval called the [shard duration](/influxdb/v1.7/concepts/glossary/#shard-duration). + +If no shard group duration is provided, the shard group duration is determined by the RP's [duration](/influxdb/v1.7/concepts/glossary/#duration) at the time the RP is created. The default values are: + +| RP Duration | Shard Group Duration | +|---|---| +| < 2 days | 1 hour | +| >= 2 days and <= 6 months | 1 day | +| > 6 months | 7 days | + +The shard group duration is also configurable per RP. +To configure the shard group duration, see [Retention Policy Management](/influxdb/v1.7/query_language/database_management/#retention-policy-management). + +## Shard group duration tradeoffs + +Determining the optimal shard group duration requires finding the balance between: + +- better overall performance with longer shards +- flexibility provided by shorter shards + +### Long shard group duration + +Longer shard group durations allow InfluxDB to store more data in the same logical location. +This reduces data duplication, improves compression efficiency, and allows faster queries in some cases. + +### Short shard group duration + +Shorter shard group durations allow the system to more efficiently drop data and record incremental backups. +When InfluxDB enforces an RP it drops entire shard groups, not individual data points, even if the points are older than the RP duration. +A shard group will only be removed once a shard group's duration *end time* is older than the RP duration. + +For example, if your RP has a duration of one day, InfluxDB will drop an hour's worth of data every hour and will always have 25 shard groups. One for each hour in the day and an extra shard group that is partially expiring, but isn't removed until the whole shard group is older than 24 hours. + +>**Note:** A special use case to consider: filtering queries on schema data (such as tags, series, measurements) by time. For example, if you want to filter schema data within a one hour interval, you must set the shard group duration to 1h. For more information, see [filter schema data by time](/influxdb/v1.7/query_language/schema_exploration/#filter-meta-queries-by-time). + +## Shard group duration recommendations + +The default shard group durations work well for most cases. However, high-throughput or long-running instances will benefit from using longer shard group durations. +Here are some recommendations for longer shard group durations: + +| RP Duration | Shard Group Duration | +|---|---| +| <= 1 day | 6 hours | +| > 1 day and <= 7 days | 1 day | +| > 7 days and <= 3 months | 7 days | +| > 3 months | 30 days | +| infinite | 52 weeks or longer | + +> **Note:** Note that `INF` (infinite) is not a [valid shard group duration](/influxdb/v1.7/query_language/database_management/#retention-policy-management). +In extreme cases where data covers decades and will never be deleted, a long shard group duration like `1040w` (20 years) is perfectly valid. + +Other factors to consider before setting shard group duration: + +* Shard groups should be twice as long as the longest time range of the most frequent queries +* Shard groups should each contain more than 100,000 [points](/influxdb/v1.7/concepts/glossary/#point) per shard group +* Shard groups should each contain more than 1,000 points per [series](/influxdb/v1.7/concepts/glossary/#series) + +### Shard group duration for backfilling + +Bulk insertion of historical data covering a large time range in the past will trigger the creation of a large number of shards at once. +The concurrent access and overhead of writing to hundreds or thousands of shards can quickly lead to slow performance and memory exhaustion. + +When writing historical data, we highly recommend temporarily setting a longer shard group duration so fewer shards are created. Typically, a shard group duration of 52 weeks works well for backfilling. diff --git a/content/influxdb/v1.7/concepts/storage_engine.md b/content/influxdb/v1.7/concepts/storage_engine.md new file mode 100644 index 000000000..5556eb09d --- /dev/null +++ b/content/influxdb/v1.7/concepts/storage_engine.md @@ -0,0 +1,436 @@ +--- +title: In-memory indexing and the Time-Structured Merge Tree (TSM) + +menu: + influxdb_1_7: + name: In-memory indexing with TSM + weight: 60 + parent: Concepts +--- + +## The InfluxDB storage engine and the Time-Structured Merge Tree (TSM) + +The InfluxDB storage engine looks very similar to a LSM Tree. +It has a write ahead log and a collection of read-only data files which are similar in concept to SSTables in an LSM Tree. +TSM files contain sorted, compressed series data. + +InfluxDB will create a [shard](/influxdb/v1.7/concepts/glossary/#shard) for each block of time. +For example, if you have a [retention policy](/influxdb/v1.7/concepts/glossary/#retention-policy-rp) with an unlimited duration, shards will be created for each 7 day block of time. +Each of these shards maps to an underlying storage engine database. +Each of these databases has its own [WAL](/influxdb/v1.7/concepts/glossary/#wal-write-ahead-log) and TSM files. + +We'll dig into each of these parts of the storage engine. + +## Storage engine + +The storage engine ties a number of components together and provides the external interface for storing and querying series data. It is composed of a number of components that each serve a particular role: + +* In-Memory Index - The in-memory index is a shared index across shards that provides the quick access to [measurements](/influxdb/v1.7/concepts/glossary/#measurement), [tags](/influxdb/v1.7/concepts/glossary/#tag), and [series](/influxdb/v1.7/concepts/glossary/#series). The index is used by the engine, but is not specific to the storage engine itself. +* WAL - The WAL is a write-optimized storage format that allows for writes to be durable, but not easily queryable. Writes to the WAL are appended to segments of a fixed size. +* Cache - The Cache is an in-memory representation of the data stored in the WAL. It is queried at runtime and merged with the data stored in TSM files. +* TSM Files - TSM files store compressed series data in a columnar format. +* FileStore - The FileStore mediates access to all TSM files on disk. It ensures that TSM files are installed atomically when existing ones are replaced as well as removing TSM files that are no longer used. +* Compactor - The Compactor is responsible for converting less optimized Cache and TSM data into more read-optimized formats. It does this by compressing series, removing deleted data, optimizing indices and combining smaller files into larger ones. +* Compaction Planner - The Compaction Planner determines which TSM files are ready for a compaction and ensures that multiple concurrent compactions do not interfere with each other. +* Compression - Compression is handled by various Encoders and Decoders for specific data types. Some encoders are fairly static and always encode the same type the same way; others switch their compression strategy based on the shape of the data. +* Writers/Readers - Each file type (WAL segment, TSM files, tombstones, etc..) has Writers and Readers for working with the formats. + +### Write Ahead Log (WAL) + +The WAL is organized as a bunch of files that look like `_000001.wal`. +The file numbers are monotonically increasing and referred to as WAL segments. +When a segment reaches 10MB in size, it is closed and a new one is opened. Each WAL segment stores multiple compressed blocks of writes and deletes. + +When a write comes in the new points are serialized, compressed using Snappy, and written to a WAL file. +The file is `fsync`'d and the data is added to an in-memory index before a success is returned. +This means that batching points together is required to achieve high throughput performance. +(Optimal batch size seems to be 5,000-10,000 points per batch for many use cases.) + +Each entry in the WAL follows a [TLV standard](https://en.wikipedia.org/wiki/Type-length-value) with a single byte representing the type of entry (write or delete), a 4 byte `uint32` for the length of the compressed block, and then the compressed block. + +### Cache + +The Cache is an in-memory copy of all data points current stored in the WAL. +The points are organized by the key, which is the measurement, [tag set](/influxdb/v1.7/concepts/glossary/#tag-set), and unique [field](/influxdb/v1.7/concepts/glossary/#field). +Each field is kept as its own time-ordered range. +The Cache data is not compressed while in memory. + +Queries to the storage engine will merge data from the Cache with data from the TSM files. +Queries execute on a copy of the data that is made from the cache at query processing time. +This way writes that come in while a query is running won't affect the result. + +Deletes sent to the Cache will clear out the given key or the specific time range for the given key. + +The Cache exposes a few controls for snapshotting behavior. +The two most important controls are the memory limits. +There is a lower bound, [`cache-snapshot-memory-size`](/influxdb/v1.7/administration/config#cache-snapshot-memory-size-25m), which when exceeded will trigger a snapshot to TSM files and remove the corresponding WAL segments. +There is also an upper bound, [`cache-max-memory-size`](/influxdb/v1.7/administration/config#cache-max-memory-size-1g), which when exceeded will cause the Cache to reject new writes. +These configurations are useful to prevent out of memory situations and to apply back pressure to clients writing data faster than the instance can persist it. +The checks for memory thresholds occur on every write. + +The other snapshot controls are time based. +The idle threshold, [`cache-snapshot-write-cold-duration`](/influxdb/v1.7/administration/config#cache-snapshot-write-cold-duration-10m), forces the Cache to snapshot to TSM files if it hasn't received a write within the specified interval. + +The in-memory Cache is recreated on restart by re-reading the WAL files on disk. + +### TSM files + +TSM files are a collection of read-only files that are memory mapped. +The structure of these files looks very similar to an SSTable in LevelDB or other LSM Tree variants. + +A TSM file is composed of four sections: header, blocks, index, and footer. + +``` ++--------+------------------------------------+-------------+--------------+ +| Header | Blocks | Index | Footer | +|5 bytes | N bytes | N bytes | 4 bytes | ++--------+------------------------------------+-------------+--------------+ +``` + +The Header is a magic number to identify the file type and a version number. + +``` ++-------------------+ +| Header | ++-------------------+ +| Magic │ Version | +| 4 bytes │ 1 byte | ++-------------------+ +``` + +Blocks are sequences of pairs of CRC32 checksums and data. +The block data is opaque to the file. +The CRC32 is used for block level error detection. +The length of the blocks is stored in the index. + +``` ++--------------------------------------------------------------------+ +│ Blocks │ ++---------------------+-----------------------+----------------------+ +| Block 1 | Block 2 | Block N | ++---------------------+-----------------------+----------------------+ +| CRC | Data | CRC | Data | CRC | Data | +| 4 bytes | N bytes | 4 bytes | N bytes | 4 bytes | N bytes | ++---------------------+-----------------------+----------------------+ +``` + +Following the blocks is the index for the blocks in the file. +The index is composed of a sequence of index entries ordered lexicographically by key and then by time. +The key includes the measurement name, tag set, and one field. +Multiple fields per point creates multiple index entries in the TSM file. +Each index entry starts with a key length and the key, followed by the block type (float, int, bool, string) and a count of the number of index block entries that follow for that key. +Each index block entry is composed of the min and max time for the block, the offset into the file where the block is located and the size of the block. There is one index block entry for each block in the TSM file that contains the key. + +The index structure can provide efficient access to all blocks as well as the ability to determine the cost associated with accessing a given key. +Given a key and timestamp, we can determine whether a file contains the block for that timestamp. +We can also determine where that block resides and how much data must be read to retrieve the block. +Knowing the size of the block, we can efficiently provision our IO statements. + +``` ++-----------------------------------------------------------------------------+ +│ Index │ ++-----------------------------------------------------------------------------+ +│ Key Len │ Key │ Type │ Count │Min Time │Max Time │ Offset │ Size │...│ +│ 2 bytes │ N bytes │1 byte│2 bytes│ 8 bytes │ 8 bytes │8 bytes │4 bytes │ │ ++-----------------------------------------------------------------------------+ +``` + +The last section is the footer that stores the offset of the start of the index. + +``` ++---------+ +│ Footer │ ++---------+ +│Index Ofs│ +│ 8 bytes │ ++---------+ +``` + +### Compression + +Each block is compressed to reduce storage space and disk IO when querying. +A block contains the timestamps and values for a given series and field. +Each block has one byte header, followed by the compressed timestamps and then the compressed values. + +``` ++--------------------------------------------------+ +| Type | Len | Timestamps | Values | +|1 Byte | VByte | N Bytes | N Bytes │ ++--------------------------------------------------+ +``` + +The timestamps and values are compressed and stored separately using encodings dependent on the data type and its shape. +Storing them independently allows timestamp encoding to be used for all timestamps, while allowing different encodings for different field types. +For example, some points may be able to use run-length encoding whereas other may not. + +Each value type also contains a 1 byte header indicating the type of compression for the remaining bytes. +The four high bits store the compression type and the four low bits are used by the encoder if needed. + +#### Timestamps + +Timestamp encoding is adaptive and based on the structure of the timestamps that are encoded. +It uses a combination of delta encoding, scaling, and compression using simple8b run-length encoding, as well as falling back to no compression if needed. + +Timestamp resolution is variable but can be as granular as a nanosecond, requiring up to 8 bytes to store uncompressed. +During encoding, the values are first delta-encoded. +The first value is the starting timestamp and subsequent values are the differences from the prior value. +This usually converts the values into much smaller integers that are easier to compress. +Many timestamps are also monotonically increasing and fall on even boundaries of time such as every 10s. +When timestamps have this structure, they are scaled by the largest common divisor that is also a factor of 10. +This has the effect of converting very large integer deltas into smaller ones that compress even better. + +Using these adjusted values, if all the deltas are the same, the time range is stored using run-length encoding. +If run-length encoding is not possible and all values are less than (1 << 60) - 1 ([~36.5 years](https://www.wolframalpha.com/input/?i=\(1+%3C%3C+60\)+-+1+nanoseconds+to+years) at nanosecond resolution), then the timestamps are encoded using [simple8b encoding](https://github.com/jwilder/encoding/tree/master/simple8b). +Simple8b encoding is a 64bit word-aligned integer encoding that packs multiple integers into a single 64bit word. +If any value exceeds the maximum the deltas are stored uncompressed using 8 bytes each for the block. +Future encodings may use a patched scheme such as Patched Frame-Of-Reference (PFOR) to handle outliers more effectively. + +#### Floats + +Floats are encoded using an implementation of the [Facebook Gorilla paper](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). +The encoding XORs consecutive values together to produce a small result when the values are close together. +The delta is then stored using control bits to indicate how many leading and trailing zeroes are in the XOR value. +Our implementation removes the timestamp encoding described in paper and only encodes the float values. + +#### Integers + +Integer encoding uses two different strategies depending on the range of values in the uncompressed data. +Encoded values are first encoded using [ZigZag encoding](https://developers.google.com/protocol-buffers/docs/encoding#signed-integers). +This interleaves positive and negative integers across a range of positive integers. + +For example, [-2,-1,0,1] becomes [3,1,0,2]. +See Google's [Protocol Buffers documentation](https://developers.google.com/protocol-buffers/docs/encoding#signed-integers) for more information. + +If all ZigZag encoded values are less than (1 << 60) - 1, they are compressed using simple8b encoding. +If any values are larger than the maximum then all values are stored uncompressed in the block. +If all values are identical, run-length encoding is used. +This works very well for values that are frequently constant. + +#### Booleans + +Booleans are encoded using a simple bit packing strategy where each Boolean uses 1 bit. +The number of Booleans encoded is stored using variable-byte encoding at the beginning of the block. + +#### Strings +Strings are encoding using [Snappy](http://google.github.io/snappy/) compression. +Each string is packed consecutively and they are compressed as one larger block. + +### Compactions + +Compactions are recurring processes that migrate data stored in a write-optimized format into a more read-optimized format. +There are a number of stages of compaction that take place while a shard is hot for writes: + +* Snapshots - Values in the Cache and WAL must be converted to TSM files to free memory and disk space used by the WAL segments. +These compactions occur based on the cache memory and time thresholds. +* Level Compactions - Level compactions (levels 1-4) occur as the TSM files grow. +TSM files are compacted from snapshots to level 1 files. +Multiple level 1 files are compacted to produce level 2 files. +The process continues until files reach level 4 and the max size for a TSM file. +They will not be compacted further unless deletes, index optimization compactions, or full compactions need to run. +Lower level compactions use strategies that avoid CPU-intensive activities like decompressing and combining blocks. +Higher level (and thus less frequent) compactions will re-combine blocks to fully compact them and increase the compression ratio. +* Index Optimization - When many level 4 TSM files accumulate, the internal indexes become larger and more costly to access. +An index optimization compaction splits the series and indices across a new set of TSM files, sorting all points for a given series into one TSM file. +Before an index optimization, each TSM file contained points for most or all series, and thus each contains the same series index. +After an index optimization, each TSM file contains points from a minimum of series and there is little series overlap between files. +Each TSM file thus has a smaller unique series index, instead of a duplicate of the full series list. +In addition, all points from a particular series are contiguous in a TSM file rather than spread across multiple TSM files. +* Full Compactions - Full compactions run when a shard has become cold for writes for long time, or when deletes have occurred on the shard. +Full compactions produce an optimal set of TSM files and include all optimizations from Level and Index Optimization compactions. +Once a shard is fully compacted, no other compactions will run on it unless new writes or deletes are stored. + +### Writes + +Writes are appended to the current WAL segment and are also added to the Cache. +Each WAL segment has a maximum size. +Writes roll over to a new file once the current file fills up. +The cache is also size bounded; snapshots are taken and WAL compactions are initiated when the cache becomes too full. +If the inbound write rate exceeds the WAL compaction rate for a sustained period, the cache may become too full, in which case new writes will fail until the snapshot process catches up. + +When WAL segments fill up and are closed, the Compactor snapshots the Cache and writes the data to a new TSM file. +When the TSM file is successfully written and `fsync`'d, it is loaded and referenced by the FileStore. + +### Updates + +Updates (writing a newer value for a point that already exists) occur as normal writes. +Since cached values overwrite existing values, newer writes take precedence. +If a write would overwrite a point in a prior TSM file, the points are merged at query runtime and the newer write takes precedence. + + +### Deletes + +Deletes occur by writing a delete entry to the WAL for the measurement or series and then updating the Cache and FileStore. +The Cache evicts all relevant entries. +The FileStore writes a tombstone file for each TSM file that contains relevant data. +These tombstone files are used at startup time to ignore blocks as well as during compactions to remove deleted entries. + +Queries against partially deleted series are handled at query time until a compaction removes the data fully from the TSM files. + +### Queries + +When a query is executed by the storage engine, it is essentially a seek to a given time associated with a specific series key and field. +First, we do a search on the data files to find the files that contain a time range matching the query as well containing matching series. + +Once we have the data files selected, we next need to find the position in the file of the series key index entries. +We run a binary search against each TSM index to find the location of its index blocks. + +In common cases the blocks will not overlap across multiple TSM files and we can search the index entries linearly to find the start block from which to read. +If there are overlapping blocks of time, the index entries are sorted to ensure newer writes will take precedence and that blocks can be processed in order during query execution. + +When iterating over the index entries the blocks are read sequentially from the blocks section. +The block is decompressed and we seek to the specific point. + + +# The new InfluxDB storage engine: from LSM Tree to B+Tree and back again to create the Time Structured Merge Tree + +Writing a new storage format should be a last resort. +So how did InfluxData end up writing our own engine? +InfluxData has experimented with many storage formats and found each lacking in some fundamental way. +The performance requirements for InfluxDB are significant, and eventually overwhelm other storage systems. +The 0.8 line of InfluxDB allowed multiple storage engines, including LevelDB, RocksDB, HyperLevelDB, and LMDB. +The 0.9 line of InfluxDB used BoltDB as the underlying storage engine. +This writeup is about the Time Structured Merge Tree storage engine that was released in 0.9.5 and is the only storage engine supported in InfluxDB 0.11+, including the entire 1.x family. + +The properties of the time series data use case make it challenging for many existing storage engines. +Over the course of InfluxDB development, InfluxData tried a few of the more popular options. +We started with LevelDB, an engine based on LSM Trees, which are optimized for write throughput. +After that we tried BoltDB, an engine based on a memory mapped B+Tree, which is optimized for reads. +Finally, we ended up building our own storage engine that is similar in many ways to LSM Trees. + +With our new storage engine we were able to achieve up to a 45x reduction in disk space usage from our B+Tree setup with even greater write throughput and compression than what we saw with LevelDB and its variants. +This post will cover the details of that evolution and end with an in-depth look at our new storage engine and its inner workings. + +## Properties of time series data + +The workload of time series data is quite different from normal database workloads. +There are a number of factors that conspire to make it very difficult to scale and remain performant: + +* Billions of individual data points +* High write throughput +* High read throughput +* Large deletes (data expiration) +* Mostly an insert/append workload, very few updates + +The first and most obvious problem is one of scale. +In DevOps, IoT, or APM it is easy to collect hundreds of millions or billions of unique data points every day. + +For example, let's say we have 200 VMs or servers running, with each server collecting an average of 100 measurements every 10 seconds. +Given there are 86,400 seconds in a day, a single measurement will generate 8,640 points in a day per server. +That gives us a total of 172,800,000 (`200 * 100 * 8,640`) individual data points per day. +We find similar or larger numbers in sensor data use cases. + +The volume of data means that the write throughput can be very high. +We regularly get requests for setups than can handle hundreds of thousands of writes per second. +Some larger companies will only consider systems that can handle millions of writes per second. + +At the same time, time series data can be a high read throughput use case. +It's true that if you're tracking 700,000 unique metrics or time series you can't hope to visualize all of them. +That leads many people to think that you don't actually read most of the data that goes into the database. +However, other than dashboards that people have up on their screens, there are automated systems for monitoring or combining the large volume of time series data with other types of data. + +Inside InfluxDB, aggregate functions calculated on the fly may combine tens of thousands of distinct time series into a single view. +Each one of those queries must read each aggregated data point, so for InfluxDB the read throughput is often many times higher than the write throughput. + +Given that time series is mostly an append-only workload, you might think that it's possible to get great performance on a B+Tree. +Appends in the keyspace are efficient and you can achieve greater than 100,000 per second. +However, we have those appends happening in individual time series. +So the inserts end up looking more like random inserts than append only inserts. + +One of the biggest problems we found with time series data is that it's very common to delete all data after it gets past a certain age. +The common pattern here is that users have high precision data that is kept for a short period of time like a few days or months. +Users then downsample and aggregate that data into lower precision rollups that are kept around much longer. + +The naive implementation would be to simply delete each record once it passes its expiration time. +However, that means that once the first points written reach their expiration date, the system is processing just as many deletes as writes, which is something most storage engines aren't designed for. + +Let's dig into the details of the two types of storage engines we tried and how these properties had a significant impact on our performance. + +## LevelDB and log structured merge trees + +When the InfluxDB project began, we picked LevelDB as the storage engine because we had used it for time series data storage in the product that was the precursor to InfluxDB. +We knew that it had great properties for write throughput and everything seemed to "just work". + +LevelDB is an implementation of a log structured merge tree (LSM tree) that was built as an open source project at Google. +It exposes an API for a key-value store where the key space is sorted. +This last part is important for time series data as it allowed us to quickly scan ranges of time as long as the timestamp was in the key. + +LSM Trees are based on a log that takes writes and two structures known as Mem Tables and SSTables. +These tables represent the sorted keyspace. +SSTables are read only files that are continuously replaced by other SSTables that merge inserts and updates into the keyspace. + +The two biggest advantages that LevelDB had for us were high write throughput and built in compression. +However, as we learned more about what people needed with time series data, we encountered a few insurmountable challenges. + +The first problem we had was that LevelDB doesn't support hot backups. +If you want to do a safe backup of the database, you have to close it and then copy it. +The LevelDB variants RocksDB and HyperLevelDB fix this problem, but there was another more pressing problem that we didn't think they could solve. + +Our users needed a way to automatically manage data retention. +That meant we needed deletes on a very large scale. +In LSM Trees, a delete is as expensive, if not more so, than a write. +A delete writes a new record known as a tombstone. +After that queries merge the result set with any tombstones to purge the deleted data from the query return. +Later, a compaction runs that removes the tombstone record and the underlying deleted record in the SSTable file. + +To get around doing deletes, we split data across what we call shards, which are contiguous blocks of time. +Shards would typically hold either one day or seven days worth of data. +Each shard mapped to an underlying LevelDB. +This meant that we could drop an entire day of data by just closing out the database and removing the underlying files. + +Users of RocksDB may at this point bring up a feature called ColumnFamilies. +When putting time series data into Rocks, it's common to split blocks of time into column families and then drop those when their time is up. +It's the same general idea: create a separate area where you can just drop files instead of updating indexes when you delete a large block of data. +Dropping a column family is a very efficient operation. +However, column families are a fairly new feature and we had another use case for shards. + +Organizing data into shards meant that it could be moved within a cluster without having to examine billions of keys. +At the time of this writing, it was not possible to move a column family in one RocksDB to another. +Old shards are typically cold for writes so moving them around would be cheap and easy. +We would have the added benefit of having a spot in the keyspace that is cold for writes so it would be easier to do consistency checks later. + +The organization of data into shards worked great for a while, until a large amount of data went into InfluxDB. +LevelDB splits the data out over many small files. +Having dozens or hundreds of these databases open in a single process ended up creating a big problem. +Users that had six months or a year of data would run out of file handles. +It's not something we found with the majority of users, but anyone pushing the database to its limits would hit this problem and we had no fix for it. +There were simply too many file handles open. + +## BoltDB and mmap B+Trees + +After struggling with LevelDB and its variants for a year we decided to move over to BoltDB, a pure Golang database heavily inspired by LMDB, a mmap B+Tree database written in C. +It has the same API semantics as LevelDB: a key value store where the keyspace is ordered. +Many of our users were surprised. +Our own posted tests of the LevelDB variants vs. LMDB (a mmap B+Tree) showed RocksDB as the best performer. + +However, there were other considerations that went into this decision outside of the pure write performance. +At this point our most important goal was to get to something stable that could be run in production and backed up. +BoltDB also had the advantage of being written in pure Go, which simplified our build chain immensely and made it easy to build for other OSes and platforms. + +The biggest win for us was that BoltDB used a single file as the database. +At this point our most common source of bug reports were from people running out of file handles. +Bolt solved the hot backup problem and the file limit problems all at the same time. + +We were willing to take a hit on write throughput if it meant that we'd have a system that was more reliable and stable that we could build on. +Our reasoning was that for anyone pushing really big write loads, they'd be running a cluster anyway. + +We released versions 0.9.0 to 0.9.2 based on BoltDB. +From a development perspective it was delightful. +Clean API, fast and easy to build in our Go project, and reliable. +However, after running for a while we found a big problem with write throughput. +After the database got over a few GB, writes would start spiking IOPS. + +Some users were able to get past this by putting InfluxDB on big hardware with near unlimited IOPS. +However, most users are on VMs with limited resources in the cloud. +We had to figure out a way to reduce the impact of writing a bunch of points into hundreds of thousands of series at a time. + +With the 0.9.3 and 0.9.4 releases our plan was to put a write ahead log (WAL) in front of Bolt. +That way we could reduce the number of random insertions into the keyspace. +Instead, we'd buffer up multiple writes that were next to each other and then flush them at once. +However, that only served to delay the problem. +High IOPS still became an issue and it showed up very quickly for anyone operating at even moderate work loads. + +However, our experience building the first WAL implementation in front of Bolt gave us the confidence we needed that the write problem could be solved. +The performance of the WAL itself was fantastic, the index simply could not keep up. +At this point we started thinking again about how we could create something similar to an LSM Tree that could keep up with our write load. + +Thus was born the Time Structured Merge Tree. diff --git a/content/influxdb/v1.7/concepts/time-series-index.md b/content/influxdb/v1.7/concepts/time-series-index.md new file mode 100644 index 000000000..88176d2bf --- /dev/null +++ b/content/influxdb/v1.7/concepts/time-series-index.md @@ -0,0 +1,50 @@ +--- +title: Time Series Index (TSI) overview + +menu: + influxdb_1_7: + name: Time Series Index (TSI) overview + weight: 70 + parent: Concepts +--- + +Find overview and background information on Time Series Index (TSI) in this topic. For detail, including how to enable and configure TSI, see [Time Series Index (TSI) details](https://docs.influxdata.com/influxdb/v1.7/concepts/tsi-details/). + +## Overview + +To support a large number of time series, that is, a very high cardinality in the number of unique time series that the database stores, InfluxData has added the new Time Series Index (TSI). +InfluxData supports customers using InfluxDB with tens of millions of time series. +InfluxData's goal, however, is to expand to hundreds of millions, and eventually billions. +Using InfluxData's TSI storage engine, users should be able to have millions of unique time series. +The goal is that the number of series should be unbounded by the amount of memory on the server hardware. +Importantly, the number of series that exist in the database will have a negligible impact on database startup time. +This work represents the most significant technical advancement in the database since InfluxData released the Time Series Merge Tree (TSM) storage engine in 2016. + +## Background information + +InfluxDB actually looks like two databases in one, a time series data store and an inverted index for the measurement, tag, and field metadata. + +### Time-Structured Merge Tree (TSM) + +The Time-Structured Merge Tree (TSM) engine that InfluxData built in 2015 and continued enhancing in 2016 was an effort to solve the problem of getting maximum throughput, compression, and query speed for raw time series data. +Up until TSI, the inverted index was an in-memory data structure that was built during startup of the database based on the data in TSM. +This meant that for every measurement, tag key-value pair, and field name, there was a lookup table in-memory to map those bits of metadata to an underlying time series. +For users with a high number of ephemeral series, memory utilization continued increasing as new time series were created. +And, startup times increased since all of that data would have to be loaded onto the heap at start time. + +> For details, see [TSM-based data storage and in-memory indexing](/influxdb/v1.7/concepts/storage_engine/). + +### Time Series Index (TSI) + +The new time series index (TSI) moves the index to files on disk that we memory map. +This means that we let the operating system handle being the Least Recently Used (LRU) memory. +Much like the TSM engine for raw time series data we have a write-ahead log with an in-memory structure that gets merged at query time with the memory-mapped index. +Background routines run constantly to compact the index into larger and larger files to avoid having to do too many index merges at query time. +Under the covers, we’re using techniques like Robin Hood Hashing to do fast index lookups and HyperLogLog++ to keep sketches of cardinality estimates. +The latter will give us the ability to add things to the query languages like the [SHOW CARDINALITY](/influxdb/v1.7/query_language/spec#show-cardinality) queries. + +### Issues solved by TSI and remaining to be solved + +The primary issue that Time Series Index (TSI) addresses is ephemeral time series. Most frequently, this occurs in use cases that want to track per process metrics or per container metrics by putting identifiers in tags. For example, the [Heapster project for Kubernetes](https://github.com/kubernetes/heapster) does this. For series that are no longer hot for writes or queries, they won’t take up space in memory. + +The issue that the Heapster project and similar use cases did not address is limiting the scope of data returned by the SHOW queries. We’ll have updates to the query language in the future to limit those results by time. We also don’t solve the problem of having all these series hot for reads and writes. For that problem, scale-out clustering is the solution. We’ll have to continue to optimize the query language and engine to work with large sets of series. We’ll need to add guard rails and limits into the language and eventually, add spill-to-disk query processing. That work will be on-going in every release of InfluxDB. diff --git a/content/influxdb/v1.7/concepts/tsi-details.md b/content/influxdb/v1.7/concepts/tsi-details.md new file mode 100644 index 000000000..22cb4483b --- /dev/null +++ b/content/influxdb/v1.7/concepts/tsi-details.md @@ -0,0 +1,155 @@ +--- +title: Time Series Index (TSI) details + +menu: + influxdb_1_7: + name: Time Series Index (TSI) details + weight: 80 + parent: Concepts +--- + +InfluxDB stores measurement and tag information in an index so data can be queried quickly. + +In earlier versions, the index was stored in-memory, requiring a lot of RAM and restricting the number of series that a machine could hold (typically, 1-4 million series, depending on the machine). + +Time Series Index (TSI) stores index data both in memory and on disk, removing RAM restrictions. This lets you store more series on a machine. +TSI uses the operating system's page cache to pull hot data into memory, leaving cold data on disk. + +## Enable TSI + +- For **InfluxDB OSS**, complete step 3 and 4 of [Upgrading to InfluxDB 1.7.x](https://docs.influxdata.com/influxdb/v1.7/administration/upgrading/#upgrade-to-influxdb-1-7-x). + +- For **InfluxDB Enterprise**, on each data node in your cluster, complete step 2 and steps 4-7 of [Upgrade data nodes](/enterprise_influxdb/v1.7/administration/upgrading/#upgrade-data-nodes). + +## Tooling + +### `influx_inspect dumptsi` + +Use the `influx_inspect dumptsi` command to troubleshoot an issue with an index: print summary statistics on an index, file, or a set of files. + +> **Note:** This command only works on one index at a time. + +For more details, see [influx_inspect dumptsi](/influxdb/v1.7/tools/influx_inspect/#dumptsi). + +### `influx_inspect buildtsi` + +If you have a corrupted TSI, delete the `index` directory within your shard, and then use the `buildtsi` command to rebuild TSI. + +This command works at the server-level but you can optionally add database, retention policy and shard filters to only apply to a subset of shards. + +For details on this command, see [influx inspect buildtsi](/influxdb/v1.7/tools/influx_inspect/#buildtsi). + +## Understanding TSI + +### File organization + +TSI (Time Series Index) is a log-structured merge tree-based database for InfluxDB series data. +TSI is composed of several parts: + +* **Index**: Contains the entire index dataset for a single shard. + +* **Partition**: Contains a sharded partition of the data for a shard. + +* **LogFile**: Contains newly written series as an in-memory index and is persisted as a WAL. + +* **IndexFile**: Contains an immutable, memory-mapped index built from a LogFile or merged from two contiguous index files. + +There is also a **SeriesFile** which contains a set of all series keys across the entire database. +Each shard within the database shares the same series file. + +### Writes + +The following occurs when a write comes into the system: + +1. Series is added to the series file or is looked up if it already exists. This returns an auto-incrementing series ID. +2. The series is sent to the Index. The index maintains a roaring bitmap of existing series IDs and ignores series that have already been created. +3. The series is hashed and sent to the appropriate Partition. +4. The Partition writes the series as an entry to the LogFile. +5. The LogFile writes the series to a write-ahead log file on disk and adds the series to a set of in-memory indexes. + +### Compaction + +Once the LogFile exceeds a threshold (1MB), then a new active log file is created and the previous one begins compacting into an IndexFile. +This first index file is at level 1 (L1). +The log file is considered level 0 (L0). + +Index files can also be created by merging two smaller index files together. +For example, if contiguous two L1 index files exist then they can be merged into an L2 index file. + +### Reads + +The index provides several API calls for retrieving sets of data such as: + +* `MeasurementIterator()`: Returns a sorted list of measurement names. +* `TagKeyIterator()`: Returns a sorted list of tag keys in a measurement. +* `TagValueIterator()`: Returns a sorted list of tag values for a tag key. +* `MeasurementSeriesIDIterator()`: Returns a sorted list of all series IDs for a measurement. +* `TagKeySeriesIDIterator()`: Returns a sorted list of all series IDs for a tag key. +* `TagValueSeriesIDIterator()`: Returns a sorted list of all series IDs for a tag value. + +These iterators are all composable using several merge iterators. +For each type of iterator (measurement, tag key, tag value, series id), there are multiple merge iterator types: + +* **Merge**: Deduplicates items from two iterators. +* **Intersect**: Returns only items that exist in two iterators. +* **Difference**: Only returns items from first iterator that don't exist in the second iterator. + +For example, a query with a WHERE clause of `region != 'us-west'` that operates across two shards will construct a set of iterators like this: + +``` +DifferenceSeriesIDIterators( + MergeSeriesIDIterators( + Shard1.MeasurementSeriesIDIterator("m"), + Shard2.MeasurementSeriesIDIterator("m"), + ), + MergeSeriesIDIterators( + Shard1.TagValueSeriesIDIterator("m", "region", "us-west"), + Shard2.TagValueSeriesIDIterator("m", "region", "us-west"), + ), +) +``` + +### Log File Structure + +The log file is simply structured as a list of LogEntry objects written to disk in sequential order. Log files are written until they reach 1MB and then they are compacted into index files. +The entry objects in the log can be of any of the following types: + +* AddSeries +* DeleteSeries +* DeleteMeasurement +* DeleteTagKey +* DeleteTagValue + +The in-memory index on the log file tracks the following: + +* Measurements by name +* Tag keys by measurement +* Tag values by tag key +* Series by measurement +* Series by tag value +* Tombstones for series, measurements, tag keys, and tag values. + +The log file also maintains bitsets for series ID existence and tombstones. +These bitsets are merged with other log files and index files to regenerate the full index bitset on startup. + +### Index File Structure + +The index file is an immutable file that tracks similar information to the log file, but all data is indexed and written to disk so that it can be directly accessed from a memory-map. + +The index file has the following sections: + +* **TagBlocks:** Maintains an index of tag values for a single tag key. +* **MeasurementBlock:** Maintains an index of measurements and their tag keys. +* **Trailer:** Stores offset information for the file as well as HyperLogLog sketches for cardinality estimation. + +### Manifest + +The MANIFEST file is stored in the index directory and lists all the files that belong to the index and the order in which they should be accessed. +This file is updated every time a compaction occurs. +Any files that are in the directory that are not in the index file are index files that are in the process of being compacted. + +### FileSet + +A file set is an in-memory snapshot of the manifest that is obtained while the InfluxDB process is running. +This is required to provide a consistent view of the index at a point-in-time. +The file set also facilitates reference counting for all of its files so that no file will be deleted via compaction until all readers of the file are done with it. diff --git a/content/influxdb/v1.7/data_sources/carbon.md b/content/influxdb/v1.7/data_sources/carbon.md new file mode 100644 index 000000000..e69de29bb diff --git a/content/influxdb/v1.7/data_sources/collectd.md b/content/influxdb/v1.7/data_sources/collectd.md new file mode 100644 index 000000000..e69de29bb diff --git a/content/influxdb/v1.7/data_sources/diamond.md b/content/influxdb/v1.7/data_sources/diamond.md new file mode 100644 index 000000000..2b7e6cdff --- /dev/null +++ b/content/influxdb/v1.7/data_sources/diamond.md @@ -0,0 +1,22 @@ +--- +title: Diamond +--- + +## Saving Diamond Metrics into InfluxDB + +Diamond is a metrics collection and delivery daemon written in Python. +It is capable of collecting cpu, memory, network, i/o, load and disk metrics. +Additionally, it features an API for implementing custom collectors for gathering metrics from almost any source. + +[Diamond homepage](https://github.com/python-diamond) + +Diamond started supporting InfluxDB at version 3.5. + +## Configuring Diamond to send metrics to InfluxDB + +Prerequisites: Diamond depends on the [influxdb python client](https://github.com/influxdb/influxdb-python). +InfluxDB-version-specific installation instructions for the influxdb python client can be found on their [github page](https://github.com/influxdb/influxdb-python). + + +[Diamond InfluxdbHandler configuration page](https://github.com/python-diamond/Diamond/wiki/handler-InfluxdbHandler) + diff --git a/content/influxdb/v1.7/data_sources/opentsdb.md b/content/influxdb/v1.7/data_sources/opentsdb.md new file mode 100644 index 000000000..dd375947f --- /dev/null +++ b/content/influxdb/v1.7/data_sources/opentsdb.md @@ -0,0 +1,19 @@ +--- +title: OpenTSDB +--- + +InfluxDB supports the OpenTSDB ["telnet" protocol](http://opentsdb.net/docs/build/html/user_guide/writing/index.html#telnet). +When OpenTSDB support is enabled, InfluxDB can act as a drop-in replacement for your OpenTSDB system. + +An example input point, and how it is processed, is shown below. + +``` +put sys.cpu.user 1356998400 42.5 host=webserver01 cpu=0 +``` + +When InfluxDB receives this data, a point is written to the database. +The point's Measurement is `sys.cpu.user`, the timestamp is `1356998400`, and the value is `42.5`. +The point is also tagged with `host=webserver01` and `cpu=0`. +Tags allow fast and efficient queries to be performed on your data. + +To learn more about enabling OpenTSDB support, check the example [configuration file](https://github.com/influxdb/influxdb/blob/1.7/etc/config.sample.toml). diff --git a/content/influxdb/v1.7/external_resources.md b/content/influxdb/v1.7/external_resources.md new file mode 100644 index 000000000..bef1839bb --- /dev/null +++ b/content/influxdb/v1.7/external_resources.md @@ -0,0 +1,32 @@ +--- +title: External resources +--- + +But wait, there's more! +Check out these resources to learn more about InfluxDB. + +## [InfluxData blog](https://www.influxdata.com/blog/) + +Check out the InfluxData Blog for announcements, updates, and +weekly [tech tips](https://www.influxdata.com/category/tech-tips/). + +## [Technical papers](https://www.influxdata.com/_resources/techpapers-new/) + +InfluxData's Technical Papers series offer in-depth analysis on performance, time series, +and benchmarking InfluxDB vs. other popular databases. + +## [Meetup videos](https://www.influxdata.com/_resources/videosnew//) + +Check out our growing Meetup videos collection for introductory content, how-tos, and more. + +## [Virtual training videos](https://www.influxdata.com/_resources/videosnew/) + +Watch the videos from our weekly training webinar. + +## [Virtual training schedule](https://www.influxdata.com/virtual-training-courses/) + +Check out our virtual training schedule to register for future webinars. + +## [InfluxData events](https://www.influxdata.com/events/) + +Find out what's happening at InfluxData and sign up for upcoming events. diff --git a/content/influxdb/v1.7/guides/_index.md b/content/influxdb/v1.7/guides/_index.md new file mode 100644 index 000000000..0b55cd6da --- /dev/null +++ b/content/influxdb/v1.7/guides/_index.md @@ -0,0 +1,10 @@ +--- +title: InfluxDB guides +menu: + influxdb_1_7: + name: Guides + weight: 40 + +--- + +{{< children type="list" >}} diff --git a/content/influxdb/v1.7/guides/calculating_percentages.md b/content/influxdb/v1.7/guides/calculating_percentages.md new file mode 100644 index 000000000..51a7178d9 --- /dev/null +++ b/content/influxdb/v1.7/guides/calculating_percentages.md @@ -0,0 +1,115 @@ +--- +title: Calculating percentages in a query +description: Percentages can be calculated using basic math operators available in InfluxQL. This guide walks through use-cases and examples of calculating percentages from two values in a single query. +menu: + influxdb_1_7: + weight: 50 + parent: Guides +--- + +[InfluxQL](/influxdb/v1.7/query_language/) lets you perform simple math equations +which makes calculating percentages using two fields in a measurement pretty simple. +However there are some caveats of which you need to be aware. + +## Basic calculations within a query +`SELECT` statements support the use of basic math operators such as `+`,`-`,`/`, `*`, `()`, etc. + +```sql +-- Add two field keys +SELECT field_key1 + field_key2 AS "field_key_sum" FROM "measurement_name" WHERE time < now() - 15m + +-- Subtract one field from another +SELECT field_key1 - field_key2 AS "field_key_difference" FROM "measurement_name" WHERE time < now() - 15m + +-- Grouping and chaining mathematical calculations +SELECT (field_key1 + field_key2) - (field_key3 + field_key4) AS "some_calculation" FROM "measurement_name" WHERE time < now() - 15m +``` + +## Calculating a percentage in a query +Using basic math functions, you can calculate a percentage by dividing one field value +by another and multiplying the result by 100: + +```sql +SELECT (field_key1 / field_key2) * 100 AS "calculated_percentage" FROM "measurement_name" WHERE time < now() - 15m +``` + +## Calculating a percentage using aggregate functions +If using aggregate functions in your percentage calculation, all data must be referenced +using aggregate functions. +_**You can't mix aggregate and non-aggregate data.**_ + +All Aggregate functions need a `GROUP BY time()` clause defining the time intervals +in which data points are grouped and aggregated. + +```sql +SELECT (sum(field_key1) / sum(field_key2)) * 100 AS "calculated_percentage" FROM "measurement_name" WHERE time < now() - 15m GROUP BY time(1m) +``` + +## Examples + +#### Sample data +The following example uses simulated Apple Stand data that tracks the weight of +baskets containing different varieties of apples throughout a day of business. + +1. [Download the sample data](https://gist.githubusercontent.com/sanderson/8f8aec94a60b2c31a61f44a37737bfea/raw/c29b239547fa2b8ee1690f7d456d31f5bd461386/apple_stand.txt) +2. Import the sample data: + +```bash +influx -import -path=path/to/apple_stand.txt -precision=s -database=apple_stand +``` + +### Calculating percentage of total weight per apple variety +The following query calculates the percentage of the total weight each variety +accounts for at each given point in time. + +```sql +SELECT + ("braeburn"/total_weight)*100, + ("granny_smith"/total_weight)*100, + ("golden_delicious"/total_weight)*100, + ("fuji"/total_weight)*100, + ("gala"/total_weight)*100 +FROM "apple_stand"."autogen"."variety" +``` +
+\*
+ +If visualized as a [stacked graph](/chronograf/v1.7/guides/visualization-types/#stacked-graph) +in Chronograf, it would look like: + +![Percentage of total per apple variety](/img/influxdb/calc-percentage-apple-variety.png) + +### Calculating aggregate percentage per variety +The following query calculates the average percentage of the total weight each variety +accounts for per hour. + +```sql +SELECT + (mean("braeburn")/mean(total_weight))*100, + (mean("granny_smith")/mean(total_weight))*100, + (mean("golden_delicious")/mean(total_weight))*100, + (mean("fuji")/mean(total_weight))*100, + (mean("gala")/mean(total_weight))*100 +FROM "apple_stand"."autogen"."variety" +WHERE time >= '2018-06-18T12:00:00Z' AND time <= '2018-06-19T04:35:00Z' +GROUP BY time(1h) +``` +
+ +_**Note the following about this query:**_ + +- It uses aggregate functions (`mean()`) for pulling all data. +- It includes a `GROUP BY time()` clause which aggregates data into 1 hour blocks. +- It includes an explicitly limited time window. Without it, aggregate functions + are very resource-intensive. + +If visualized as a [stacked graph](/chronograf/v1.7/guides/visualization-types/#stacked-graph) +in Chronograf, it would look like: + +![Hourly average percentage of total per apple variety](/img/influxdb/calc-percentage-hourly-apple-variety.png) diff --git a/content/influxdb/v1.7/guides/downsampling_and_retention.md b/content/influxdb/v1.7/guides/downsampling_and_retention.md new file mode 100644 index 000000000..e767931ae --- /dev/null +++ b/content/influxdb/v1.7/guides/downsampling_and_retention.md @@ -0,0 +1,232 @@ +--- +title: Downsampling and data retention +menu: + influxdb_1_7: + weight: 30 + parent: Guides +--- + +InfluxDB can handle hundreds of thousands of data points per second. +Working with that much data over a long period of time can create storage +concerns. +A natural solution is to downsample the data; keep the high precision raw data +for only a limited time, and store the lower precision, summarized data for much +longer or forever. + +InfluxDB offers two features - continuous queries (CQ) and retention policies +(RP) - that automate the process of downsampling data and expiring old data. +This guide describes a practical use case for CQs and RPs and covers how to +set up those features in InfluxDB databases. + +### Definitions + +A **continuous query** (CQ) is an InfluxQL query that runs automatically and +periodically within a database. +CQs require a function in the `SELECT` clause and must include a +`GROUP BY time()` clause. + +A **retention policy** (RP) is the part of InfluxDB data structure +that describes for how long InfluxDB keeps data. +InfluxDB compares your local server's timestamp to the timestamps on your data +and deletes data that are older than the RP's `DURATION`. +A single database can have several RPs and RPs are unique per database. + +This guide will not go into detail about the syntax for creating and managing +CQs and RPs. +If you're new to both concepts, we recommend looking over the detailed +[CQ documentation](/influxdb/v1.7/query_language/continuous_queries/) and +[RP documentation](/influxdb/v1.7/query_language/database_management/#retention-policy-management). + +### Sample data + +This section uses fictional real-time data that track the number of food orders +to a restaurant via phone and via website at ten second intervals. +We will store those data in a +[database](/influxdb/v1.7/concepts/glossary/#database) called `food_data`, in +the [measurement](/influxdb/v1.7/concepts/glossary/#measurement) `orders`, and +in the [fields](/influxdb/v1.7/concepts/glossary/#field) `phone` and `website`. + +Sample: + +```bash +name: orders +------------ +time phone website +2016-05-10T23:18:00Z 10 30 +2016-05-10T23:18:10Z 12 39 +2016-05-10T23:18:20Z 11 56 +``` + +### Goal + +Assume that, in the long run, we're only interested in the average number of orders by phone +and by website at 30 minute intervals. +In the next steps, we use RPs and CQs to: + +* Automatically aggregate the ten-second resolution data to 30-minute resolution data +* Automatically delete the raw, ten-second resolution data that are older than two hours +* Automatically delete the 30-minute resolution data that are older than 52 weeks + +### Database preparation + +We perform the following steps before writing the data to the database +`food_data`. +We do this **before** inserting any data because CQs only run against recent +data; that is, data with timestamps that are no older than `now()` minus +the `FOR` clause of the CQ, or `now()` minus the `GROUP BY time()` interval if +the CQ has no `FOR` clause. + +#### 1. Create the database + +```sql +> CREATE DATABASE "food_data" +``` + +#### 2. Create a two-hour `DEFAULT` retention policy + +InfluxDB writes to the `DEFAULT` retention policy if we do not supply an explicit RP when +writing a point to the database. +We make the `DEFAULT` RP keep data for two hours, because we want InfluxDB to +automatically write the incoming ten-second resolution data to that RP. + +Use the +[`CREATE RETENTION POLICY`](/influxdb/v1.7/query_language/database_management/#create-retention-policies-with-create-retention-policy) +statement to create a `DEFAULT` RP: + +```sql +> CREATE RETENTION POLICY "two_hours" ON "food_data" DURATION 2h REPLICATION 1 DEFAULT +``` + +That query creates an RP called `two_hours` that exists in the database +`food_data`. +`two_hours` keeps data for a `DURATION` of two hours (`2h`) and it's the `DEFAULT` +RP for the database `food_data`. + +{{% warn %}} +The replication factor (`REPLICATION 1`) is a required parameter set to 1 for single node instances. For multiple data nodes in a cluster, the replication factor **must be evenly divisible** into the number of data nodes. For example, a replication factor of 2 works with 2, 4, or 6 data nodes, and so on. A replication factor of 3 works with 3, 6, or 9 data nodes, and so on. + +{{% /warn %}} + +> **Note:** When we created the `food_data` database in step 1, InfluxDB +automatically generated an RP named `autogen` and set it as the `DEFAULT` +RP for the database. +The `autogen` RP has an infinite retention period. +With the query above, the RP `two_hours` replaces `autogen` as the `DEFAULT` RP +for the `food_data` database. + +#### 3. Create a 52-week retention policy + +Next we want to create another retention policy that keeps data for 52 weeks and is not the +`DEFAULT` retention policy (RP) for the database. +Ultimately, the 30-minute rollup data will be stored in this RP. + +Use the +[`CREATE RETENTION POLICY`](/influxdb/v1.7/query_language/database_management/#create-retention-policies-with-create-retention-policy) +statement to create a non-`DEFAULT` retention policy: + +```sql +> CREATE RETENTION POLICY "a_year" ON "food_data" DURATION 52w REPLICATION 1 +``` + +That query creates a retention policy (RP) called `a_year` that exists in the database +`food_data`. +The `a_year` setting keeps data for a `DURATION` of 52 weeks (`52w`). +Leaving out the `DEFAULT` argument ensures that `a_year` is not the `DEFAULT` +RP for the database `food_data`. +That is, write and read operations against `food_data` that do not specify an +RP will still go to the `two_hours` RP (the `DEFAULT` RP). + +#### 4. Create the continuous query + +Now that we've set up our RPs, we want to create a continuous query (CQ) that will automatically +and periodically downsample the ten-second resolution data to the 30-minute +resolution, and then store those results in a different measurement with a different +retention policy. + +Use the +[`CREATE CONTINUOUS QUERY`](/influxdb/v1.7/query_language/continuous_queries/) +statement to generate a CQ: + +```sql +> CREATE CONTINUOUS QUERY "cq_30m" ON "food_data" BEGIN + SELECT mean("website") AS "mean_website",mean("phone") AS "mean_phone" + INTO "a_year"."downsampled_orders" + FROM "orders" + GROUP BY time(30m) +END +``` + +That query creates a CQ called `cq_30m` in the database `food_data`. +`cq_30m` tells InfluxDB to calculate the 30-minute average of the two fields +`website` and `phone` in the measurement `orders` and in the `DEFAULT` RP +`two_hours`. +It also tells InfluxDB to write those results to the measurement +`downsampled_orders` in the retention policy `a_year` with the field keys +`mean_website` and `mean_phone`. +InfluxDB will run this query every 30 minutes for the previous 30 minutes. + +> **Note:** Notice that we fully qualify (that is, we use the syntax +`"".""`) the measurement in the `INTO` +clause. +InfluxDB requires that syntax to write data to an RP other than the `DEFAULT` +RP. + +### Results + +With the new CQ and two new RPs, `food_data` is ready to start receiving data. +After writing data to our database and letting things run for a bit, we see +two measurements: `orders` and `downsampled_orders`. + +```sql +> SELECT * FROM "orders" LIMIT 5 +name: orders +--------- +time phone website +2016-05-13T23:00:00Z 10 30 +2016-05-13T23:00:10Z 12 39 +2016-05-13T23:00:20Z 11 56 +2016-05-13T23:00:30Z 8 34 +2016-05-13T23:00:40Z 17 32 + +> SELECT * FROM "a_year"."downsampled_orders" LIMIT 5 +name: downsampled_orders +--------------------- +time mean_phone mean_website +2016-05-13T15:00:00Z 12 23 +2016-05-13T15:30:00Z 13 32 +2016-05-13T16:00:00Z 19 21 +2016-05-13T16:30:00Z 3 26 +2016-05-13T17:00:00Z 4 23 +``` + +The data in `orders` are the raw, ten-second resolution data that reside in the +two-hour RP. +The data in `downsampled_orders` are the aggregated, 30-minute resolution data +that are subject to the 52-week RP. + +Notice that the first timestamps in `downsampled_orders` are older than the first +timestamps in `orders`. +This is because InfluxDB has already deleted data from `orders` with timestamps +that are older than our local server's timestamp minus two hours (assume we + executed the `SELECT` queries at `2016-05-14T00:59:59Z`). +InfluxDB will only start dropping data from `downsampled_orders` after 52 weeks. + +> **Notes:** +> +* Notice that we fully qualify (that is, we use the syntax +`"".""`) `downsampled_orders` in +the second `SELECT` statement. We must specify the RP in that query to `SELECT` +data that reside in an RP other than the `DEFAULT` RP. +> +* By default, InfluxDB checks to enforce an RP every 30 minutes. +Between checks, `orders` may have data that are older than two hours. +The rate at which InfluxDB checks to enforce an RP is a configurable setting, +see +[Database Configuration](/influxdb/v1.7/administration/config#check-interval-30m0s). + +Using a combination of RPs and CQs, we've successfully set up our database to +automatically keep the high precision raw data for a limited time, create lower +precision data, and store that lower precision data for a longer period of time. +Now that you have a general understanding of how these features can work +together, check out the detailed documentation on [CQs](/influxdb/v1.7/query_language/continuous_queries/) and [RPs](/influxdb/v1.7/query_language/database_management/#retention-policy-management) +to see all that they can do for you. diff --git a/content/influxdb/v1.7/guides/hardware_sizing.md b/content/influxdb/v1.7/guides/hardware_sizing.md new file mode 100644 index 000000000..9b28c3156 --- /dev/null +++ b/content/influxdb/v1.7/guides/hardware_sizing.md @@ -0,0 +1,481 @@ +--- +title: Hardware sizing guidelines +menu: + influxdb_1_7: + weight: 40 + parent: Guides +--- + +Review configuration and hardware guidelines for InfluxDB OSS (open source) and InfluxDB Enterprise: + +* [Single node or cluster?](#single-node-or-cluster) +* [Query guidelines](#query-guidelines) +* [InfluxDB OSS guidelines](#influxdb-oss-guidelines) +* [InfluxDB Enterprise cluster guidelines](#influxdb-enterprise-cluster-guidelines) +* [When do I need more RAM?](#when-do-i-need-more-ram) +* [Recommended cluster configurations](#recommended-cluster-configurations) +* [Storage: type, amount, and configuration](#storage-type-amount-and-configuration) + +> **Disclaimer:** Your numbers may vary from recommended guidelines. Guidelines provide estimated benchmarks for implementing the most performant system for your business. + +## Single node or cluster? + +If your InfluxDB performance requires any of the following, a single node (InfluxDB OSS) may not support your needs: + +- more than 750,000 field writes per second +- more than 100 moderate queries per second ([see Query guides](#query-guidelines)) +- more than 10,000,000 [series cardinality](/influxdb/v1.7/concepts/glossary/#series-cardinality) + + We recommend InfluxDB Enterprise, which supports multiple data nodes (a cluster) across multiple server cores. InfluxDB Enterprise distributes multiple copies of your data across a cluster, providing high-availability and redundancy, so an unavailable node doesn't significantly impact the cluster. Please contact for assistance tuning your system. + +If you want a single node instance of InfluxDB that's fully open source, requires fewer writes, queries, and unique series than listed above, and do **not require** redundancy, we recommend InfluxDB OSS. + +> **Note:** Without the redundancy of a cluster, writes and queries fail immediately when a server is unavailable. + +## Query guidelines + +> Query complexity varies widely on system impact. Recommendations for both single nodes and clusters are based on **moderate** query loads. + +For **simple** or **complex** queries, we recommend testing and adjusting the suggested requirements as needed. Query complexity is defined by the following criteria: + +| Query complexity | Criteria | +|------------------|---------------------------------------------------------------------------------------| +| Simple | Have few or no functions and no regular expressions | +| | Are bounded in time to a few minutes, hours, or 24 hours at most | +| | Typically execute in a few milliseconds to a few dozen milliseconds | +| Moderate | Have multiple functions and one or two regular expressions | +| | May also have `GROUP BY` clauses or sample a time range of multiple weeks | +| | Typically execute in a few hundred or a few thousand milliseconds | +| Complex | Have multiple aggregation or transformation functions or multiple regular expressions | +| | May sample a very large time range of months or years | +| | Typically take multiple seconds to execute | + +## InfluxDB OSS guidelines + +Run InfluxDB on locally attached solid state drives (SSDs). Other storage configurations have lower performance and may not be able to recover from small interruptions in normal processing. + +Estimated guidelines include writes per second, queries per second, and number of unique [series](/influxdb/v1.7/concepts/glossary/#series), CPU, RAM, and IOPS (input/output operations per second). + +| vCPU or CPU | RAM | IOPS | Writes per second | Queries* per second | Unique series | +| ----------: | ------: | -------: | ----------------: | ------------------: | ------------: | +| 2-4 cores | 2-4 GB | 500 | < 5,000 | < 5 | < 100,000 | +| 4-6 cores | 8-32 GB | 500-1000 | < 250,000 | < 25 | < 1,000,000 | +| 8+ cores | 32+ GB | 1000+ | > 250,000 | > 25 | > 1,000,000 | + +* **Queries per second for moderate queries.** Queries vary widely in their impact on the system. For simple or complex queries, we recommend testing and adjusting the suggested requirements as needed. See [query guidelines](#query-guidelines) for details. + +> We recommend no more than 50 databases per host to prevent OS file handler issues. Please contact for assistance tuning your system. + +## InfluxDB Enterprise cluster guidelines + +### Meta nodes + +> Set up clusters with an odd number of meta nodes──an even number may cause issues in certain configurations. + +A cluster must have a **minimum of three** independent meta nodes for data redundancy and availability. A cluster with `2n + 1` meta nodes can tolerate the loss of `n` meta nodes. + +Meta nodes do not need very much computing power. Regardless of the cluster load, we recommend the following guidelines for the meta nodes: + +* vCPU or CPU: 1-2 cores +* RAM: 512 MB - 1 GB +* IOPS: 50 + +### Web node + +The InfluxDB Enterprise web server is primarily an HTTP server with similar load requirements. For most applications, the server doesn't need to be very robust. A cluster can function with only one web server, but for redundancy, we recommend connecting multiple web servers to a single back-end Postgres database. + +> **Note:** Production clusters should not use the SQLite database (lacks support for redundant web servers and handling high loads). + +* vCPU or CPU: 2-4 cores +* RAM: 2-4 GB +* IOPS: 100 + +### Data nodes + +A cluster with one data node is valid but has no data redundancy. Redundancy is set by the [replication factor](/influxdb/v1.7/concepts/glossary/#replication-factor) on the retention policy the data is written to. To ensure data is successfully replicated across a cluster, the number of data nodes in a cluster **must be evenly divisible** by the replication factor. + +>**Note:** If data nodes aren’t divisible by the replication factor, data may be distributed unevenly across the cluster, causing poor performance. + +Guidelines vary by writes per second per node, moderate queries per second per node, and the number of unique series per node. + +#### Guidelines per node + +| vCPU or CPU | RAM | IOPS | Writes per second | Queries* per second | Unique series | +| ----------: | -------: | ----: | ----------------: | ------------------: | ------------: | +| 2 cores | 4-8 GB | 1000 | < 5,000 | < 5 | < 100,000 | +| 4-6 cores | 16-32 GB | 1000+ | < 100,000 | < 25 | < 1,000,000 | +| 8+ cores | 32+ GB | 1000+ | > 100,000 | > 25 | > 1,000,000 | + +Guidelines are provided for moderate queries. Queries vary widely in their impact on the system. For simple or complex queries, we recommend testing and adjusting the suggested requirements as needed. See [query guidelines](#query-guidelines) for detail. + +## When do I need more RAM? + +Start with the recommended RAM, and then adjust as needed: + +- For InfluxDB OSS, determine your unique number of series, and then refer to [InfluxDB OSS guidelines](#influxdb-oss-guidelines). +- For InfluxDB Enterprise, consider your server cores (AWS EC2 R4 instances or equivalent), and then refer to [guidelines per cluster](#guidelines-per-cluster). + +In general, more RAM improves query speed. Your RAM requirements are primarily determined by series cardinality. Higher cardinality requires more RAM. Regardless of RAM, a series cardinality of 10 million or more can cause OOM (out of memory) failures. You can usually resolve OOM issues by redesigning your [schema](/influxdb/v1.7/concepts/glossary/#schema). + +## Guidelines per cluster + +InfluxDB Enterprise guidelines vary by writes and queries per second, series cardinality, replication factor, and infrastructure-AWS EC2 R4 instances or equivalent: + +- R4.xlarge (4 cores); 30.5 GB RAM +- R4.2xlarge (8 cores); 61 GB RAM +- R4.4xlarge (16 cores); 122 GB RAM +- R4.8xlarge (32 cores); 244 GB RAM + +> Guidelines stem from a DevOps monitoring use case: maintaining a group of computers and monitoring server metrics (such as CPU, kernel, memory, disk space, disk I/O, network, and so on). + +> We recommend no more than 50 databases per cluster to prevent file handler issues on your OS. Please contact [presales-support@influxdata.com](mailto:presales-support@influxdata.com) for assistance tuning your system. + +### Recommended cluster configurations + +Cluster configurations guidelines are organized by: + +- Series cardinality in your data set: 10,000, 100,000, 1,000,000, or 10,000,000 +- Number of data nodes +- Number of server cores + +> Cluster configurations were tested against Time Series Index (TSI) (`tsi1`). To prevent multiple index types (`tsi1` and the earlier `inmem`) from being used simultaneously, TSI isn't enabled by default. + +> We recommend enabling TSI; for more information, see [TSI details](/influxdb/v1.7/concepts/tsi-details/). For `inmem`, use the guidelines below as a benchmark and adjust as needed. + +For each cluster configuration, you'll find guidelines for the following: + +- **maximum writes per second only** (no dashboard queries are running) +- **maximum queries per second only** (no data is being written) +- **maximum simultaneous queries and writes per second, combined** + +#### Review cluster configuration tables + +1. Select the series cardinality tab below, and then click to expand a replication factor. +2. In the **Nodes x Core** column, find the number of data nodes and server cores in your configuration, and then review the recommended **maximum** guidelines. + +{{< tabs-wrapper >}} +{{% tabs %}} +[10,000 series](#) +[100,000 series](#) +[1,000,000 series](#) +[10,000,000 series](#) +{{% /tabs %}} +{{% tab-content %}} + +Select one of the following replication factors to see the recommended cluster configuration for 10,000 series: + +{{% expand "Replication factor, 1" %}} + +| Nodes x Core | Writes per second | Queries per second | Queries + writes per second | +|:------------:|------------------:|-------------------:|:---------------------------:| +| 1 x 4 | 188,000 | 5 | 4 + 99,000 | +| 1 x 8 | 405,000 | 9 | 8 + 207,000 | +| 1 x 16 | 673,000 | 15 | 14 + 375,000 | +| 1 x 32 | 1,056,000 | 24 | 22 + 650,000 | +| 2 x 4 | 384,000 | 14 | 14 + 184,000 | +| 2 x 8 | 746,000 | 22 | 22 + 334,000 | +| 2 x 16 | 1,511,000 | 56 | 40 + 878,000 | +| 2 x 32 | 2,426,000 | 96 | 68 + 1,746,000 | + +{{% /expand %}} + +{{% expand "Replication factor, 2" %}} + +| Nodes x Core | Writes per second | Queries per second | Queries + writes per second | +|:------------:|------------------:|-------------------:|:---------------------------:| +| 2 x 4 | 296,000 | 16 | 16 + 151,000 | +| 2 x 8 | 560,000 | 30 | 26 + 290,000 | +| 2 x 16 | 972,000 | 54 | 50 + 456,000 | +| 2 x 32 | 1,860,000 | 84 | 74 + 881,000 | +| 4 x 8 | 1,781,000 | 100 | 64 + 682,000 | +| 4 x 16 | 3,430,000 | 192 | 104 + 1,732,000 | +| 4 x 32 | 6,351,000 | 432 | 188 + 3,283,000 | +| 6 x 8 | 2,923,000 | 216 | 138 + 1,049,000 | +| 6 x 16 | 5,650,000 | 498 | 246 + 2,246,000 | +| 6 x 32 | 9,842,000 | 1248 | 528 + 5,229,000 | +| 8 x 8 | 3,987,000 | 632 | 336 + 1,722,000 | +| 8 x 16 | 7,798,000 | 1384 | 544 + 3,911,000 | +| 8 x 32 | 13,189,000 | 3648 | 1,152 + 7,891,000 | + +{{% /expand %}} + +{{% expand "Replication factor, 3" %}} + +| Nodes x Core | Writes per second | Queries per second | Queries + writes per second | +|:------------:|------------------:|-------------------:|:---------------------------:| +| 3 x 8 | 815,000 | 63 | 54 + 335,000 | +| 3 x 16 | 1,688,000 | 120 | 87 + 705,000 | +| 3 x 32 | 3,164,000 | 255 | 132 + 1,626,000 | +| 6 x 8 | 2,269,000 | 252 | 168 + 838,000 | +| 6 x 16 | 4,593,000 | 624 | 336 + 2,019,000 | +| 6 x 32 | 7,776,000 | 1340 | 576 + 3,624,000 | + +{{% /expand %}} + +{{% expand "Replication factor, 4" %}} + +| Nodes x Core | Writes per second | Queries per second | Queries + writes per second | +|:------------:| -----------------:| ------------------:|:---------------------------:| +| 4 x 8 | 1,028,000 | 116 | 98 + 365,000 | +| 4 x 16 | 2,067,000 | 208 | 140 + 8,056,000 | +| 4 x 32 | 3,290,000 | 428 | 228 + 1,892,000 | +| 8 x 8 | 2,813,000 | 928 | 496 + 1,225,000 | +| 8 x 16 | 5,225,000 | 2176 | 800 + 2,799,000 | +| 8 x 32 | 8,555,000 | 5184 | 1088 + 6,055,000 | + +{{% /expand %}} + +{{% expand "Replication factor, 6" %}} + +| Nodes x Core | Writes per second | Queries per second | Queries + writes per second | +|:------------:| -----------------:| ------------------:|:---------------------------:| +| 6 x 8 | 1,261,000 | 288 | 192 + 522,000 | +| 6 x 16 | 2,370,000 | 576 | 288 + 1,275,000 | +| 6 x 32 | 3,601,000 | 1056 | 336 + 2,390,000 | + +{{% /expand %}} + +{{% expand "Replication factor, 8" %}} + +| Nodes x Core | Writes per second | Queries per second | Queries + writes per second | +|:------------:| ----------------: | -----------------: |:---------------------------:| +| 8 x 8 | 1,382,000 | 1184 | 416 + 915,000 | +| 8 x 16 | 2,658,000 | 2504 | 448 + 2,204,000 | +| 8 x 32 | 3,887,000 | 5184 | 602 + 4,120,000 | + +{{% /expand %}} + +{{% /tab-content %}} + +{{% tab-content %}} + +Select one of the following replication factors to see the recommended cluster configuration for 100,000 series: + +{{% expand "Replication factor, 1" %}} + +| Nodes x Core | Writes per second | Queries per second | Queries + writes per second | +|:------------:|------------------:|-------------------:|:---------------------------:| +| 1 x 4 | 143,000 | 5 | 4 + 77,000 | +| 1 x 8 | 322,000 | 9 | 8 + 167,000 | +| 1 x 16 | 624,000 | 17 | 12 + 337,000 | +| 1 x 32 | 1,114,000 | 26 | 18 + 657,000 | +| 2 x 4 | 265,000 | 14 | 12 + 115,000 | +| 2 x 8 | 573,000 | 30 | 22 + 269,000 | +| 2 x 16 | 1,261,000 | 52 | 38 + 679,000 | +| 2 x 32 | 2,335,000 | 90 | 66 + 1,510,000 | + +{{% /expand %}} + +{{% expand "Replication factor, 2" %}} + +| Nodes x Core | Writes per second | Queries per second | Queries + writes per second | +|:------------:|------------------:|-------------------:|:---------------------------:| +| 2 x 4 | 196,000 | 16 | 14 + 77,000 | +| 2 x 8 | 482,000 | 30 | 24 + 203,000 | +| 2 x 16 | 1,060,000 | 60 | 42 + 415,000 | +| 2 x 32 | 1,958,000 | 94 | 64 + 984,000 | +| 4 x 8 | 1,144,000 | 108 | 68 + 406,000 | +| 4 x 16 | 2,512,000 | 228 | 148 + 866,000 | +| 4 x 32 | 4,346,000 | 564 | 320 + 1,886,000 | +| 6 x 8 | 1,802,000 | 252 | 156 + 618,000 | +| 6 x 16 | 3,924,000 | 562 | 384 + 1,068,000 | +| 6 x 32 | 6,533,000 | 1340 | 912 + 2,083,000 | +| 8 x 8 | 2,516,000 | 712 | 360 + 1,020,000 | +| 8 x 16 | 5,478,000 | 1632 | 1,024 + 1,843,000 | +| 8 x 32 | 1,0527,000 | 3392 | 1,792 + 4,998,000 | + +{{% /expand %}} + +{{% expand "Replication factor, 3" %}} + +| Nodes x Core | Writes per second | Queries per second | Queries + writes per second | +|:------------:|------------------:|-------------------:|:---------------------------:| +| 3 x 8 | 616,000 | 72 | 51 + 218,000 | +| 3 x 16 | 1,268,000 | 117 | 84 + 438,000 | +| 3 x 32 | 2,260,000 | 189 | 114 + 984,000 | +| 6 x 8 | 1,393,000 | 294 | 192 + 421,000 | +| 6 x 16 | 3,056,000 | 726 | 456 + 893,000 | +| 6 x 32 | 5,017,000 | 1584 | 798 + 1,098,000 | + +{{% /expand %}} + +{{% expand "Replication factor, 4" %}} + +| Nodes x Core | Writes per second | Queries per second | Queries + writes per second | +|:------------:| -----------------:| ------------------:|:---------------------------:| +| 4 x 8 | 635,000 | 112 | 80 + 207,000 | +| 4 x 16 | 1,359,000 | 188 | 124 + 461,000 | +| 4 x 32 | 2,320,000 | 416 | 192 + 1,102,000 | +| 8 x 8 | 1,570,000 | 1360 | 816 + 572,000 | +| 8 x 16 | 3,205,000 | 2720 | 832 + 2,053,000 | +| 8 x 32 | 3,294,000 | 2592 | 804 + 2,174,000 | + +{{% /expand %}} + +{{% expand "Replication factor, 6" %}} + +| Nodes x Core | Writes per second | Queries per second | Queries + writes per second | +|:------------:| -----------------:| ------------------:|:---------------------------:| +| 6 x 8 | 694,000 | 302 | 198 + 204,000 | +| 6 x 16 | 1,397,000 | 552 | 360 + 450,000 | +| 6 x 32 | 2,298,000 | 1248 | 384 + 1,261,000 | + +{{% /expand %}} + +{{% expand "Replication factor, 8" %}} + +| Nodes x Core | Writes per second | Queries per second | Queries + writes per second | +|:------------:| ----------------: | -----------------: |:---------------------------:| +| 8 x 8 | 739,000 | 1296 | 480 + 371,000 | +| 8 x 16 | 1,396,000 | 2592 | 672 + 843,000 | +| 8 x 32 | 2,614,000 | 2496 | 960 + 1,371,000 | + +{{% /expand %}} + +{{% /tab-content %}} + +{{% tab-content %}} + +Select one of the following replication factors to see the recommended cluster configuration for 1,000,000 series: + +{{% expand "Replication factor, 2" %}} + +| Nodes x Core | Writes per second | Queries per second | Queries + writes per second | +|:-------------:|------------------:|-------------------:|:---------------------------:| +| 2 x 4 | 104,000 | 18 | 12 + 54,000 | +| 2 x 8 | 195,000 | 36 | 24 + 99,000 | +| 2 x 16 | 498,000 | 70 | 44 + 145,000 | +| 2 x 32 | 1,195,000 | 102 | 84 + 232,000 | +| 4 x 8 | 488,000 | 120 | 56 + 222,000 | +| 4 x 16 | 1,023,000 | 244 | 112 + 428,000 | +| 4 x 32 | 2,686,000 | 468 | 208 + 729,000 | +| 6 x 8 | 845,000 | 270 | 126 + 356,000 | +| 6 x 16 | 1,780,000 | 606 | 288 + 663,000 | +| 6 x 32 | 430,000 | 1,488 | 624 + 1,209,000 | +| 8 x 8 | 1,831,000 | 808 | 296 + 778,000 | +| 8 x 16 | 4,167,000 | 1,856 | 640 + 2,031,000 | +| 8 x 32 | 7,813,000 | 3,201 | 896 + 4,897,000 | + +{{% /expand %}} + +{{% expand "Replication factor, 3" %}} + +| Nodes x Core | Writes per second | Queries per second | Queries + writes per second | +|:------------:|------------------:|-------------------:|:---------------------------:| +| 3 x 8 | 234,000 | 72 | 42 + 87,000 | +| 3 x 16 | 613,000 | 120 | 75 + 166,000 | +| 3 x 32 | 1,365,000 | 141 | 114 + 984,000 | +| 6 x 8 | 593,000 | 318 | 144 + 288,000 | +| 6 x 16 | 1,545,000 | 744 | 384 + 407,000 | +| 6 x 32 | 3,204,000 | 1632 | 912 + 505,000 | + +{{% /expand %}} + +{{% expand "Replication factor, 4" %}} + +| Nodes x Core | Writes per second | Queries per second | Queries + writes per second | +|:------------:| -----------------:| ------------------:|:---------------------------:| +| 4 x 8 | 258,000 | 116 | 68 + 73,000 | +| 4 x 16 | 675,000 | 196 | 132 + 140,000 | +| 4 x 32 | 1,513,000 | 244 | 176 + 476,000 | +| 8 x 8 | 614,000 | 1096 | 400 + 258,000 | +| 8 x 16 | 1,557,000 | 2496 | 1152 + 436,000 | +| 8 x 32 | 3,265,000 | 4288 | 2240 + 820,000 | + +{{% /expand %}} + +{{% expand "Replication factor, 6" %}} + +| Nodes x Core | Writes per second | Queries per second | Queries + writes per second | +|:------------:| -----------------:| ------------------:|:---------------------------:| +| 6 x 8 | 694,000 | 302 | 198 + 204,000 | +| 6 x 16 | 1,397,000 | 552 | 360 + 450,000 | +| 6 x 32 | 2,298,000 | 1248 | 384 + 1,261,000 | + +{{% /expand %}} + +{{% expand "Replication factor, 8" %}} + +| Nodes x Core | Writes per second | Queries per second | Queries + writes per second | +|:------------:| ----------------: | -----------------: |:---------------------------:| +| 8 x 8 | 739,000 | 1296 | 480 + 371,000 | +| 8 x 16 | 1,396,000 | 2592 | 672 + 843,000 | +| 8 x 32 | 2,614,000 | 2496 | 960 + 1,371,000 | + +{{% /expand %}} + +{{% /tab-content %}} + +{{% tab-content %}} + +Select one of the following replication factors to see the recommended cluster configuration for 10,000,000 series: + +{{% expand "Replication factor, 1" %}} + +| Nodes x Core | Writes per second | Queries per second | Queries + writes per second | +|:------------:|------------------:|-------------------:|:---------------------------:| +| 2 x 4 | 122,000 | 16 | 12 + 81,000 | +| 2 x 8 | 259,000 | 36 | 24 + 143,000 | +| 2 x 16 | 501,000 | 66 | 44 + 290,000 | +| 2 x 32 | 646,000 | 142 | 54 + 400,000 | + +{{% /expand %}} + +{{% expand "Replication factor, 2" %}} + +| Nodes x Core | Writes per second | Queries per second | Queries + writes per second | +|:------------:|------------------:|-------------------:|:---------------------------:| +| 2 x 4 | 87,000 | 18 | 14 + 56,000 | +| 2 x 8 | 169,000 | 38 | 24 + 98,000 | +| 2 x 16 | 334,000 | 76 | 46 + 224,000 | +| 2 x 32 | 534,000 | 136 | 58 + 388,000 | +| 4 x 8 | 335,000 | 120 | 60 + 204,000 | +| 4 x 16 | 643,000 | 256 | 112 + 395,000 | +| 4 x 32 | 967,000 | 560 | 158 + 806,000 | +| 6 x 8 | 521,000 | 378 | 144 + 319,000 | +| 6 x 16 | 890,000 | 582 | 186 + 513,000 | +| 8 x 8 | 699,000 | 1,032 | 256 + 477,000 | +| 8 x 16 | 1,345,000 | 2,048 | 544 + 741,000 | + +{{% /expand %}} + +{{% expand "Replication factor, 3" %}} + +| Nodes x Core | Writes per second | Queries per second | Queries + writes per second | +|:------------:|------------------:|-------------------:|:---------------------------:| +| 3 x 8 | 170,000 | 60 | 42 + 98,000 | +| 3 x 16 | 333,000 | 129 | 76 + 206,000 | +| 3 x 32 | 609,000 | 178 | 60 + 162,000 | +| 6 x 8 | 395,000 | 402 | 132 + 247,000 | +| 6 x 16 | 679,000 | 894 | 150 + 527,000 | + +{{% /expand %}} + +{{% expand "Replication factor, 4" %}} + +| Nodes x Core | Writes per second | Queries per second | Queries + writes per second | +|:------------:| -----------------:| ------------------:|:---------------------------:| +| 4 x 8 | 183365 | 132 | 52 + 100,000 | + +{{% /expand %}} + +{{% /tab-content %}} +{{< /tabs-wrapper >}} + +## Storage: type, amount, and configuration + +### Storage volume and IOPS + +Consider the type of storage you need and the amount. InfluxDB is designed to run on solid state drives (SSDs) and memory-optimized cloud instances, for example, AWS EC2 R5 or R4 instances. InfluxDB isn't tested on hard disk drives (HDDs) and we don't recommend HDDs for production. For best results, InfluxDB servers must have a minimum of 1000 IOPS on storage to ensure recovery and availability. We recommend at least 2000 IOPS for rapid recovery of cluster data nodes after downtime. + +See your cloud provider documentation for IOPS detail on your storage volumes. + +### Bytes and compression + +Database names, [measurements](/influxdb/v1.7/concepts/glossary/#measurement), [tag keys](/influxdb/v1.7/concepts/glossary/#tag-key), [field keys](/influxdb/v1.7/concepts/glossary/#field-key), and [tag values](/influxdb/v1.7/concepts/glossary/#tag-value) are stored only once and always as strings. [Field values](/influxdb/v1.7/concepts/glossary/#field-value) and [timestamps](/influxdb/v1.7/concepts/glossary/#timestamp) are stored for every point. + +Non-string values require approximately three bytes. String values require variable space, determined by string compression. + +### Separate `wal` and `data` directories + +When running InfluxDB in a production environment, store the `wal` directory and the `data` directory on separate storage devices. This optimization significantly reduces disk contention under heavy write load──an important consideration if the write load is highly variable. If the write load does not vary by more than 15%, this optimization is probably not necessary. diff --git a/content/influxdb/v1.7/guides/migrate-to-enterprise.md b/content/influxdb/v1.7/guides/migrate-to-enterprise.md new file mode 100644 index 000000000..73ddd608a --- /dev/null +++ b/content/influxdb/v1.7/guides/migrate-to-enterprise.md @@ -0,0 +1,11 @@ +--- +title: Migrate from InfluxDB OSS to InfluxDB Enterprise +description: > + Migrate your InfluxDB OSS instance with your data and users to InfluxDB Enterprise. +menu: + influxdb_1_7: + weight: 50 + parent: Guides + name: Migrate to InfluxDB Enterprise + url: /enterprise_influxdb/v1.7/guides/migration/ +--- diff --git a/content/influxdb/v1.7/guides/querying_data.md b/content/influxdb/v1.7/guides/querying_data.md new file mode 100644 index 000000000..8d174c349 --- /dev/null +++ b/content/influxdb/v1.7/guides/querying_data.md @@ -0,0 +1,166 @@ +--- +title: Querying data with the InfluxDB API +alias: + -/docs/v1.7/query_language/querying_data/ +menu: + influxdb_1_7: + weight: 20 + parent: Guides +--- + +## Querying data with the InfluxDB API + +The InfluxDB API is the primary means for querying data in InfluxDB (see the [command line interface](/influxdb/v1.7/tools/shell/) and [client libraries](/influxdb/v1.7/tools/api_client_libraries/) for alternative ways to query the database). + +> **Note**: The following examples use `curl`, a command line tool that transfers data using URLs. Learn the basics of `curl` with the [HTTP Scripting Guide](https://curl.haxx.se/docs/httpscripting.html). + +To perform a query send a `GET` request to the `/query` endpoint, set the URL parameter `db` as the target database, and set the URL parameter `q` as your query. +You may also use a `POST` request by sending the same parameters either as URL parameters or as part of the body with `application/x-www-form-urlencoded`. +The example below uses the InfluxDB API to query the same database that you encountered in [Writing Data](/influxdb/v1.7/guides/writing_data/). + +```bash +curl -G 'http://localhost:8086/query?pretty=true' --data-urlencode "db=mydb" --data-urlencode "q=SELECT \"value\" FROM \"cpu_load_short\" WHERE \"region\"='us-west'" +``` + +InfluxDB returns JSON. +The results of your query appear in the `"results"` array. +If an error occurs, InfluxDB sets an `"error"` key with an explanation of the error. + + +```json +{ + "results": [ + { + "statement_id": 0, + "series": [ + { + "name": "cpu_load_short", + "columns": [ + "time", + "value" + ], + "values": [ + [ + "2015-01-29T21:55:43.702900257Z", + 2 + ], + [ + "2015-01-29T21:55:43.702900257Z", + 0.55 + ], + [ + "2015-06-11T20:46:02Z", + 0.64 + ] + ] + } + ] + } + ] +} +``` + +> **Note:** Appending `pretty=true` to the URL enables pretty-printed JSON output. +While this is useful for debugging or when querying directly with tools like `curl`, it is not recommended for production use as it consumes unnecessary network bandwidth. + +### Multiple queries + +Send multiple queries to InfluxDB in a single API call. +Simply delimit each query using a semicolon, for example: + +```bash +curl -G 'http://localhost:8086/query?pretty=true' --data-urlencode "db=mydb" --data-urlencode "q=SELECT \"value\" FROM \"cpu_load_short\" WHERE \"region\"='us-west';SELECT count(\"value\") FROM \"cpu_load_short\" WHERE \"region\"='us-west'" +``` + +returns: + +```json +{ + "results": [ + { + "statement_id": 0, + "series": [ + { + "name": "cpu_load_short", + "columns": [ + "time", + "value" + ], + "values": [ + [ + "2015-01-29T21:55:43.702900257Z", + 2 + ], + [ + "2015-01-29T21:55:43.702900257Z", + 0.55 + ], + [ + "2015-06-11T20:46:02Z", + 0.64 + ] + ] + } + ] + }, + { + "statement_id": 1, + "series": [ + { + "name": "cpu_load_short", + "columns": [ + "time", + "count" + ], + "values": [ + [ + "1970-01-01T00:00:00Z", + 3 + ] + ] + } + ] + } + ] +} +``` + +### Other options when querying data + +#### Timestamp format + +Everything in InfluxDB is stored and reported in UTC. +By default, timestamps are returned in RFC3339 UTC and have nanosecond precision, for example `2015-08-04T19:05:14.318570484Z`. +If you want timestamps in Unix epoch format include in your request the query string parameter `epoch` where `epoch=[h,m,s,ms,u,ns]`. +For example, get epoch in seconds with: + +```bash +curl -G 'http://localhost:8086/query' --data-urlencode "db=mydb" --data-urlencode "epoch=s" --data-urlencode "q=SELECT \"value\" FROM \"cpu_load_short\" WHERE \"region\"='us-west'" +``` + +#### Authentication + +Authentication in InfluxDB is disabled by default. +See [Authentication and Authorization](/influxdb/v1.7/administration/authentication_and_authorization/) for how to enable and set up authentication. + +#### Maximum row limit + +The [`max-row-limit` configuration option](/influxdb/v1.7/administration/config#max-row-limit-0) allows users to limit the maximum number of returned results to prevent InfluxDB from running out of memory while it aggregates the results. +The `max-row-limit` configuration option is set to `0` by default. +That default setting allows for an unlimited number of rows returned per request. + +The maximum row limit only applies to non-chunked queries. Chunked queries can return an unlimited number of points. + +#### Chunking + +Chunking can be used to return results in streamed batches rather than as a single response by setting the query string parameter `chunked=true`. Responses will be chunked by series or by every 10,000 points, whichever occurs first. To change the maximum chunk size to a different value, set the query string parameter `chunk_size` to a different value. +For example, get your results in batches of 20,000 points with: + +```bash +curl -G 'http://localhost:8086/query' --data-urlencode "db=deluge" --data-urlencode "chunked=true" --data-urlencode "chunk_size=20000" --data-urlencode "q=SELECT * FROM liters" +``` + +### InfluxQL + +Now that you know how to query data, check out the [Data Exploration page](/influxdb/v1.7/query_language/data_exploration/) to get acquainted with InfluxQL. +For more information about querying data with the InfluxDB API, please see the [API reference documentation](/influxdb/v1.7/tools/api/#query-http-endpoint). diff --git a/content/influxdb/v1.7/guides/writing_data.md b/content/influxdb/v1.7/guides/writing_data.md new file mode 100644 index 000000000..6029c251d --- /dev/null +++ b/content/influxdb/v1.7/guides/writing_data.md @@ -0,0 +1,178 @@ +--- +title: Writing data with the InfluxDB API + +menu: + influxdb_1_7: + weight: 10 + parent: Guides +--- + +Write data into InfluxDB using the [command line interface](/influxdb/v1.7/tools/shell/), [client libraries](/influxdb/v1.7/clients/api/), and plugins for common data formats such as [Graphite](/influxdb/v1.7/write_protocols/graphite/). + +> **Note**: The following examples use `curl`, a command line tool that transfers data using URLs. Learn the basics of `curl` with the [HTTP Scripting Guide](https://curl.haxx.se/docs/httpscripting.html). + +### Create a database using the InfluxDB API + +To create a database send a `POST` request to the `/query` endpoint and set the URL parameter `q` to `CREATE DATABASE `. +The example below sends a request to InfluxDB running on `localhost` and creates the `mydb` database: + +```bash +curl -i -XPOST http://localhost:8086/query --data-urlencode "q=CREATE DATABASE mydb" +``` + +### Write data using the InfluxDB API + +The InfluxDB API is the primary means of writing data into InfluxDB, by sending `POST` requests to the `/write` endpoint. + +The example below writes a single point to the `mydb` database. +The data consist of the [measurement](/influxdb/v1.7/concepts/glossary/#measurement) `cpu_load_short`, the [tag keys](/influxdb/v1.7/concepts/glossary/#tag-key) `host` and `region` with the [tag values](/influxdb/v1.7/concepts/glossary/#tag-value) `server01` and `us-west`, the [field key](/influxdb/v1.7/concepts/glossary/#field-key) `value` with a [field value](/influxdb/v1.7/concepts/glossary/#field-value) of `0.64`, and the [timestamp](/influxdb/v1.7/concepts/glossary/#timestamp) `1434055562000000000`. + +```bash +curl -i -XPOST 'http://localhost:8086/write?db=mydb' --data-binary 'cpu_load_short,host=server01,region=us-west value=0.64 1434055562000000000' +``` + +When writing points, you must specify an existing database in the `db` query parameter. +Points will be written to `db`'s default retention policy if you do not supply a retention policy via the `rp` query parameter. +See the [InfluxDB API Reference](/influxdb/v1.7/tools/api/#write-http-endpoint) documentation for a complete list of the available query parameters. + +The body of the POST - we call this the [InfluxDB line protocol](/influxdb/v1.7/concepts/glossary/#influxdb-line-protocol) - contains the time-series data that you wish to store. +They consist of a measurement, tags, fields, and a timestamp. +InfluxDB requires a measurement name. +Strictly speaking, tags are optional but most series include tags to differentiate data sources and to make querying both easy and efficient. +Both tag keys and tag values are strings. +Field keys are required and are always strings, and, [by default](/influxdb/v1.7/write_protocols/line_protocol_reference/#data-types), field values are floats. +The timestamp - supplied at the end of the line in Unix time in nanoseconds since January 1, 1970 UTC - is optional. +If you do not specify a timestamp InfluxDB uses the server's local nanosecond timestamp in Unix epoch. +Anything that has to do with time in InfluxDB is always UTC. + +> **Note:** Avoid using the following reserved keys: `_field`, `_measurement`, and `time`. If reserved keys are included as a tag or field key, the associated point is discarded. + +### Configure gzip compression + +InfluxDB supports gzip compression. To reduce network traffic, consider the following options: + +* To accept compressed data from InfluxDB, add the `Accept-Encoding: gzip` header to InfluxDB API requests. + +* To compress data before sending it to InfluxDB, add the `Content-Encoding: gzip` header to InfluxDB API requests. + +For details about enabling gzip for client libraries, see your client library documentation. + +#### Enable gzip compression in the Telegraf InfluxDB output plugin + +* In the Telegraf configuration file (telegraf.conf), under [[outputs.influxdb]], change + `content_encoding = "identity"` (default) to `content_encoding = "gzip"` + +>**Note** +Writes to InfluxDB 2.x [[outputs.influxdb_v2]] are configured to compress content in gzip format by default. + +### Writing multiple points + +Post multiple points to multiple series at the same time by separating each point with a new line. +Batching points in this manner results in much higher performance. + +The following example writes three points to the database `mydb`. +The first point belongs to the series with the measurement `cpu_load_short` and tag set `host=server02` and has the server's local timestamp. +The second point belongs to the series with the measurement `cpu_load_short` and tag set `host=server02,region=us-west` and has the specified timestamp `1422568543702900257`. +The third point has the same specified timestamp as the second point, but it is written to the series with the measurement `cpu_load_short` and tag set `direction=in,host=server01,region=us-west`. + +```bash +curl -i -XPOST 'http://localhost:8086/write?db=mydb' --data-binary 'cpu_load_short,host=server02 value=0.67 +cpu_load_short,host=server02,region=us-west value=0.55 1422568543702900257 +cpu_load_short,direction=in,host=server01,region=us-west value=2.0 1422568543702900257' +``` + +### Writing points from a file + +Write points from a file by passing `@filename` to `curl`. +The data in the file should follow the [InfluxDB line protocol syntax](/influxdb/v1.7/write_protocols/write_syntax/). + +Example of a properly-formatted file (`cpu_data.txt`): + +```txt +cpu_load_short,host=server02 value=0.67 +cpu_load_short,host=server02,region=us-west value=0.55 1422568543702900257 +cpu_load_short,direction=in,host=server01,region=us-west value=2.0 1422568543702900257 +``` + +Write the data in `cpu_data.txt` to the `mydb` database with: + +```bash +curl -i -XPOST 'http://localhost:8086/write?db=mydb' --data-binary @cpu_data.txt +``` + +> **Note:** If your data file has more than 5,000 points, it may be necessary to split that file into several files in order to write your data in batches to InfluxDB. +By default, the HTTP request times out after five seconds. +InfluxDB will still attempt to write the points after that time out but there will be no confirmation that they were successfully written. + +### Schemaless Design + +InfluxDB is a schemaless database. +You can add new measurements, tags, and fields at any time. +Note that if you attempt to write data with a different type than previously used (for example, writing a string to a field that previously accepted integers), InfluxDB will reject those data. + +### A note on REST + +InfluxDB uses HTTP solely as a convenient and widely supported data transfer protocol. + +Modern web APIs have settled on REST because it addresses a common need. +As the number of endpoints grows the need for an organizing system becomes pressing. +REST is the industry agreed style for organizing large numbers of endpoints. +This consistency is good for those developing and consuming the API: everyone involved knows what to expect. + +REST, however, is a convention. +InfluxDB makes do with three API endpoints. +This simple, easy to understand system uses HTTP as a transfer method for [InfluxQL](/influxdb/v1.7/query_language/spec/). +The InfluxDB API makes no attempt to be RESTful. + +### HTTP response summary + +* 2xx: If your write request received `HTTP 204 No Content`, it was a success! +* 4xx: InfluxDB could not understand the request. +* 5xx: The system is overloaded or significantly impaired. + +#### Examples + +##### Writing a float to a field that previously accepted booleans + +```bash +curl -i -XPOST 'http://localhost:8086/write?db=hamlet' --data-binary 'tobeornottobe booleanonly=true' + +curl -i -XPOST 'http://localhost:8086/write?db=hamlet' --data-binary 'tobeornottobe booleanonly=5' +``` + +returns: + +```bash +HTTP/1.1 400 Bad Request +Content-Type: application/json +Request-Id: [...] +X-Influxdb-Version: 1.4.x +Date: Wed, 01 Mar 2017 19:38:01 GMT +Content-Length: 150 + +{"error":"field type conflict: input field \"booleanonly\" on measurement \"tobeornottobe\" is type float, already exists as type boolean dropped=1"} +``` + +##### Writing a point to a database that doesn't exist + +```bash +curl -i -XPOST 'http://localhost:8086/write?db=atlantis' --data-binary 'liters value=10' +``` + +returns: + +```bash +HTTP/1.1 404 Not Found +Content-Type: application/json +Request-Id: [...] +X-Influxdb-Version: 1.4.x +Date: Wed, 01 Mar 2017 19:38:35 GMT +Content-Length: 45 + +{"error":"database not found: \"atlantis\""} +``` + +### Next steps + +Now that you know how to write data with the InfluxDB API, discover how to query them with the [Querying data](/influxdb/v1.7/guides/querying_data/) guide! +For more information about writing data with the InfluxDB API, please see the [InfluxDB API reference](/influxdb/v1.7/tools/api/#write-http-endpoint). diff --git a/content/influxdb/v1.7/high_availability/_index.md b/content/influxdb/v1.7/high_availability/_index.md new file mode 100644 index 000000000..b2e517f1a --- /dev/null +++ b/content/influxdb/v1.7/high_availability/_index.md @@ -0,0 +1,15 @@ +--- +title: High availability with InfluxDB Enterprise + +menu: + influxdb_1_7: + name: High availability + weight: 100 +--- + +## [Clustering with InfluxDB Enterprise](/influxdb/v1.7/high_availability/clusters/) + +InfluxDB OSS does not support clustering. +For high availability or horizontal scaling of InfluxDB, consider the InfluxData +commercial clustered offering, +[InfluxDB Enterprise](https://portal.influxdata.com/). diff --git a/content/influxdb/v1.7/high_availability/clusters.md b/content/influxdb/v1.7/high_availability/clusters.md new file mode 100644 index 000000000..fcbf47ae8 --- /dev/null +++ b/content/influxdb/v1.7/high_availability/clusters.md @@ -0,0 +1,18 @@ +--- +title: Clustering with InfluxDB Enterprise +aliases: + - /influxdb/v1.7/clustering/ + - /influxdb/v1.7/clustering/cluster_setup/ + - /influxdb/v1.7/clustering/cluster_node_config/ + - /influxdb/v1.7/guides/clustering/ +menu: + influxdb_1_7: + name: Clustering + weight: 10 + parent: High availability +--- + +InfluxDB OSS does not support clustering. +For high availability or horizontal scaling of InfluxDB, consider the InfluxData +commercial clustered offering, +[InfluxDB Enterprise](/enterprise_influxdb/latest/). diff --git a/content/influxdb/v1.7/introduction/_index.md b/content/influxdb/v1.7/introduction/_index.md new file mode 100644 index 000000000..1d06fcd52 --- /dev/null +++ b/content/influxdb/v1.7/introduction/_index.md @@ -0,0 +1,21 @@ +--- +title: Introducing InfluxDB OSS +menu: + influxdb_1_7: + name: Introduction + weight: 20 +--- + +To get up and running with the open source (OSS) version of InfluxDB, complete the following tasks: + +## [Download InfluxDB OSS](https://portal.influxdata.com/downloads) + +Find the latest stable download and nightly builds of InfluxDB. + +## [Install InfluxDB OSS](/influxdb/v1.7/introduction/installation/) + +Learn how to install InfluxDB on Ubuntu, Debian, Red Hat, CentOS, and macOS. + +## [Get started with InfluxDB OSS](/influxdb/v1.7/introduction/getting-started/) + +Discover how to read and write time series data using InfluxDB. diff --git a/content/influxdb/v1.7/introduction/downloading.md b/content/influxdb/v1.7/introduction/downloading.md new file mode 100644 index 000000000..955d07879 --- /dev/null +++ b/content/influxdb/v1.7/introduction/downloading.md @@ -0,0 +1,10 @@ +--- +title: Downloading InfluxDB OSS +menu: + influxdb_1_7: + name: Downloading + weight: 10 + parent: Introduction +--- + +Download the latest InfluxDB open source (OSS) release at the [InfluxData download page](https://portal.influxdata.com/downloads). diff --git a/content/influxdb/v1.7/introduction/getting-started.md b/content/influxdb/v1.7/introduction/getting-started.md new file mode 100644 index 000000000..576555f85 --- /dev/null +++ b/content/influxdb/v1.7/introduction/getting-started.md @@ -0,0 +1,196 @@ +--- +title: Getting started with InfluxDB OSS +aliases: + - /influxdb/v1.7/introduction/getting_started/ +menu: + influxdb_1_7: + name: Getting started + weight: 30 + parent: Introduction +--- + +With InfluxDB open source (OSS) [installed](/influxdb/v1.7/introduction/installation), you're ready to start doing some awesome things. +In this section we'll use the `influx` [command line interface](/influxdb/v1.7/tools/shell/) (CLI), which is included in all +InfluxDB packages and is a lightweight and simple way to interact with the database. +The CLI communicates with InfluxDB directly by making requests to the InfluxDB API over port `8086` by default. + +> **Note:** The database can also be used by making raw HTTP requests. +See [Writing Data](/influxdb/v1.7/guides/writing_data/) and [Querying Data](/influxdb/v1.7/guides/querying_data/) +for examples with the `curl` application. + +## Creating a database + +If you've installed InfluxDB locally, the `influx` command is available via the command line. +Execute `influx` to start the CLI and automatically connect to the local InfluxDB instance +(assuming you have already started the server with `service influxdb start` or by running `influxd` directly). +The output should look like this: + +```bash +$ influx -precision rfc3339 +Connected to http://localhost:8086 version 1.7.x +InfluxDB shell 1.7.x +> +``` + +> **Notes:** +> +* The InfluxDB API runs on port `8086` by default. +Therefore, `influx` will connect to port `8086` and `localhost` by default. +If you need to alter these defaults, run `influx --help`. +* The [`-precision` argument](/influxdb/v1.7/tools/shell/#influx-options) specifies the format/precision of any returned timestamps. +In the example above, `rfc3339` tells InfluxDB to return timestamps in [RFC3339 format](https://www.ietf.org/rfc/rfc3339.txt) (`YYYY-MM-DDTHH:MM:SS.nnnnnnnnnZ`). + +The command line is now ready to take input in the form of the Influx Query Language (a.k.a InfluxQL) statements. +To exit the InfluxQL shell, type `exit` and hit return. + +A fresh install of InfluxDB has no databases (apart from the system `_internal`), +so creating one is our first task. +You can create a database with the `CREATE DATABASE ` InfluxQL statement, +where `` is the name of the database you wish to create. +Names of databases can contain any unicode character as long as the string is double-quoted. +Names can also be left unquoted if they contain _only_ ASCII letters, +digits, or underscores and do not begin with a digit. + +Throughout this guide, we'll use the database name `mydb`: + +```sql +> CREATE DATABASE mydb +> +``` +> **Notes:** +> +* `default` is a [reserved InfluxQL keyword](/influxdb/v1.7/query_language/spec/#keywords) and cannot be used as a database name. +* After hitting enter, a new prompt appears and nothing else is displayed. +In the CLI, this means the statement was executed and there were no errors to display. +If something goes wrong, an error is displayed. No news is good news! + +Now that the `mydb` database is created, we'll use the `SHOW DATABASES` statement +to display all existing databases: + +```sql +> SHOW DATABASES +name: databases +name +---- +_internal +mydb + +> +``` + +> **Note:** The `_internal` database is created and used by InfluxDB to store internal runtime metrics. +Check it out later to get an interesting look at how InfluxDB is performing under the hood. + +Unlike `SHOW DATABASES`, most InfluxQL statements must operate against a specific database. +You may explicitly name the database with each query, +but the CLI provides a convenience statement, `USE `, +which will automatically set the database for all future requests. For example: + +```sql +> USE mydb +Using database mydb +> +``` + +Now future commands will only be run against the `mydb` database. + +## Writing and exploring data + +Now that we have a database, InfluxDB is ready to accept queries and writes. + +First, a short primer on the datastore. +Data in InfluxDB is organized by "time series", +which contain a measured value, like "cpu_load" or "temperature". +Time series have zero to many `points`, one for each discrete sample of the metric. +Points consist of `time` (a timestamp), a `measurement` ("cpu_load", for example), +at least one key-value `field` (the measured value itself, e.g. +"value=0.64", or "temperature=21.2"), and zero to many key-value `tags` containing any metadata about the value (e.g. +"host=server01", "region=EMEA", "dc=Frankfurt"). + +Conceptually you can think of a `measurement` as an SQL table, +where the primary index is always time. +`tags` and `fields` are effectively columns in the table. +`tags` are indexed, and `fields` are not. +The difference is that, with InfluxDB, you can have millions of measurements, +you don't have to define schemas up-front, and null values aren't stored. + +Points are written to InfluxDB using the InfluxDB line protocol, which follows the following format: + +``` +[,=...] =[,=...] [unix-nano-timestamp] +``` + +The following lines are all examples of points that can be written to InfluxDB: + +``` +cpu,host=serverA,region=us_west value=0.64 +payment,device=mobile,product=Notepad,method=credit billed=33,licenses=3i 1434067467100293230 +stock,symbol=AAPL bid=127.46,ask=127.48 +temperature,machine=unit42,type=assembly external=25,internal=37 1434067467000000000 +``` + +> **Note:** For details on the InfluxDB line protocol, see [InfluxDB line protocol syntax](/influxdb/v1.7/write_protocols/line_protocol_reference/#line-protocol-syntax) page. + +To insert a single time series data point into InfluxDB using the CLI, enter `INSERT` followed by a point: + +```sql +> INSERT cpu,host=serverA,region=us_west value=0.64 +> +``` + +A point with the measurement name of `cpu` and tags `host` and `region` has now been written to the database, with the measured `value` of `0.64`. + +Now we will query for the data we just wrote: + +```sql +> SELECT "host", "region", "value" FROM "cpu" +name: cpu +--------- +time host region value +2015-10-21T19:28:07.580664347Z serverA us_west 0.64 + +> +``` + +> **Note:** We did not supply a timestamp when writing our point. +When no timestamp is supplied for a point, InfluxDB assigns the local current timestamp when the point is ingested. +That means your timestamp will be different. + +Let's try storing another type of data, with two fields in the same measurement: + +```sql +> INSERT temperature,machine=unit42,type=assembly external=25,internal=37 +> +``` + +To return all fields and tags with a query, you can use the `*` operator: + +```sql +> SELECT * FROM "temperature" +name: temperature +----------------- +time external internal machine type +2015-10-21T19:28:08.385013942Z 25 37 unit42 assembly + +> +``` + +> **Warning:** Using `*` without a `LIMIT` clause on a large database can cause performance issues. +You can use `Ctrl+C` to cancel a query that is taking too long to respond. + +InfluxQL has many [features and keywords](/influxdb/v1.7/query_language/spec/) that are not covered here, +including support for Go-style regex. For example: + +```sql +> SELECT * FROM /.*/ LIMIT 1 +-- +> SELECT * FROM "cpu" WHERE "value" > 0.9 +``` + +This is all you need to know to write data into InfluxDB and query it back. +To learn more about the InfluxDB write protocol, +check out the guide on [Writing Data](/influxdb/v1.7/guides/writing_data/). +To further explore the query language, +check out the guide on [Querying Data](/influxdb/v1.7/guides/querying_data/). +For more information on InfluxDB concepts, check out the [Key Concepts] +(/influxdb/v1.7/concepts/key_concepts/) page. diff --git a/content/influxdb/v1.7/introduction/installation.md b/content/influxdb/v1.7/introduction/installation.md new file mode 100644 index 000000000..49e6a90a8 --- /dev/null +++ b/content/influxdb/v1.7/introduction/installation.md @@ -0,0 +1,368 @@ +--- +title: Installing InfluxDB OSS +menu: + influxdb_1_7: + name: Installing + weight: 20 + parent: Introduction +--- + +This page provides directions for installing, starting, and configuring InfluxDB open source (OSS). + +## InfluxDB OSS installation requirements + +Installation of the InfluxDB package may require `root` or administrator privileges in order to complete successfully. + +### InfluxDB OSS networking ports + +By default, InfluxDB uses the following network ports: + +- TCP port `8086` is available for client-server communication using the InfluxDB API. +- TCP port `8088` is available for the RPC service to perform back up and restore operations. + +In addition to the ports above, InfluxDB also offers multiple plugins that may +require [custom ports](/influxdb/v1.7/administration/ports/). +All port mappings can be modified through the [configuration file](/influxdb/v1.7/administration/config), +which is located at `/etc/influxdb/influxdb.conf` for default installations. + +### Network Time Protocol (NTP) + +InfluxDB uses a host's local time in UTC to assign timestamps to data and for +coordination purposes. +Use the Network Time Protocol (NTP) to synchronize time between hosts; if hosts' +clocks aren't synchronized with NTP, the timestamps on the data written to InfluxDB +can be inaccurate. + +## Installing InfluxDB OSS + +If you want to use InfluxDB but don't want to install software, check out our +[managed hosted InfluxDB offering](https://cloud.influxdata.com). + +> **Note:** Windows support is experimental. + +{{< tabs-wrapper >}} +{{% tabs %}} +[Ubuntu & Debian](#) +[Red Hat & CentOS](#) +[SLES & openSUSE](#) +[FreeBSD/PC-BSD](#) +[macOS](#) +{{% /tabs %}} +{{% tab-content %}} +For instructions on how to install the Debian package from a file, +please see the +[downloads page](https://influxdata.com/downloads/). + +Debian and Ubuntu users can install the latest stable version of InfluxDB using the +`apt-get` package manager. + +For Ubuntu users, add the InfluxData repository with the following commands: + +{{< code-tabs-wrapper >}} +{{% code-tabs %}} +[wget](#) +[curl](#) +{{% /code-tabs %}} +{{% code-tab-content %}} +```bash +wget -qO- https://repos.influxdata.com/influxdb.key | sudo apt-key add - +source /etc/lsb-release +echo "deb https://repos.influxdata.com/${DISTRIB_ID,,} ${DISTRIB_CODENAME} stable" | sudo tee /etc/apt/sources.list.d/influxdb.list +``` +{{% /code-tab-content %}} + +{{% code-tab-content %}} +```bash +curl -sL https://repos.influxdata.com/influxdb.key | sudo apt-key add - +source /etc/lsb-release +echo "deb https://repos.influxdata.com/${DISTRIB_ID,,} ${DISTRIB_CODENAME} stable" | sudo tee /etc/apt/sources.list.d/influxdb.list +``` +{{% /code-tab-content %}} +{{< /code-tabs-wrapper >}} + +For Debian users, add the InfluxData repository: + +{{< code-tabs-wrapper >}} +{{% code-tabs %}} +[wget](#) +[curl](#) +{{% /code-tabs %}} +{{% code-tab-content %}} +```bash +wget -qO- https://repos.influxdata.com/influxdb.key | sudo apt-key add - +source /etc/os-release +echo "deb https://repos.influxdata.com/debian $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/influxdb.list +``` +{{% /code-tab-content %}} + +{{% code-tab-content %}} +```bash +curl -sL https://repos.influxdata.com/influxdb.key | sudo apt-key add - +source /etc/os-release +echo "deb https://repos.influxdata.com/debian $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/influxdb.list +``` +{{% /code-tab-content %}} +{{< /code-tabs-wrapper >}} + + +Then, install and start the InfluxDB service: + +```bash +sudo apt-get update && sudo apt-get install influxdb +sudo service influxdb start +``` + +Or if your operating system is using systemd (Ubuntu 15.04+, Debian 8+): + +```bash +sudo apt-get update && sudo apt-get install influxdb +sudo systemctl unmask influxdb.service +sudo systemctl start influxdb +``` + +{{% /tab-content %}} + +{{% tab-content %}} + +For instructions on how to install the RPM package from a file, please see the [downloads page](https://influxdata.com/downloads/). + +Red Hat and CentOS users can install the latest stable version of InfluxDB using the `yum` package manager: + +```bash +cat <}} + +### Verify the authenticity of downloaded binary (optional) + +For added security, follow these steps to verify the signature of your InfluxDB download with `gpg`. + +(Most operating systems include the `gpg` command by default. +If `gpg` is not available, see the [GnuPG homepage](https://gnupg.org/download/) for installation instructions.) + +1. Download and import InfluxData's public key: + + ``` + curl -sL https://repos.influxdata.com/influxdb.key | gpg --import + ``` + +2. Download the signature file for the release by adding `.asc` to the download URL. + For example: + + ``` + wget https://dl.influxdata.com/influxdb/releases/influxdb-1.7.10_linux_amd64.tar.gz.asc + ``` + +3. Verify the signature with `gpg --verify`: + + ``` + gpg --verify influxdb-1.7.10_linux_amd64.tar.gz.asc influxdb-1.7.10_linux_amd64.tar.gz + ``` + + The output from this command should include the following: + + ``` + gpg: Good signature from "InfluxDB Packaging Service " [unknown] + ``` + + +## Configuring InfluxDB OSS + +The system has internal defaults for every configuration file setting. +View the default configuration settings with the `influxd config` command. + +Most of the settings in the local configuration file +(`/etc/influxdb/influxdb.conf`) are commented out; all +commented-out settings will be determined by the internal defaults. +Any uncommented settings in the local configuration file override the +internal defaults. +Note that the local configuration file does not need to include every +configuration setting. + +There are two ways to launch InfluxDB with your configuration file: + +* Point the process to the correct configuration file by using the `-config` +option: + + ```bash + influxd -config /etc/influxdb/influxdb.conf + ``` +* Set the environment variable `INFLUXDB_CONFIG_PATH` to the path of your +configuration file and start the process. +For example: + + ``` + echo $INFLUXDB_CONFIG_PATH + /etc/influxdb/influxdb.conf + + influxd + ``` + +InfluxDB first checks for the `-config` option and then for the environment +variable. + +See the [Configuration](/influxdb/v1.7/administration/config/) documentation for more information. + +### Data and WAL directory permissions + +Make sure the directories in which data and the [write ahead log](/influxdb/v1.7/concepts/glossary#wal-write-ahead-log) (WAL) are stored are writable for the user running the `influxd` service. + +> **Note:** If the data and WAL directories are not writable, the `influxd` service will not start. + +Information about `data` and `wal` directory paths is available in the [Data settings](/influxdb/v1.7/administration/config/#data-settings) section of the [Configuring InfluxDB](/influxdb/v1.7/administration/config/) documentation. + +## Hosting InfluxDB OSS on AWS + +### Hardware requirements for InfluxDB + +We recommend using two SSD volumes, using one for the `influxdb/wal` and the other for the `influxdb/data`. +Depending on your load, each volume should have around 1k-3k provisioned IOPS. +The `influxdb/data` volume should have more disk space with lower IOPS and the `influxdb/wal` volume should have less disk space with higher IOPS. + +Each machine should have a minimum of 8GB RAM. + +We’ve seen the best performance with the R4 class of machines, as they provide more memory than either of the C3/C4 class and the M4 class. + +### Configuring InfluxDB OSS instances + +This example assumes that you are using two SSD volumes and that you have mounted them appropriately. +This example also assumes that each of those volumes is mounted at `/mnt/influx` and `/mnt/db`. +For more information on how to do that see the Amazon documentation on how to [Add a Volume to Your Instance](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-attaching-volume.html). + +### Configuration file +You'll have to update the configuration file appropriately for each InfluxDB instance you have. + +``` +... + +[meta] + dir = "/mnt/db/meta" + ... + +... + +[data] + dir = "/mnt/db/data" + ... +wal-dir = "/mnt/influx/wal" + ... + +... + +[hinted-handoff] + ... +dir = "/mnt/db/hh" + ... +``` + +### InfluxDB OSS permissions + +When using non-standard directories for InfluxDB data and configurations, also be sure to set filesystem permissions correctly: + +```bash +chown influxdb:influxdb /mnt/influx +chown influxdb:influxdb /mnt/db +``` + +For InfluxDB 1.7.6 or later, you must give owner permissions to the `init.sh` file. To do this, run the following script in your `influxdb` directory: + +```sh +if [ ! -f "$STDOUT" ]; then + mkdir -p $(dirname $STDOUT) + chown $USER:$GROUP $(dirname $STDOUT) + fi + + if [ ! -f "$STDERR" ]; then + mkdir -p $(dirname $STDERR) + chown $USER:$GROUP $(dirname $STDERR) + fi + + # Override init script variables with DEFAULT values + ``` diff --git a/content/influxdb/v1.7/query_language/_index.md b/content/influxdb/v1.7/query_language/_index.md new file mode 100644 index 000000000..2261349c3 --- /dev/null +++ b/content/influxdb/v1.7/query_language/_index.md @@ -0,0 +1,73 @@ +--- +title: Influx Query Language (InfluxQL) +menu: + influxdb_1_7: + weight: 70 + identifier: InfluxQL +--- + +This section introduces InfluxQL, the InfluxDB SQL-like query language for +working with data in InfluxDB databases. + +## InfluxQL tutorial + +The first seven documents in this section provide a tutorial-style introduction +to InfluxQL. +Feel free to download the dataset provided in +[Sample Data](/influxdb/v1.7/query_language/data_download/) and follow along +with the documentation. + +#### [Data exploration](/influxdb/v1.7/query_language/data_exploration/) + +Covers the query language basics for InfluxQL, including the +[`SELECT` statement](/influxdb/v1.7/query_language/data_exploration/#the-basic-select-statement), +[`GROUP BY` clauses](/influxdb/v1.7/query_language/data_exploration/#the-group-by-clause), +[`INTO` clauses](/influxdb/v1.7/query_language/data_exploration/#the-into-clause), and more. +See Data Exploration to learn about +[time syntax](/influxdb/v1.7/query_language/data_exploration/#time-syntax) and +[regular expressions](/influxdb/v1.7/query_language/data_exploration/#regular-expressions) in +queries. + +#### [Schema exploration](/influxdb/v1.7/query_language/schema_exploration/) + +Covers queries that are useful for viewing and exploring your +[schema](/influxdb/v1.7/concepts/glossary/#schema). +See Schema Exploration for syntax explanations and examples of InfluxQL's `SHOW` +queries. + +#### [Database management](/influxdb/v1.7/query_language/database_management/) + +Covers InfluxQL for managing +[databases](/influxdb/v1.7/concepts/glossary/#database) and +[retention policies](/influxdb/v1.7/concepts/glossary/#retention-policy-rp) in +InfluxDB. +See Database Management for creating and dropping databases and retention +policies as well as deleting and dropping data. + +#### [InfluxQL functions](/influxdb/v1.7/query_language/functions/) + +Covers all [InfluxQL functions](/influxdb/v1.7/query_language/functions/). + +#### [InfluxQL continuous queries](/influxdb/v1.7/query_language/continuous_queries/) + +Covers the +[basic syntax](/influxdb/v1.7/query_language/continuous_queries/#basic-syntax) +, +[advanced syntax](/influxdb/v1.7/query_language/continuous_queries/#advanced-syntax) +, +and +[common use cases](/influxdb/v1.7/query_language/continuous_queries/#continuous-query-use-cases) +for +[continuous queries](/influxdb/v1.7/concepts/glossary/#continuous-query-cq). +This page also describes how to +[`SHOW`](/influxdb/v1.7/query_language/continuous_queries/#listing-continuous-queries) and +[`DROP`](/influxdb/v1.7/query_language/continuous_queries/#deleting-continuous-queries) +continuous queries. + +#### [InfluxQL mathematical operators](/influxdb/v1.7/query_language/math_operators/) + +Covers the use of mathematical operators in InfluxQL. + +## [InfluxQL reference](/influxdb/v1.7/query_language/spec/) + +The reference documentation for InfluxQL. diff --git a/content/influxdb/v1.7/query_language/continuous_queries.md b/content/influxdb/v1.7/query_language/continuous_queries.md new file mode 100644 index 000000000..5348ea210 --- /dev/null +++ b/content/influxdb/v1.7/query_language/continuous_queries.md @@ -0,0 +1,985 @@ +--- +title: InfluxQL Continuous Queries + +menu: + influxdb_1_7: + name: Continuous Queries + weight: 50 + parent: InfluxQL +--- + +## Introduction + +Continuous queries (CQ) are InfluxQL queries that run automatically and +periodically on realtime data and store query results in a +specified measurement. + + + + + + + + + + + + + + + + + +
Basic SyntaxAdvanced SyntaxCQ Management
Examples of Basic SyntaxExamples of Advanced SyntaxCQ Use Cases
Common Issues with Basic SyntaxCommon Issues with Advanced SyntaxFurther information
+ +## Syntax + +### Basic syntax + +```sql +CREATE CONTINUOUS QUERY ON +BEGIN + +END +``` + +#### Description of basic syntax + +##### The `cq_query` + +The `cq_query` requires a +[function](/influxdb/v1.7/concepts/glossary/#function), +an [`INTO` clause](/influxdb/v1.7/query_language/spec/#clauses), +and a [`GROUP BY time()` clause](/influxdb/v1.7/query_language/spec/#clauses): + +```sql +SELECT INTO FROM [WHERE ] GROUP BY time()[,] +``` + +>**Note:** Notice that the `cq_query` does not require a time range in a `WHERE` clause. +InfluxDB automatically generates a time range for the `cq_query` when it executes the CQ. +Any user-specified time ranges in the `cq_query`'s `WHERE` clause will be ignored +by the system. + +##### Schedule and coverage + +Continuous queries operate on real-time data. +They use the local server’s timestamp, the `GROUP BY time()` interval, and +InfluxDB database's preset time boundaries to determine when to execute and what time +range to cover in the query. + +CQs execute at the same interval as the `cq_query`'s `GROUP BY time()` interval, +and they run at the start of the InfluxDB database's preset time boundaries. +If the `GROUP BY time()` interval is one hour, the CQ executes at the start of +every hour. + +When the CQ executes, it runs a single query for the time range between +[`now()`](/influxdb/v1.7/concepts/glossary/#now) and `now()` minus the +`GROUP BY time()` interval. +If the `GROUP BY time()` interval is one hour and the current time is 17:00, +the query's time range is between 16:00 and 16:59.999999999. + +#### Examples of basic syntax + +The examples below use the following sample data in the `transportation` +database. +The measurement `bus_data` stores 15-minute resolution data on the number of bus +`passengers` and `complaints`: + +```sql +name: bus_data +-------------- +time passengers complaints +2016-08-28T07:00:00Z 5 9 +2016-08-28T07:15:00Z 8 9 +2016-08-28T07:30:00Z 8 9 +2016-08-28T07:45:00Z 7 9 +2016-08-28T08:00:00Z 8 9 +2016-08-28T08:15:00Z 15 7 +2016-08-28T08:30:00Z 15 7 +2016-08-28T08:45:00Z 17 7 +2016-08-28T09:00:00Z 20 7 +``` + +##### Automatically downsampling data + +Use a simple CQ to automatically downsample data from a single field +and write the results to another measurement in the same database. + +```sql +CREATE CONTINUOUS QUERY "cq_basic" ON "transportation" +BEGIN + SELECT mean("passengers") INTO "average_passengers" FROM "bus_data" GROUP BY time(1h) +END +``` + +`cq_basic` calculates the average hourly number of passengers from the +`bus_data` measurement and stores the results in the `average_passengers` +measurement in the `transportation` database. + +`cq_basic` executes at one-hour intervals, the same interval as the +`GROUP BY time()` interval. +Every hour, `cq_basic` runs a single query that covers the time range between +`now()` and `now()` minus the `GROUP BY time()` interval, that is, the time +range between `now()` and one hour prior to `now()`. + +Annotated log output on the morning of August 28, 2016: + +```sql +> +At **8:00** `cq_basic` executes a query with the time range `time >= '7:00' AND time < '08:00'`. +`cq_basic` writes one point to the `average_passengers` measurement: +> + name: average_passengers + ------------------------ + time mean + 2016-08-28T07:00:00Z 7 +> +At **9:00** `cq_basic` executes a query with the time range `time >= '8:00' AND time < '9:00'`. +`cq_basic` writes one point to the `average_passengers` measurement: +> + name: average_passengers + ------------------------ + time mean + 2016-08-28T08:00:00Z 13.75 +``` + +Here are the results: + +```sql +> SELECT * FROM "average_passengers" +name: average_passengers +------------------------ +time mean +2016-08-28T07:00:00Z 7 +2016-08-28T08:00:00Z 13.75 +``` + +##### Automatically downsampling data into another retention policy + +[Fully qualify](/influxdb/v1.7/query_language/data_exploration/#the-basic-select-statement) +the destination measurement to store the downsampled data in a non-`DEFAULT` +[retention policy](/influxdb/v1.7/concepts/glossary/#retention-policy-rp) (RP). + +```sql +CREATE CONTINUOUS QUERY "cq_basic_rp" ON "transportation" +BEGIN + SELECT mean("passengers") INTO "transportation"."three_weeks"."average_passengers" FROM "bus_data" GROUP BY time(1h) +END +``` + +`cq_basic_rp` calculates the average hourly number of passengers from the +`bus_data` measurement and stores the results in the `transportation` database, +the `three_weeks` RP, and the `average_passengers` measurement. + +`cq_basic_rp` executes at one-hour intervals, the same interval as the +`GROUP BY time()` interval. +Every hour, `cq_basic_rp` runs a single query that covers the time range between +`now()` and `now()` minus the `GROUP BY time()` interval, that is, the time +range between `now()` and one hour prior to `now()`. + +Annotated log output on the morning of August 28, 2016: + +```sql +> +At **8:00** `cq_basic_rp` executes a query with the time range `time >= '7:00' AND time < '8:00'`. +`cq_basic_rp` writes one point to the `three_weeks` RP and the `average_passengers` measurement: +> + name: average_passengers + ------------------------ + time mean + 2016-08-28T07:00:00Z 7 +> +At **9:00** `cq_basic_rp` executes a query with the time range +`time >= '8:00' AND time < '9:00'`. +`cq_basic_rp` writes one point to the `three_weeks` RP and the `average_passengers` measurement: +> + name: average_passengers + ------------------------ + time mean + 2016-08-28T08:00:00Z 13.75 +``` + +Here are the results: + +```sql +> SELECT * FROM "transportation"."three_weeks"."average_passengers" +name: average_passengers +------------------------ +time mean +2016-08-28T07:00:00Z 7 +2016-08-28T08:00:00Z 13.75 +``` + +`cq_basic_rp` uses CQs and retention policies to automatically downsample data +and keep those downsampled data for an alternative length of time. +See the [Downsampling and Data Retention](/influxdb/v1.7/guides/downsampling_and_retention/) +guide for an in-depth discussion about this CQ use case. + +##### Automatically downsampling a database with backreferencing + +Use a function with a wildcard (`*`) and `INTO` query's +[backreferencing syntax](/influxdb/v1.7/query_language/data_exploration/#the-into-clause) +to automatically downsample data from all measurements and numerical fields in +a database. + +```sql +CREATE CONTINUOUS QUERY "cq_basic_br" ON "transportation" +BEGIN + SELECT mean(*) INTO "downsampled_transportation"."autogen".:MEASUREMENT FROM /.*/ GROUP BY time(30m),* +END +``` + +`cq_basic_br` calculates the 30-minute average of `passengers` and `complaints` +from every measurement in the `transportation` database (in this case, there's only the +`bus_data` measurement). +It stores the results in the `downsampled_transportation` database. + +`cq_basic_br` executes at 30 minutes intervals, the same interval as the +`GROUP BY time()` interval. +Every 30 minutes, `cq_basic_br` runs a single query that covers the time range +between `now()` and `now()` minus the `GROUP BY time()` interval, that is, +the time range between `now()` and 30 minutes prior to `now()`. + +Annotated log output on the morning of August 28, 2016: + +```sql +> +At **7:30**, `cq_basic_br` executes a query with the time range `time >= '7:00' AND time < '7:30'`. +`cq_basic_br` writes two points to the `bus_data` measurement in the `downsampled_transportation` database: +> + name: bus_data + -------------- + time mean_complaints mean_passengers + 2016-08-28T07:00:00Z 9 6.5 +> +At **8:00**, `cq_basic_br` executes a query with the time range `time >= '7:30' AND time < '8:00'`. +`cq_basic_br` writes two points to the `bus_data` measurement in the `downsampled_transportation` database: +> + name: bus_data + -------------- + time mean_complaints mean_passengers + 2016-08-28T07:30:00Z 9 7.5 +> +[...] +> +At **9:00**, `cq_basic_br` executes a query with the time range `time >= '8:30' AND time < '9:00'`. +`cq_basic_br` writes two points to the `bus_data` measurement in the `downsampled_transportation` database: +> + name: bus_data + -------------- + time mean_complaints mean_passengers + 2016-08-28T08:30:00Z 7 16 +``` + +Here are the results: + +```sql +> SELECT * FROM "downsampled_transportation."autogen"."bus_data" +name: bus_data +-------------- +time mean_complaints mean_passengers +2016-08-28T07:00:00Z 9 6.5 +2016-08-28T07:30:00Z 9 7.5 +2016-08-28T08:00:00Z 8 11.5 +2016-08-28T08:30:00Z 7 16 +``` + +##### Automatically downsampling data and configuring CQ time boundaries + +Use an +[offset interval](/influxdb/v1.7/query_language/data_exploration/#advanced-group-by-time-syntax) +in the `GROUP BY time()` clause to alter both the CQ's default execution time and +preset time boundaries. + +```sql +CREATE CONTINUOUS QUERY "cq_basic_offset" ON "transportation" +BEGIN + SELECT mean("passengers") INTO "average_passengers" FROM "bus_data" GROUP BY time(1h,15m) +END +``` + +`cq_basic_offset`calculates the average hourly number of passengers from the +`bus_data` measurement and stores the results in the `average_passengers` +measurement. + +`cq_basic_offset` executes at one-hour intervals, the same interval as the +`GROUP BY time()` interval. +The 15 minute offset interval forces the CQ to execute 15 minutes after the +default execution time; `cq_basic_offset` executes at 8:15 instead of 8:00. + +Every hour, `cq_basic_offset` runs a single query that covers the time range +between `now()` and `now()` minus the `GROUP BY time()` interval, that is, the +time range between `now()` and one hour prior to `now()`. +The 15 minute offset interval shifts forward the generated preset time boundaries in the +CQ's `WHERE` clause; `cq_basic_offset` queries between 7:15 and 8:14.999999999 instead of 7:00 and 7:59.999999999. + +Annotated log output on the morning of August 28, 2016: + +``` +> +At **8:15** `cq_basic_offset` executes a query with the time range `time >= '7:15' AND time < '8:15'`. +`cq_basic_offset` writes one point to the `average_passengers` measurement: +> + name: average_passengers + ------------------------ + time mean + 2016-08-28T07:15:00Z 7.75 +> +At **9:15** `cq_basic_offset` executes a query with the time range `time >= '8:15' AND time < '9:15'`. +`cq_basic_offset` writes one point to the `average_passengers` measurement: +> + name: average_passengers + ------------------------ + time mean + 2016-08-28T08:15:00Z 16.75 +``` + +Here are the results: + +```sql +> SELECT * FROM "average_passengers" +name: average_passengers +------------------------ +time mean +2016-08-28T07:15:00Z 7.75 +2016-08-28T08:15:00Z 16.75 +``` + +Notice that the timestamps are for 7:15 and 8:15 instead of 7:00 and 8:00. + +#### Common issues with basic syntax + +##### Handling time intervals with no data + +CQs do not write any results for a time interval if no data fall within that +time range. + +Note that the basic syntax does not support using +[`fill()`](/influxdb/v1.7/query_language/data_exploration/#group-by-time-intervals-and-fill) +to change the value reported for intervals with no data. +Basic syntax CQs ignore `fill()` if it's included in the CQ query. +A possible workaround is to use the +[advanced CQ syntax](#example-4-configuring-cq-time-ranges-and-filling-empty-results). + +##### Resampling previous time intervals + +The basic CQ runs a single query that covers the time range between `now()` +and `now()` minus the `GROUP BY time()` interval. +See the [advanced syntax](#advanced-syntax) for how to configure the query's +time range. + +##### Backfilling results for older data + +CQs operate on realtime data, that is, data with timestamps that occur +relative to [`now()`](/influxdb/v1.7/concepts/glossary/#now). +Use a basic +[`INTO` query](/influxdb/v1.7/query_language/data_exploration/#the-into-clause) +to backfill results for data with older timestamps. + +##### Missing tags in the CQ results + +By default, all +[`INTO` queries](/influxdb/v1.7/query_language/data_exploration/#the-into-clause) +convert any tags in the source measurement to fields in the destination +measurement. + +Include `GROUP BY *` in the CQ to preserve tags in the destination measurement. + +### Advanced syntax + +```txt +CREATE CONTINUOUS QUERY ON +RESAMPLE EVERY FOR +BEGIN + +END +``` + +#### Description of advanced syntax + +##### The `cq_query` + +See [ Description of Basic Syntax](/influxdb/v1.7/query_language/continuous_queries/#description-of-basic-syntax). + +##### Scheduling and coverage + +CQs operate on real-time data. With the advanced syntax, CQs use the local +server’s timestamp, the information in the `RESAMPLE` clause, and the InfluxDB +server's preset time boundaries to determine when to execute and what time range to +cover in the query. + +CQs execute at the same interval as the `EVERY` interval in the `RESAMPLE` +clause, and they run at the start of InfluxDB’s preset time boundaries. +If the `EVERY` interval is two hours, InfluxDB executes the CQ at the top of +every other hour. + +When the CQ executes, it runs a single query for the time range between +[`now()`](/influxdb/v1.7/concepts/glossary/#now) and `now()` minus the `FOR` interval in the `RESAMPLE` clause. +If the `FOR` interval is two hours and the current time is 17:00, the query's +time range is between 15:00 and 16:59.999999999. + +Both the `EVERY` interval and the `FOR` interval accept +[duration literals](/influxdb/v1.7/query_language/spec/#durations). +The `RESAMPLE` clause works with either or both of the `EVERY` and `FOR` intervals +configured. +CQs default to the relevant +[basic syntax behavior](/influxdb/v1.7/query_language/continuous_queries/#description-of-basic-syntax) +if the `EVERY` interval or `FOR` interval is not provided (see the first issue in +[Common Issues with Advanced Syntax](/influxdb/v1.7/query_language/continuous_queries/#common-issues-with-advanced-syntax) +for an anomalous case). + +#### Examples of advanced syntax + +The examples below use the following sample data in the `transportation` database. +The measurement `bus_data` stores 15-minute resolution data on the number of bus +`passengers`: + +```sql +name: bus_data +-------------- +time passengers +2016-08-28T06:30:00Z 2 +2016-08-28T06:45:00Z 4 +2016-08-28T07:00:00Z 5 +2016-08-28T07:15:00Z 8 +2016-08-28T07:30:00Z 8 +2016-08-28T07:45:00Z 7 +2016-08-28T08:00:00Z 8 +2016-08-28T08:15:00Z 15 +2016-08-28T08:30:00Z 15 +2016-08-28T08:45:00Z 17 +2016-08-28T09:00:00Z 20 +``` + +##### Configuring execution intervals + +Use an `EVERY` interval in the `RESAMPLE` clause to specify the CQ's execution +interval. + +```sql +CREATE CONTINUOUS QUERY "cq_advanced_every" ON "transportation" +RESAMPLE EVERY 30m +BEGIN + SELECT mean("passengers") INTO "average_passengers" FROM "bus_data" GROUP BY time(1h) +END +``` + +`cq_advanced_every` calculates the one-hour average of `passengers` +from the `bus_data` measurement and stores the results in the +`average_passengers` measurement in the `transportation` database. + +`cq_advanced_every` executes at 30-minute intervals, the same interval as the +`EVERY` interval. +Every 30 minutes, `cq_advanced_every` runs a single query that covers the time +range for the current time bucket, that is, the one-hour time bucket that +intersects with `now()`. + +Annotated log output on the morning of August 28, 2016: + +```sql +> +At **8:00**, `cq_advanced_every` executes a query with the time range `WHERE time >= '7:00' AND time < '8:00'`. +`cq_advanced_every` writes one point to the `average_passengers` measurement: +> + name: average_passengers + ------------------------ + time mean + 2016-08-28T07:00:00Z 7 +> +At **8:30**, `cq_advanced_every` executes a query with the time range `WHERE time >= '8:00' AND time < '9:00'`. +`cq_advanced_every` writes one point to the `average_passengers` measurement: +> + name: average_passengers + ------------------------ + time mean + 2016-08-28T08:00:00Z 12.6667 +> +At **9:00**, `cq_advanced_every` executes a query with the time range `WHERE time >= '8:00' AND time < '9:00'`. +`cq_advanced_every` writes one point to the `average_passengers` measurement: +> + name: average_passengers + ------------------------ + time mean + 2016-08-28T08:00:00Z 13.75 +``` + +Here are the results: + +```sql +> SELECT * FROM "average_passengers" +name: average_passengers +------------------------ +time mean +2016-08-28T07:00:00Z 7 +2016-08-28T08:00:00Z 13.75 +``` + +Notice that `cq_advanced_every` calculates the result for the 8:00 time interval +twice. +First, it runs at 8:30 and calculates the average for every available data point +between 8:00 and 9:00 (`8`,`15`, and `15`). +Second, it runs at 9:00 and calculates the average for every available data +point between 8:00 and 9:00 (`8`, `15`, `15`, and `17`). +Because of the way InfluxDB +[handles duplicate points](/influxdb/v1.7/troubleshooting/frequently-asked-questions/#how-does-influxdb-handle-duplicate-points) +, the second result simply overwrites the first result. + +##### Configuring time ranges for resampling + +Use a `FOR` interval in the `RESAMPLE` clause to specify the length of the CQ's +time range. + +```sql +CREATE CONTINUOUS QUERY "cq_advanced_for" ON "transportation" +RESAMPLE FOR 1h +BEGIN + SELECT mean("passengers") INTO "average_passengers" FROM "bus_data" GROUP BY time(30m) +END +``` + +`cq_advanced_for` calculates the 30-minute average of `passengers` +from the `bus_data` measurement and stores the results in the `average_passengers` +measurement in the `transportation` database. + +`cq_advanced_for` executes at 30-minute intervals, the same interval as the +`GROUP BY time()` interval. +Every 30 minutes, `cq_advanced_for` runs a single query that covers the time +range between `now()` and `now()` minus the `FOR` interval, that is, the time +range between `now()` and one hour prior to `now()`. + +Annotated log output on the morning of August 28, 2016: + +```sql +> +At **8:00** `cq_advanced_for` executes a query with the time range `WHERE time >= '7:00' AND time < '8:00'`. +`cq_advanced_for` writes two points to the `average_passengers` measurement: +> + name: average_passengers + ------------------------ + time mean + 2016-08-28T07:00:00Z 6.5 + 2016-08-28T07:30:00Z 7.5 +> +At **8:30** `cq_advanced_for` executes a query with the time range `WHERE time >= '7:30' AND time < '8:30'`. +`cq_advanced_for` writes two points to the `average_passengers` measurement: +> + name: average_passengers + ------------------------ + time mean + 2016-08-28T07:30:00Z 7.5 + 2016-08-28T08:00:00Z 11.5 +> +At **9:00** `cq_advanced_for` executes a query with the time range `WHERE time >= '8:00' AND time < '9:00'`. +`cq_advanced_for` writes two points to the `average_passengers` measurement: +> + name: average_passengers + ------------------------ + time mean + 2016-08-28T08:00:00Z 11.5 + 2016-08-28T08:30:00Z 16 +``` + +Notice that `cq_advanced_for` will calculate the result for every time interval +twice. +The CQ calculates the average for the 7:30 time interval at 8:00 and at 8:30, +and it calculates the average for the 8:00 time interval at 8:30 and 9:00. + +Here are the results: + +```sql +> SELECT * FROM "average_passengers" +name: average_passengers +------------------------ +time mean +2016-08-28T07:00:00Z 6.5 +2016-08-28T07:30:00Z 7.5 +2016-08-28T08:00:00Z 11.5 +2016-08-28T08:30:00Z 16 +``` + +##### Configuring execution intervals and CQ time ranges + +Use an `EVERY` interval and `FOR` interval in the `RESAMPLE` clause to specify +the CQ's execution interval and the length of the CQ's time range. + +```sql +CREATE CONTINUOUS QUERY "cq_advanced_every_for" ON "transportation" +RESAMPLE EVERY 1h FOR 90m +BEGIN + SELECT mean("passengers") INTO "average_passengers" FROM "bus_data" GROUP BY time(30m) +END +``` + +`cq_advanced_every_for` calculates the 30-minute average of +`passengers` from the `bus_data` measurement and stores the results in the +`average_passengers` measurement in the `transportation` database. + +`cq_advanced_every_for` executes at one-hour intervals, the same interval as the +`EVERY` interval. +Every hour, `cq_advanced_every_for` runs a single query that covers the time +range between `now()` and `now()` minus the `FOR` interval, that is, the time +range between `now()` and 90 minutes prior to `now()`. + +Annotated log output on the morning of August 28, 2016: + +```sql +> +At **8:00** `cq_advanced_every_for` executes a query with the time range `WHERE time >= '6:30' AND time < '8:00'`. +`cq_advanced_every_for` writes three points to the `average_passengers` measurement: +> + name: average_passengers + ------------------------ + time mean + 2016-08-28T06:30:00Z 3 + 2016-08-28T07:00:00Z 6.5 + 2016-08-28T07:30:00Z 7.5 +> +At **9:00** `cq_advanced_every_for` executes a query with the time range `WHERE time >= '7:30' AND time < '9:00'`. +`cq_advanced_every_for` writes three points to the `average_passengers` measurement: +> + name: average_passengers + ------------------------ + time mean + 2016-08-28T07:30:00Z 7.5 + 2016-08-28T08:00:00Z 11.5 + 2016-08-28T08:30:00Z 16 +``` + +Notice that `cq_advanced_every_for` will calculate the result for every time +interval twice. +The CQ calculates the average for the 7:30 interval at 8:00 and 9:00. + +Here are the results: + +```sql +> SELECT * FROM "average_passengers" +name: average_passengers +------------------------ +time mean +2016-08-28T06:30:00Z 3 +2016-08-28T07:00:00Z 6.5 +2016-08-28T07:30:00Z 7.5 +2016-08-28T08:00:00Z 11.5 +2016-08-28T08:30:00Z 16 +``` + +##### Configuring CQ time ranges and filling empty results + +Use a `FOR` interval and `fill()` to change the value reported for time +intervals with no data. +Note that at least one data point must fall within the `FOR` interval for `fill()` +to operate. +If no data fall within the `FOR` interval the CQ writes no points to the +destination measurement. + +```sql +CREATE CONTINUOUS QUERY "cq_advanced_for_fill" ON "transportation" +RESAMPLE FOR 2h +BEGIN + SELECT mean("passengers") INTO "average_passengers" FROM "bus_data" GROUP BY time(1h) fill(1000) +END +``` + +`cq_advanced_for_fill` calculates the one-hour average of `passengers` from the +`bus_data` measurement and stores the results in the `average_passengers` +measurement in the `transportation` database. +Where possible, it writes the value `1000` for time intervals with no results. + +`cq_advanced_for_fill` executes at one-hour intervals, the same interval as the +`GROUP BY time()` interval. +Every hour, `cq_advanced_for_fill` runs a single query that covers the time +range between `now()` and `now()` minus the `FOR` interval, that is, the time +range between `now()` and two hours prior to `now()`. + +Annotated log output on the morning of August 28, 2016: + +```sql +> +At **6:00**, `cq_advanced_for_fill` executes a query with the time range `WHERE time >= '4:00' AND time < '6:00'`. +`cq_advanced_for_fill` writes nothing to `average_passengers`; `bus_data` has no data +that fall within that time range. +> +At **7:00**, `cq_advanced_for_fill` executes a query with the time range `WHERE time >= '5:00' AND time < '7:00'`. +`cq_advanced_for_fill` writes two points to `average_passengers`: +> + name: average_passengers + ------------------------ + time mean + 2016-08-28T05:00:00Z 1000 <------ fill(1000) + 2016-08-28T06:00:00Z 3 <------ average of 2 and 4 +> +[...] +> +At **11:00**, `cq_advanced_for_fill` executes a query with the time range `WHERE time >= '9:00' AND time < '11:00'`. +`cq_advanced_for_fill` writes two points to `average_passengers`: +> + name: average_passengers + ------------------------ + 2016-08-28T09:00:00Z 20 <------ average of 20 + 2016-08-28T10:00:00Z 1000 <------ fill(1000) +> +``` + +At **12:00**, `cq_advanced_for_fill` executes a query with the time range `WHERE time >= '10:00' AND time < '12:00'`. +`cq_advanced_for_fill` writes nothing to `average_passengers`; `bus_data` has no data +that fall within that time range. + +Here are the results: + +```sql +> SELECT * FROM "average_passengers" +name: average_passengers +------------------------ +time mean +2016-08-28T05:00:00Z 1000 +2016-08-28T06:00:00Z 3 +2016-08-28T07:00:00Z 7 +2016-08-28T08:00:00Z 13.75 +2016-08-28T09:00:00Z 20 +2016-08-28T10:00:00Z 1000 +``` + +> **Note:** `fill(previous)` doesn’t fill the result for a time interval if the +previous value is outside the query’s time range. +See [Frequently Asked Questions](/influxdb/v1.7/troubleshooting/frequently-asked-questions/#why-does-fill-previous-return-empty-results) +for more information. + +#### Common issues with advanced syntax + +##### If the `EVERY` interval is greater than the `GROUP BY time()` interval + +If the `EVERY` interval is greater than the `GROUP BY time()` interval, the CQ +executes at the same interval as the `EVERY` interval and runs a single query +that covers the time range between `now()` and `now()` minus the `EVERY` +interval (not between `now()` and `now()` minus the `GROUP BY time()` interval). + +For example, if the `GROUP BY time()` interval is `5m` and the `EVERY` interval +is `10m`, the CQ executes every ten minutes. +Every ten minutes, the CQ runs a single query that covers the time range +between `now()` and `now()` minus the `EVERY` interval, that is, the time +range between `now()` and ten minutes prior to `now()`. + +This behavior is intentional and prevents the CQ from missing data between +execution times. + +##### If the `FOR` interval is less than the execution interval + +If the `FOR` interval is less than the `GROUP BY time()` interval or, if +specified, the `EVERY` interval, InfluxDB returns the following error: + +```sql +error parsing query: FOR duration must be >= GROUP BY time duration: must be a minimum of got +``` + +To avoid missing data between execution times, the `FOR` interval must be equal +to or greater than the `GROUP BY time()` interval or, if specified, the `EVERY` +interval. + +Currently, this is the intended behavior. +GitHub Issue [#6963](https://github.com/influxdata/influxdb/issues/6963) +outlines a feature request for CQs to support gaps in data coverage. + +## Continuous query management + +Only admin users are allowed to work with CQs. For more on user privileges, see [Authentication and Authorization](/influxdb/v1.7/administration/authentication_and_authorization/#user-types-and-privileges). + +### Listing continuous queries + +List every CQ on an InfluxDB instance with: + +```sql +SHOW CONTINUOUS QUERIES +``` + +`SHOW CONTINUOUS QUERIES` groups results by database. + +##### Examples + +The output shows that the `telegraf` and `mydb` databases have CQs: + +```sql +> SHOW CONTINUOUS QUERIES +name: _internal +--------------- +name query + + +name: telegraf +-------------- +name query +idle_hands CREATE CONTINUOUS QUERY idle_hands ON telegraf BEGIN SELECT min(usage_idle) INTO telegraf.autogen.min_hourly_cpu FROM telegraf.autogen.cpu GROUP BY time(1h) END +feeling_used CREATE CONTINUOUS QUERY feeling_used ON telegraf BEGIN SELECT mean(used) INTO downsampled_telegraf.autogen.:MEASUREMENT FROM telegraf.autogen./.*/ GROUP BY time(1h) END + + +name: downsampled_telegraf +-------------------------- +name query + + +name: mydb +---------- +name query +vampire CREATE CONTINUOUS QUERY vampire ON mydb BEGIN SELECT count(dracula) INTO mydb.autogen.all_of_them FROM mydb.autogen.one GROUP BY time(5m) END +``` + +### Deleting continuous queries + +Delete a CQ from a specific database with: + +```sql +DROP CONTINUOUS QUERY ON +``` + +`DROP CONTINUOUS QUERY` returns an empty result. + +##### Examples + +Drop the `idle_hands` CQ from the `telegraf` database: + +```sql +> DROP CONTINUOUS QUERY "idle_hands" ON "telegraf"` +> +``` + +### Altering continuous queries + +CQs cannot be altered once they're created. +To change a CQ, you must `DROP` and re`CREATE` it with the updated settings. + +### Continuous query statistics + +If `query-stats-enabled` is set to `true` in your `influxdb.conf` or using the `INFLUXDB_CONTINUOUS_QUERIES_QUERY_STATS_ENABLED` environment variable, data will be written to `_internal` with information about when continuous queries ran and their duration. +Information about CQ configuration settings is available in the [Configuration](/influxdb/v1.7/administration/config/#continuous-queries-settings) documentation. + +> **Note:** `_internal` houses internal system data and is meant for internal use. +The structure of and data stored in `_internal` can change at any time. +Use of this data falls outside the scope of official InfluxData support. + +## Continuous query use cases + +### Downsampling and Data Retention + +Use CQs with InfluxDB database +[retention policies](/influxdb/v1.7/concepts/glossary/#retention-policy-rp) +(RPs) to mitigate storage concerns. +Combine CQs and RPs to automatically downsample high precision data to a lower +precision and remove the dispensable, high precision data from the database. + +See the +[Downsampling and data retention](/influxdb/v1.7/guides/downsampling_and_retention/) +guide for a detailed walkthrough of this common use case. + +### Precalculating expensive queries + +Shorten query runtimes by pre-calculating expensive queries with CQs. +Use a CQ to automatically downsample commonly-queried, high precision data to a +lower precision. +Queries on lower precision data require fewer resources and return faster. + +**Tip:** Pre-calculate queries for your preferred graphing tool to accelerate +the population of graphs and dashboards. + +### Substituting for a `HAVING` clause + +InfluxQL does not support [`HAVING` clauses](https://en.wikipedia.org/wiki/Having_%28SQL%29). +Get the same functionality by creating a CQ to aggregate the data and querying +the CQ results to apply the `HAVING` clause. + +> **Note:** InfluxQL supports [subqueries](/influxdb/v1.7/query_language/data_exploration/#subqueries) which also offer similar functionality to `HAVING` clauses. +See [Data Exploration](/influxdb/v1.7/query_language/data_exploration/#subqueries) for more information. + +##### Example + +InfluxDB does not accept the following query with a `HAVING` clause. +The query calculates the average number of `bees` at `30` minute intervals and +requests averages that are greater than `20`. + +```sql +SELECT mean("bees") FROM "farm" GROUP BY time(30m) HAVING mean("bees") > 20 +``` + +To get the same results: + +**1. Create a CQ** + +This step performs the `mean("bees")` part of the query above. +Because this step creates CQ you only need to execute it once. + +The following CQ automatically calculates the average number of `bees` at +`30` minutes intervals and writes those averages to the `mean_bees` field in the +`aggregate_bees` measurement. + +```sql +CREATE CONTINUOUS QUERY "bee_cq" ON "mydb" BEGIN SELECT mean("bees") AS "mean_bees" INTO "aggregate_bees" FROM "farm" GROUP BY time(30m) END +``` + +**2. Query the CQ results** + +This step performs the `HAVING mean("bees") > 20` part of the query above. + +Query the data in the measurement `aggregate_bees` and request values of the `mean_bees` field that are greater than `20` in the `WHERE` clause: + +```sql +SELECT "mean_bees" FROM "aggregate_bees" WHERE "mean_bees" > 20 +``` + +### Substituting for nested functions + +Some InfluxQL functions +[support nesting](/influxdb/v1.7/troubleshooting/frequently-asked-questions/#which-influxql-functions-support-nesting) +of other functions. +Most do not. +If your function does not support nesting, you can get the same functionality using a CQ to calculate +the inner-most function. +Then simply query the CQ results to calculate the outer-most function. + +> **Note:** InfluxQL supports [subqueries](/influxdb/v1.7/query_language/data_exploration/#subqueries) which also offer the same functionality as nested functions. +See [Data Exploration](/influxdb/v1.7/query_language/data_exploration/#subqueries) for more information. + +##### Example + +InfluxDB does not accept the following query with a nested function. +The query calculates the number of non-null values +of `bees` at `30` minute intervals and the average of those counts: + +```sql +SELECT mean(count("bees")) FROM "farm" GROUP BY time(30m) +``` + +To get the same results: + +**1. Create a CQ** + +This step performs the `count("bees")` part of the nested function above. +Because this step creates a CQ you only need to execute it once. + +The following CQ automatically calculates the number of non-null values of `bees` at `30` minute intervals +and writes those counts to the `count_bees` field in the `aggregate_bees` measurement. + +```sql +CREATE CONTINUOUS QUERY "bee_cq" ON "mydb" BEGIN SELECT count("bees") AS "count_bees" INTO "aggregate_bees" FROM "farm" GROUP BY time(30m) END +``` + +**2. Query the CQ results** + +This step performs the `mean([...])` part of the nested function above. + +Query the data in the measurement `aggregate_bees` to calculate the average of the +`count_bees` field: + +```sql +SELECT mean("count_bees") FROM "aggregate_bees" WHERE time >= AND time <= +``` + +## Further information + +To see how to combine two InfluxDB features, CQs, and retention policies, +to periodically downsample data and automatically expire the dispensable high +precision data, see [Downsampling and data retention](/influxdb/v1.7/guides/downsampling_and_retention/). + +Kapacitor, InfluxData's data processing engine, can do the same work as +continuous queries in InfluxDB databases. + +To learn when to use Kapacitor instead of InfluxDB and how to perform the same CQ +functionality with a TICKscript, see [examples of continuous queries in Kapacitor](/kapacitor/latest/examples/continuous_queries/). diff --git a/content/influxdb/v1.7/query_language/data_download.md b/content/influxdb/v1.7/query_language/data_download.md new file mode 100644 index 000000000..b2715321d --- /dev/null +++ b/content/influxdb/v1.7/query_language/data_download.md @@ -0,0 +1,121 @@ +--- +title: Sample data +menu: + influxdb_1_7: + weight: 10 + parent: InfluxQL +aliases: + - /influxdb/v1.7/sample_data/data_download/ +--- + +In order to explore the query language further, these instructions help you create a database, +download and write data to that database within your InfluxDB installation. +The sample data is then used and referenced in [Data Exploration](../../query_language/data_exploration/), +[Schema Exploration](../../query_language/schema_exploration/), and [Functions](../../query_language/functions/). + +## Creating a database + +If you've installed InfluxDB locally, the `influx` command should be available via the command line. +Executing `influx` will start the CLI and automatically connect to the local InfluxDB instance +(assuming you have already started the server with `service influxdb start` or by running `influxd` directly). +The output should look like this: + +```bash +$ influx -precision rfc3339 +Connected to http://localhost:8086 version 1.4.x +InfluxDB shell 1.4.x +> +``` + +> **Notes:** +> +* The InfluxDB API runs on port `8086` by default. +Therefore, `influx` will connect to port `8086` and `localhost` by default. +If you need to alter these defaults, run `influx --help`. +* The [`-precision` argument](/influxdb/latest/tools/shell/#influx-options) specifies the format/precision of any returned timestamps. +In the example above, `rfc3339` tells InfluxDB to return timestamps in [RFC3339 format](https://www.ietf.org/rfc/rfc3339.txt) (`YYYY-MM-DDTHH:MM:SS.nnnnnnnnnZ`). + +The command line is now ready to take input in the form of the Influx Query Language (a.k.a InfluxQL) statements. +To exit the InfluxQL shell, type `exit` and hit return. + +A fresh install of InfluxDB has no databases (apart from the system `_internal`), +so creating one is our first task. +You can create a database with the `CREATE DATABASE ` InfluxQL statement, +where `` is the name of the database you wish to create. +Names of databases can contain any unicode character as long as the string is double-quoted. +Names can also be left unquoted if they contain _only_ ASCII letters, +digits, or underscores and do not begin with a digit. + +Throughout the query language exploration, we'll use the database name `NOAA_water_database`: + +``` +> CREATE DATABASE NOAA_water_database +> exit +``` + +### Download and write the data to InfluxDB + +From your terminal, download the text file that contains the data in [line protocol](/influxdb/v1.7/concepts/glossary/#influxdb-line-protocol) format: +``` +curl https://s3.amazonaws.com/noaa.water-database/NOAA_data.txt -o NOAA_data.txt +``` + +Write the data to InfluxDB via the [CLI](../../tools/shell/): +``` +influx -import -path=NOAA_data.txt -precision=s -database=NOAA_water_database +``` + +### Test queries +```bash +$ influx -precision rfc3339 -database NOAA_water_database +Connected to http://localhost:8086 version 1.4.x +InfluxDB shell 1.4.x +> +``` + +See all five measurements: +```bash +> SHOW measurements +name: measurements +------------------ +name +average_temperature +h2o_feet +h2o_pH +h2o_quality +h2o_temperature +``` + +Count the number of non-null values of `water_level` in `h2o_feet`: +```bash +> SELECT COUNT("water_level") FROM h2o_feet +name: h2o_feet +-------------- +time count +1970-01-01T00:00:00Z 15258 +``` + +Select the first five observations in the measurement h2o_feet: + +```bash +> SELECT * FROM h2o_feet LIMIT 5 +name: h2o_feet +-------------- +time level description location water_level +2015-08-18T00:00:00Z below 3 feet santa_monica 2.064 +2015-08-18T00:00:00Z between 6 and 9 feet coyote_creek 8.12 +2015-08-18T00:06:00Z between 6 and 9 feet coyote_creek 8.005 +2015-08-18T00:06:00Z below 3 feet santa_monica 2.116 +2015-08-18T00:12:00Z between 6 and 9 feet coyote_creek 7.887 +``` + +### Data sources and things to note +The sample data is publicly available data from the [National Oceanic and Atmospheric Administration’s (NOAA) Center for Operational Oceanographic Products and Services](http://tidesandcurrents.noaa.gov/stations.html?type=Water+Levels). +The data include 15,258 observations of water levels (ft) collected every six minutes at two stations (Santa Monica, CA (ID 9410840) and Coyote Creek, CA (ID 9414575)) over the period from August 18, 2015 through September 18, 2015. + +Note that the measurements `average_temperature`, `h2o_pH`, `h2o_quality`, and `h2o_temperature` contain fictional data. +Those measurements serve to illuminate query functionality in [Schema Exploration](../../query_language/schema_exploration/). + + +The `h2o_feet` measurement is the only measurement that contains the NOAA data. +Please note that the `level description` field isn't part of the original NOAA data - we snuck it in there for the sake of having a field key with a special character and string [field values](../../concepts/glossary/#field-value). diff --git a/content/influxdb/v1.7/query_language/data_exploration.md b/content/influxdb/v1.7/query_language/data_exploration.md new file mode 100644 index 000000000..a7c7a04fd --- /dev/null +++ b/content/influxdb/v1.7/query_language/data_exploration.md @@ -0,0 +1,3332 @@ +--- +title: Data exploration using InfluxQL +description: Explore time series data using InfluxData's SQL-like query language. Understand how to use the SELECT statement to query data from measurements, tags, and fields. +menu: + influxdb_1_7: + name: Data exploration + weight: 20 + parent: InfluxQL +--- + +InfluxQL is an SQL-like query language for interacting with data in InfluxDB. +The following sections detail InfluxQL's `SELECT` statement and useful query syntax +for exploring your data. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
The Basics:Configure Query Results:General Tips on Query Syntax:
The SELECT statementORDER BY time DESCTime Syntax
The WHERE clauseThe LIMIT and SLIMIT clausesRegular Expressions
The GROUP BY clauseThe OFFSET and SOFFSET clausesData types and cast operations
The INTO clauseThe Time Zone clauseMerge behavior
Multiple statements
Subqueries
+ +### Sample data + +This document uses publicly available data from the +[National Oceanic and Atmospheric Administration's (NOAA) Center for Operational Oceanographic Products and Services](http://tidesandcurrents.noaa.gov/stations.html?type=Water+Levels). +See the [Sample Data](/influxdb/v1.7/query_language/data_download/) page to download +the data and follow along with the example queries in the sections below. + +Start by logging into the Influx CLI: + +```bash +$ influx -precision rfc3339 -database NOAA_water_database +Connected to http://localhost:8086 version 1.7.x +InfluxDB shell 1.7.x +> +``` + +Next, get acquainted with this subsample of the data in the `h2o_feet` measurement: + +name: h2o_feet + +| time | level description | location | water_level | +| ------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | ------------------------------------------------------------------------- | ------------------------------------------------------------------------ | +| 2015-08-18T00:00:00Z | between 6 and 9 feet | coyote_creek | 8.12 | +| 2015-08-18T00:00:00Z | below 3 feet | santa_monica | 2.064 | +| 2015-08-18T00:06:00Z | between 6 and 9 feet | coyote_creek | 8.005 | +| 2015-08-18T00:06:00Z | below 3 feet | santa_monica | 2.116 | +| 2015-08-18T00:12:00Z | between 6 and 9 feet | coyote_creek | 7.887 | +| 2015-08-18T00:12:00Z | below 3 feet | santa_monica | 2.028 | + +The data in the `h2o_feet` [measurement](/influxdb/v1.7/concepts/glossary/#measurement) +occur at six-minute time intervals. +The measurement has one [tag key](/influxdb/v1.7/concepts/glossary/#tag-key) +(`location`) which has two [tag values](/influxdb/v1.7/concepts/glossary/#tag-value): +`coyote_creek` and `santa_monica`. +The measurement also has two [fields](/influxdb/v1.7/concepts/glossary/#field): +`level description` stores string [field values](/influxdb/v1.7/concepts/glossary/#field-value) +and `water_level` stores float field values. +All of these data is in the `NOAA_water_database` [database](/influxdb/v1.7/concepts/glossary/#database). + +> **Disclaimer:** The `level description` field isn't part of the original NOAA data - we snuck it in there for the sake of having a field key with a special character and string field values. + +## The basic SELECT statement + +The `SELECT` statement queries data from a particular [measurement](/influxdb/v1.7/concepts/glossary/#measurement) or measurements. + +### Syntax + +```sql +SELECT [,,] FROM [,] +``` + +The `SELECT` statement requires a `SELECT` clause and a `FROM` clause. + +#### `SELECT` clause + +The `SELECT` clause supports several formats for specifying data: + +`SELECT *` +          Returns all [fields](/influxdb/v1.7/concepts/glossary/#field) and [tags](/influxdb/v1.7/concepts/glossary/#tag). + +`SELECT ""` +          Returns a specific field. + +`SELECT "",""` +          Returns more than one field. + +`SELECT "",""` +          Returns a specific field and tag. +The `SELECT` clause must specify at least one field when it includes a tag. + +`SELECT ""::field,""::tag` +          Returns a specific field and tag. +The `::[field | tag]` syntax specifies the [identifier's](/influxdb/v1.7/concepts/glossary/#identifier) type. +Use this syntax to differentiate between field keys and tag keys that have the same name. + +Other supported features: +[Arithmetic operations](/influxdb/v1.7/query_language/math_operators/), +[Functions](/influxdb/v1.7/query_language/functions/), +[Basic cast operations](#data-types-and-cast-operations), +[Regular expressions](#regular-expressions) + +> **Note:** The SELECT statement cannot include an aggregate function **and** a non-aggregate function, field key, or tag key. For more information, see [error about mixing aggregate and non-aggregate queries](/influxdb/v1.7/troubleshooting/errors/#error-parsing-query-mixing-aggregate-and-non-aggregate-queries-is-not-supported). + +#### `FROM` clause + +The `FROM` clause supports several formats for specifying a [measurement(s)](/influxdb/v1.7/concepts/glossary/#measurement): + +`FROM ` +           +Returns data from a single measurement. +If you're using the [CLI](/influxdb/v1.7/tools/shell/) InfluxDB queries the measurement in the +[`USE`d](/influxdb/v1.7/tools/shell/#commands) +[database](/influxdb/v1.7/concepts/glossary/#database) and the `DEFAULT` [retention policy](/influxdb/v1.7/concepts/glossary/#retention-policy-rp). +If you're using the [InfluxDB API](/influxdb/v1.7/tools/api/) InfluxDB queries the +measurement in the database specified in the [`db` query string parameter](/influxdb/v1.7/tools/api/#query-string-parameters) +and the `DEFAULT` retention policy. + +`FROM ,` +           +Returns data from more than one measurement. + +`FROM ..` +           +Returns data from a fully qualified measurement. +Fully qualify a measurement by specifying its database and retention policy. + +`FROM ..` +           +Returns data from a measurement in a user-specified [database](/influxdb/v1.7/concepts/glossary/#database) and the `DEFAULT` +[retention policy](/influxdb/v1.7/concepts/glossary/#retention-policy-rp). + +Other supported features: +[Regular Expressions](#regular-expressions) + +#### Quoting + +[Identifiers](/influxdb/v1.7/concepts/glossary/#identifier) **must** be double quoted if they contain characters other than `[A-z,0-9,_]`, if they +begin with a digit, or if they are an [InfluxQL keyword](https://github.com/influxdata/influxql/blob/master/README.md#keywords). +While not always necessary, we recommend that you double quote identifiers. + +> **Note:** The quoting syntax for queries differs from the [line protocol](/influxdb/v1.7/concepts/glossary/#influxdb-line-protocol). +Please review the [rules for single and double-quoting](/influxdb/v1.7/troubleshooting/frequently-asked-questions/#when-should-i-single-quote-and-when-should-i-double-quote-in-queries) in queries. + +### Examples + +#### Select all fields and tags from a single measurement + +```sql +> SELECT * FROM "h2o_feet" + +name: h2o_feet +-------------- +time level description location water_level +2015-08-18T00:00:00Z below 3 feet santa_monica 2.064 +2015-08-18T00:00:00Z between 6 and 9 feet coyote_creek 8.12 +[...] +2015-09-18T21:36:00Z between 3 and 6 feet santa_monica 5.066 +2015-09-18T21:42:00Z between 3 and 6 feet santa_monica 4.938 +``` + +The query selects all [fields](/influxdb/v1.7/concepts/glossary/#field) and +[tags](/influxdb/v1.7/concepts/glossary/#tag) from the `h2o_feet` +[measurement](/influxdb/v1.7/concepts/glossary/#measurement). + +If you're using the [CLI](/influxdb/v1.7/tools/shell/) be sure to enter +`USE NOAA_water_database` before you run the query. +The CLI queries the data in the `USE`d database and the +`DEFAULT` [retention policy](/influxdb/v1.7/concepts/glossary/#retention-policy-rp). +If you're using the [InfluxDB API](/influxdb/v1.7/tools/api/) be sure to set the +`db` [query string parameter](/influxdb/v1.7/tools/api/#query-string-parameters) +to `NOAA_water_database`. +If you do not set the `rp` query string parameter, the InfluxDB API automatically +queries the database's `DEFAULT` retention policy. + +#### Select specific tags and fields from a single measurement + +```sql +> SELECT "level description","location","water_level" FROM "h2o_feet" + +name: h2o_feet +-------------- +time level description location water_level +2015-08-18T00:00:00Z below 3 feet santa_monica 2.064 +2015-08-18T00:00:00Z between 6 and 9 feet coyote_creek 8.12 +[...] +2015-09-18T21:36:00Z between 3 and 6 feet santa_monica 5.066 +2015-09-18T21:42:00Z between 3 and 6 feet santa_monica 4.938 +``` + +The query selects the `level description` field, the `location` tag, and the +`water_level` field. +Note that the `SELECT` clause must specify at least one field when it includes +a tag. + +#### Select specific tags and fields from a single measurement, and provide their identifier type + +```sql +> SELECT "level description"::field,"location"::tag,"water_level"::field FROM "h2o_feet" + +name: h2o_feet +-------------- +time level description location water_level +2015-08-18T00:00:00Z below 3 feet santa_monica 2.064 +2015-08-18T00:00:00Z between 6 and 9 feet coyote_creek 8.12 +[...] +2015-09-18T21:36:00Z between 3 and 6 feet santa_monica 5.066 +2015-09-18T21:42:00Z between 3 and 6 feet santa_monica 4.938 +``` + +The query selects the `level description` field, the `location` tag, and the +`water_level` field from the `h2o_feet` measurement. +The `::[field | tag]` syntax specifies if the +[identifier](/influxdb/v1.7/concepts/glossary/#identifier) is a field or tag. +Use `::[field | tag]` to differentiate between [an identical field key and tag key ](/influxdb/v1.7/troubleshooting/frequently-asked-questions/#how-do-i-query-data-with-an-identical-tag-key-and-field-key). +That syntax is not required for most use cases. + +#### Select all fields from a single measurement + +```sql +> SELECT *::field FROM "h2o_feet" + +name: h2o_feet +-------------- +time level description water_level +2015-08-18T00:00:00Z below 3 feet 2.064 +2015-08-18T00:00:00Z between 6 and 9 feet 8.12 +[...] +2015-09-18T21:36:00Z between 3 and 6 feet 5.066 +2015-09-18T21:42:00Z between 3 and 6 feet 4.938 +``` + +The query selects all fields from the `h2o_feet` measurement. +The `SELECT` clause supports combining the `*` syntax with the `::` syntax. + +#### Select a specific field from a measurement and perform basic arithmetic + +```sql +> SELECT ("water_level" * 2) + 4 from "h2o_feet" + +name: h2o_feet +-------------- +time water_level +2015-08-18T00:00:00Z 20.24 +2015-08-18T00:00:00Z 8.128 +[...] +2015-09-18T21:36:00Z 14.132 +2015-09-18T21:42:00Z 13.876 +``` + +The query multiplies `water_level`'s field values by two and adds four to those +values. +Note that InfluxDB follows the standard order of operations. +See [Mathematical Operators](/influxdb/v1.7/query_language/math_operators/) +for more on supported operators. + +#### Select all data from more than one measurement + +```sql +> SELECT * FROM "h2o_feet","h2o_pH" + +name: h2o_feet +-------------- +time level description location pH water_level +2015-08-18T00:00:00Z below 3 feet santa_monica 2.064 +2015-08-18T00:00:00Z between 6 and 9 feet coyote_creek 8.12 +[...] +2015-09-18T21:36:00Z between 3 and 6 feet santa_monica 5.066 +2015-09-18T21:42:00Z between 3 and 6 feet santa_monica 4.938 + +name: h2o_pH +------------ +time level description location pH water_level +2015-08-18T00:00:00Z santa_monica 6 +2015-08-18T00:00:00Z coyote_creek 7 +[...] +2015-09-18T21:36:00Z santa_monica 8 +2015-09-18T21:42:00Z santa_monica 7 +``` + +The query selects all fields and tags from two measurements: `h2o_feet` and +`h2o_pH`. +Separate multiple measurements with a comma (`,`). + +#### Select all data from a fully qualified measurement + +```sql +> SELECT * FROM "NOAA_water_database"."autogen"."h2o_feet" + +name: h2o_feet +-------------- +time level description location water_level +2015-08-18T00:00:00Z below 3 feet santa_monica 2.064 +2015-08-18T00:00:00Z between 6 and 9 feet coyote_creek 8.12 +[...] +2015-09-18T21:36:00Z between 3 and 6 feet santa_monica 5.066 +2015-09-18T21:42:00Z between 3 and 6 feet santa_monica 4.938 +``` + +The query selects data in the `NOAA_water_database`, the `autogen` retention +policy, and the measurement `h2o_feet`. + +In the CLI, fully qualify a measurement to query data in a database other +than the `USE`d database and in a retention policy other than the +`DEFAULT` retention policy. +In the InfluxDB API, fully qualify a measurement in place of using the `db` +and `rp` query string parameters if desired. + +#### Select all data from a measurement in a particular database + +```sql +> SELECT * FROM "NOAA_water_database".."h2o_feet" + +name: h2o_feet +-------------- +time level description location water_level +2015-08-18T00:00:00Z below 3 feet santa_monica 2.064 +2015-08-18T00:00:00Z between 6 and 9 feet coyote_creek 8.12 +[...] +2015-09-18T21:36:00Z between 3 and 6 feet santa_monica 5.066 +2015-09-18T21:42:00Z between 3 and 6 feet santa_monica 4.938 +``` + +The query selects data in the `NOAA_water_database`, the `DEFAULT` retention +policy, and the `h2o_feet` measurement. +The `..` indicates the `DEFAULT` retention policy for the specified database. + +In the CLI, specify the database to query data in a database other than the +`USE`d database. +In the InfluxDB API, specify the database in place of using the `db` query +string parameter if desired. + +### Common issues with the SELECT statement + +#### Selecting tag keys in the SELECT clause + +A query requires at least one [field key](/influxdb/v1.7/concepts/glossary/#field-key) +in the `SELECT` clause to return data. +If the `SELECT` clause only includes a single [tag key](/influxdb/v1.7/concepts/glossary/#tag-key) or several tag keys, the +query returns an empty response. +This behavior is a result of how the system stores data. + +##### Example + +The following query returns no data because it specifies a single tag key (`location`) in +the `SELECT` clause: + +```sql +> SELECT "location" FROM "h2o_feet" +> +``` + +To return any data associated with the `location` tag key, the query's `SELECT` +clause must include at least one field key (`water_level`): + +```sql +> SELECT "water_level","location" FROM "h2o_feet" +name: h2o_feet +time water_level location +---- ----------- -------- +2015-08-18T00:00:00Z 8.12 coyote_creek +2015-08-18T00:00:00Z 2.064 santa_monica +[...] +2015-09-18T21:36:00Z 5.066 santa_monica +2015-09-18T21:42:00Z 4.938 santa_monica +``` + +## The `WHERE` clause + +The `WHERE` filters data based on +[fields](/influxdb/v1.7/concepts/glossary/#field), +[tags](/influxdb/v1.7/concepts/glossary/#tag), and/or +[timestamps](/influxdb/v1.7/concepts/glossary/#timestamp). + +Tired of reading? Check out this InfluxQL Short: +
+
+ + +### Syntax + +``` +SELECT_clause FROM_clause WHERE [(AND|OR) [...]] +``` + +The `WHERE` clause supports `conditional_expression`s on fields, tags, and +timestamps. + +>**Note** InfluxDB does not support using OR in the WHERE clause to specify multiple time ranges. For example, InfluxDB returns an empty response for the following query: + +`> SELECT * FROM "absolutismus" WHERE time = '2016-07-31T20:07:00Z' OR time = '2016-07-31T23:07:17Z'` + +#### Fields + +``` +field_key ['string' | boolean | float | integer] +``` + +The `WHERE` clause supports comparisons against string, boolean, float, +and integer [field values](/influxdb/v1.7/concepts/glossary/#field-value). + +Single quote string field values in the `WHERE` clause. +Queries with unquoted string field values or double quoted string field values +will not return any data and, in most cases, +[will not return an error](#common-issues-with-the-where-clause). + +##### Supported operators + +| Operator | Meaning | +|:--------:|:-------- | +| `=` | equal to | +| `<>` | not equal to | +| `!=` | not equal to | +| `>` | greater than | +| `>=` | greater than or equal to | +| `<` | less than | +| `<=` | less than or equal to | + +Other supported features: +[Arithmetic Operations](/influxdb/v1.7/query_language/math_operators/), +[Regular Expressions](#regular-expressions) + +#### Tags + +```sql +tag_key ['tag_value'] +``` + +Single quote [tag values](/influxdb/v1.7/concepts/glossary/#tag-value) in +the `WHERE` clause. +Queries with unquoted tag values or double quoted tag values will not return +any data and, in most cases, +[will not return an error](#common-issues-with-the-where-clause). + +##### Supported operators + +| Operator | Meaning | +|:--------:|:------- | +| `=` | equal to | +| `<>` | not equal to | +| `!=` | not equal to | + +Other supported features: +[Regular Expressions](#regular-expressions) + +#### Timestamps + +For most `SELECT` statements, the default time range is between [`1677-09-21 00:12:43.145224194` and `2262-04-11T23:47:16.854775806Z` UTC](/influxdb/v1.7/troubleshooting/frequently-asked-questions/#what-are-the-minimum-and-maximum-timestamps-that-influxdb-can-store). +For `SELECT` statements with a [`GROUP BY time()` clause](#group-by-time-intervals), the default time +range is between `1677-09-21 00:12:43.145224194` UTC and [`now()`](/influxdb/v1.7/concepts/glossary/#now). + +The [Time Syntax](#time-syntax) section on this page +details how to specify alternative time ranges in the `WHERE` clause. + +### Examples + +#### Select data that have specific field key-values + +```sql +> SELECT * FROM "h2o_feet" WHERE "water_level" > 8 + +name: h2o_feet +-------------- +time level description location water_level +2015-08-18T00:00:00Z between 6 and 9 feet coyote_creek 8.12 +2015-08-18T00:06:00Z between 6 and 9 feet coyote_creek 8.005 +[...] +2015-09-18T00:12:00Z between 6 and 9 feet coyote_creek 8.189 +2015-09-18T00:18:00Z between 6 and 9 feet coyote_creek 8.084 +``` + +The query returns data from the `h2o_feet` +[measurement](/influxdb/v1.7/concepts/glossary/#measurement) with +[field values](/influxdb/v1.7/concepts/glossary/#field-value) of `water_level` +that are greater than eight. + +#### Select data that have a specific string field key-value + +```sql +> SELECT * FROM "h2o_feet" WHERE "level description" = 'below 3 feet' + +name: h2o_feet +-------------- +time level description location water_level +2015-08-18T00:00:00Z below 3 feet santa_monica 2.064 +2015-08-18T00:06:00Z below 3 feet santa_monica 2.116 +[...] +2015-09-18T14:06:00Z below 3 feet santa_monica 2.999 +2015-09-18T14:36:00Z below 3 feet santa_monica 2.907 +``` + +The query returns data from the `h2o_feet` measurement with field values of +`level description` that equal the `below 3 feet` string. +InfluxQL requires single quotes around string field values in the `WHERE` +clause. + +#### Select data that have a specific field key-value and perform basic arithmetic + +```sql +> SELECT * FROM "h2o_feet" WHERE "water_level" + 2 > 11.9 + +name: h2o_feet +-------------- +time level description location water_level +2015-08-29T07:06:00Z at or greater than 9 feet coyote_creek 9.902 +2015-08-29T07:12:00Z at or greater than 9 feet coyote_creek 9.938 +2015-08-29T07:18:00Z at or greater than 9 feet coyote_creek 9.957 +2015-08-29T07:24:00Z at or greater than 9 feet coyote_creek 9.964 +2015-08-29T07:30:00Z at or greater than 9 feet coyote_creek 9.954 +2015-08-29T07:36:00Z at or greater than 9 feet coyote_creek 9.941 +2015-08-29T07:42:00Z at or greater than 9 feet coyote_creek 9.925 +2015-08-29T07:48:00Z at or greater than 9 feet coyote_creek 9.902 +2015-09-02T23:30:00Z at or greater than 9 feet coyote_creek 9.902 +``` + +The query returns data from the `h2o_feet` measurement with field values of +`water_level` plus two that are greater than 11.9. +Note that InfluxDB follows the standard order of operations +See [Mathematical Operators](/influxdb/v1.7/query_language/math_operators/) +for more on supported operators. + +#### Select data that have a specific tag key-value + +```sql +> SELECT "water_level" FROM "h2o_feet" WHERE "location" = 'santa_monica' + +name: h2o_feet +-------------- +time water_level +2015-08-18T00:00:00Z 2.064 +2015-08-18T00:06:00Z 2.116 +[...] +2015-09-18T21:36:00Z 5.066 +2015-09-18T21:42:00Z 4.938 +``` + +The query returns data from the `h2o_feet` measurement where the +[tag key](/influxdb/v1.7/concepts/glossary/#tag-key) `location` is set to `santa_monica`. +InfluxQL requires single quotes around tag values in the `WHERE` clause. + +#### Select data that have specific field key-values and tag key-values + +```sql +> SELECT "water_level" FROM "h2o_feet" WHERE "location" <> 'santa_monica' AND (water_level < -0.59 OR water_level > 9.95) + +name: h2o_feet +-------------- +time water_level +2015-08-29T07:18:00Z 9.957 +2015-08-29T07:24:00Z 9.964 +2015-08-29T07:30:00Z 9.954 +2015-08-29T14:30:00Z -0.61 +2015-08-29T14:36:00Z -0.591 +2015-08-30T15:18:00Z -0.594 +``` + +The query returns data from the `h2o_feet` measurement where the tag key +`location` is not set to `santa_monica` and where the field values of +`water_level` are either less than -0.59 or greater than 9.95. +The `WHERE` clause supports the operators `AND` and `OR`, and supports +separating logic with parentheses. + +#### Select data that have specific timestamps + +```sql +> SELECT * FROM "h2o_feet" WHERE time > now() - 7d +``` + +The query returns data from the `h2o_feet` measurement that have [timestamps](/influxdb/v1.7/concepts/glossary/#timestamp) +within the past seven days. +The [Time Syntax](#time-syntax) section on this page +offers in-depth information on supported time syntax in the `WHERE` clause. + +### Common issues with the `WHERE` clause + +#### A `WHERE` clause query unexpectedly returns no data + +In most cases, this issue is the result of missing single quotes around +[tag values](/influxdb/v1.7/concepts/glossary/#tag-value) +or string [field values](/influxdb/v1.7/concepts/glossary/#field-value). +Queries with unquoted or double quoted tag values or string field values will +not return any data and, in most cases, will not return an error. + +The first two queries in the code block below attempt to specify the tag value +`santa_monica` without any quotes and with double quotes. +Those queries return no results. +The third query single quotes `santa_monica` (this is the supported syntax) +and returns the expected results. + +```sql +> SELECT "water_level" FROM "h2o_feet" WHERE "location" = santa_monica + +> SELECT "water_level" FROM "h2o_feet" WHERE "location" = "santa_monica" + +> SELECT "water_level" FROM "h2o_feet" WHERE "location" = 'santa_monica' + +name: h2o_feet +-------------- +time water_level +2015-08-18T00:00:00Z 2.064 +[...] +2015-09-18T21:42:00Z 4.938 +``` + +The first two queries in the code block below attempt to specify the string +field value `at or greater than 9 feet` without any quotes and with double +quotes. +The first query returns an error because the string field value includes +white spaces. +The second query returns no results. +The third query single quotes `at or greater than 9 feet` (this is the +supported syntax) and returns the expected results. + +```sql +> SELECT "level description" FROM "h2o_feet" WHERE "level description" = at or greater than 9 feet + +ERR: error parsing query: found than, expected ; at line 1, char 86 + +> SELECT "level description" FROM "h2o_feet" WHERE "level description" = "at or greater than 9 feet" + +> SELECT "level description" FROM "h2o_feet" WHERE "level description" = 'at or greater than 9 feet' + +name: h2o_feet +-------------- +time level description +2015-08-26T04:00:00Z at or greater than 9 feet +[...] +2015-09-15T22:42:00Z at or greater than 9 feet +``` + +# The GROUP BY clause + +The `GROUP BY` clause groups query results by: + +- one or more specified [tags](/influxdb/v1.7/concepts/glossary/#tag) +- specified time interval + +>**Note:** You cannot use `GROUP BY` to group fields. + + + + + + + + + + + + + + +
GROUP BY tags +
GROUP BY time intervals: + Basic SyntaxAdvanced SyntaxGROUP BY time intervals and fill()
+ +## GROUP BY tags + +`GROUP BY ` groups query results by one or more specified tags. + +Watch InfluxQL short about `GROUP BY` with tags: +
+
+ + +#### Syntax + +```sql +SELECT_clause FROM_clause [WHERE_clause] GROUP BY [* | [,` +   Groups results by a specific tag + +`GROUP BY ,` +   Groups results by more than one tag. +The order of the [tag keys](/influxdb/v1.7/concepts/glossary/#tag-key) is irrelevant. + +If the query includes a [`WHERE` clause](#the-where-clause) the `GROUP BY` +clause must appear after the `WHERE` clause. + +Other supported features: [Regular Expressions](#regular-expressions) + +#### Examples + +##### Group query results by a single tag + +```sql +> SELECT MEAN("water_level") FROM "h2o_feet" GROUP BY "location" + +name: h2o_feet +tags: location=coyote_creek +time mean +---- ---- +1970-01-01T00:00:00Z 5.359342451341401 + + +name: h2o_feet +tags: location=santa_monica +time mean +---- ---- +1970-01-01T00:00:00Z 3.530863470081006 +``` + +The query uses an InfluxQL [function](/influxdb/v1.7/query_language/functions/) +to calculate the average `water_level` for each +[tag value](/influxdb/v1.7/concepts/glossary/#tag-value) of `location` in +the `h2o_feet` [measurement](/influxdb/v1.7/concepts/glossary/#measurement). +InfluxDB returns results in two [series](/influxdb/v1.7/concepts/glossary/#series): one for each tag value of `location`. + +>**Note:** In InfluxDB, [epoch 0](https://en.wikipedia.org/wiki/Unix_time) (`1970-01-01T00:00:00Z`) is often used as a null timestamp equivalent. +If you request a query that has no timestamp to return, such as an [aggregation function](/influxdb/v1.7/query_language/functions/) with an unbounded time range, InfluxDB returns epoch 0 as the timestamp. + +##### Group query results by more than one tag + +```sql +> SELECT MEAN("index") FROM "h2o_quality" GROUP BY location,randtag + +name: h2o_quality +tags: location=coyote_creek, randtag=1 +time mean +---- ---- +1970-01-01T00:00:00Z 50.69033760186263 + +name: h2o_quality +tags: location=coyote_creek, randtag=2 +time mean +---- ---- +1970-01-01T00:00:00Z 49.661867544220485 + +name: h2o_quality +tags: location=coyote_creek, randtag=3 +time mean +---- ---- +1970-01-01T00:00:00Z 49.360939907550076 + +name: h2o_quality +tags: location=santa_monica, randtag=1 +time mean +---- ---- +1970-01-01T00:00:00Z 49.132712456344585 + +name: h2o_quality +tags: location=santa_monica, randtag=2 +time mean +---- ---- +1970-01-01T00:00:00Z 50.2937984496124 + +name: h2o_quality +tags: location=santa_monica, randtag=3 +time mean +---- ---- +1970-01-01T00:00:00Z 49.99919903884662 +``` + +The query uses an InfluxQL [function](/influxdb/v1.7/query_language/functions/) to calculate the average `index` for +each combination of the `location` [tag](/influxdb/v1.7/concepts/glossary/#tag) and the `randtag` tag in the +`h2o_quality` measurement. +Separate multiple tags with a comma in the `GROUP BY` clause. + +##### Group query results by all tags + +```sql +> SELECT MEAN("index") FROM "h2o_quality" GROUP BY * + +name: h2o_quality +tags: location=coyote_creek, randtag=1 +time mean +---- ---- +1970-01-01T00:00:00Z 50.55405446521169 + + +name: h2o_quality +tags: location=coyote_creek, randtag=2 +time mean +---- ---- +1970-01-01T00:00:00Z 50.49958856271162 + + +name: h2o_quality +tags: location=coyote_creek, randtag=3 +time mean +---- ---- +1970-01-01T00:00:00Z 49.5164137518956 + + +name: h2o_quality +tags: location=santa_monica, randtag=1 +time mean +---- ---- +1970-01-01T00:00:00Z 50.43829082296367 + + +name: h2o_quality +tags: location=santa_monica, randtag=2 +time mean +---- ---- +1970-01-01T00:00:00Z 52.0688508894012 + + +name: h2o_quality +tags: location=santa_monica, randtag=3 +time mean +---- ---- +1970-01-01T00:00:00Z 49.29386362086556 +``` + +The query uses an InfluxQL [function](/influxdb/v1.7/query_language/functions/) +to calculate the average `index` for every possible +[tag](/influxdb/v1.7/concepts/glossary/#tag) combination in the `h2o_quality` +measurement. + +Note that the query results are identical to the results of the query in [Example 2](#examples-2) +where we explicitly specified the `location` and `randtag` tag keys. +This is because the `h2o_quality` measurement only has two tag keys. + +## GROUP BY time intervals + +`GROUP BY time()` queries group query results by a user-specified time interval. + +### Basic GROUP BY time() syntax + +#### Syntax + +```sql +SELECT () FROM_clause WHERE GROUP BY time(),[tag_key] [fill()] +``` + +Basic `GROUP BY time()` queries require an InfluxQL [function](/influxdb/v1.7/query_language/functions/) +in the [`SELECT` clause](#the-basic-select-statement) and a time range in the +[`WHERE` clause](#the-where-clause). +Note that the `GROUP BY` clause must come after the `WHERE` clause. + +##### `time(time_interval)` + +The `time_interval` in the `GROUP BY time()` clause is a +[duration literal](/influxdb/v1.7/query_language/spec/#durations). +It determines how InfluxDB groups query results over time. +For example, a `time_interval` of `5m` groups query results into five-minute +time groups across the time range specified in the [`WHERE` clause](#the-where-clause). + +##### `fill()` + +`fill()` is optional. +It changes the value reported for time intervals that have no data. +See [GROUP BY time intervals and `fill()`](#group-by-time-intervals-and-fill) +for more information. + +**Coverage:** + +Basic `GROUP BY time()` queries rely on the `time_interval` and on the InfluxDB database's +preset time boundaries to determine the raw data included in each time interval +and the timestamps returned by the query. + +#### Examples of basic syntax + +The examples below use the following subsample of the sample data: + +```sql +> SELECT "water_level","location" FROM "h2o_feet" WHERE time >= '2015-08-18T00:00:00Z' AND time <= '2015-08-18T00:30:00Z' + +name: h2o_feet +-------------- +time water_level location +2015-08-18T00:00:00Z 8.12 coyote_creek +2015-08-18T00:00:00Z 2.064 santa_monica +2015-08-18T00:06:00Z 8.005 coyote_creek +2015-08-18T00:06:00Z 2.116 santa_monica +2015-08-18T00:12:00Z 7.887 coyote_creek +2015-08-18T00:12:00Z 2.028 santa_monica +2015-08-18T00:18:00Z 7.762 coyote_creek +2015-08-18T00:18:00Z 2.126 santa_monica +2015-08-18T00:24:00Z 7.635 coyote_creek +2015-08-18T00:24:00Z 2.041 santa_monica +2015-08-18T00:30:00Z 7.5 coyote_creek +2015-08-18T00:30:00Z 2.051 santa_monica +``` + +##### Group query results into 12 minute intervals + +```sql +> SELECT COUNT("water_level") FROM "h2o_feet" WHERE "location"='coyote_creek' AND time >= '2015-08-18T00:00:00Z' AND time <= '2015-08-18T00:30:00Z' GROUP BY time(12m) + +name: h2o_feet +-------------- +time count +2015-08-18T00:00:00Z 2 +2015-08-18T00:12:00Z 2 +2015-08-18T00:24:00Z 2 +``` + +The query uses an InfluxQL [function](/influxdb/v1.7/query_language/functions/) +to count the number of `water_level` points with the [tag](/influxdb/v1.7/concepts/glossary/#tag) +`location = coyote_creek` and it group results into 12 minute intervals. + +The result for each [timestamp](/influxdb/v1.7/concepts/glossary/#timestamp) +represents a single 12 minute interval. +The count for the first timestamp covers the raw data between `2015-08-18T00:00:00Z` +and up to, but not including, `2015-08-18T00:12:00Z`. +The count for the second timestamp covers the raw data between `2015-08-18T00:12:00Z` +and up to, but not including, `2015-08-18T00:24:00Z.` + +##### Group query results into 12 minutes intervals and by a tag key + +```sql +> SELECT COUNT("water_level") FROM "h2o_feet" WHERE time >= '2015-08-18T00:00:00Z' AND time <= '2015-08-18T00:30:00Z' GROUP BY time(12m),"location" + +name: h2o_feet +tags: location=coyote_creek +time count +---- ----- +2015-08-18T00:00:00Z 2 +2015-08-18T00:12:00Z 2 +2015-08-18T00:24:00Z 2 + +name: h2o_feet +tags: location=santa_monica +time count +---- ----- +2015-08-18T00:00:00Z 2 +2015-08-18T00:12:00Z 2 +2015-08-18T00:24:00Z 2 +``` + +The query uses an InfluxQL [function](/influxdb/v1.7/query_language/functions/) +to count the number of `water_level` points. +It groups results by the `location` tag and into 12 minute intervals. +Note that the time interval and the tag key are separated by a comma in the +`GROUP BY` clause. + +The query returns two [series](/influxdb/v1.7/concepts/glossary/#series) of results: one for each +[tag value](/influxdb/v1.7/concepts/glossary/#tag-value) of the `location` tag. +The result for each timestamp represents a single 12 minute interval. +The count for the first timestamp covers the raw data between `2015-08-18T00:00:00Z` +and up to, but not including, `2015-08-18T00:12:00Z`. +The count for the second timestamp covers the raw data between `2015-08-18T00:12:00Z` +and up to, but not including, `2015-08-18T00:24:00Z.` + +#### Common issues with basic syntax + +##### Unexpected timestamps and values in query results + +With the basic syntax, InfluxDB relies on the `GROUP BY time()` interval +and on the system's preset time boundaries to determine the raw data included +in each time interval and the timestamps returned by the query. +In some cases, this can lead to unexpected results. + +**Example** + +Raw data: + +```sql +> SELECT "water_level" FROM "h2o_feet" WHERE "location"='coyote_creek' AND time >= '2015-08-18T00:00:00Z' AND time <= '2015-08-18T00:18:00Z' +name: h2o_feet +-------------- +time water_level +2015-08-18T00:00:00Z 8.12 +2015-08-18T00:06:00Z 8.005 +2015-08-18T00:12:00Z 7.887 +2015-08-18T00:18:00Z 7.762 +``` + +Query and results: + +The following query covers a 12-minute time range and groups results into 12-minute time intervals, but it returns **two** results: + +```sql +> SELECT COUNT("water_level") FROM "h2o_feet" WHERE "location"='coyote_creek' AND time >= '2015-08-18T00:06:00Z' AND time < '2015-08-18T00:18:00Z' GROUP BY time(12m) + +name: h2o_feet +time count +---- ----- +2015-08-18T00:00:00Z 1 <----- Note that this timestamp occurs before the start of the query's time range +2015-08-18T00:12:00Z 1 +``` + +Explanation: + +InfluxDB uses preset round-number time boundaries for `GROUP BY` intervals that are +independent of any time conditions in the `WHERE` clause. +When it calculates the results, all returned data must occur within the query's +explicit time range but the `GROUP BY` intervals will be based on the preset +time boundaries. + +The table below shows the preset time boundary, the relevant `GROUP BY time()` interval, the +points included, and the returned timestamp for each `GROUP BY time()` +interval in the results. + +| Time Interval Number | Preset Time Boundary |`GROUP BY time()` Interval | Points Included | Returned Timestamp | +| :------------- | :------------- | :------------- | :------------- | :------------- | +| 1 | `time >= 2015-08-18T00:00:00Z AND time < 2015-08-18T00:12:00Z` | `time >= 2015-08-18T00:06:00Z AND time < 2015-08-18T00:12:00Z` | `8.005` | `2015-08-18T00:00:00Z` | +| 2 | `time >= 2015-08-12T00:12:00Z AND time < 2015-08-18T00:24:00Z` | `time >= 2015-08-12T00:12:00Z AND time < 2015-08-18T00:18:00Z` | `7.887` | `2015-08-18T00:12:00Z` | + +The first preset 12-minute time boundary begins at `00:00` and ends just before +`00:12`. +Only one raw point (`8.005`) falls both within the query's first `GROUP BY time()` interval and in that +first time boundary. +Note that while the returned timestamp occurs before the start of the query's time range, +the query result excludes data that occur before the query's time range. + +The second preset 12-minute time boundary begins at `00:12` and ends just before +`00:24`. +Only one raw point (`7.887`) falls both within the query's second `GROUP BY time()` interval and in that +second time boundary. + +The [advanced `GROUP BY time()` syntax](#advanced-group-by-time-syntax) allows users to shift +the start time of the InfluxDB database's preset time boundaries. +[Example 3](#examples-3) +in the Advanced Syntax section continues with the query shown here; +it shifts forward the preset time boundaries by six minutes such that +InfluxDB returns: + +```sql +name: h2o_feet +time count +---- ----- +2015-08-18T00:06:00Z 2 +``` + +### Advanced GROUP BY time() syntax + +#### Syntax + +```sql +SELECT () FROM_clause WHERE GROUP BY time(,),[tag_key] [fill()] +``` + +Advanced `GROUP BY time()` queries require an InfluxQL [function](/influxdb/v1.7/query_language/functions/) +in the [`SELECT` clause](#the-basic-select-statement) and a time range in the +[`WHERE` clause](#the-where-clause). +Note that the `GROUP BY` clause must come after the `WHERE` clause. + +##### `time(time_interval,offset_interval)` + +See the [Basic GROUP BY time() Syntax](#basic-group-by-time-syntax) +for details on the `time_interval`. + +The `offset_interval` is a +[duration literal](/influxdb/v1.7/query_language/spec/#durations). +It shifts forward or back tje InfluxDB database's preset time boundaries. +The `offset_interval` can be positive or negative. + +##### `fill()` + +`fill()` is optional. +It changes the value reported for time intervals that have no data. +See [GROUP BY time intervals and `fill()`](#group-by-time-intervals-and-fill) +for more information. + +**Coverage:** + +Advanced `GROUP BY time()` queries rely on the `time_interval`, the `offset_interval` +, and on the InfluxDB database's preset time boundaries to determine the raw data included in each time interval +and the timestamps returned by the query. + +#### Examples of advanced syntax + +The examples below use the following subsample of the sample data: + +```sql +> SELECT "water_level" FROM "h2o_feet" WHERE "location"='coyote_creek' AND time >= '2015-08-18T00:00:00Z' AND time <= '2015-08-18T00:54:00Z' + +name: h2o_feet +-------------- +time water_level +2015-08-18T00:00:00Z 8.12 +2015-08-18T00:06:00Z 8.005 +2015-08-18T00:12:00Z 7.887 +2015-08-18T00:18:00Z 7.762 +2015-08-18T00:24:00Z 7.635 +2015-08-18T00:30:00Z 7.5 +2015-08-18T00:36:00Z 7.372 +2015-08-18T00:42:00Z 7.234 +2015-08-18T00:48:00Z 7.11 +2015-08-18T00:54:00Z 6.982 +``` + +##### Group query results into 18 minute intervals and shift the preset time boundaries forward + +```sql +> SELECT MEAN("water_level") FROM "h2o_feet" WHERE "location"='coyote_creek' AND time >= '2015-08-18T00:06:00Z' AND time <= '2015-08-18T00:54:00Z' GROUP BY time(18m,6m) + +name: h2o_feet +time mean +---- ---- +2015-08-18T00:06:00Z 7.884666666666667 +2015-08-18T00:24:00Z 7.502333333333333 +2015-08-18T00:42:00Z 7.108666666666667 +``` + +The query uses an InfluxQL [function](/influxdb/v1.7/query_language/functions/) +to calculate the average `water_level`, grouping results into 18 minute +time intervals, and offsetting the preset time boundaries by six minutes. + +The time boundaries and returned timestamps for the query **without** the `offset_interval` adhere to the InfluxDB database's preset time boundaries. Let's first examine the results without the offset: + +```sql +> SELECT MEAN("water_level") FROM "h2o_feet" WHERE "location"='coyote_creek' AND time >= '2015-08-18T00:06:00Z' AND time <= '2015-08-18T00:54:00Z' GROUP BY time(18m) + +name: h2o_feet +time mean +---- ---- +2015-08-18T00:00:00Z 7.946 +2015-08-18T00:18:00Z 7.6323333333333325 +2015-08-18T00:36:00Z 7.238666666666667 +2015-08-18T00:54:00Z 6.982 +``` + +The time boundaries and returned timestamps for the query **without** the +`offset_interval` adhere to the InfluxDB database's preset time boundaries: + +| Time Interval Number | Preset Time Boundary |`GROUP BY time()` Interval | Points Included | Returned Timestamp | +| :------------- | :------------- | :------------- | :------------- | :------------- | +| 1 | `time >= 2015-08-18T00:00:00Z AND time < 2015-08-18T00:18:00Z` | `time >= 2015-08-18T00:06:00Z AND time < 2015-08-18T00:18:00Z` | `8.005`,`7.887` | `2015-08-18T00:00:00Z` | +| 2 | `time >= 2015-08-18T00:18:00Z AND time < 2015-08-18T00:36:00Z` | <--- same | `7.762`,`7.635`,`7.5` | `2015-08-18T00:18:00Z` | +| 3 | `time >= 2015-08-18T00:36:00Z AND time < 2015-08-18T00:54:00Z` | <--- same | `7.372`,`7.234`,`7.11` | `2015-08-18T00:36:00Z` | +| 4 | `time >= 2015-08-18T00:54:00Z AND time < 2015-08-18T01:12:00Z` | `time = 2015-08-18T00:54:00Z` | `6.982` | `2015-08-18T00:54:00Z` | + +The first preset 18-minute time boundary begins at `00:00` and ends just before +`00:18`. +Two raw points (`8.005` and `7.887`) fall both within the first `GROUP BY time()` interval and in that +first time boundary. +Note that while the returned timestamp occurs before the start of the query's time range, +the query result excludes data that occur before the query's time range. + +The second preset 18-minute time boundary begins at `00:18` and ends just before +`00:36`. +Three raw points (`7.762` and `7.635` and `7.5`) fall both within the second `GROUP BY time()` interval and in that +second time boundary. In this case, the boundary time range and the interval's time range are the same. + +The fourth preset 18-minute time boundary begins at `00:54` and ends just before +`1:12:00`. +One raw point (`6.982`) falls both within the fourth `GROUP BY time()` interval and in that +fourth time boundary. + +The time boundaries and returned timestamps for the query **with** the +`offset_interval` adhere to the offset time boundaries: + +| Time Interval Number | Offset Time Boundary |`GROUP BY time()` Interval | Points Included | Returned Timestamp | +| :------------- | :------------- | :------------- | :------------- | ------------- | +| 1 | `time >= 2015-08-18T00:06:00Z AND time < 2015-08-18T00:24:00Z` | <--- same | `8.005`,`7.887`,`7.762` | `2015-08-18T00:06:00Z` | +| 2 | `time >= 2015-08-18T00:24:00Z AND time < 2015-08-18T00:42:00Z` | <--- same | `7.635`,`7.5`,`7.372` | `2015-08-18T00:24:00Z` | +| 3 | `time >= 2015-08-18T00:42:00Z AND time < 2015-08-18T01:00:00Z` | <--- same | `7.234`,`7.11`,`6.982` | `2015-08-18T00:42:00Z` | +| 4 | `time >= 2015-08-18T01:00:00Z AND time < 2015-08-18T01:18:00Z` | NA | NA | NA | + +The six-minute offset interval shifts forward the preset boundary's time range +such that the boundary time ranges and the relevant `GROUP BY time()` interval time ranges are +always the same. +With the offset, each interval performs the calculation on three points, and +the timestamp returned matches both the start of the boundary time range and the +start of the `GROUP BY time()` interval time range. + +Note that `offset_interval` forces the fourth time boundary to be outside +the query's time range so the query returns no results for that last interval. + +##### Group query results into 12 minute intervals and shift the preset time boundaries back + +```sql +> SELECT MEAN("water_level") FROM "h2o_feet" WHERE "location"='coyote_creek' AND time >= '2015-08-18T00:06:00Z' AND time <= '2015-08-18T00:54:00Z' GROUP BY time(18m,-12m) + +name: h2o_feet +time mean +---- ---- +2015-08-18T00:06:00Z 7.884666666666667 +2015-08-18T00:24:00Z 7.502333333333333 +2015-08-18T00:42:00Z 7.108666666666667 +``` + +The query uses an InfluxQL [function](/influxdb/v1.7/query_language/functions/) +to calculate the average `water_level`, grouping results into 18 minute +time intervals, and offsetting the preset time boundaries by -12 minutes. + +> **Note:** The query in Example 2 returns the same results as the query in Example 1, but +the query in Example 2 uses a negative `offset_interval` instead of a positive +`offset_interval`. +> There are no performance differences between the two queries; feel free to choose the most +intuitive option when deciding between a positive and negative `offset_interval`. + +The time boundaries and returned timestamps for the query **without** the `offset_interval` adhere to InfluxDB database's preset time boundaries. Let's first examine the results without the offset: + +```sql +> SELECT MEAN("water_level") FROM "h2o_feet" WHERE "location"='coyote_creek' AND time >= '2015-08-18T00:06:00Z' AND time <= '2015-08-18T00:54:00Z' GROUP BY time(18m) + +name: h2o_feet +time mean +---- ---- +2015-08-18T00:00:00Z 7.946 +2015-08-18T00:18:00Z 7.6323333333333325 +2015-08-18T00:36:00Z 7.238666666666667 +2015-08-18T00:54:00Z 6.982 +``` + +The time boundaries and returned timestamps for the query **without** the +`offset_interval` adhere to the InfluxDB database's preset time boundaries: + +| Time Interval Number | Preset Time Boundary |`GROUP BY time()` Interval | Points Included | Returned Timestamp | +| :------------- | :------------- | :------------- | :------------- | :------------- | +| 1 | `time >= 2015-08-18T00:00:00Z AND time < 2015-08-18T00:18:00Z` | `time >= 2015-08-18T00:06:00Z AND time < 2015-08-18T00:18:00Z` | `8.005`,`7.887` | `2015-08-18T00:00:00Z` | +| 2 | `time >= 2015-08-18T00:18:00Z AND time < 2015-08-18T00:36:00Z` | <--- same | `7.762`,`7.635`,`7.5` | `2015-08-18T00:18:00Z` | +| 3 | `time >= 2015-08-18T00:36:00Z AND time < 2015-08-18T00:54:00Z` | <--- same | `7.372`,`7.234`,`7.11` | `2015-08-18T00:36:00Z` | +| 4 | `time >= 2015-08-18T00:54:00Z AND time < 2015-08-18T01:12:00Z` | `time = 2015-08-18T00:54:00Z` | `6.982` | `2015-08-18T00:54:00Z` | + +The first preset 18-minute time boundary begins at `00:00` and ends just before +`00:18`. +Two raw points (`8.005` and `7.887`) fall both within the first `GROUP BY time()` interval and in that +first time boundary. +Note that while the returned timestamp occurs before the start of the query's time range, +the query result excludes data that occur before the query's time range. + +The second preset 18-minute time boundary begins at `00:18` and ends just before +`00:36`. +Three raw points (`7.762` and `7.635` and `7.5`) fall both within the second `GROUP BY time()` interval and in that +second time boundary. In this case, the boundary time range and the interval's time range are the same. + +The fourth preset 18-minute time boundary begins at `00:54` and ends just before +`1:12:00`. +One raw point (`6.982`) falls both within the fourth `GROUP BY time()` interval and in that +fourth time boundary. + +The time boundaries and returned timestamps for the query **with** the +`offset_interval` adhere to the offset time boundaries: + +| Time Interval Number | Offset Time Boundary |`GROUP BY time()` Interval | Points Included | Returned Timestamp | +| :------------- | :------------- | :------------- | :------------- | ------------- | +| 1 | `time >= 2015-08-17T23:48:00Z AND time < 2015-08-18T00:06:00Z` | NA | NA | NA | +| 2 | `time >= 2015-08-18T00:06:00Z AND time < 2015-08-18T00:24:00Z` | <--- same | `8.005`,`7.887`,`7.762` | `2015-08-18T00:06:00Z` | +| 3 | `time >= 2015-08-18T00:24:00Z AND time < 2015-08-18T00:42:00Z` | <--- same | `7.635`,`7.5`,`7.372` | `2015-08-18T00:24:00Z` | +| 4 | `time >= 2015-08-18T00:42:00Z AND time < 2015-08-18T01:00:00Z` | <--- same | `7.234`,`7.11`,`6.982` | `2015-08-18T00:42:00Z` | + +The negative 12-minute offset interval shifts back the preset boundary's time range +such that the boundary time ranges and the relevant `GROUP BY time()` interval time ranges are always the +same. +With the offset, each interval performs the calculation on three points, and +the timestamp returned matches both the start of the boundary time range and the +start of the `GROUP BY time()` interval time range. + +Note that `offset_interval` forces the first time boundary to be outside +the query's time range so the query returns no results for that first interval. + +##### Group query results into 12 minute intervals and shift the preset time boundaries forward + +This example is a continuation of the scenario outlined in [Common Issues with Basic Syntax](#common-issues-with-basic-syntax). + +```sql +> SELECT COUNT("water_level") FROM "h2o_feet" WHERE "location"='coyote_creek' AND time >= '2015-08-18T00:06:00Z' AND time < '2015-08-18T00:18:00Z' GROUP BY time(12m,6m) + +name: h2o_feet +time count +---- ----- +2015-08-18T00:06:00Z 2 +``` + +The query uses an InfluxQL [function](/influxdb/v1.7/query_language/functions/) +to count the number of `water_level` points, grouping results into 12 minute +time intervals, and offsetting the preset time boundaries by six minutes. + +The time boundaries and returned timestamps for the query **without** the `offset_interval` adhere to InfluxDB database's preset time boundaries. Let's first examine the results without the offset: + +```sql +> SELECT COUNT("water_level") FROM "h2o_feet" WHERE "location"='coyote_creek' AND time >= '2015-08-18T00:06:00Z' AND time < '2015-08-18T00:18:00Z' GROUP BY time(12m) + +name: h2o_feet +time count +---- ----- +2015-08-18T00:00:00Z 1 +2015-08-18T00:12:00Z 1 +``` + +The time boundaries and returned timestamps for the query **without** the +`offset_interval` adhere to InfluxDB database's preset time boundaries: + +| Time Interval Number | Preset Time Boundary |`GROUP BY time()` Interval | Points Included | Returned Timestamp | +| :------------- | :------------- | :------------- | :------------- | :------------- | +| 1 | `time >= 2015-08-18T00:00:00Z AND time < 2015-08-18T00:12:00Z` | `time >= 2015-08-18T00:06:00Z AND time < 2015-08-18T00:12:00Z` | `8.005` | `2015-08-18T00:00:00Z` | +| 2 | `time >= 2015-08-12T00:12:00Z AND time < 2015-08-18T00:24:00Z` | `time >= 2015-08-12T00:12:00Z AND time < 2015-08-18T00:18:00Z` | `7.887` | `2015-08-18T00:12:00Z` | + +The first preset 12-minute time boundary begins at `00:00` and ends just before +`00:12`. +Only one raw point (`8.005`) falls both within the query's first `GROUP BY time()` interval and in that +first time boundary. +Note that while the returned timestamp occurs before the start of the query's time range, +the query result excludes data that occur before the query's time range. + +The second preset 12-minute time boundary begins at `00:12` and ends just before +`00:24`. +Only one raw point (`7.887`) falls both within the query's second `GROUP BY time()` interval and in that +second time boundary. + +The time boundaries and returned timestamps for the query **with** the +`offset_interval` adhere to the offset time boundaries: + +| Time Interval Number | Offset Time Boundary |`GROUP BY time()` Interval | Points Included | Returned Timestamp | +| :------------- | :------------- | :------------- | :------------- | :------------- | +| 1 | `time >= 2015-08-18T00:06:00Z AND time < 2015-08-18T00:18:00Z` | <--- same | `8.005`,`7.887` | `2015-08-18T00:06:00Z` | +| 2 | `time >= 2015-08-18T00:18:00Z AND time < 2015-08-18T00:30:00Z` | NA | NA | NA | + +The six-minute offset interval shifts forward the preset boundary's time range +such that the preset boundary time range and the relevant `GROUP BY time()` interval time range are the +same. +With the offset, the query returns a single result, and the timestamp returned +matches both the start of the boundary time range and the start of the `GROUP BY time()` interval +time range. + +Note that `offset_interval` forces the second time boundary to be outside +the query's time range so the query returns no results for that second interval. + +## `GROUP BY` time intervals and `fill()` + +`fill()` changes the value reported for time intervals that have no data. + +#### Syntax + +```sql +SELECT () FROM_clause WHERE GROUP BY time(time_interval,[)] +``` + +By default, a `GROUP BY time()` interval with no data reports `null` as its +value in the output column. +`fill()` changes the value reported for time intervals that have no data. +Note that `fill()` must go at the end of the `GROUP BY` clause if you're +`GROUP(ing) BY` several things (for example, both [tags](/influxdb/v1.7/concepts/glossary/#tag) and a time interval). + +##### fill_option + +Any numerical value +              + Reports the given numerical value for time intervals with no data. + +`linear` +              +Reports the results of [linear interpolation](https://en.wikipedia.org/wiki/Linear_interpolation) for time intervals with no data. + +`none` +              +              +           +Reports no timestamp and no value for time intervals with no data. + +`null` +              +              +           +Reports null for time intervals with no data but returns a timestamp. This is the same as the default behavior. + +`previous` +              +              +    +Reports the value from the previous time interval for time intervals with no data. + +#### Examples + +{{< tabs-wrapper >}} +{{% tabs %}} +[Example 1: fill(100)](#) +[Example 2: fill(linear)](#) +[Example 3: fill(none)](#) +[Example 4: fill(null)](#) +[Example 5: fill(previous)](#) +{{% /tabs %}} + +{{% tab-content %}} + +Without `fill(100)`: + +```sql +> SELECT MAX("water_level") FROM "h2o_feet" WHERE "location"='coyote_creek' AND time >= '2015-09-18T16:00:00Z' AND time <= '2015-09-18T16:42:00Z' GROUP BY time(12m) + +name: h2o_feet +-------------- +time max +2015-09-18T16:00:00Z 3.599 +2015-09-18T16:12:00Z 3.402 +2015-09-18T16:24:00Z 3.235 +2015-09-18T16:36:00Z +``` + +With `fill(100)`: + +```sql +> SELECT MAX("water_level") FROM "h2o_feet" WHERE "location"='coyote_creek' AND time >= '2015-09-18T16:00:00Z' AND time <= '2015-09-18T16:42:00Z' GROUP BY time(12m) fill(100) + +name: h2o_feet +-------------- +time max +2015-09-18T16:00:00Z 3.599 +2015-09-18T16:12:00Z 3.402 +2015-09-18T16:24:00Z 3.235 +2015-09-18T16:36:00Z 100 +``` + +`fill(100)` changes the value reported for the time interval with no data to `100`. + +{{% /tab-content %}} + +{{% tab-content %}} + +Without `fill(linear)`: + +```sql +> SELECT MEAN("tadpoles") FROM "pond" WHERE time >= '2016-11-11T21:00:00Z' AND time <= '2016-11-11T22:06:00Z' GROUP BY time(12m) + +name: pond +time mean +---- ---- +2016-11-11T21:00:00Z 1 +2016-11-11T21:12:00Z +2016-11-11T21:24:00Z 3 +2016-11-11T21:36:00Z +2016-11-11T21:48:00Z +2016-11-11T22:00:00Z 6 +``` + +With `fill(linear)`: + +```sql +> SELECT MEAN("tadpoles") FROM "pond" WHERE time >= '2016-11-11T21:00:00Z' AND time <= '2016-11-11T22:06:00Z' GROUP BY time(12m) fill(linear) + +name: pond +time mean +---- ---- +2016-11-11T21:00:00Z 1 +2016-11-11T21:12:00Z 2 +2016-11-11T21:24:00Z 3 +2016-11-11T21:36:00Z 4 +2016-11-11T21:48:00Z 5 +2016-11-11T22:00:00Z 6 +``` + +`fill(linear)` changes the value reported for the time interval with no data +to the results of [linear interpolation](https://en.wikipedia.org/wiki/Linear_interpolation). + +> **Note:** The data in Example 2 are not in `NOAA_water_database`. +We had to create a dataset with less regular data to work with `fill(linear)`. + +{{% /tab-content %}} + +{{% tab-content %}} + +Without `fill(none)`: + +```sql +> SELECT MAX("water_level") FROM "h2o_feet" WHERE "location"='coyote_creek' AND time >= '2015-09-18T16:00:00Z' AND time <= '2015-09-18T16:42:00Z' GROUP BY time(12m) + +name: h2o_feet +-------------- +time max +2015-09-18T16:00:00Z 3.599 +2015-09-18T16:12:00Z 3.402 +2015-09-18T16:24:00Z 3.235 +2015-09-18T16:36:00Z +``` + +With `fill(none)`: + +```sql +> SELECT MAX("water_level") FROM "h2o_feet" WHERE "location"='coyote_creek' AND time >= '2015-09-18T16:00:00Z' AND time <= '2015-09-18T16:42:00Z' GROUP BY time(12m) fill(none) + +name: h2o_feet +-------------- +time max +2015-09-18T16:00:00Z 3.599 +2015-09-18T16:12:00Z 3.402 +2015-09-18T16:24:00Z 3.235 +``` + +`fill(none)` reports no value and no timestamp for the time interval with no data. + +{{% /tab-content %}} + +{{% tab-content %}} + +Without `fill(null)`: + +```sql +> SELECT MAX("water_level") FROM "h2o_feet" WHERE "location"='coyote_creek' AND time >= '2015-09-18T16:00:00Z' AND time <= '2015-09-18T16:42:00Z' GROUP BY time(12m) + +name: h2o_feet +-------------- +time max +2015-09-18T16:00:00Z 3.599 +2015-09-18T16:12:00Z 3.402 +2015-09-18T16:24:00Z 3.235 +2015-09-18T16:36:00Z +``` + +With `fill(null)`: + +```sql +> SELECT MAX("water_level") FROM "h2o_feet" WHERE "location"='coyote_creek' AND time >= '2015-09-18T16:00:00Z' AND time <= '2015-09-18T16:42:00Z' GROUP BY time(12m) fill(null) + +name: h2o_feet +-------------- +time max +2015-09-18T16:00:00Z 3.599 +2015-09-18T16:12:00Z 3.402 +2015-09-18T16:24:00Z 3.235 +2015-09-18T16:36:00Z +``` + +`fill(null)` reports `null` as the value for the time interval with no data. +That result matches the result of the query without `fill(null)`. + +{{% /tab-content %}} + +{{% tab-content %}} + +Without `fill(previous)`: + +```sql +> SELECT MAX("water_level") FROM "h2o_feet" WHERE "location"='coyote_creek' AND time >= '2015-09-18T16:00:00Z' AND time <= '2015-09-18T16:42:00Z' GROUP BY time(12m) + +name: h2o_feet +-------------- +time max +2015-09-18T16:00:00Z 3.599 +2015-09-18T16:12:00Z 3.402 +2015-09-18T16:24:00Z 3.235 +2015-09-18T16:36:00Z +``` + +With `fill(previous)`: + +```sql +> SELECT MAX("water_level") FROM "h2o_feet" WHERE "location"='coyote_creek' AND time >= '2015-09-18T16:00:00Z' AND time <= '2015-09-18T16:42:00Z' GROUP BY time(12m) fill(previous) + +name: h2o_feet +-------------- +time max +2015-09-18T16:00:00Z 3.599 +2015-09-18T16:12:00Z 3.402 +2015-09-18T16:24:00Z 3.235 +2015-09-18T16:36:00Z 3.235 +``` + +`fill(previous)` changes the value reported for the time interval with no data to `3.235`, +the value from the previous time interval. + +{{% /tab-content %}} +{{< /tabs-wrapper >}} + +#### Common issues with `fill()` + +##### Queries with `fill()` when no data fall within the query's time range + +Currently, queries ignore `fill()` if no data fall within the query's time range. +This is the expected behavior. An open +[feature request](https://github.com/influxdata/influxdb/issues/6967) on GitHub +proposes that `fill()` should force a return of values even if the query's time +range covers no data. + +**Example** + +The following query returns no data because `water_level` has no points within +the query's time range. +Note that `fill(800)` has no effect on the query results. + +```sql +> SELECT MEAN("water_level") FROM "h2o_feet" WHERE "location" = 'coyote_creek' AND time >= '2015-09-18T22:00:00Z' AND time <= '2015-09-18T22:18:00Z' GROUP BY time(12m) fill(800) +> +``` + +##### Queries with `fill(previous)` when the previous result falls outside the query's time range + +`fill(previous)` doesn’t fill the result for a time interval if the previous +value is outside the query’s time range. + +**Example** + +The following query covers the time range between `2015-09-18T16:24:00Z` and `2015-09-18T16:54:00Z`. +Note that `fill(previous)` fills the result for `2015-09-18T16:36:00Z` with the +result from `2015-09-18T16:24:00Z`. + +```sql +> SELECT MAX("water_level") FROM "h2o_feet" WHERE location = 'coyote_creek' AND time >= '2015-09-18T16:24:00Z' AND time <= '2015-09-18T16:54:00Z' GROUP BY time(12m) fill(previous) + +name: h2o_feet +-------------- +time max +2015-09-18T16:24:00Z 3.235 +2015-09-18T16:36:00Z 3.235 +2015-09-18T16:48:00Z 4 +``` + +The next query shortens the time range in the previous query. +It now covers the time between `2015-09-18T16:36:00Z` and `2015-09-18T16:54:00Z`. +Note that `fill(previous)` doesn't fill the result for `2015-09-18T16:36:00Z` with the +result from `2015-09-18T16:24:00Z`; the result for `2015-09-18T16:24:00Z` is outside the query's +shorter time range. + +```sql +> SELECT MAX("water_level") FROM "h2o_feet" WHERE location = 'coyote_creek' AND time >= '2015-09-18T16:36:00Z' AND time <= '2015-09-18T16:54:00Z' GROUP BY time(12m) fill(previous) + +name: h2o_feet +-------------- +time max +2015-09-18T16:36:00Z +2015-09-18T16:48:00Z 4 +``` + +##### `fill(linear)` when the previous or following result falls outside the query's time range + +`fill(linear)` doesn't fill the result for a time interval with no data if the +previous result or the following result is outside the query's time range. + +**Example** + +The following query covers the time range between `2016-11-11T21:24:00Z` and +`2016-11-11T22:06:00Z`. Note that `fill(linear)` fills the results for the +`2016-11-11T21:36:00Z` time interval and the `2016-11-11T21:48:00Z` time interval +using the values from the `2016-11-11T21:24:00Z` time interval and the +`2016-11-11T22:00:00Z` time interval. + +```sql +> SELECT MEAN("tadpoles") FROM "pond" WHERE time > '2016-11-11T21:24:00Z' AND time <= '2016-11-11T22:06:00Z' GROUP BY time(12m) fill(linear) + +name: pond +time mean +---- ---- +2016-11-11T21:24:00Z 3 +2016-11-11T21:36:00Z 4 +2016-11-11T21:48:00Z 5 +2016-11-11T22:00:00Z 6 +``` + +The next query shortens the time range in the previous query. +It now covers the time between `2016-11-11T21:36:00Z` and `2016-11-11T22:06:00Z`. +Note that `fill()` previous doesn't fill the results for the `2016-11-11T21:36:00Z` +time interval and the `2016-11-11T21:48:00Z` time interval; the result for +`2016-11-11T21:24:00Z` is outside the query's shorter time range and InfluxDB +cannot perform the linear interpolation. + +```sql +> SELECT MEAN("tadpoles") FROM "pond" WHERE time >= '2016-11-11T21:36:00Z' AND time <= '2016-11-11T22:06:00Z' GROUP BY time(12m) fill(linear) +name: pond +time mean +---- ---- +2016-11-11T21:36:00Z +2016-11-11T21:48:00Z +2016-11-11T22:00:00Z 6 +``` + +> **Note:** The data in Issue 3 are not in `NOAA_water_database`. +> We had to create a dataset with less regular data to work with `fill(linear)`. + +# The INTO clause + +The `INTO` clause writes query results to a user-specified [measurement](/influxdb/v1.7/concepts/glossary/#measurement). + +### Syntax + +```sql +SELECT_clause INTO FROM_clause [WHERE_clause] [GROUP_BY_clause] +``` + +The `INTO` clause supports several formats for specifying a [measurement](/influxdb/v1.7/concepts/glossary/#measurement): + +`INTO ` +           +Writes data to the specified measurement. +If you're using the [CLI](/influxdb/v1.7/tools/shell/) InfluxDB writes the data to the measurement in the +[`USE`d](/influxdb/v1.7/tools/shell/#commands) +[database](/influxdb/v1.7/concepts/glossary/#database) and the `DEFAULT` [retention policy](/influxdb/v1.7/concepts/glossary/#retention-policy-rp). +If you're using the [InfluxDB API](/influxdb/v1.7/tools/api/) InfluxDB writes the data to the +measurement in the database specified in the [`db` query string parameter](/influxdb/v1.7/tools/api/#query-string-parameters) +and the `DEFAULT` retention policy. + +`INTO ..` +           +Writes data to a fully qualified measurement. +Fully qualify a measurement by specifying its database and retention policy. + +`INTO ..` +           +Writes data to a measurement in a user-specified database and the `DEFAULT` +retention policy. + +`INTO ..:MEASUREMENT FROM //` +           +Writes data to all measurements in the user-specified database and +retention policy that match the [regular expression](#regular-expressions) in the `FROM` clause. +`:MEASUREMENT` is a backreference to each measurement matched in the `FROM` clause. + +### Examples + +#### Rename a database + +```sql +> SELECT * INTO "copy_NOAA_water_database"."autogen".:MEASUREMENT FROM "NOAA_water_database"."autogen"./.*/ GROUP BY * + +name: result +time written +---- ------- +0 76290 +``` + +Directly renaming a database in InfluxDB is not possible, so a common use for the `INTO` clause is to move data from one database to another. +The query above writes all data in the `NOAA_water_database` and `autogen` retention policy to the `copy_NOAA_water_database` database and the `autogen` retention policy. + +The [backreference](#examples-5) syntax (`:MEASUREMENT`) maintains the source measurement names in the destination database. +Note that both the `copy_NOAA_water_database` database and its `autogen` retention policy must exist prior to running the `INTO` query. +See [Database Management](/influxdb/v1.7/query_language/database_management/) +for how to manage databases and retention policies. + +The `GROUP BY *` clause [preserves tags](#missing-data) in the source database as tags in the destination database. +The following query does not maintain the series context for tags; tags will be stored as fields in the destination database (`copy_NOAA_water_database`): + +```sql +SELECT * INTO "copy_NOAA_water_database"."autogen".:MEASUREMENT FROM "NOAA_water_database"."autogen"./.*/ +``` + +When moving large amounts of data, we recommend sequentially running `INTO` queries for different measurements and using time boundaries in the [`WHERE` clause](#time-syntax). +This prevents your system from running out of memory. +The codeblock below provides sample syntax for those queries: + +``` +SELECT * +INTO .. +FROM .. +WHERE time > now() - 100w and time < now() - 90w GROUP BY * + +SELECT * +INTO .. +FROM ..} +WHERE time > now() - 90w and time < now() - 80w GROUP BY * + +SELECT * +INTO .. +FROM .. +WHERE time > now() - 80w and time < now() - 70w GROUP BY * +``` + +#### Write the results of a query to a measurement + +```sql +> SELECT "water_level" INTO "h2o_feet_copy_1" FROM "h2o_feet" WHERE "location" = 'coyote_creek' + +name: result +------------ +time written +1970-01-01T00:00:00Z 7604 + +> SELECT * FROM "h2o_feet_copy_1" + +name: h2o_feet_copy_1 +--------------------- +time water_level +2015-08-18T00:00:00Z 8.12 +[...] +2015-09-18T16:48:00Z 4 +``` + +The query writes its results a new [measurement](/influxdb/v1.7/concepts/glossary/#measurement): `h2o_feet_copy_1`. +If you're using the [CLI](/influxdb/v1.7/tools/shell/), InfluxDB writes the data to +the `USE`d [database](/influxdb/v1.7/concepts/glossary/#database) and the `DEFAULT` [retention policy](/influxdb/v1.7/concepts/glossary/#retention-policy-rp). +If you're using the [InfluxDB API](/influxdb/v1.7/tools/api/), InfluxDB writes the +data to the database and retention policy specified in the `db` and `rp` +[query string parameters](/influxdb/v1.7/tools/api/#query-string-parameters). +If you do not set the `rp` query string parameter, the InfluxDB API automatically +writes the data to the database's `DEFAULT` retention policy. + +The response shows the number of points (`7605`) that InfluxDB writes to `h2o_feet_copy_1`. +The timestamp in the response is meaningless; InfluxDB uses epoch 0 +(`1970-01-01T00:00:00Z`) as a null timestamp equivalent. + +#### Write the results of a query to a fully qualified measurement + +```sql +> SELECT "water_level" INTO "where_else"."autogen"."h2o_feet_copy_2" FROM "h2o_feet" WHERE "location" = 'coyote_creek' + +name: result +------------ +time written +1970-01-01T00:00:00Z 7604 + +> SELECT * FROM "where_else"."autogen"."h2o_feet_copy_2" + +name: h2o_feet_copy_2 +--------------------- +time water_level +2015-08-18T00:00:00Z 8.12 +[...] +2015-09-18T16:48:00Z 4 +``` + +The query writes its results to a new measurement: `h2o_feet_copy_2`. +InfluxDB writes the data to the `where_else` database and to the `autogen` +retention policy. +Note that both `where_else` and `autogen` must exist prior to running the `INTO` +query. +See [Database Management](/influxdb/v1.7/query_language/database_management/) +for how to manage databases and retention policies. + +The response shows the number of points (`7605`) that InfluxDB writes to `h2o_feet_copy_2`. +The timestamp in the response is meaningless; InfluxDB uses epoch 0 +(`1970-01-01T00:00:00Z`) as a null timestamp equivalent. + +#### Write aggregated results to a measurement (downsampling) + +```sql +> SELECT MEAN("water_level") INTO "all_my_averages" FROM "h2o_feet" WHERE "location" = 'coyote_creek' AND time >= '2015-08-18T00:00:00Z' AND time <= '2015-08-18T00:30:00Z' GROUP BY time(12m) + +name: result +------------ +time written +1970-01-01T00:00:00Z 3 + +> SELECT * FROM "all_my_averages" + +name: all_my_averages +--------------------- +time mean +2015-08-18T00:00:00Z 8.0625 +2015-08-18T00:12:00Z 7.8245 +2015-08-18T00:24:00Z 7.5675 +``` + +The query aggregates data using an +InfluxQL [function](/influxdb/v1.7/query_language/functions) and a [`GROUP BY +time()` clause](#group-by-time-intervals). +It also writes its results to the `all_my_averages` measurement. + +The response shows the number of points (`3`) that InfluxDB writes to `all_my_averages`. +The timestamp in the response is meaningless; InfluxDB uses epoch 0 +(`1970-01-01T00:00:00Z`) as a null timestamp equivalent. + +The query is an example of downsampling: taking higher precision data, +aggregating those data to a lower precision, and storing the lower precision +data in the database. +Downsampling is a common use case for the `INTO` clause. + +#### Write aggregated results for more than one measurement to a different database (downsampling with backreferencing) + +```sql +> SELECT MEAN(*) INTO "where_else"."autogen".:MEASUREMENT FROM /.*/ WHERE time >= '2015-08-18T00:00:00Z' AND time <= '2015-08-18T00:06:00Z' GROUP BY time(12m) + +name: result +time written +---- ------- +1970-01-01T00:00:00Z 5 + +> SELECT * FROM "where_else"."autogen"./.*/ + +name: average_temperature +time mean_degrees mean_index mean_pH mean_water_level +---- ------------ ---------- ------- ---------------- +2015-08-18T00:00:00Z 78.5 + +name: h2o_feet +time mean_degrees mean_index mean_pH mean_water_level +---- ------------ ---------- ------- ---------------- +2015-08-18T00:00:00Z 5.07625 + +name: h2o_pH +time mean_degrees mean_index mean_pH mean_water_level +---- ------------ ---------- ------- ---------------- +2015-08-18T00:00:00Z 6.75 + +name: h2o_quality +time mean_degrees mean_index mean_pH mean_water_level +---- ------------ ---------- ------- ---------------- +2015-08-18T00:00:00Z 51.75 + +name: h2o_temperature +time mean_degrees mean_index mean_pH mean_water_level +---- ------------ ---------- ------- ---------------- +2015-08-18T00:00:00Z 63.75 +``` + +The query aggregates data using an +InfluxQL [function](/influxdb/v1.7/query_language/functions) and a [`GROUP BY +time()` clause](#group-by-time-intervals). +It aggregates data in every measurement that matches the [regular expression](#regular-expressions) +in the `FROM` clause and writes the results to measurements with the same name in the +`where_else` database and the `autogen` retention policy. +Note that both `where_else` and `autogen` must exist prior to running the `INTO` +query. +See [Database management](/influxdb/v1.7/query_language/database_management/) +for how to manage databases and retention policies. + +The response shows the number of points (`5`) that InfluxDB writes to the `where_else` +database and the `autogen` retention policy. +The timestamp in the response is meaningless; InfluxDB uses epoch 0 +(`1970-01-01T00:00:00Z`) as a null timestamp equivalent. + +The query is an example of downsampling with backreferencing. +It takes higher precision data from more than one measurement, +aggregates those data to a lower precision, and stores the lower precision +data in the database. +Downsampling with backreferencing is a common use case for the `INTO` clause. + +### Common issues with the `INTO` clause + +#### Missing data + +If an `INTO` query includes a [tag key](/influxdb/v1.7/concepts/glossary#tag-key) in the [`SELECT` clause](#the-basic-select-statement), the query converts [tags](/influxdb/v1.7/concepts/glossary#tag) in the current +measurement to [fields](/influxdb/v1.7/concepts/glossary#field) in the destination measurement. +This can cause InfluxDB to overwrite [points](/influxdb/v1.7/concepts/glossary#point) that were previously differentiated +by a [tag value](/influxdb/v1.7/concepts/glossary#tag-value). +Note that this behavior does not apply to queries that use the [`TOP()`](/influxdb/v1.7/query_language/functions/#top) or [`BOTTOM()`](/influxdb/v1.7/query_language/functions/#bottom) functions. +The +[Frequently Asked Questions](/influxdb/v1.7/troubleshooting/frequently-asked-questions/#why-are-my-into-queries-missing-data) +document describes that behavior in detail. + +To preserve tags in the current measurement as tags in the destination measurement, +[`GROUP BY` the relevant tag key](#group-by-tags) or `GROUP BY *` in the `INTO` query. + +#### Automating queries with the `INTO` clause + +The `INTO` clause section in this document shows how to manually implement +queries with an `INTO` clause. +See the [Continuous Queries](/influxdb/v1.7/query_language/continuous_queries/) +documentation for how to automate `INTO` clause queries on realtime data. +Among [other uses](/influxdb/v1.7/query_language/continuous_queries/#continuous-query-use-cases), +Continuous Queries automate the downsampling process. + +## ORDER BY time DESC + +By default, InfluxDB returns results in ascending time order; the first [point](/influxdb/v1.7/concepts/glossary/#point) +returned has the oldest [timestamp](/influxdb/v1.7/concepts/glossary/#timestamp) and +the last point returned has the most recent timestamp. +`ORDER BY time DESC` reverses that order such that InfluxDB returns the points +with the most recent timestamps first. + +InfluxQL returns series in sorted order, +which lets you limit or offset query results by series (rather than by row) using +[slimit](/influxdb/v1.7/query_language/data_exploration/#the-slimit-clause) and +[soffset](https://docs.influxdata.com/influxdb/v1.7/query_language/data_exploration/#the-soffset-clause). +This is useful for paging through results by time. + + +### Syntax + +```sql +SELECT_clause [INTO_clause] FROM_clause [WHERE_clause] [GROUP_BY_clause] ORDER BY time DESC +``` + +`ORDER by time DESC` must appear after the [`GROUP BY` clause](#the-group-by-clause) +if the query includes a `GROUP BY` clause. +`ORDER by time DESC` must appear after the [`WHERE` clause](#the-where-clause) +if the query includes a `WHERE` clause and no `GROUP BY` clause. + +### Examples + +#### Return the newest points first + +```sql +> SELECT "water_level" FROM "h2o_feet" WHERE "location" = 'santa_monica' ORDER BY time DESC + +name: h2o_feet +time water_level +---- ----------- +2015-09-18T21:42:00Z 4.938 +2015-09-18T21:36:00Z 5.066 +[...] +2015-08-18T00:06:00Z 2.116 +2015-08-18T00:00:00Z 2.064 +``` + +The query returns the points with the most recent timestamps from the +`h2o_feet` [measurement](/influxdb/v1.7/concepts/glossary/#measurement) first. +Without `ORDER by time DESC`, the query would return `2015-08-18T00:00:00Z` +first and `2015-09-18T21:42:00Z` last. + +#### Return the newest points first and include a GROUP BY time() clause + +```sql +> SELECT MEAN("water_level") FROM "h2o_feet" WHERE time >= '2015-08-18T00:00:00Z' AND time <= '2015-08-18T00:42:00Z' GROUP BY time(12m) ORDER BY time DESC + +name: h2o_feet +time mean +---- ---- +2015-08-18T00:36:00Z 4.6825 +2015-08-18T00:24:00Z 4.80675 +2015-08-18T00:12:00Z 4.950749999999999 +2015-08-18T00:00:00Z 5.07625 +``` + +The query uses an InfluxQL [function](/influxdb/v1.7/query_language/functions) +and a time interval in the [GROUP BY clause](#group-by-time-intervals) +to calculate the average `water_level` for each twelve-minute +interval in the query's time range. +`ORDER BY time DESC` returns the most recent 12-minute time intervals +first. + +Without `ORDER BY time DESC`, the query would return +`2015-08-18T00:00:00Z` first and `2015-08-18T00:36:00Z` last. + +# The LIMIT and SLIMIT clauses + +`LIMIT` and `SLIMIT` limit the number of +[points](/influxdb/v1.7/concepts/glossary/#point) and the number of +[series](/influxdb/v1.7/concepts/glossary/#series) returned per query. + +## The LIMIT clause + +`LIMIT ` returns the first `N` [points](/influxdb/v1.7/concepts/glossary/#point) from the specified [measurement](/influxdb/v1.7/concepts/glossary/#measurement). + +### Syntax + +```sql +SELECT_clause [INTO_clause] FROM_clause [WHERE_clause] [GROUP_BY_clause] [ORDER_BY_clause] LIMIT +``` + +`N` specifies the number of [points](/influxdb/v1.7/concepts/glossary/#point) to return from the specified [measurement](/influxdb/v1.7/concepts/glossary/#measurement). +If `N` is greater than the number of points in a measurement, InfluxDB returns +all points from that series. + +Note that the `LIMIT` clause must appear in the order outlined in the syntax above. + +### Examples + +#### Limit the number of points returned + +```sql +> SELECT "water_level","location" FROM "h2o_feet" LIMIT 3 + +name: h2o_feet +time water_level location +---- ----------- -------- +2015-08-18T00:00:00Z 8.12 coyote_creek +2015-08-18T00:00:00Z 2.064 santa_monica +2015-08-18T00:06:00Z 8.005 coyote_creek +``` + +The query returns the three oldest [points](/influxdb/v1.7/concepts/glossary/#point) (determined by timestamp) from the `h2o_feet` [measurement](/influxdb/v1.7/concepts/glossary/#measurement). + +#### Limit the number points returned and include a GROUP BY clause + +```sql +> SELECT MEAN("water_level") FROM "h2o_feet" WHERE time >= '2015-08-18T00:00:00Z' AND time <= '2015-08-18T00:42:00Z' GROUP BY *,time(12m) LIMIT 2 + +name: h2o_feet +tags: location=coyote_creek +time mean +---- ---- +2015-08-18T00:00:00Z 8.0625 +2015-08-18T00:12:00Z 7.8245 + +name: h2o_feet +tags: location=santa_monica +time mean +---- ---- +2015-08-18T00:00:00Z 2.09 +2015-08-18T00:12:00Z 2.077 +``` + +The query uses an InfluxQL [function](/influxdb/v1.7/query_language/functions) +and a [GROUP BY clause](#group-by-time-intervals) +to calculate the average `water_level` for each [tag](/influxdb/v1.7/concepts/glossary/#tag) and for each twelve-minute +interval in the query's time range. +`LIMIT 2` requests the two oldest twelve-minute averages (determined by timestamp). + +Note that without `LIMIT 2`, the query would return four points per [series](/influxdb/v1.7/concepts/glossary/#series); +one for each twelve-minute interval in the query's time range. + +## The `SLIMIT` clause + +`SLIMIT ` returns every [point](/influxdb/v1.7/concepts/glossary/#point) from \ [series](/influxdb/v1.7/concepts/glossary/#series) in the specified [measurement](/influxdb/v1.7/concepts/glossary/#measurement). + +### Syntax + +```sql +SELECT_clause [INTO_clause] FROM_clause [WHERE_clause] GROUP BY *[,time()] [ORDER_BY_clause] SLIMIT +``` + +`N` specifies the number of [series](/influxdb/v1.7/concepts/glossary/#series) to return from the specified [measurement](/influxdb/v1.7/concepts/glossary/#measurement). +If `N` is greater than the number of series in a measurement, InfluxDB returns +all series from that measurement. + +There is an [ongoing issue](https://github.com/influxdata/influxdb/issues/7571) that requires queries with `SLIMIT` to include `GROUP BY *`. +Note that the `SLIMIT` clause must appear in the order outlined in the syntax above. + +### Examples + +#### Limit the number of series returned + +```sql +> SELECT "water_level" FROM "h2o_feet" GROUP BY * SLIMIT 1 + +name: h2o_feet +tags: location=coyote_creek +time water_level +---- ----- +2015-08-18T00:00:00Z 8.12 +2015-08-18T00:06:00Z 8.005 +2015-08-18T00:12:00Z 7.887 +[...] +2015-09-18T16:12:00Z 3.402 +2015-09-18T16:18:00Z 3.314 +2015-09-18T16:24:00Z 3.235 +``` + +The query returns all `water_level` [points](/influxdb/v1.7/concepts/glossary/#point) from one of the [series](/influxdb/v1.7/concepts/glossary/#series) associated +with the `h2o_feet` [measurement](/influxdb/v1.7/concepts/glossary/#measurement). + +#### Limit the number of series returned and include a GROUP BY time() clause + +```sql +> SELECT MEAN("water_level") FROM "h2o_feet" WHERE time >= '2015-08-18T00:00:00Z' AND time <= '2015-08-18T00:42:00Z' GROUP BY *,time(12m) SLIMIT 1 + +name: h2o_feet +tags: location=coyote_creek +time mean +---- ---- +2015-08-18T00:00:00Z 8.0625 +2015-08-18T00:12:00Z 7.8245 +2015-08-18T00:24:00Z 7.5675 +2015-08-18T00:36:00Z 7.303 +``` + +The query uses an InfluxQL [function](/influxdb/v1.7/query_language/functions) +and a time interval in the [GROUP BY clause](#group-by-time-intervals) +to calculate the average `water_level` for each twelve-minute +interval in the query's time range. +`SLIMIT 1` requests a single series associated with the `h2o_feet` measurement. + +Note that without `SLIMIT 1`, the query would return results for the two series +associated with the `h2o_feet` measurement: `location=coyote_creek` and +`location=santa_monica`. + +## LIMIT and SLIMIT + +`LIMIT ` followed by `SLIMIT ` returns the first \ [points](/influxdb/v1.7/concepts/glossary/#point) from \ [series](/influxdb/v1.7/concepts/glossary/#series) in the specified measurement. + +### Syntax + +```sql +SELECT_clause [INTO_clause] FROM_clause [WHERE_clause] GROUP BY *[,time()] [ORDER_BY_clause] LIMIT SLIMIT +``` + +`N1` specifies the number of [points](/influxdb/v1.7/concepts/glossary/#point) to return per [measurement](/influxdb/v1.7/concepts/glossary/#measurement). +If `N1` is greater than the number of points in a measurement, InfluxDB returns all points from that measurement. + +`N2` specifies the number of series to return from the specified [measurement](/influxdb/v1.7/concepts/glossary/#measurement). +If `N2` is greater than the number of series in a measurement, InfluxDB returns all series from that measurement. + +There is an [ongoing issue](https://github.com/influxdata/influxdb/issues/7571) that requires queries with `LIMIT` and `SLIMIT` to include `GROUP BY *`. +Note that the `LIMIT` and `SLIMIT` clauses must appear in the order outlined in the syntax above. + +### Examples + +#### Limit the number of points and series returned + +```sql +> SELECT "water_level" FROM "h2o_feet" GROUP BY * LIMIT 3 SLIMIT 1 + +name: h2o_feet +tags: location=coyote_creek +time water_level +---- ----------- +2015-08-18T00:00:00Z 8.12 +2015-08-18T00:06:00Z 8.005 +2015-08-18T00:12:00Z 7.887 +``` + +The query returns the three oldest [points](/influxdb/v1.7/concepts/glossary/#point) (determined by timestamp) from one +of the [series](/influxdb/v1.7/concepts/glossary/#series) associated with the +[measurement](/influxdb/v1.7/concepts/glossary/#measurement) `h2o_feet`. + +#### Limit the number of points and series returned and include a GROUP BY time() clause + +```sql +> SELECT MEAN("water_level") FROM "h2o_feet" WHERE time >= '2015-08-18T00:00:00Z' AND time <= '2015-08-18T00:42:00Z' GROUP BY *,time(12m) LIMIT 2 SLIMIT 1 + +name: h2o_feet +tags: location=coyote_creek +time mean +---- ---- +2015-08-18T00:00:00Z 8.0625 +2015-08-18T00:12:00Z 7.8245 +``` + +The query uses an InfluxQL [function](/influxdb/v1.7/query_language/functions) +and a time interval in the [GROUP BY clause](#group-by-time-intervals) +to calculate the average `water_level` for each twelve-minute +interval in the query's time range. +`LIMIT 2` requests the two oldest twelve-minute averages (determined by +timestamp) and `SLIMIT 1` requests a single series +associated with the `h2o_feet` measurement. + +Note that without `LIMIT 2 SLIMIT 1`, the query would return four points +for each of the two series associated with the `h2o_feet` measurement. + +## The OFFSET and SOFFSET clauses + +`OFFSET` and `SOFFSET` paginates [points](/influxdb/v1.7/concepts/glossary/#point) and [series](/influxdb/v1.7/concepts/glossary/#series) returned. + + + + + + +
The OFFSET clauseThe SOFFSET clause
+ +## The `OFFSET` clause + +`OFFSET ` paginates `N` [points](/influxdb/v1.7/concepts/glossary/#point) in the query results. + +### Syntax + +```sql +SELECT_clause [INTO_clause] FROM_clause [WHERE_clause] [GROUP_BY_clause] [ORDER_BY_clause] LIMIT_clause OFFSET [SLIMIT_clause] +``` + +`N` specifies the number of [points](/influxdb/v1.7/concepts/glossary/#point) to paginate. +The `OFFSET` clause requires a [`LIMIT` clause](#the-limit-clause). +Using the `OFFSET` clause without a `LIMIT` clause can cause [inconsistent +query results](https://github.com/influxdata/influxdb/issues/7577). + +> **Note:** InfluxDB returns no results if the `WHERE` clause includes a time +range and the `OFFSET` clause would cause InfluxDB to return points with +timestamps outside of that time range. + +### Examples + +#### Paginate points + +```sql +> SELECT "water_level","location" FROM "h2o_feet" LIMIT 3 OFFSET 3 + +name: h2o_feet +time water_level location +---- ----------- -------- +2015-08-18T00:06:00Z 2.116 santa_monica +2015-08-18T00:12:00Z 7.887 coyote_creek +2015-08-18T00:12:00Z 2.028 santa_monica +``` + +The query returns the fourth, fifth, and sixth [points](/influxdb/v1.7/concepts/glossary/#point) from the `h2o_feet` [measurement](/influxdb/v1.7/concepts/glossary/#measurement). +If the query did not include `OFFSET 3`, it would return the first, second, +and third points from that measurement. + +#### Paginate points and include several clauses + +```sql +> SELECT MEAN("water_level") FROM "h2o_feet" WHERE time >= '2015-08-18T00:00:00Z' AND time <= '2015-08-18T00:42:00Z' GROUP BY *,time(12m) ORDER BY time DESC LIMIT 2 OFFSET 2 SLIMIT 1 + +name: h2o_feet +tags: location=coyote_creek +time mean +---- ---- +2015-08-18T00:12:00Z 7.8245 +2015-08-18T00:00:00Z 8.0625 +``` + +This example is pretty involved, so here's the clause-by-clause breakdown: + +The [`SELECT` clause](#the-basic-select-statement) specifies an InfluxQL [function](/influxdb/v1.7/query_language/functions). +The [`FROM` clause](#the-basic-select-statement) specifies a single measurement. +The [`WHERE` clause](#the-where-clause) specifies the time range for the query. +The [`GROUP BY` clause](#the-group-by-clause) groups results by all tags (`*`) and into 12-minute intervals. +The [`ORDER BY time DESC` clause](#order-by-time-desc) returns results in descending timestamp order. +The [`LIMIT 2` clause](#the-limit-clause) limits the number of points returned to two. +The `OFFSET 2` clause excludes the first two averages from the query results. +The [`SLIMIT 1` clause](#the-slimit-clause) limits the number of series returned to one. + +Without `OFFSET 2`, the query would return the first two averages of the query results: + +```sql +name: h2o_feet +tags: location=coyote_creek +time mean +---- ---- +2015-08-18T00:36:00Z 7.303 +2015-08-18T00:24:00Z 7.5675 +``` + +## The `SOFFSET` clause + +`SOFFSET ` paginates `N` [series](/influxdb/v1.7/concepts/glossary/#series) in the query results. + +### Syntax + +```sql +SELECT_clause [INTO_clause] FROM_clause [WHERE_clause] GROUP BY *[,time(time_interval)] [ORDER_BY_clause] [LIMIT_clause] [OFFSET_clause] SLIMIT_clause SOFFSET +``` + +`N` specifies the number of [series](/influxdb/v1.7/concepts/glossary/#series) to paginate. +The `SOFFSET` clause requires an [`SLIMIT` clause](#the-slimit-clause). +Using the `SOFFSET` clause without an `SLIMIT` clause can cause [inconsistent +query results](https://github.com/influxdata/influxdb/issues/7578). +There is an [ongoing issue](https://github.com/influxdata/influxdb/issues/7571) that requires queries with `SLIMIT` to include `GROUP BY *`. + +> **Note:** InfluxDB returns no results if the `SOFFSET` clause paginates +through more than the total number of series. + +### Examples + +#### Paginate series + +```sql +> SELECT "water_level" FROM "h2o_feet" GROUP BY * SLIMIT 1 SOFFSET 1 + +name: h2o_feet +tags: location=santa_monica +time water_level +---- ----------- +2015-08-18T00:00:00Z 2.064 +2015-08-18T00:06:00Z 2.116 +[...] +2015-09-18T21:36:00Z 5.066 +2015-09-18T21:42:00Z 4.938 +``` + +The query returns data for the [series](/influxdb/v1.7/concepts/glossary/#series) associated with the `h2o_feet` +[measurement](/influxdb/v1.7/concepts/glossary/#measurement) and the `location = santa_monica` [tag](/influxdb/v1.7/concepts/glossary/#tag). +Without `SOFFSET 1`, the query returns data for the series associated with the +`h2o_feet` measurement and the `location = coyote_creek` tag. + +#### Paginate series and include all clauses + +```sql +> SELECT MEAN("water_level") FROM "h2o_feet" WHERE time >= '2015-08-18T00:00:00Z' AND time <= '2015-08-18T00:42:00Z' GROUP BY *,time(12m) ORDER BY time DESC LIMIT 2 OFFSET 2 SLIMIT 1 SOFFSET 1 + +name: h2o_feet +tags: location=santa_monica +time mean +---- ---- +2015-08-18T00:12:00Z 2.077 +2015-08-18T00:00:00Z 2.09 +``` + +This example is pretty involved, so here's the clause-by-clause breakdown: + +The [`SELECT` clause](#the-basic-select-statement) specifies an InfluxQL [function](/influxdb/v1.7/query_language/functions). +The [`FROM` clause](#the-basic-select-statement) specifies a single measurement. +The [`WHERE` clause](#the-where-clause) specifies the time range for the query. +The [`GROUP BY` clause](#the-group-by-clause) groups results by all tags (`*`) and into 12-minute intervals. +The [`ORDER BY time DESC` clause](#order-by-time-desc) returns results in descending timestamp order. +The [`LIMIT 2` clause](#the-limit-clause) limits the number of points returned to two. +The [`OFFSET 2` clause](#the-offset-clause) excludes the first two averages from the query results. +The [`SLIMIT 1` clause](#the-slimit-clause) limits the number of series returned to one. +The `SOFFSET 1` clause paginates the series returned. + +Without `SOFFSET 1`, the query would return the results for a different series: + +```sql +name: h2o_feet +tags: location=coyote_creek +time mean +---- ---- +2015-08-18T00:12:00Z 7.8245 +2015-08-18T00:00:00Z 8.0625 +``` + +## The Time Zone clause + +The `tz()` clause returns the UTC offset for the specified timezone. + +### Syntax + +```sql +SELECT_clause [INTO_clause] FROM_clause [WHERE_clause] [GROUP_BY_clause] [ORDER_BY_clause] [LIMIT_clause] [OFFSET_clause] [SLIMIT_clause] [SOFFSET_clause] tz('') +``` + +By default, InfluxDB stores and returns timestamps in UTC. +The `tz()` clause includes the UTC offset or, if applicable, the UTC Daylight Savings Time (DST) offset to the query's returned timestamps. +The returned timestamps must be in [RFC3339 format](/influxdb/v1.7/query_language/data_exploration/#configuring-the-returned-timestamps) for the UTC offset or UTC DST to appear. +The `time_zone` parameter follows the TZ syntax in the [Internet Assigned Numbers Authority time zone database](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones#List) and it requires single quotes. + +### Examples + +#### Return the UTC offset for Chicago's time zone + +```sql +> SELECT "water_level" FROM "h2o_feet" WHERE "location" = 'santa_monica' AND time >= '2015-08-18T00:00:00Z' AND time <= '2015-08-18T00:18:00Z' tz('America/Chicago') + +name: h2o_feet +time water_level +---- ----------- +2015-08-17T19:00:00-05:00 2.064 +2015-08-17T19:06:00-05:00 2.116 +2015-08-17T19:12:00-05:00 2.028 +2015-08-17T19:18:00-05:00 2.126 +``` + +The query results include the UTC offset (`-05:00`) for the `America/Chicago` time zone in the timestamps. + +## Time syntax + +For most `SELECT` statements, the default time range is between [`1677-09-21 00:12:43.145224194` and `2262-04-11T23:47:16.854775806Z` UTC](/influxdb/v1.7/troubleshooting/frequently-asked-questions/#what-are-the-minimum-and-maximum-timestamps-that-influxdb-can-store). +For `SELECT` statements with a [`GROUP BY time()` clause](#group-by-time-intervals), +the default time range is between `1677-09-21 00:12:43.145224194` UTC and [`now()`](/influxdb/v1.7/concepts/glossary/#now). +The following sections detail how to specify alternative time ranges in the `SELECT` +statement's [`WHERE` clause](#the-where-clause). + + + + + + + +
Absolute timeRelative timeCommon issues with time syntax
+ +Tired of reading? Check out this InfluxQL Short: +
+
+ + +## Absolute time + +Specify absolute time with date-time strings and epoch time. + +### Syntax + +```sql +SELECT_clause FROM_clause WHERE time ['' | '' | ] [AND ['' | '' | ] [...]] +``` + +#### Supported operators + +| Operator | Meaning | +|:--------:|:------- | +| `=` | equal to | +| `<>` | not equal to | +| `!=` | not equal to | +| `>` | greater than | +| `>=` | greater than or equal to | +| `<` | less than | +| `<=` | less than or equal to | + +Currently, InfluxDB does not support using `OR` with absolute time in the `WHERE` +clause. See the [Frequently Asked Questions](/influxdb/v1.7/troubleshooting/frequently-asked-questions/#why-is-my-query-with-a-where-or-time-clause-returning-empty-results) +document and the [GitHub Issue](https://github.com/influxdata/influxdb/issues/7530) +for more information. + +#### `rfc3339_date_time_string` + +```sql +'YYYY-MM-DDTHH:MM:SS.nnnnnnnnnZ' +``` + +`.nnnnnnnnn` is optional and is set to `.000000000` if not included. +The [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) date-time string requires single quotes. + +#### `rfc3339_like_date_time_string` + +```sql +'YYYY-MM-DD HH:MM:SS.nnnnnnnnn' +``` + +`HH:MM:SS.nnnnnnnnn.nnnnnnnnn` is optional and is set to `00:00:00.000000000` if not included. +The RFC3339-like date-time string requires single quotes. + +#### `epoch_time` + +Epoch time is the amount of time that has elapsed since 00:00:00 +Coordinated Universal Time (UTC), Thursday, 1 January 1970. + +By default, InfluxDB assumes that all epoch timestamps are in nanoseconds. +Include a [duration literal](/influxdb/v1.7/query_language/spec/#durations) +at the end of the epoch timestamp to indicate a precision other than nanoseconds. + +#### Basic arithmetic + +All timestamp formats support basic arithmetic. +Add (`+`) or subtract (`-`) a time from a timestamp with a [duration literal](/influxdb/v1.7/query_language/spec/#durations). +Note that InfluxQL requires a whitespace between the `+` or `-` and the +duration literal. + +### Examples + +#### Specify a time range with RFC3339 date-time strings + +```sql +> SELECT "water_level" FROM "h2o_feet" WHERE "location" = 'santa_monica' AND time >= '2015-08-18T00:00:00.000000000Z' AND time <= '2015-08-18T00:12:00Z' + +name: h2o_feet +time water_level +---- ----------- +2015-08-18T00:00:00Z 2.064 +2015-08-18T00:06:00Z 2.116 +2015-08-18T00:12:00Z 2.028 +``` + +The query returns data with timestamps between August 18, 2015 at 00:00:00.000000000 and +August 18, 2015 at 00:12:00. +The nanosecond specification in the first timestamp (`.000000000`) +is optional. + +Note that the single quotes around the RFC3339 date-time strings are required. + +#### Specify a time range with RFC3339-like date-time strings + +```sql +> SELECT "water_level" FROM "h2o_feet" WHERE "location" = 'santa_monica' AND time >= '2015-08-18' AND time <= '2015-08-18 00:12:00' + +name: h2o_feet +time water_level +---- ----------- +2015-08-18T00:00:00Z 2.064 +2015-08-18T00:06:00Z 2.116 +2015-08-18T00:12:00Z 2.028 +``` + +The query returns data with timestamps between August 18, 2015 at 00:00:00 and August 18, 2015 +at 00:12:00. +The first date-time string does not include a time; InfluxDB assumes the time +is 00:00:00. + +Note that the single quotes around the RFC3339-like date-time strings are +required. + +#### Specify a time range with epoch timestamps + +```sql +> SELECT "water_level" FROM "h2o_feet" WHERE "location" = 'santa_monica' AND time >= 1439856000000000000 AND time <= 1439856720000000000 + +name: h2o_feet +time water_level +---- ----------- +2015-08-18T00:00:00Z 2.064 +2015-08-18T00:06:00Z 2.116 +2015-08-18T00:12:00Z 2.028 +``` + +The query returns data with timestamps that occur between August 18, 2015 +at 00:00:00 and August 18, 2015 at 00:12:00. +By default InfluxDB assumes epoch timestamps are in nanoseconds. + +#### Specify a time range with second-precision epoch timestamps + +```sql +> SELECT "water_level" FROM "h2o_feet" WHERE "location" = 'santa_monica' AND time >= 1439856000s AND time <= 1439856720s + +name: h2o_feet +time water_level +---- ----------- +2015-08-18T00:00:00Z 2.064 +2015-08-18T00:06:00Z 2.116 +2015-08-18T00:12:00Z 2.028 +``` + +The query returns data with timestamps that occur between August 18, 2015 +at 00:00:00 and August 18, 2015 at 00:12:00. +The `s` [duration literal](/influxdb/v1.7/query_language/spec/#durations) at the +end of the epoch timestamps indicate that the epoch timestamps are in seconds. + +#### Perform basic arithmetic on an RFC3339-like date-time string + +```sql +> SELECT "water_level" FROM "h2o_feet" WHERE time > '2015-09-18T21:24:00Z' + 6m + +name: h2o_feet +time water_level +---- ----------- +2015-09-18T21:36:00Z 5.066 +2015-09-18T21:42:00Z 4.938 +``` + +The query returns data with timestamps that occur at least six minutes after +September 18, 2015 at 21:24:00. +Note that the whitespace between the `+` and `6m` is required. + +#### Perform basic arithmetic on an epoch timestamp + +```sql +> SELECT "water_level" FROM "h2o_feet" WHERE time > 24043524m - 6m + +name: h2o_feet +time water_level +---- ----------- +2015-09-18T21:24:00Z 5.013 +2015-09-18T21:30:00Z 5.01 +2015-09-18T21:36:00Z 5.066 +2015-09-18T21:42:00Z 4.938 +``` + +The query returns data with timestamps that occur at least six minutes before +September 18, 2015 at 21:24:00. +Note that the whitespace between the `-` and `6m` is required. + +## Relative time + +Use [`now()`](/influxdb/v1.7/concepts/glossary/#now) to query data with [timestamps](/influxdb/v1.7/concepts/glossary/#timestamp) relative to the server's current timestamp. + +### Syntax + +```sql +SELECT_clause FROM_clause WHERE time now() [[ - | + ] ] [(AND|OR) now() [...]] +``` + +`now()` is the Unix time of the server at the time the query is executed on that server. +The whitespace between `-` or `+` and the [duration literal](/influxdb/v1.7/query_language/spec/#durations) is required. + +#### Supported operators +| Operator | Meaning | +|:--------:|:------- | +| `=` | equal to | +| `<>` | not equal to | +| `!=` | not equal to | +| `>` | greater than | +| `>=` | greater than or equal to | +| `<` | less than | +| `<=` | less than or equal to | + +#### `duration_literal` + +`u` or `µ` microseconds +`ms`       milliseconds +`s`      seconds +`m`      minutes +`h`      hours +`d`      days +`w`      weeks + +### Examples + +#### Specify a time range with relative time + +```sql +> SELECT "water_level" FROM "h2o_feet" WHERE time > now() - 1h +``` + +The query returns data with timestamps that occur within the past hour. +The whitespace between `-` and `1h` is required. + +#### Specify a time range with absolute time and relative time + +```sql +> SELECT "level description" FROM "h2o_feet" WHERE time > '2015-09-18T21:18:00Z' AND time < now() + 1000d + +name: h2o_feet +time level description +---- ----------------- +2015-09-18T21:24:00Z between 3 and 6 feet +2015-09-18T21:30:00Z between 3 and 6 feet +2015-09-18T21:36:00Z between 3 and 6 feet +2015-09-18T21:42:00Z between 3 and 6 feet +``` + +The query returns data with timestamps that occur between September 18, 2015 +at 21:18:00 and 1000 days from `now()`. +The whitespace between `+` and `1000d` is required. + +## Common issues with time syntax + +### Using `OR` to select time multiple time intervals + +InfluxDB does not support using the `OR` operator in the `WHERE` clause to specify multiple time intervals. + +For more information, see [Frequently asked questions](/influxdb/v1.7/troubleshooting/frequently-asked-questions/#why-is-my-query-with-a-where-or-time-clause-returning-empty-results). + +### Querying data that occur after `now()` with a `GROUP BY time()` clause + +Most `SELECT` statements have a default time range between [`1677-09-21 00:12:43.145224194` and `2262-04-11T23:47:16.854775806Z` UTC](/influxdb/v1.7/troubleshooting/frequently-asked-questions/#what-are-the-minimum-and-maximum-timestamps-that-influxdb-can-store). +For `SELECT` statements with a [`GROUP BY time()` clause](#group-by-time-intervals), +the default time range is between `1677-09-21 00:12:43.145224194` UTC and [`now()`](/influxdb/v1.7/concepts/glossary/#now). + +To query data with timestamps that occur after `now()`, `SELECT` statements with +a `GROUP BY time()` clause must provide an alternative upper bound in the +`WHERE` clause. + +#### Example + +Use the [CLI](/influxdb/v1.7/tools/shell/) to write a point to the `NOAA_water_database` that occurs after `now()`: + +```sql +> INSERT h2o_feet,location=santa_monica water_level=3.1 1587074400000000000 +``` + +Run a `GROUP BY time()` query that covers data with timestamps between +`2015-09-18T21:30:00Z` and `now()`: + +```sql +> SELECT MEAN("water_level") FROM "h2o_feet" WHERE "location"='santa_monica' AND time >= '2015-09-18T21:30:00Z' GROUP BY time(12m) fill(none) + +name: h2o_feet +time mean +---- ---- +2015-09-18T21:24:00Z 5.01 +2015-09-18T21:36:00Z 5.002 +``` + +Run a `GROUP BY time()` query that covers data with timestamps between +`2015-09-18T21:30:00Z` and 180 weeks from `now()`: + +```sql +> SELECT MEAN("water_level") FROM "h2o_feet" WHERE "location"='santa_monica' AND time >= '2015-09-18T21:30:00Z' AND time <= now() + 180w GROUP BY time(12m) fill(none) + +name: h2o_feet +time mean +---- ---- +2015-09-18T21:24:00Z 5.01 +2015-09-18T21:36:00Z 5.002 +2020-04-16T22:00:00Z 3.1 +``` + +Note that the `WHERE` clause must provide an alternative **upper** bound to +override the default `now()` upper bound. The following query merely resets +the lower bound to `now()` such that the query's time range is between +`now()` and `now()`: + +```sql +> SELECT MEAN("water_level") FROM "h2o_feet" WHERE "location"='santa_monica' AND time >= now() GROUP BY time(12m) fill(none) +> +``` + +### Configuring the returned timestamps + +The [CLI](/influxdb/v1.7/tools/shell/) returns timestamps in +nanosecond epoch format by default. +Specify alternative formats with the +[`precision ` command](/influxdb/v1.7/tools/shell/#influx-commands). +The [InfluxDB API](/influxdb/v1.7/tools/api/) returns timestamps +in [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) format by default. +Specify alternative formats with the +[`epoch` query string parameter](/influxdb/v1.7/tools/api/#query-string-parameters). + +## Regular expressions + +InfluxQL supports using regular expressions when specifying: + +* [field keys](/influxdb/v1.7/concepts/glossary/#field-key) and [tag keys](/influxdb/v1.7/concepts/glossary/#tag-key) in the [`SELECT` clause](#the-basic-select-statement) +* [measurements](/influxdb/v1.7/concepts/glossary/#measurement) in the [`FROM` clause](#the-basic-select-statement) +* [tag values](/influxdb/v1.7/concepts/glossary/#tag-value) and string [field values](/influxdb/v1.7/concepts/glossary/#field-value) in the [`WHERE` clause](#the-where-clause). +* [tag keys](/influxdb/v1.7/concepts/glossary/#tag-key) in the [`GROUP BY` clause](#group-by-tags) + +Currently, InfluxQL does not support using regular expressions to match +non-string field values in the +`WHERE` clause, +[databases](/influxdb/v1.7/concepts/glossary/#database), and +[retention polices](/influxdb/v1.7/concepts/glossary/#retention-policy-rp). + +> **Note:** Regular expression comparisons are more computationally intensive than exact +string comparisons; queries with regular expressions are not as performant +as those without. + +### Syntax + +```sql +SELECT // FROM // WHERE [ // | //] GROUP BY // +``` + +Regular expressions are surrounded by `/` characters and use +[Golang's regular expression syntax](http://golang.org/pkg/regexp/syntax/). + +#### Supported operators + +`=~` matches against +`!~` doesn't match against + +### Examples + +#### Use a regular expression to specify field keys and tag keys in the SELECT clause + +```sql +> SELECT /l/ FROM "h2o_feet" LIMIT 1 + +name: h2o_feet +time level description location water_level +---- ----------------- -------- ----------- +2015-08-18T00:00:00Z between 6 and 9 feet coyote_creek 8.12 +``` + +The query selects all [field keys](/influxdb/v1.7/concepts/glossary/#field-key) +and [tag keys](/influxdb/v1.7/concepts/glossary/#tag-key) that include an `l`. +Note that the regular expression in the `SELECT` clause must match at least one +field key in order to return results for a tag key that matches the regular +expression. + +Currently, there is no syntax to distinguish between regular expressions for +field keys and regular expressions for tag keys in the `SELECT` clause. +The syntax `//::[field | tag]` is not supported. + +#### Use a regular expression to specify measurements in the FROM clause + +```sql +> SELECT MEAN("degrees") FROM /temperature/ + +name: average_temperature +time mean +---- ---- +1970-01-01T00:00:00Z 79.98472932232272 + +name: h2o_temperature +time mean +---- ---- +1970-01-01T00:00:00Z 64.98872722506226 +``` + +The query uses an InfluxQL [function](/influxdb/v1.7/query_language/functions/) +to calculate the average `degrees` for every [measurement](/influxdb/v1.7/concepts/glossary#measurement) in the `NOAA_water_database` +[database](/influxdb/v1.7/concepts/glossary#database) that contains the word `temperature`. + +#### Use a regular expression to specify tag values in the WHERE clause + +```sql +> SELECT MEAN(water_level) FROM "h2o_feet" WHERE "location" =~ /[m]/ AND "water_level" > 3 + +name: h2o_feet +time mean +---- ---- +1970-01-01T00:00:00Z 4.47155532049926 +``` + +The query uses an InfluxQL [function](/influxdb/v1.7/query_language/functions/) +to calculate the average `water_level` where the [tag value](/influxdb/v1.7/concepts/glossary#tag-value) of `location` +includes an `m` and `water_level` is greater than three. + +#### Use a regular expression to specify a tag with no value in the WHERE clause + +```sql +> SELECT * FROM "h2o_feet" WHERE "location" !~ /./ +> +``` + +The query selects all data from the `h2o_feet` measurement where the `location` +[tag](/influxdb/v1.7/concepts/glossary#tag) has no value. +Every data [point](/influxdb/v1.7/concepts/glossary#point) in the `NOAA_water_database` has a tag value for `location`. + +It's possible to perform this same query without a regular expression. +See the +[Frequently Asked Questions](/influxdb/v1.7/troubleshooting/frequently-asked-questions/#how-do-i-select-data-with-a-tag-that-has-no-value) +document for more information. + +#### Use a regular expression to specify a tag with a value in the WHERE clause + +```sql +> SELECT MEAN("water_level") FROM "h2o_feet" WHERE "location" =~ /./ + +name: h2o_feet +time mean +---- ---- +1970-01-01T00:00:00Z 4.442107025822523 +``` + +The query uses an InfluxQL [function](/influxdb/v1.7/query_language/functions/) +to calculate the average `water_level` across all data that have a tag value for +`location`. + +#### Use a regular expression to specify a field value in the WHERE clause + +```sql +> SELECT MEAN("water_level") FROM "h2o_feet" WHERE "location" = 'santa_monica' AND "level description" =~ /between/ + +name: h2o_feet +time mean +---- ---- +1970-01-01T00:00:00Z 4.47155532049926 +``` + +The query uses an InfluxQL [function](/influxdb/v1.7/query_language/functions/) +to calculate the average `water_level` for all data where the field value of +`level description` includes the word `between`. + +#### Use a regular expression to specify tag keys in the GROUP BY clause + +```sql +> SELECT FIRST("index") FROM "h2o_quality" GROUP BY /l/ + +name: h2o_quality +tags: location=coyote_creek +time first +---- ----- +2015-08-18T00:00:00Z 41 + +name: h2o_quality +tags: location=santa_monica +time first +---- ----- +2015-08-18T00:00:00Z 99 +``` + +The query uses an InfluxQL [function](/influxdb/v1.7/query_language/functions/) +to select the first value of `index` for every tag that includes the letter `l` +in its tag key. + +## Data types and cast operations + +The [`SELECT` clause](#the-basic-select-statement) supports specifying a [field's](/influxdb/v1.7/concepts/glossary/#field) type and basic cast +operations with the `::` syntax. + + + + + + +
Data TypesCast Operations
+ +## Data types + +[Field values](/influxdb/v1.7/concepts/glossary/#field-value) can be floats, integers, strings, or booleans. +The `::` syntax allows users to specify the field's type in a query. + +> **Note:** Generally, it is not necessary to specify the field value +type in the [`SELECT` clause](#the-basic-select-statement). +In most cases, InfluxDB rejects any writes that attempt to write a [field value](/influxdb/v1.7/concepts/glossary/#field-value) +to a field that previously accepted field values of a different type. +> +It is possible for field value types to differ across [shard groups](/influxdb/v1.7/concepts/glossary/#shard-group). +In these cases, it may be necessary to specify the field value type in the +`SELECT` clause. +Please see the +[Frequently Asked Questions](/influxdb/v1.7/troubleshooting/frequently-asked-questions/#how-does-influxdb-handle-field-type-discrepancies-across-shards) +document for more information on how InfluxDB handles field value type discrepancies. + +### Syntax + +```sql +SELECT_clause :: FROM_clause +``` + +`type` can be `float`, `integer`, `string`, or `boolean`. +In most cases, InfluxDB returns no data if the `field_key` does not store data of the specified +`type`. See [Cast Operations](#cast-operations) for more information. + +### Example + +```sql +> SELECT "water_level"::float FROM "h2o_feet" LIMIT 4 + +name: h2o_feet +-------------- +time water_level +2015-08-18T00:00:00Z 8.12 +2015-08-18T00:00:00Z 2.064 +2015-08-18T00:06:00Z 8.005 +2015-08-18T00:06:00Z 2.116 +``` + +The query returns values of the `water_level` field key that are floats. + +## Cast operations + +The `::` syntax allows users to perform basic cast operations in queries. +Currently, InfluxDB supports casting [field values](/influxdb/v1.7/concepts/glossary/#field-value) from integers to +floats or from floats to integers. + +### Syntax + +```sql +SELECT_clause :: FROM_clause +``` + +`type` can be `float` or `integer`. + +InfluxDB returns no data if the query attempts to cast an integer or float to a +string or boolean. + +### Examples + +#### Cast float field values to integers + +```sql +> SELECT "water_level"::integer FROM "h2o_feet" LIMIT 4 + +name: h2o_feet +-------------- +time water_level +2015-08-18T00:00:00Z 8 +2015-08-18T00:00:00Z 2 +2015-08-18T00:06:00Z 8 +2015-08-18T00:06:00Z 2 +``` + +The query returns the integer form of `water_level`'s float [field values](/influxdb/v1.7/concepts/glossary/#field-value). + +#### Cast float field values to strings (this functionality is not supported) + +```sql +> SELECT "water_level"::string FROM "h2o_feet" LIMIT 4 +> +``` + +The query returns no data as casting a float field value to a string is not +yet supported. + +## Merge behavior + +In InfluxDB, queries merge [series](/influxdb/v1.7/concepts/glossary/#series) +automatically. + +### Example + +The `h2o_feet` [measurement](/influxdb/v1.7/concepts/glossary/#measurement) in the `NOAA_water_database` is part of two [series](/influxdb/v1.7/concepts/glossary/#series). +The first series is made up of the `h2o_feet` measurement and the `location = coyote_creek` [tag](/influxdb/v1.7/concepts/glossary/#tag). +The second series is made of up the `h2o_feet` measurement and the `location = santa_monica` tag. + +The following query automatically merges those two series when it calculates the [average](/influxdb/v1.7/query_language/functions/#mean) `water_level`: + +```sql +> SELECT MEAN("water_level") FROM "h2o_feet" + +name: h2o_feet +-------------- +time mean +1970-01-01T00:00:00Z 4.442107025822521 +``` + +If you want the average `water_level` for the first series only, specify the relevant tag in the [`WHERE` clause](#the-where-clause): + +```sql +> SELECT MEAN("water_level") FROM "h2o_feet" WHERE "location" = 'coyote_creek' + +name: h2o_feet +-------------- +time mean +1970-01-01T00:00:00Z 5.359342451341401 +``` + +If you want the average `water_level` for each individual series, include a [`GROUP BY` clause](#group-by-tags): + +```sql +> SELECT MEAN("water_level") FROM "h2o_feet" GROUP BY "location" + +name: h2o_feet +tags: location=coyote_creek +time mean +---- ---- +1970-01-01T00:00:00Z 5.359342451341401 + +name: h2o_feet +tags: location=santa_monica +time mean +---- ---- +1970-01-01T00:00:00Z 3.530863470081006 +``` + +## Multiple statements + +Separate multiple [`SELECT` statements](#the-basic-select-statement) in a query with a semicolon (`;`). + +### Examples + +{{< tabs-wrapper >}} +{{% tabs %}} +[Example 1: CLI](#) +[Example 2: InfluxDB API](#) +{{% /tabs %}} + +{{% tab-content %}} + +In the InfluxDB [CLI](/influxdb/v1.7/tools/shell/): + +```sql +> SELECT MEAN("water_level") FROM "h2o_feet"; SELECT "water_level" FROM "h2o_feet" LIMIT 2 + +name: h2o_feet +time mean +---- ---- +1970-01-01T00:00:00Z 4.442107025822522 + +name: h2o_feet +time water_level +---- ----------- +2015-08-18T00:00:00Z 8.12 +2015-08-18T00:00:00Z 2.064 +``` + +{{% /tab-content %}} + +{{% tab-content %}} + +With the [InfluxDB API](/influxdb/v1.7/tools/api/): + +```json +{ + "results": [ + { + "statement_id": 0, + "series": [ + { + "name": "h2o_feet", + "columns": [ + "time", + "mean" + ], + "values": [ + [ + "1970-01-01T00:00:00Z", + 4.442107025822522 + ] + ] + } + ] + }, + { + "statement_id": 1, + "series": [ + { + "name": "h2o_feet", + "columns": [ + "time", + "water_level" + ], + "values": [ + [ + "2015-08-18T00:00:00Z", + 8.12 + ], + [ + "2015-08-18T00:00:00Z", + 2.064 + ] + ] + } + ] + } + ] +} +``` + +{{% /tab-content %}} +{{< /tabs-wrapper >}} + +## Subqueries + +A subquery is a query that is nested in the `FROM` clause of another query. +Use a subquery to apply a query as a condition in the enclosing query. +Subqueries offer functionality similar to nested functions and SQL +[`HAVING` clauses](https://en.wikipedia.org/wiki/Having_(SQL\)). + +### Syntax + +```sql +SELECT_clause FROM ( SELECT_statement ) [...] +``` + +InfluxDB performs the subquery first and the main query second. + +The main query surrounds the subquery and requires at least the [`SELECT` clause](#the-basic-select-statement) and the [`FROM` clause](#the-basic-select-statement). +The main query supports all clauses listed in this document. + +The subquery appears in the main query's `FROM` clause, and it requires surrounding parentheses. +The subquery supports all clauses listed in this document. + +InfluxQL supports multiple nested subqueries per main query. +Sample syntax for multiple subqueries: + +```sql +SELECT_clause FROM ( SELECT_clause FROM ( SELECT_statement ) [...] ) [...] +``` + +### Examples + +#### Calculate the [`SUM()`](/influxdb/v1.7/query_language/functions/#sum) of several [`MAX()`](/influxdb/v1.7/query_language/functions/#max) values + +```sql +> SELECT SUM("max") FROM (SELECT MAX("water_level") FROM "h2o_feet" GROUP BY "location") + +name: h2o_feet +time sum +---- --- +1970-01-01T00:00:00Z 17.169 +``` + +The query returns the sum of the maximum `water_level` values across every tag value of `location`. + +InfluxDB first performs the subquery; it calculates the maximum value of `water_level` for each tag value of `location`: + +```sql +> SELECT MAX("water_level") FROM "h2o_feet" GROUP BY "location" +name: h2o_feet + +tags: location=coyote_creek +time max +---- --- +2015-08-29T07:24:00Z 9.964 + +name: h2o_feet +tags: location=santa_monica +time max +---- --- +2015-08-29T03:54:00Z 7.205 +``` + +Next, InfluxDB performs the main query and calculates the sum of those maximum values: `9.964` + `7.205` = `17.169`. +Notice that the main query specifies `max`, not `water_level`, as the field key in the `SUM()` function. + +#### Calculate the [`MEAN()`](/influxdb/v1.7/query_language/functions/#mean) difference between two fields + +```sql +> SELECT MEAN("difference") FROM (SELECT "cats" - "dogs" AS "difference" FROM "pet_daycare") + +name: pet_daycare +time mean +---- ---- +1970-01-01T00:00:00Z 1.75 +``` + +The query returns the average of the differences between the number of `cats` and `dogs` in the `pet_daycare` measurement. + +InfluxDB first performs the subquery. +The subquery calculates the difference between the values in the `cats` field and the values in the `dogs` field, +and it names the output column `difference`: + +```sql +> SELECT "cats" - "dogs" AS "difference" FROM "pet_daycare" + +name: pet_daycare +time difference +---- ---------- +2017-01-20T00:55:56Z -1 +2017-01-21T00:55:56Z -49 +2017-01-22T00:55:56Z 66 +2017-01-23T00:55:56Z -9 +``` + +Next, InfluxDB performs the main query and calculates the average of those differences. +Notice that the main query specifies `difference` as the field key in the `MEAN()` function. + +#### Calculate several [`MEAN()`](/influxdb/v1.7/query_language/functions/#mean) values and place a condition on those mean values + +```sql +> SELECT "all_the_means" FROM (SELECT MEAN("water_level") AS "all_the_means" FROM "h2o_feet" WHERE time >= '2015-08-18T00:00:00Z' AND time <= '2015-08-18T00:30:00Z' GROUP BY time(12m) ) WHERE "all_the_means" > 5 + +name: h2o_feet +time all_the_means +---- ------------- +2015-08-18T00:00:00Z 5.07625 +``` + +The query returns all mean values of the `water_level` field that are greater than five. + +InfluxDB first performs the subquery. +The subquery calculates `MEAN()` values of `water_level` from `2015-08-18T00:00:00Z` through `2015-08-18T00:30:00Z` and groups the results into 12-minute intervals. +It also names the output column `all_the_means`: + +```sql +> SELECT MEAN("water_level") AS "all_the_means" FROM "h2o_feet" WHERE time >= '2015-08-18T00:00:00Z' AND time <= '2015-08-18T00:30:00Z' GROUP BY time(12m) + +name: h2o_feet +time all_the_means +---- ------------- +2015-08-18T00:00:00Z 5.07625 +2015-08-18T00:12:00Z 4.950749999999999 +2015-08-18T00:24:00Z 4.80675 +``` + +Next, InfluxDB performs the main query and returns only those mean values that are greater than five. +Notice that the main query specifies `all_the_means` as the field key in the `SELECT` clause. + +#### Calculate the [`SUM()`](/influxdb/v1.7/query_language/functions/#sum) of several [`DERIVATIVE()`](/influxdb/v1.7/query_language/functions/#derivative) values + +```sql +> SELECT SUM("water_level_derivative") AS "sum_derivative" FROM (SELECT DERIVATIVE(MEAN("water_level")) AS "water_level_derivative" FROM "h2o_feet" WHERE time >= '2015-08-18T00:00:00Z' AND time <= '2015-08-18T00:30:00Z' GROUP BY time(12m),"location") GROUP BY "location" + +name: h2o_feet +tags: location=coyote_creek +time sum_derivative +---- -------------- +1970-01-01T00:00:00Z -0.4950000000000001 + +name: h2o_feet +tags: location=santa_monica +time sum_derivative +---- -------------- +1970-01-01T00:00:00Z -0.043999999999999595 +``` + +The query returns the sum of the derivative of average `water_level` values for each tag value of `location`. + +InfluxDB first performs the subquery. +The subquery calculates the derivative of average `water_level` values taken at 12-minute intervals. +It performs that calculation for each tag value of `location` and names the output column `water_level_derivative`: + +```sql +> SELECT DERIVATIVE(MEAN("water_level")) AS "water_level_derivative" FROM "h2o_feet" WHERE time >= '2015-08-18T00:00:00Z' AND time <= '2015-08-18T00:30:00Z' GROUP BY time(12m),"location" + +name: h2o_feet +tags: location=coyote_creek +time water_level_derivative +---- ---------------------- +2015-08-18T00:12:00Z -0.23800000000000043 +2015-08-18T00:24:00Z -0.2569999999999997 + +name: h2o_feet +tags: location=santa_monica +time water_level_derivative +---- ---------------------- +2015-08-18T00:12:00Z -0.0129999999999999 +2015-08-18T00:24:00Z -0.030999999999999694 +``` + +Next, InfluxDB performs the main query and calculates the sum of the `water_level_derivative` values for each tag value of `location`. +Notice that the main query specifies `water_level_derivative`, not `water_level` or `derivative`, as the field key in the `SUM()` function. + +### Common issues with subqueries + +#### Multiple SELECT statements in a subquery + +InfluxQL supports multiple nested subqueries per main query: + +```sql +SELECT_clause FROM ( SELECT_clause FROM ( SELECT_statement ) [...] ) [...] + ------------------ ---------------- + Subquery 1 Subquery 2 +``` + +InfluxQL does not support multiple [`SELECT` statements](#the-basic-select-statement) per subquery: + +```sql +SELECT_clause FROM (SELECT_statement; SELECT_statement) [...] +``` + +The system returns a parsing error if a subquery includes multiple `SELECT` statements. diff --git a/content/influxdb/v1.7/query_language/database_management.md b/content/influxdb/v1.7/query_language/database_management.md new file mode 100644 index 000000000..afeff5f79 --- /dev/null +++ b/content/influxdb/v1.7/query_language/database_management.md @@ -0,0 +1,379 @@ +--- +title: Database management using InfluxQL +description: Use InfluxQL to administer your InfluxDB server and work with InfluxDB databases, retention policies, series, measurements, and shards. +menu: + influxdb_1_7: + name: Data management + weight: 40 + parent: InfluxQL +--- + +InfluxQL offers a full suite of administrative commands. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Data Management:Retention Policy Management:
CREATE DATABASECREATE RETENTION POLICY
DROP DATABASEALTER RETENTION POLICY
DROP SERIESDROP RETENTION POLICY
DELETE
DROP MEASUREMENT
DROP SHARD
+ +If you're looking for `SHOW` queries (for example, `SHOW DATABASES` or `SHOW RETENTION POLICIES`), see [Schema Exploration](/influxdb/v1.7/query_language/schema_exploration). + +The examples in the sections below use the InfluxDB [Command Line Interface (CLI)](/influxdb/v1.7/introduction/getting-started/). +You can also execute the commands using the InfluxDB API; simply send a `GET` request to the `/query` endpoint and include the command in the URL parameter `q`. +For more on using the InfluxDB API, see [Querying data](/influxdb/v1.7/guides/querying_data/). + +> **Note:** When authentication is enabled, only admin users can execute most of the commands listed on this page. +> See the documentation on [authentication and authorization](/influxdb/v1.7/administration/authentication_and_authorization/) for more information. + +## Data management + +### CREATE DATABASE + +Creates a new database. + +#### Syntax + +```sql +CREATE DATABASE [WITH [DURATION ] [REPLICATION ] [SHARD DURATION ] [NAME ]] +``` + +#### Description of syntax + +`CREATE DATABASE` requires a database [name](/influxdb/v1.7/troubleshooting/frequently-asked-questions/#what-words-and-characters-should-i-avoid-when-writing-data-to-influxdb). + +The `WITH`, `DURATION`, `REPLICATION`, `SHARD DURATION`, and `NAME` clauses are optional and create a single [retention policy](/influxdb/v1.7/concepts/glossary/#retention-policy-rp) associated with the created database. +If you do not specify one of the clauses after `WITH`, the relevant behavior defaults to the `autogen` retention policy settings. +The created retention policy automatically serves as the database's default retention policy. +For more information about those clauses, see [Retention Policy Management](/influxdb/v1.7/query_language/database_management/#retention-policy-management). + +A successful `CREATE DATABASE` query returns an empty result. +If you attempt to create a database that already exists, InfluxDB does nothing and does not return an error. + +#### Examples + +##### Create a database + +``` +> CREATE DATABASE "NOAA_water_database" +> +``` + +The query creates a database called `NOAA_water_database`. +[By default](/influxdb/v1.7/administration/config/#retention-autocreate-true), InfluxDB also creates the `autogen` retention policy and associates it with the `NOAA_water_database`. + +##### Create a database with a specific retention policy + +``` +> CREATE DATABASE "NOAA_water_database" WITH DURATION 3d REPLICATION 1 SHARD DURATION 1h NAME "liquid" +> +``` + +The query creates a database called `NOAA_water_database`. +It also creates a default retention policy for `NOAA_water_database` with a `DURATION` of three days, a [replication factor](/influxdb/v1.7/concepts/glossary/#replication-factor) of one, a [shard group](/influxdb/v1.7/concepts/glossary/#shard-group) duration of one hour, and with the name `liquid`. + +### Delete a database with DROP DATABASE + +The `DROP DATABASE` query deletes all of the data, measurements, series, continuous queries, and retention policies from the specified database. +The query takes the following form: +```sql +DROP DATABASE +``` + +Drop the database NOAA_water_database: +```bash +> DROP DATABASE "NOAA_water_database" +> +``` + +A successful `DROP DATABASE` query returns an empty result. +If you attempt to drop a database that does not exist, InfluxDB does not return an error. + +### Drop series from the index with DROP SERIES + +The `DROP SERIES` query deletes all points from a [series](/influxdb/v1.7/concepts/glossary/#series) in a database, +and it drops the series from the index. + +> **Note:** `DROP SERIES` does not support time intervals in the `WHERE` clause. +See +[`DELETE`](/influxdb/v1.7/query_language/database_management/#delete-series-with-delete) +for that functionality. + +The query takes the following form, where you must specify either the `FROM` clause or the `WHERE` clause: +```sql +DROP SERIES FROM WHERE ='' +``` + +Drop all series from a single measurement: +```sql +> DROP SERIES FROM "h2o_feet" +``` + +Drop series with a specific tag pair from a single measurement: +```sql +> DROP SERIES FROM "h2o_feet" WHERE "location" = 'santa_monica' +``` + +Drop all points in the series that have a specific tag pair from all measurements in the database: +```sql +> DROP SERIES WHERE "location" = 'santa_monica' +``` + +A successful `DROP SERIES` query returns an empty result. + +### Delete series with DELETE + +The `DELETE` query deletes all points from a +[series](/influxdb/v1.7/concepts/glossary/#series) in a database. +Unlike +[`DROP SERIES`](/influxdb/v1.7/query_language/database_management/#drop-series-from-the-index-with-drop-series), it does not drop the series from the index and it supports time intervals +in the `WHERE` clause. + +The query takes the following form where you must include either the `FROM` +clause or the `WHERE` clause, or both: + +``` +DELETE FROM WHERE [=''] | [