From 3e120663f4a8507c7cbe01004302119c4ebb56d9 Mon Sep 17 00:00:00 2001 From: Scott Anderson Date: Mon, 3 Aug 2020 09:23:32 -0600 Subject: [PATCH] added platform section in platform nav in 1.x docs --- assets/styles/layouts/_sidebar.scss | 11 +- content/platform/faq/_index.md | 26 + content/platform/install-and-deploy/_index.md | 24 + .../install-and-deploy/deploying/_index.md | 36 + .../deploying/amazon-web-services.md | 19 + .../deploying/google-cloud-platform.md | 88 ++ .../deploying/kubernetes.md | 28 + .../deploying/sandbox-install.md | 115 ++ .../install-and-deploy/install/_index.md | 28 + .../install-and-deploy/install/oss-install.md | 44 + content/platform/integrations/_index.md | 21 + content/platform/integrations/docker.md | 17 + content/platform/integrations/kubernetes.md | 136 +++ content/platform/introduction/_index.md | 152 +++ .../platform/introduction/getting-started.md | 51 + content/platform/monitoring/_index.md | 22 + .../platform/monitoring/containers/_index.md | 16 + .../monitoring/containers/kubernetes.md | 32 + .../monitoring/influxdata-platform/_index.md | 32 + .../external-monitor-setup.md | 160 +++ .../internal-vs-external.md | 113 ++ .../monitoring-dashboards.md | 99 ++ .../influxdata-platform/tools/_index.md | 26 + .../tools/kapacitor-measurements.md | 270 +++++ .../tools/measurements-internal.md | 995 ++++++++++++++++++ .../tools/show-diagnostics.md | 179 ++++ .../influxdata-platform/tools/show-stats.md | 54 + content/platform/ops-guide/_index.md | 11 + content/platform/use-cases/_index.md | 18 + .../use-cases/monitor-kubernetes-apps.md | 11 + .../platform/use-cases/monitor-kubernetes.md | 11 + layouts/partials/sidebar.html | 26 +- layouts/partials/topnav/product-selector.html | 4 +- layouts/partials/topnav/version-selector.html | 2 +- 34 files changed, 2869 insertions(+), 8 deletions(-) create mode 100644 content/platform/faq/_index.md create mode 100644 content/platform/install-and-deploy/_index.md create mode 100644 content/platform/install-and-deploy/deploying/_index.md create mode 100644 content/platform/install-and-deploy/deploying/amazon-web-services.md create mode 100644 content/platform/install-and-deploy/deploying/google-cloud-platform.md create mode 100644 content/platform/install-and-deploy/deploying/kubernetes.md create mode 100644 content/platform/install-and-deploy/deploying/sandbox-install.md create mode 100644 content/platform/install-and-deploy/install/_index.md create mode 100644 content/platform/install-and-deploy/install/oss-install.md create mode 100644 content/platform/integrations/_index.md create mode 100644 content/platform/integrations/docker.md create mode 100644 content/platform/integrations/kubernetes.md create mode 100644 content/platform/introduction/_index.md create mode 100644 content/platform/introduction/getting-started.md create mode 100644 content/platform/monitoring/_index.md create mode 100644 content/platform/monitoring/containers/_index.md create mode 100644 content/platform/monitoring/containers/kubernetes.md create mode 100644 content/platform/monitoring/influxdata-platform/_index.md create mode 100644 content/platform/monitoring/influxdata-platform/external-monitor-setup.md create mode 100644 content/platform/monitoring/influxdata-platform/internal-vs-external.md create mode 100644 content/platform/monitoring/influxdata-platform/monitoring-dashboards.md create mode 100644 content/platform/monitoring/influxdata-platform/tools/_index.md create mode 100644 content/platform/monitoring/influxdata-platform/tools/kapacitor-measurements.md create mode 100644 content/platform/monitoring/influxdata-platform/tools/measurements-internal.md create mode 100644 content/platform/monitoring/influxdata-platform/tools/show-diagnostics.md create mode 100644 content/platform/monitoring/influxdata-platform/tools/show-stats.md create mode 100644 content/platform/ops-guide/_index.md create mode 100644 content/platform/use-cases/_index.md create mode 100644 content/platform/use-cases/monitor-kubernetes-apps.md create mode 100644 content/platform/use-cases/monitor-kubernetes.md diff --git a/assets/styles/layouts/_sidebar.scss b/assets/styles/layouts/_sidebar.scss index ec70e1bc7..57040ec4a 100644 --- a/assets/styles/layouts/_sidebar.scss +++ b/assets/styles/layouts/_sidebar.scss @@ -274,7 +274,7 @@ } } - // Reference title styles + // Nav section title styles h4 { margin: 2rem 0 0 -1rem; color: rgba($article-heading-alt, .6); @@ -283,6 +283,15 @@ text-transform: uppercase; font-size: .85rem; letter-spacing: .08rem; + + &.platform:after { + content: "\e911"; + font-family: "icomoon"; + font-style: normal; + font-size: 1rem; + opacity: .4; + margin-left: .25rem; + } } } } diff --git a/content/platform/faq/_index.md b/content/platform/faq/_index.md new file mode 100644 index 000000000..8cd6736ec --- /dev/null +++ b/content/platform/faq/_index.md @@ -0,0 +1,26 @@ +--- +title: Frequently asked questions +description: Frequently asked questions about time series data and the InfluxData platform. +menu: + platform: + name: Frequently asked questions + weight: 70 +--- + +[What is time series data?](#what-is-time-series-data) +[Why shouldn't I just use a relational database?](#why-shouldn-t-i-just-use-a-relational-database) + +## What is time series data? +Time series data is a series of data points each associated with a specific time. +Examples include: + +- Server performance metrics +- Financial averages over time +- Sensor data, such as temperature, barometric pressure, wind speeds, etc. + +## Why shouldn't I just use a relational database? +Relational databases can be used to store and analyze time series data, but depending +on the precision of your data, a query can involve potentially millions of rows. +InfluxDB is purpose-built to store and query data by time, providing out-of-the-box +functionality that optionally downsamples data after a specific age and a query +engine optimized for time-based data. diff --git a/content/platform/install-and-deploy/_index.md b/content/platform/install-and-deploy/_index.md new file mode 100644 index 000000000..e03c37784 --- /dev/null +++ b/content/platform/install-and-deploy/_index.md @@ -0,0 +1,24 @@ +--- +title: Install and deploy the InfluxData Platform +description: Quickly install and configure the InfluxData Platform to work with time series data +menu: + platform: + name: Install and deploy + identifier: install-and-deploy-platform + weight: 20 +--- + +The InfluxData Platform can be manually installed and configured or +can be deployed leveraging containers and other technologies that can speed up +and provide consistent resources. + +## [Install the InfluxData Platform](/platform/install-and-deploy/install/) + +You can manually install and configure all of the InfluxData Platform OSS components +for use or install InfluxDB Enterprise clusters for production use. + + +## [Deploy the InfluxData TICK stack](/platform/install-and-deploy/deploying/) + +Use containers and container providers to simplify and ease the deployment of +the InfluxData Platform. diff --git a/content/platform/install-and-deploy/deploying/_index.md b/content/platform/install-and-deploy/deploying/_index.md new file mode 100644 index 000000000..ca8236f4c --- /dev/null +++ b/content/platform/install-and-deploy/deploying/_index.md @@ -0,0 +1,36 @@ +--- +title: Deploy the TICK stack +description: Easy ways to deploy the TICK stack on the platform you use. +menu: + platform: + name: Deploy InfluxData Platform + identifier: deploy-platform + weight: 22 + parent: install-and-deploy-platform +--- + +There are multiple ways to deploy the InfluxData Platform (also known +as the TICK stack). See if one of the following container-based +installation options meets your requirements. + +## Deploy the InfluxData Platform in Docker containers + +You can [deploy the InfluxData Platform OSS (TICK stack) in Docker containers using the InfluxData Sandbox](/platform/install-and-deploy/deploying/sandbox-install) to quickly get up and running and ready for exploration +and testing. The InfluxData Sandbox is not intended for production environments, +but it is a quick way to start using the InfluxData Platform and work with Docker +containers. + +## Deploy InfluxData Platform components in Kubernetes + +To deploy InfluxData Platform OSS components in Kubernetes, see +[Deploy InfluxData Platform components in Kubernetes](/platform/install-and-deploy/deploying/kubernetes). + +## Deploy an InfluxDB Enterprise cluster on Amazon Web Services + +To learn how to deploy InfluxDB Enterprise clusters on Amazon Web Services, see +[Deploy an InfluxDB Enterprise cluster on Amazon Web Services](/platform/install-and-deploy/deploying/amazon-web-services) + +## Deploy InfluxData Platform in Google Cloud Platform + +To learn about deploying InfluxDB Enterprise clusters on Google Cloud Platform (GCP), +see [Deploy InfluxData Platform in Google Cloud Platform](/platform/install-and-deploy/deploying/google-cloud-platform). diff --git a/content/platform/install-and-deploy/deploying/amazon-web-services.md b/content/platform/install-and-deploy/deploying/amazon-web-services.md new file mode 100644 index 000000000..28974ca54 --- /dev/null +++ b/content/platform/install-and-deploy/deploying/amazon-web-services.md @@ -0,0 +1,19 @@ +--- +title: Deploy an InfluxDB Enterprise cluster on Amazon Web Services +description: Use the Terraform InfluxDB AWS Module to deploy single or multi- cluster architectures on Amazon Web Services. +menu: + platform: + name: Deploy InfluxDB Enterprise clusters on Amazon Web Services + parent: deploy-platform + weight: 3 +--- + +InfluxData recommends using the Terraform [InfluxDB AWS Module](https://github.com/gruntwork-io/terraform-aws-influx) to deploy a single InfluxDB Enterprise cluster or a multi-cluster architecture on Amazon Web Services. + +### InfluxDB AWS Module (Terraform) + +The [InfluxDB AWS Module] is the official module for deploying InfluxDB Enterprise on AWS using [Terraform](https://www.terraform.io/) and [Packer](https://www.packer.io/). + +The InfluxDB AWS Module, maintained by [Gruntwork](http://www.gruntwork.io/), was written using a combination of Terraform and scripts (mostly bash) and includes automated tests, documentation, and examples. + +For details on using this Terraform module to deploy InfluxDB Enterprise clusters, see the [InfluxDB AWS Module repository](https://github.com/gruntwork-io/terraform-aws-influx). diff --git a/content/platform/install-and-deploy/deploying/google-cloud-platform.md b/content/platform/install-and-deploy/deploying/google-cloud-platform.md new file mode 100644 index 000000000..aa4136133 --- /dev/null +++ b/content/platform/install-and-deploy/deploying/google-cloud-platform.md @@ -0,0 +1,88 @@ +--- +title: Deploy the InfluxData Platform in Google Cloud Platform +description: Deploy the InfluxData Platform and InfluxDB Enterprise clusters in Google Cloud Platform +menu: + platform: + name: Deploy in Google Cloud Platform + parent: deploy-platform + weight: 2 +--- + +For deploying InfluxDB Enterprise clusters on Google Cloud Platform (GCP) infrastructure, InfluxData provides an [InfluxDB Enterprise bring-your-own-license (BYOL) solution](https://console.cloud.google.com/marketplace/details/influxdata-public/influxdb-enterprise-byol) on the [Google Cloud Platform Marketplace](https://cloud.google.com/marketplace/) that makes the installation and setup process easy and straightforward. Clusters deployed through the GCP Marketplace are ready for production. + +{{% note %}} +The [Deployment Manager templates](https://cloud.google.com/deployment-manager/) used for the InfluxDB Enterprise BYOL solution are [open source](https://github.com/influxdata/google-deployment-manager-influxdb-enterprise). Issues and feature requests for the Marketplace deployment should be [submitted through the related GitHub repository](https://github.com/influxdata/google-deployment-manager-influxdb-enterprise/issues/new) (requires a GitHub account) or by contacting [InfluxData support](mailto:Support@InfluxData.com). +{{% /note %}} + +## Prerequisites + +This guide requires the following: + +- A [Google Cloud Platform (GCP)](https://cloud.google.com/) account with access to the [GCP Marketplace](https://cloud.google.com/marketplace/). +- A valid InfluxDB Enterprise license key, or [sign up for a free InfluxDB Enterprise trial for GCP](https://portal.influxdata.com/users/gcp). +- Access to [GCP Cloud Shell](https://cloud.google.com/shell/) or the [`gcloud` SDK and command line tools](https://cloud.google.com/sdk/). + +To deploy InfluxDB Enterprise on platforms other than GCP, please see [InfluxDB Enterprise installation options](/enterprise_influxdb/v1.6/introduction/installation_guidelines). + +## Deploy a cluster + +To deploy an InfluxDB Enterprise cluster, log in to your Google Cloud Platform account and navigate to [InfluxData's InfluxDB Enterprise (BYOL)](https://console.cloud.google.com/partner/editor/influxdata-public/influxdb-enterprise-byol) solution in the GCP Marketplace. + +![GCP InfluxDB Enterprise solution page](/img/enterprise/gcp/byol-intro-1.png) + +Click __Launch on compute engine__ to open up the configuration page. + +![GCP InfluxDB Enterprise configuration page](/img/enterprise/gcp/byol-intro-2.png) + +Copy the InfluxDB Enterprise license key to the __InfluxDB Enterprise license key__ field or [sign up for a free InfluxDB Enterprise trial for GCP](https://portal.influxdata.com/users/gcp) to obtain a license key. + +Adjust any other fields as desired. The cluster will only be accessible within the network (or subnetwork, if specified) in which it is deployed. The fields in collapsed sections generally do not need to be altered. + +Click __Deploy__ to launch the InfluxDB Enterprise cluster. + +![GCP InfluxDB Enterprise deployment pending page](/img/enterprise/gcp/byol-intro-3.png) + +The cluster will take up to five minutes to fully deploy. If the deployment does not complete or reports an error, read through the list of [common deployment errors](https://cloud.google.com/marketplace/docs/troubleshooting). + +![GCP InfluxDB Enterprise deployment complete page](/img/enterprise/gcp/byol-intro-4.png) + +Your cluster is now deployed! + +{{% note %}} +Make sure you save the "Admin username", "Admin password", and "Connection internal IP" values displayed on the screen. They will be required when attempting to access the cluster. +{{% /note %}} + +## Access the cluster + +The cluster's IP address is only reachable from within the GCP network (or subnetwork) specified in the solution configuration. A cluster can only be reached from instances or services within the same GCP network or subnetwork in which it was provisioned. + +Using the GCP Cloud Shell or `gcloud` CLI, create a new instance that will be used to access the InfluxDB Enterprise cluster. + +``` +gcloud compute instances create influxdb-access --zone us-central1-f --image-family debian-9 --image-project debian-cloud +``` + +SSH into the instance. + +``` +gcloud compute ssh influxdb-access +``` + +On the instance, install the `influx` command line tool via the InfluxDB open source package. + +``` +wget https://dl.influxdata.com/influxdb/releases/influxdb_1.6.3_amd64.deb +sudo dpkg -i influxdb_1.6.3_amd64.deb +``` + +Now the InfluxDB Enterprise cluster can be accessed using the following command with "Admin username", "Admin password", and "Connection internal IP" values from the deployment screen substituted for ``. + +``` +influx -username -password -host -execute "CREATE DATABASE test" + +influx -username -password -host -execute "SHOW DATABASES" +``` + +### Next steps + +For an introduction to InfluxDB database and the InfluxData Platform, see [Getting started with InfluxDB](/platform/introduction/getting-started). diff --git a/content/platform/install-and-deploy/deploying/kubernetes.md b/content/platform/install-and-deploy/deploying/kubernetes.md new file mode 100644 index 000000000..d65ca720c --- /dev/null +++ b/content/platform/install-and-deploy/deploying/kubernetes.md @@ -0,0 +1,28 @@ +--- +title: Deploy InfluxData Platform components in Kubernetes +description: Deploy the InfluxData Platform components in Kubernetes +menu: + platform: + name: Deploy InfluxData Platform in Kubernetes + parent: deploy-platform + weight: 4 +--- + +## Deploy the TICK stack in Kubernetes + +Instructions for installing and configuring all components of the open source TICK stack – Telegraf, InfluxDB, Chronograf, and Kapacitor in Kubernetes. + +### Use Helm Charts to deploy InfluxData Platform components + +InfluxData recommends using the [Helm Stable](https://github.com/helm/charts/tree/master/stable) repository for installing the TICK stack. + +- [Telegraf](https://github.com/helm/charts/tree/master/stable/telegraf) +- [InfluxDB](https://github.com/helm/charts/tree/master/stable/influxdb) +- [Chronograf](https://github.com/helm/charts/tree/master/stable/chronograf) +- [Kapacitor](https://github.com/helm/charts/tree/master/stable/kapacitor) + +### Use the InfluxDB Operator + +[InfluxDB operator](https://github.com/influxdata/influxdata-operator) is a [Kubernetes operator](https://coreos.com/operators/) that can be used to deploy InfluxDB OSS in Kubernetes. The InfluxDB operator can handle operational tasks, like creating a backup, automatically. The operator currently has been tested on [AWS's Elastic Kubernetes Service](https://aws.amazon.com/eks/) and [GCP's Google Kubernetes Engine](https://cloud.google.com/kubernetes-engine/). + +[Deploy InfluxDB using the InfluxData operator](https://github.com/influxdata/influxdata-operator) diff --git a/content/platform/install-and-deploy/deploying/sandbox-install.md b/content/platform/install-and-deploy/deploying/sandbox-install.md new file mode 100644 index 000000000..d980a014e --- /dev/null +++ b/content/platform/install-and-deploy/deploying/sandbox-install.md @@ -0,0 +1,115 @@ +--- +title: Deploy the InfluxData Platform (TICK stack) in Docker containers +description: Install the InfluxData Sandbox, the quickest way to get a TICK stack up and running and ready for exploration and testing. +aliases: + - /platform/installation/sandbox-install +menu: + platform: + name: Deploy InfluxData Platform (OSS) + parent: deploy-platform + weight: 1 +--- + +The quickest way to start using the InfluxData Platform (TICK stack) OSS is to download and deploy the [InfluxData Sandbox](https://github.com/influxdata/sandbox). The InfluxData Sandbox uses Docker containers to deploy the InfluxData Platform components. The InfluxData Sandbox provides a containerized, ready-to-use TICK stack, built using [Docker](https://www.docker.com) and [Docker Compose](https://docs.docker.com/compose/overview/), to capture data from your local machine and the Docker containers. + +After deploying using the InfluxData Sandbox, you will have the latest versions of: + +* Telegraf +* InfluxDB OSS +* Chronograf +* Kapacitor OSS + +{{% note %}} +The InfluxData Sandbox is not recommended for production use. +{{% /note %}} + +## Requirements + +- Linux or macOS (Windows support is experimental) +- [Git](https://git-scm.com/) +- [Docker](https://docs.docker.com/install/#supported-platforms) +- [Docker Compose](https://docs.docker.com/compose/install/) +(Packaged with Docker for Mac) + +## Download and run the Sandbox + +The InfluxData Sandbox is open source and is available for +[download from Github](https://github.com/influxdata/sandbox). +To download it, use `git` to clone the source repository: + +```bash +# Clone the InfluxData Sandbox from Github +git clone https://github.com/influxdata/sandbox.git +``` + +The Sandbox repo includes a `sandbox` binary used to provision and manage the +Sandbox's containers and data. Run `./sandbox up` from inside the `sandbox` directory +to download the necessary images, then build and start all the required Docker containers. + +```bash +# cd into the sandbox directory +cd sandbox + +# Start the sandbox +./sandbox up +``` + +{{% note %}} +Make sure no other instances of TICK stack components are running on your local +machine when starting the Sandbox. Otherwise you will run into port conflicts +and the Sandbox won't be able to start properly. +{{% /note %}} + +Once started, two tabs will open in your browser: + +1. Chronograf ([localhost:8888](http://localhost:8888)) +2. InfluxData Sandbox Documentation ([localhost:3010](http://localhost:3010)) + +Chronograf is the web-based user interface for the TICK stack and can be used to manage the stack. You can use Chronograf to: + +* query and explore data +* [create Kapacitor alerts](/chronograf/v1.6/guides/create-alert-rules/) +* preview [data visualizations](/chronograf/v1.6/guides/visualization-types/) +* [build custom dashboards](/chronograf/v1.6/guides/create-a-dashboard/) + +### Using nightly builds + +The `./sandbox up` command includes a `-nightly` option that will pull nightly +builds for InfluxDB and Chronograf, giving you the most recent updates and +experimental functionality. + +{{% warn %}} +Nightly builds are experimental and are not guaranteed to be functional. +{{% /warn %}} + +```bash +./sandbox up -nightly +``` + +## Interacting with the Sandbox TICK stack + +With the Sandbox running, each component of the TICK stack is available to work with. +The Sandbox documentation provides tutorials for interacting with each component. +The documentation is available at [localhost:3010/tutorials](http://localhost:3010/tutorials) +(with the Sandbox running) or [on Github](https://github.com/influxdata/sandbox/tree/master/documentation/static/tutorials). + +All configuration files, tools, and CLIs needed for managing each component of the +TICK stack are included in their respective Docker containers. +Tasks outlined throughout the InfluxData documentation can be accomplished using +the InfluxData Sandbox. + +The `./sandbox enter` command opens a console inside the specified container where +the project's configuration files and CLIs are available. + +```bash +# Pattern +./sandbox enter [ telegraf | influxdb | chronograf | kapacitor ] + +# Example: console into the telegraf container +./sandbox enter telegraf +``` + +{{% note %}} +After updating a configuration file in a Sandbox container, use the `./sandbox restart` +command to restart the containers and apply the updated configuration. +{{% /note %}} diff --git a/content/platform/install-and-deploy/install/_index.md b/content/platform/install-and-deploy/install/_index.md new file mode 100644 index 000000000..5b702edeb --- /dev/null +++ b/content/platform/install-and-deploy/install/_index.md @@ -0,0 +1,28 @@ +--- +title: Install the InfluxData 1.x platform +description: Quickly install and configure the InfluxData platform to begin exploring time series data +menu: + platform: + name: Install the InfluxData Platform + weight: 11 + identifier: install-platform + parent: install-and-deploy-platform +--- + +Install and configure the InfluxData platform (TICK stack) using one of the following methods: + +- For **non-production** environments. The quickest way to install the InfluxData platform is to [deploy the InfluxData 1.x platform in Docker containers](/platform/install-and-deploy/deploying/sandbox-install). +- For **production** environments. Do one of the following: + + - [Install the open source version of InfluxData 1.x platform](/platform/install-and-deploy/install/oss-install) + - Install InfluxData 1.x Enterprise: + + a. [Install Telegraf](/telegraf/latest/introduction/installation/) + + b. [Install InfluxDB Enterprise](/enterprise_influxdb/latest/install-and-deploy/) + + c. [Install Kapacitor Enterprise](/enterprise_kapacitor/latest/introduction/installation_guide/) + +{{% note %}} +Windows support is experimental. +{{% /note %}} diff --git a/content/platform/install-and-deploy/install/oss-install.md b/content/platform/install-and-deploy/install/oss-install.md new file mode 100644 index 000000000..b815ece56 --- /dev/null +++ b/content/platform/install-and-deploy/install/oss-install.md @@ -0,0 +1,44 @@ +--- +title: Install the InfluxData TICK stack (OSS) +description: Install and configure the open source InfluxData TICK stack – Telegraf, InfluxDB, Chronograf, and Kapacitor. +alias: /platform/installation/oss-install +menu: + platform: + name: Install InfluxData TICK stack (OSS) + parent: install-platform + weight: 2 +--- + +## Download the TICK stack components + +To download each of the TICK stack components, see [InfluxData downloads page](https://portal.influxdata.com/downloads). +Telegraf, InfluxDB, Chronograf, and Kapacitor are each separate binaries that need +to be installed, configured, and started separately. + + +## Install Telegraf + +The [Telegraf installation instructions](https://docs.influxdata.com/telegraf/latest/introduction/installation/) +walk through installing and configuring Telegraf. + +## Install InfluxDB + +The [InfluxDB OSS installation instructions](https://docs.influxdata.com/influxdb/latest/introduction/installation/) +walk through installing and configuring the open source version of InfluxDB. + +## Install Chronograf + +The [Chronograf installation instructions](https://docs.influxdata.com/chronograf/latest/introduction/installation/) +walk through installing and configuring Chronograf. + +## Install Kapacitor + +The [Kapacitor OSS installation instructions](https://docs.influxdata.com/kapacitor/latest/introduction/installation/) +walk through installing and configuring the open source version of Kapacitor. + +## InfluxData Sandbox + +The [InfluxData Sandbox](https://github.com/influxdata/sandbox) is an alternative +method for installing the OSS TICK stack that uses Docker and Docker Compose to build +and network each component. For information about installing the Sandbox, view the +[InfluxData Sandbox installation instructions](/platform/install-and-deploy/deploying/sandbox-install). diff --git a/content/platform/integrations/_index.md b/content/platform/integrations/_index.md new file mode 100644 index 000000000..050f095e0 --- /dev/null +++ b/content/platform/integrations/_index.md @@ -0,0 +1,21 @@ +--- +title: Integrate the InfluxData TICK stack +description: Discover all ways the TICK stack integrates into your favorite projects. +menu: + platform: + name: Integrate + identifier: integrate-platform + weight: 40 +--- + +The InfluxData Platform and its TICK stack components integrate with a number of +platforms and technologies. These pages document the integrations and provide +links to additional material. + +## [Docker](/platform/integrations/docker) +Learn the TICK stack can run in Docker containers and monitor containers running +on the Docker daemon. + +## [Kubernetes](/platform/integrations/kubernetes) +Learn about all the ways the TICK stack can work with Kubernetes, including +monitoring Kubernetes and deploying on Kubernetes. diff --git a/content/platform/integrations/docker.md b/content/platform/integrations/docker.md new file mode 100644 index 000000000..9f04cfa91 --- /dev/null +++ b/content/platform/integrations/docker.md @@ -0,0 +1,17 @@ +--- +title: Docker +description: InfluxData projects are well suited to be deployed in Docker containers. Containers for each project are published to DockerHub. +menu: + platform: + name: Docker + parent: integrate-platform +--- + +InfluxData projects are well suited to be deployed in Docker containers. Containers for each project are published to DockerHub. + +- [Telegraf](https://hub.docker.com/_/telegraf/) +- [InfluxDB](https://hub.docker.com/_/influxdb/) +- [Chronograf](https://hub.docker.com/_/chronograf/) +- [Kapacitor](https://hub.docker.com/_/kapacitor/) + +The source for all containers is available [here](https://github.com/influxdata/influxdata-docker). diff --git a/content/platform/integrations/kubernetes.md b/content/platform/integrations/kubernetes.md new file mode 100644 index 000000000..83c0ad1f3 --- /dev/null +++ b/content/platform/integrations/kubernetes.md @@ -0,0 +1,136 @@ +--- +title: Kubernetes +description: Deploy InfluxDB OSS in Kubernetes and monitor Kubernetes +menu: + platform: + name: Kubernetes + parent: integrate-platform +--- + +[Kubernetes](https://kubernetes.io/) is a container orchestration project that +has become a popular way to deploy and manage containers across multiple servers and cloud providers. + +![InfluxDB Kubernetes Logos](/img/platform/flux-kube.png) + +{{% note %}} +This page is about using Kubernetes with the TICK stack and 1.x versions of InfluxDB. + +To start experimenting with InfluxDB 2.0 and Kubernetes, go to the **Kubernetes** section of the [Get Started page for InfluxDB 2.0](https://v2.docs.influxdata.com/v2.0/get-started/). +{{% /note %}} + +There are several ways use the InfluxData Platform with Kubernetes: + +- [Monitor Kubernetes](#monitor-kubernetes) + - [kube-influxdb Kubernetes monitoring project](#kube-influxdb-kubernetes-monitoring-project) + - [Collect Kubernetes metrics with Telegraf](#collect-kubernetes-metrics-with-telegraf) + - [Prometheus remote read and write support](#prometheus-remote-read-and-write-support) +- [Deploy the TICK stack in Kubernetes](#deploy-the-tick-stack-in-kubernetes) + - [Helm Charts](#helm-charts) + - [K8s Operator](#k8s-operator) + - [Solutions for Kubernetes services](#solutions-for-kubernetes-services) +- [Frequently asked questions](#frequently-asked-questions) + - [How is the InfluxData Platform (TICK) different from Prometheus?](#how-is-the-influxdata-platform-tick-different-from-prometheus) + - [Should I run InfluxDB in Kubernetes?](#should-i-run-influxdb-in-kubernetes) + +## Monitor Kubernetes + +The TICK stack is an easy and performant way to monitor the services that make up a Kubernetes cluster, whether or not you're running InfluxDB in a Kubernetes cluster or somewhere else. + +### kube-influxdb Kubernetes monitoring project + +The [kube-influxdb](https://github.com/influxdata/kube-influxdb) project is a +set of Helm charts to make collection and visualization of Kubernetes metrics +easy. It uses Telegraf, the metrics collection agent, to collect metrics and +events and includes a set of pre-configured Chronograf dashboards. + +See the [kube-influxdb Getting Started guide](https://github.com/influxdata/kube-influxdb/blob/master/docs/v1.0/getting_started.md). + +### Collect Kubernetes metrics with Telegraf + +The [Telegraf metrics collection agent](/telegraf/latest/introduction/getting-started/) +can collect many types of metrics in a Kubernetes cluster, like [Docker container metrics](https://github.com/influxdata/telegraf/blob/master/plugins/inputs/docker/README.md) +and [stats from kubelets](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/kubernetes). +It can even scrape [Prometheus metrics API endpoints](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/prometheus). +Telegraf is used in the [kube-influxdb project](#kube-influxdb-kubernetes-monitoring-project) +to collect metrics. + +See [Set up a Kubernetes monitoring architecture using Telegraf](https://www.influxdata.com/blog/monitoring-kubernetes-architecture/). + +### Prometheus remote read and write support + +InfluxDB supports the Prometheus remote read and write API for clusters already +using Prometheus for metrics collection. See the +[FAQ](#frequently-asked-questions) for more information on why a more flexible +time series data store is useful. + +Read about [Prometheus remote read and write API support in InfluxDB](/influxdb/latest/supported_protocols/prometheus/). + +## Deploy the TICK stack in Kubernetes +Instructions for installing and configuring all components of the open source +TICK stack – Telegraf, InfluxDB, Chronograf, and Kapacitor in Kubernetes. + +{{% note %}} +Running InfluxDB in Kubernetes in production is not recommended. See the [FAQ](#frequently-asked-questions) for more info. +{{% /note %}} + +### Helm Charts + +InfluxData maintains [Helm charts](https://github.com/influxdata/helm-charts) for setting up data collection and monitoring in Kubernetes using InfluxDB and related applications. + +{{% note %}} +Helm charts are currently in beta and subject to change. +{{% /note %}} + +### K8s Operator + +The [InfluxData operator](https://github.com/influxdata/influxdata-operator) is +a [Kubernetes operator](https://coreos.com/operators/). The InfluxData operator +can be used to deploy InfluxDB in Kubernetes and can handle operational tasks +automatically, like creating a backup. The operator currently has been tested on +[AWS's Elastic Kubernetes Service](https://aws.amazon.com/eks/) and [GCP's Google Kubernetes Engine](https://cloud.google.com/kubernetes-engine/). + +[Deploy InfluxDB using the InfluxData +operator](https://github.com/influxdata/influxdata-operator) + +### Solutions for Kubernetes services + +InfluxData maintains ways to deploy the InfluxData Platform components to popular Kubernetes service providers. + +- [TICK Stack on the AWS Container Marketplace](https://aws.amazon.com/marketplace/pp/B07KGM885K?qid=1544514373950&sr=0-18&ref_=srh_res_product_title) +- [Telegraf, InfluxDB, and Grafana on the GCP Marketplace](https://console.cloud.google.com/marketplace/details/influxdata-public/telegraf-influxdb-grafana?q=telegraf) + +## Frequently asked questions + +### How is the InfluxData Platform (TICK) different from Prometheus? + +InfluxDB was purpose-built as a time series database. Overall, it is more +flexible and can handle more use cases than Prometheus alone, such as irregular +events and string data types. + +Many InfluxDB users find it provides several advantages over Prometheus: +- Handles event data that comes in at irregular intervals, e.g. structured logs, + application events, and trace data. +- Works well as a centralized long-term metrics store for federated Prometheus + servers in multiple clusters. + +### Should I run InfluxDB in Kubernetes? + +While Kubernetes is rapidly becoming a stable deployment platform for stateful +applications, it still introduces significant complexity and few benefits for +database workloads. + +Therefore, we _do not_ currently recommend running InfluxDB or InfluxDB +Enterprise on Kubernetes in production. While many users have managed to run the +databases in Kubernetes successfully, many InfluxDB users have also experienced +issues including significant downtime and even loss of data due to Kubernetes +rescheduling pods or problems with mounted volumes. + +InfluxData provides several [ways to deploy InfluxDB in Kubernetes](/platform/install-and-deploy/deploying/kubernetes/), +which should be considered experimental and not for use in production. We +suggest exploring the [Terraform InfluxDB module](https://registry.terraform.io/modules/influxdata/influxdb/aws/1.0.4) +for a declarative way to deploy InfluxDB for production use. + +{{% note %}} +The other InfluxData Platform components (Telegraf, Chronograf, Kapacitor) run well on Kubernetes. +The above recommendation only applies to the database. +{{% /note %}} diff --git a/content/platform/introduction/_index.md b/content/platform/introduction/_index.md new file mode 100644 index 000000000..e1f1c8aa9 --- /dev/null +++ b/content/platform/introduction/_index.md @@ -0,0 +1,152 @@ +--- +title: Introduction to the InfluxData platform +description: The InfluxData platform is the leading modern time-series platform built for metrics and events. +aliases: + - /platform/ +menu: + platform: + name: Introduction + weight: 10 +--- + +**InfluxData platform** is the leading modern [time series](/platform/faq/#what-is-time-series-data) platform, built for metrics and events. Explore both versions of our platform below--[**InfluxData 1.x**](#influxdata-1-x) and [**InfluxDB 2.0**](#influxdb-2-0). + +## InfluxData 1.x + +The **InfluxData 1.x platform** includes the following open source components ([TICK stack](#tick-stack-components)): + + - [Telegraf](#telegraf): collect data + - [InfluxDB](#influxdb): store data + - [Chronograf](#chronograf): visualize data + - [Kapacitor](#kapacitor): process data and alerts + +**InfluxData 1.x** also includes the following **commercial offerings**: + + - [InfluxDB Enterprise](#influxdb-enterprise) + - [Kapacitor Enterprise](#kapacitor-enterprise) + - [InfluxCloud 1.x](https://help.influxcloud.net) (hosted cloud solution) + +## InfluxDB 2.0 + +The **InfluxDB 2.0 platform** consolidates components from the **InfluxData 1.x platform** into a single packaged solution, with added features and flexibility: + + - [InfluxDB 2.0 OSS](https://v2.docs.influxdata.com/v2.0/get-started/): open source platform solution in a single binary + - [InfluxDB Cloud 2.0](https://v2.docs.influxdata.com/v2.0/get-started/) (**commercial offering**): hosted cloud solution + +InfluxDB Enterprise 2.0 is in development. + +## InfluxData 1.x TICK stack + +### Telegraf + +Telegraf is a data collection agent that captures data from a growing list of sources +and translates it into [InfluxDB line protocol format](/influxdb/latest/write_protocols/line_protocol_reference/) +for storage in InfluxDB. Telegraf's extensible architecture makes it easy to +create [plugins](/telegraf/latest/plugins/) that both pull data (input plugins) and push data (output plugins) +to and from different sources and endpoints. + +### InfluxDB + +InfluxDB stores data for any use case involving large amounts of timestamped data, including +DevOps monitoring, log data, application metrics, IoT sensor data, and real-time analytics. +It provides functionality that allows you to conserve space on your machine by keeping +data for a defined length of time, then automatically downsampling or expiring and deleting +unneeded data from the system. + +### Chronograf + +Chronograf is the user interface for the TICK stack that provides customizable dashboards, +data visualizations, and data exploration. It also allows you to view and manage +[Kapacitor](#kapacitor) tasks. + +### Kapacitor + +Kapacitor is a data processing framework that enables you to process and act on data +as it is written to InfluxDB. This includes detecting anomalies, creating alerts +based on user-defined logic, and running ETL jobs. + +## InfluxData 1.x Enterprise versions + +InfluxDB Enterprise and Kapacitor Enterprise provide clustering, access control, and incremental backup functionality for production infrastructures at scale. You'll also receive direct support from the InfluxData support team. + +{{% note %}} +InfluxDB Enterprise and Kapacitor Enterprise are compatible with open source versions of Telegraf and Chronograf. +{{% /note %}} + +### InfluxDB Enterprise + +InfluxDB Enterprise provides functionality necessary to run a high-availability (HA) InfluxDB cluster, providing clustering, horizontal scale out, and advanced access controls, including: + +- Hinted handoff +- Anti-entropy +- Fine-grained authorization +- Cluster profiling +- Incremental backups + +#### Hinted handoff + +Data is written across nodes using an eventually consistent write model. +All writes are added to the [Hinted Handoff Queue (HHQ)](/enterprise_influxdb/latest/concepts/clustering/#hinted-handoff), +then written to other nodes in the cluster. + +#### Anti-Entropy + +InfluxDB Enterprise's +[Anti-Entropy (AE)](/enterprise_influxdb/latest/administration/anti-entropy/) +process ensures data shards in the cluster are in sync. When "entropy" (out-of-sync +data) is detected, AE will repair the affected shards, syncing the missing data. + +#### Fine-grained authorization + +In InfluxDB Enterprise, fine-grained authorization can be used to control access +at the measurement or series levels rather than just the database level. + +#### Cluster profiling + +Enterprise meta nodes expose the `/debug/pprof` API endpoint that allows you to +profile and potentially diagnose performance bottlenecks in your cluster. + +#### Incremental backups + +InfluxDB Enterprise allows for incremental backups that write only newly added +data to existing backup files rather than backing up all data in a new backup. + +### Kapacitor Enterprise + +Kapacitor Enterprise provides functionality necessary to run a high-availability +Kapacitor cluster, including: + +- Kapacitor cluster management +- Alert deduplication +- Secure communication + +#### Kapacitor cluster management + +Kapacitor Enterprise is packaged with `kapactorctl`, a command line client for creating +and managing Kapacitor clusters. + +#### Alert deduplication + +As alerts are triggered in a multi-node Kapacitor cluster, Kapacitor Enterprise +deduplicates alert data to prevent duplicate alert notifications from being sent. + +#### Secure communication + +Data is passed between InfluxDB and Kapacitor via subscriptions. +Kapacitor Enterprise includes configuration options that let you encrypt +communication between your Kapacitor Enterprise and InfluxDB Enterprise clusters. + + + +## Get started + +To get started with the **InfluxData 1.x** platform, see + +[Installation and Configuration](/platform/installation) +[Getting Started](/platform/introduction/getting-started) + +To get started with the **InfluxDB 2.0** platform, see [**InfluxDB Cloud 2.0**](https://v2.docs.influxdata.com/v2.0/get-started/) or [**InfluxDB 2.0 OSS**](https://v2.docs.influxdata.com/v2.0/get-started/). diff --git a/content/platform/introduction/getting-started.md b/content/platform/introduction/getting-started.md new file mode 100644 index 000000000..39069573b --- /dev/null +++ b/content/platform/introduction/getting-started.md @@ -0,0 +1,51 @@ +--- +title: Get started with the InfluxData Platform +description: placeholder +menu: + platform: + name: Get started + parent: Introduction +--- +## Overview + +![Getting started setup](/img/chronograf/v1.6/intro-gs-diagram.png) + +Before following the steps below, [download and install](/platform/install-and-deploy/install/) each component of the TICK stack, or [Install the InfluxData Sandbox](/platform/install-and-deploy/deploying/sandbox-install/). + +## Understand how Telegraf writes data to InfluxDB + +Once Telegraf is installed and started, it will send system metrics to InfluxDB by default, which automatically creates a ‘telegraf’ database. + +The configuration file for Telegraf specifies where metrics come from and where they go (inputs and outputs). In this example, we'll focus on CPU data, which is one of the default system metrics generated by Telegraf. For this example, it is worth noting some relevant values: + +* `[agent].interval` - declares the frequency at which system metrics will be sent to InfluxDB. +* `[[outputs.influxdb]]` - declares how to connect to InfluxDB and the destination database, which is the default ‘telegraf’ database. +* `[[inputs.cpu]]` - declares how to collect the system cpu metrics to be sent to InfluxDB. Enabled by default. + +For details about the configuration file, see [Get started with Telegraf](https://docs.influxdata.com/telegraf/latest/introduction/getting-started/). + +## Query data in InfluxDB + +As reviewed above, Telegraf is sending system data, including CPU usage, to InfluxDB. There are two ways you can query your InfluxDB data: + +* [In Chronograf with the Data Explorer.](https://docs.influxdata.com/chronograf/latest/guides/create-a-dashboard/#step-3-create-your-query) Use the builder to select from your existing data and allow Chronograf to format the query for you. Alternatively, manually enter and edit a query. You can move between using the builder and manually editing the query. +* [Using the command line interface.](https://docs.influxdata.com/influxdb/latest/query_language/data_exploration/) + + +**Query example:** +```sql +SELECT "usage_system", + "usage_user" +FROM "telegraf"."autogen"."cpu" +WHERE time > now() - 30m +``` + +## Visualize that data in a Chronograf dashboard + +Now that you've explored your data with queries, you can build a dashboard in Chronograf to visualize the data. For details, see [Create a dashboard](https://docs.influxdata.com/chronograf/latest/guides/create-a-dashboard) and [Using pre-created dashboards](https://docs.influxdata.com/chronograf/latest/guides/using-precreated-dashboards/). + +## Create an alert in Kapacitor based on that data + +Since InfluxDB is running on `localhost:8086`, Kapacitor finds it during start up and creates several subscriptions on InfluxDB. These subscriptions tell InfluxDB to send all the data it receives from Telegraf to Kapacitor. + +For step-by-step instructions on how to set up an alert in Kapacitor based on your data, see [Creating Chronograf alert rules](https://docs.influxdata.com/chronograf/latest/guides/create-alert-rules/). diff --git a/content/platform/monitoring/_index.md b/content/platform/monitoring/_index.md new file mode 100644 index 000000000..1f8baee24 --- /dev/null +++ b/content/platform/monitoring/_index.md @@ -0,0 +1,22 @@ +--- +title: Monitor the InfluxData Platform and containers +description: Use the InfluxData Platform to monitor InfluxDB health and containers. +aliases: + - /platform/monitoring/ +menu: + platform: + name: Monitor + weight: 30 +--- + + +## [Monitor the InfluxData Platform](/platform/monitoring/influxdata-platform) + +To monitor the health of the InfluxData Platform components, you can use +measurement statistics that are available internally or use a "watcher of watchers" +approach to collect statistics from remote InfluxDB servers and store them externally +from the systems that are being monitored. + +## [Monitor containers using the InfluxData Platform](/platform/monitoring/containers) + +The InfluxData Platform can be used to monitor containers and container orchestration. diff --git a/content/platform/monitoring/containers/_index.md b/content/platform/monitoring/containers/_index.md new file mode 100644 index 000000000..eab40f24f --- /dev/null +++ b/content/platform/monitoring/containers/_index.md @@ -0,0 +1,16 @@ +--- +title: Monitor containers using the InfluxData Platform +description: Use the InfluxData TICK stack to monitor containers, including Kubernetes and Docker +menu: + platform: + name: Monitor containers + weight: 2 + parent: Monitor +--- + +This is a new section of the documentation and new content will be added frequently +as support for the InfluxData Platform and containers rapidly evolves. + +## [Monitor Kubernetes using the InfluxData Platform](/platform/monitoring/containers/kubernetes/) + +You can use the InfluxData Platform to monitor your Kubernetes installations. diff --git a/content/platform/monitoring/containers/kubernetes.md b/content/platform/monitoring/containers/kubernetes.md new file mode 100644 index 000000000..078d1f375 --- /dev/null +++ b/content/platform/monitoring/containers/kubernetes.md @@ -0,0 +1,32 @@ +--- +title: Monitor Kubernetes using the InfluxData Platform +description: Use the InfluxData TICK stack to monitor Kubernetes. +menu: + platform: + name: Monitor Kubernetes + weight: 3 + parent: Monitor containers + draft: false +--- +## Monitor Kubernetes +The TICK stack is an easy and performant way to monitor the services that make up a Kubernetes cluster, whether or not you're running InfluxDB in a Kubernetes cluster or somewhere else. + +Why use the InfluxData + +### kube-influxdb Kubernetes monitoring project + +The [kube-influxdb](https://github.com/influxdata/kube-influxdb) project is a set of Helm charts to make collection and visualization of Kubernetes metrics easy. It uses Telegraf, the metrics collection agent is used as the primary agent to collect metrics and events + +[Read the kube-influxdb Getting Started guide.](https://github.com/influxdata/kube-influxdb/blob/master/docs/v1.0/getting_started.md) + +### Collect Kubernetes metrics with Telegraf + +The [Telegraf metrics collection agent](https://docs.influxdata.com/telegraf/v1.9/introduction/getting-started/) can collect many types of metrics in a Kubernetes cluster, like [Docker container metrics](https://github.com/influxdata/telegraf/blob/release-1.9/plugins/inputs/docker/README.md) and [stats from kubelets](https://github.com/influxdata/telegraf/tree/release-1.9/plugins/inputs/kubernetes). It can even scrape [Prometheus metrics API endpoints](https://github.com/influxdata/telegraf/tree/release-1.9/plugins/inputs/prometheus). Telegraf is used in the [kube-influxdb project](#kube-influxdb-kubernetes-monitoring-project) to collect metrics. + +[Read about setting up a Kubernetes monitoring architecture using Telegraf](https://www.influxdata.com/blog/monitoring-kubernetes-architecture/) + +### Prometheus remote read and write support + +InfluxDB supports the Prometheus remote read and write API for clusters already using Prometheus for metrics collection, but need require a more flexible time series data store. + +[Read about the Prometheus remote read and write API support in InfluxDB](https://docs.influxdata.com/influxdb/v1.7/supported_protocols/prometheus/) diff --git a/content/platform/monitoring/influxdata-platform/_index.md b/content/platform/monitoring/influxdata-platform/_index.md new file mode 100644 index 000000000..8ce3706b8 --- /dev/null +++ b/content/platform/monitoring/influxdata-platform/_index.md @@ -0,0 +1,32 @@ +--- +title: Monitor the InfluxData Platform +description: How to use the InfluxData TICK stack to monitor itself and other TICK stacks in order to identify and alert on anomalies. +menu: + platform: + name: Monitor the InfluxData Platform + identifier: monitor-platform + weight: 1 + parent: Monitor +--- + +One of the primary use cases for the InfluxData Platform is as server and infrastructure +monitoring solution. No matter what type of data you're using the TICK stack to collect and +store, it's important to monitor the health of your stack and identify any potential issues. + +The following pages provide information about setting up a TICK stack that monitors +another OSS or Enterprise TICK stack. They cover different potential monitoring strategies +and visualizing the monitoring data in a way that makes it easy to recognize, alert on, +and address anomalies as they happen. + +## [Internal versus external monitoring](/platform/monitoring/influxdata-platform/internal-vs-external) +An explanation of internal and external monitoring strategies for your Enterprise +or OSS TICK stack with the pros and cons of each. + +## [Set up an external monitor](/platform/monitoring/influxdata-platform/external-monitor-setup) +How to set up an external InfluxData TICK stack that monitors another Enterprise or OSS TICK stack. + +## [Dashboards for monitoring InfluxDB](/platform/monitoring/influxdata-platform/monitoring-dashboards) +Set up dashboards to help visualize and monitor the health and performance of your InfluxData TICK stack. + +## [Tools for monitoring InfluxDB](/platform/monitoring/influxdata-platform/tools) +The InfluxData platform provides tools to help you monitor and troubleshoot issues if they arise. diff --git a/content/platform/monitoring/influxdata-platform/external-monitor-setup.md b/content/platform/monitoring/influxdata-platform/external-monitor-setup.md new file mode 100644 index 000000000..fbfeceb23 --- /dev/null +++ b/content/platform/monitoring/influxdata-platform/external-monitor-setup.md @@ -0,0 +1,160 @@ +--- +title: Configure a watcher of watchers system to monitor InfluxDB servers +description: How to set up an external InfluxData TICK stack that monitors another Enterprise or OSS TICK stack. +aliases: + - /platform/monitoring/external-monitor-setup/ +menu: + platform: + name: Configure a watcher of watchers + parent: monitor-platform + weight: 4 +--- + +The flexibility and portability of InfluxData's TICK stack make it easy to use in different +monitoring solutions, including monitoring the TICK stack with another TICK stack. +This guide walks through setting up an external TICK stack monitor to which important +metrics are sent and monitored. + +The following terms are used throughout this guide: + +- **Primary** - The _monitored_ TICK stack or cluster for which uptime is most important. +- **Monitor** - The _monitoring_ TICK stack to which monitoring data is sent and processed. + +_This guide assumes a primary node or cluster is already running._ + +## Install a monitor +Install a separate TICK stack to act as your monitor. +Your monitor should be on hardware separate from your primary cluster. +Installation instructions for the TICK stack are provided in the [installation guides](/platform/install-and-deploy/). + +{{% note %}} +In order for your monitor to receive data from your primary cluster, the primary +must be able to connect to your monitor's API endpoint via HTTP or UDP. +{{% /note %}} + +## Install Telegraf on each node +[Install the `telegraf` agent](/telegraf/latest/introduction/installation/#installation) +on each node in your primary InfluxDB cluster you would like to monitor. + +### Send data collected by Telegraf to your monitor +[Generate a Telegraf configuration file](/telegraf/latest/introduction/installation/#configuration) +and modify the InfluxDB output `url` setting to include the URL of your monitor's +InfluxDB API endpoint. + +_**telegraf.conf**_ +```toml +# ... + +[[outputs.influxdb]] + ## The full HTTP or UDP URL for your InfluxDB instance. + urls = ["http://monitor-url.com:8086"] + +# ... +``` + +### Configure Telegraf input plugins +By default, Telegraf is configured to collect the following system metrics from +the host machine: + +- CPU +- Disk +- Disk IO +- Memory +- Processes +- Swap +- System (load, number of CPUs, number of users, uptime, etc.) + +Use other [Telegraf input plugins](/telegraf/latest/plugins/inputs/) to collect +a variety of metrics. + +#### Monitor InfluxDB performance metrics +To monitor the internal performance of InfluxDB, enable the InfluxDB input plugin +in the Telegraf configuration files used to run Telegraf **on InfluxDB instances**. +The InfluxDB input plugin pulls [InfluxDB internal metrics](/platform/monitoring/influxdata-platform/tools/measurements-internal/) +from the local InfluxDB `/debug/vars` endpoint. + +```toml +# ... + +[[inputs.influxdb]] + # ... + ## Multiple URLs from which to read InfluxDB-formatted JSON + ## Default is "http://localhost:8086/debug/vars". + urls = [ + "http://localhost:8086/debug/vars" + ] + +# ... +``` + + + +#### Monitor Kapacitor performance metrics +To monitor the internal performance of Kapacitor, enable the Kapacitor input plugin +in the Telegraf configuration files used to run Telegraf **on Kapacaitor instances**. +The Kapacitor input plugin pulls [Kapactor internal metrics](/platform/monitoring/influxdata-platform/tools/kapacitor-measurements/) +from the local Kapacitor `/debug/vars` endpoint. + +```toml +# ... + +[[inputs.influxdb]] + # ... + ## Multiple URLs from which to read Kapacitor-formatted JSON + ## Default is "http://localhost:9092/kapacitor/v1/debug/vars". + urls = [ + "http://localhost:9092/kapacitor/v1/debug/vars" + ] + +# ... +``` + +### (Optional) Namespace monitoring data +If Telegraf is running on your monitor instance, it will store your monitor's own +metrics in the `telegraf` database by default. +To keep your monitor's internal data separate from your other monitoring data, +configure your local Telegraf agent to write to a database other than `telegraf` using +the `database` setting under `[[outputs.influxdb]]` in your `telelgraf.conf`. + +```toml +# ... + +[[outputs.influxdb]] + # ... + ## The target database for metrics; will be created as needed. + database = "monitor_local" + + # ... +``` + +### (Optional) Update primary hostnames +Telegraf's default behavior is to include a `host` tag on each data point +using the `os.hostname` provided by the host machine. +Customize the hostname by updating the `hostname` setting under the `[agent]` +section in your `telegraf.conf`. + +_**Example custom hostname in telegraf.conf**_ +```toml +[agent] + + # ... + + ## Override default hostname, if empty use os.Hostname() + hostname = "primary_influxdb_1" + + # ... +``` + +## Start Telegraf +With Telegraf installed and configured on each of your primary nodes, start Telegraf +using your custom configuration file. + +```bash +telegraf -config path/to/telegraf.conf +``` + +## Create Kapacitor monitoring alerts +Monitoring data should now be flowing from your primary cluster to your monitor +where it can be processed by your monitor's Kapacitor component. +[Create Kapacitor alerts](/kapacitor/latest/working/alerts/) that alert you of issues +detected in any of the monitored metrics. diff --git a/content/platform/monitoring/influxdata-platform/internal-vs-external.md b/content/platform/monitoring/influxdata-platform/internal-vs-external.md new file mode 100644 index 000000000..39c03ee89 --- /dev/null +++ b/content/platform/monitoring/influxdata-platform/internal-vs-external.md @@ -0,0 +1,113 @@ +--- +title: Considerations for monitoring the InfluxData Platform +description: An explanation of internal and external monitoring strategies for your Enterprise or OSS TICK stack with the pros and cons of each. +aliases: + - /platform/monitoring/internal-vs-external/ +menu: + platform: + name: Configurations for monitoring + parent: monitor-platform + weight: 3 +--- + +One of the primary use cases for InfluxData's TICK stack is infrastructure monitoring, +including using the TICK stack to monitor itself or another TICK stack. +These are the two main approaches to Monitoring your TICK stack: + +- **[Internal monitoring](#internal-monitoring)** - A TICK stack that monitors itself. +- **["Watcher of watchers" approach](#the-watcher-of-watchers-approach)** - A TICK stack monitored by another TICK stack. + +## Internal monitoring + +{{% warn %}}Not recommended for production environments.{{% /warn %}} + +By default, the InfluxData platform is configured to monitor itself. +Telegraf collects metrics from the host on which it's running for things such as +CPU usage, memory usage, disk usage, etc., and stores them in the `telegraf` database in InfluxDB. +InfluxDB also reports performance metrics about itself, such as continuous query statistics, +internal goroutine statistics, write statistics, series cardinality, and others, +and stores them in the `_internal` database. +_For the recommendation about `_internal` databases, see [Disable the `_internal` database in production clusters](#disable-the-internal-database-in-production-clusters) below._ + +[Monitoring dashboards](/platform/monitoring/monitoring-dashboards) are available +that visualize the default metrics provided in each of these databases. +You can also [configure Kapacitor alerts](/kapacitor/latest/working/alerts/) +to monitor and alert on each of these metrics. + +### Pros of internal monitoring + +#### Simple setup +Internal monitoring requires no additional setup or configuration changes. +The TICK stack monitors itself out of the box. + +### Cons of internal monitoring + +#### No hardware separation + +When using internal monitoring, if your TICK stack goes offline, your monitor does as well. +Any configured alerts will not be sent and you will not be notified of any issues. +Because of this, **internal monitoring is not recommended for production use cases.** + +## The "watcher of watchers" approach + +{{% note %}} +Recommended for production environments. +{{% /note %}} + +A "watcher of watchers" approach for monitoring InfluxDB OSS and InfluxDB cluster +nodes offers monitoring of your InfluxDB resources while ensuring that the monitoring +statistics are available remotely in case of data loss. + +This usually takes the form of an Enterprise cluster being monitored by an OSS TICK stack. +It consists of Telegraf agents installed on each node in your primary cluster +reporting metrics for their respective hosts to a monitoring TICK stack installed +on a separate server or cluster. + +--- + +_For information about setting up an external monitoring TICK stack, see [Setup an external monitor](/platform/monitoring/external-monitor-setup)._ + +--- + +[Monitoring dashboards](/platform/monitoring/influxdata-platform/monitoring-dashboards) are available +that visualize the default metrics provided by the Telegraf agents. +You can also [configure Kapacitor alerts](/kapacitor/latest/working/alerts/) +to monitor and alert on each of these metrics. + +### Pros of external monitoring + +#### Hardware separation + +With a monitor running separate from your primary TICK stack, issues that occur in the primary stack will not affect the monitor. +If your primary TICK stack goes down or has issues, your monitor will be able detect them and alert you. + +### Cons of external monitoring + +#### Slightly more setup + +There is more setup involved with external monitoring, but the benefits far +outweigh the extra time required, especially for production use cases. + +## Recommendations + +### Disable the `_internal` database in production clusters +InfluxData does **not** recommend using the `_internal` database in a production cluster. +It creates unnecessary overhead, particularly for busy clusters, that can overload an already loaded cluster. +Metrics stored in the `_internal` database primarily measure workload performance, +which should only be tested in non-production environments. + +To disable the `_internal` database, set [`store-enabled`](/influxdb/latest/administration/config/#monitoring-settings-monitor) +to `false` under the `[monitor]` section of your `influxdb.conf`. + +_**influxdb.conf**_ +```toml +# ... +[monitor] + + # ... + + # Whether to record statistics internally. + store-enabled = false + + #... +``` diff --git a/content/platform/monitoring/influxdata-platform/monitoring-dashboards.md b/content/platform/monitoring/influxdata-platform/monitoring-dashboards.md new file mode 100644 index 000000000..a648ae69b --- /dev/null +++ b/content/platform/monitoring/influxdata-platform/monitoring-dashboards.md @@ -0,0 +1,99 @@ +--- +title: Set up monitoring dashboards +description: > + Set up dashboards to visualize and monitor the health and performance of your + InfluxData TICK stack. +aliases: + - /platform/monitoring/monitoring-dashboards/ + - /platform/monitoring/monitoring-dashboards/dashboard-oss-monitoring/ + - /platform/monitoring/monitoring-dashboards/dashboard-enterprise-monitoring/ +menu: + platform: + name: Monitoring dashboards + weight: 3 + parent: monitor-platform +aliases: + - /platform/monitoring/influxdata-platform/monitoring-dashboards/dashboard-enterprise-monitoring/ + - /platform/monitoring/influxdata-platform/monitoring-dashboards/dashboard-oss-monitoring/ +--- + + +The following dashboards provide visualizations of performance metrics for +InfluxDB open source (OSS), InfluxDB Enterprise, and Kapacitor. + +## Prebuilt dashboards +Chronograf provides prebuilt monitoring dashboards that use data from specific +Telegraf input plugins. To view prebuilt dashboards: + +1. Open Chronograf and click **Host List** in the navigation bar. +2. Each link in the **Apps** column is a prebuilt dashboard generated using metrics + from Telegraf input plugins. + Click a link to view the associated dashboard. + +## Import monitoring dashboards +Use the dashboards below to visualize and monitor key TICK stack metrics. +Download the dashboard file and import it into Chronograf. +For detailed instructions, see [Importing a dashboard](/chronograf/latest/administration/import-export-dashboards/#importing-a-dashboard). + +- [Monitor InfluxDB OSS](#monitor-influxdb-oss) +- [Monitor InfluxDB Enterprise](#monitor-influxdb-enterprise) +- [Monitor Kapacitor](#monitor-kapacitor) + +### Monitor InfluxDB OSS +Use the InfluxDB OSS Monitor dashboard to monitor InfluxDB OSS in Chronograf. + +Download InfluxDB OSS Monitor dashboard + +The InfluxDB OSS Monitor dashboard uses data from the `_internal` database +_([not recommended for production](/platform/monitoring/influxdata-platform/internal-vs-external/#disable-the-internal-database-in-production-clusters))_ +or collected by the [Telegraf `influxdb` input plugin](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/influxdb). +This dashboard contains the following cells: + +- Queries Executed Per Minute +- HTTP Requests Per Minute +- Points Throughput Per Minute by Hostname +- Series Cardinality & Measurements by Database +- HTTP Request Duration (99th %) +- Heap Size +- Shard Write Errors +- Continuous Queries Executed Per Minute + +### Monitor InfluxDB Enterprise +Use the InfluxDB Enterprise Monitor dashboard to monitor InfluxDB Enterprise in Chronograf. + +Download InfluxDB Enterprise Monitor dashboard + +The InfluxDB Enterprise Monitor dashboard uses data from the `_internal` database +_([not recommended for production](/platform/monitoring/influxdata-platform/internal-vs-external/#disable-the-internal-database-in-production-clusters))_ +or collected by the [Telegraf `influxdb` input plugin](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/influxdb). +This dashboard contains the following cells: + +- Queries Executed Per Minute +- HTTP Requests Per Minute +- Points Throughput Per Minute by Hostname +- Series Cardinality & Measurements by Database +- HTTP Request Duration (99th %) +- Heap Size +- Shard Write Errors +- Continuous Queries Executed Per Minute +- Hinted HandOff Queue Size +- Anti-Entropy Errors & Jobs + +### Monitor Kapacitor +Use the Kapacitor Monitor dashboard to monitor Kapacitor in Chronograf. + +Download Kapacitor Monitor dashboard + +The Kapacitor Monitor dashboard requires the Telegraf +[`mem`](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/mem), +[`cpu`](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/cpu), +[`system`](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/system), +and [`kapacitor`](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/kapacitor) +input plugins and contains the following cells: + +- Kapacitor Host RAM Usage +- Kapacitor Host CPU Usage +- Kapacitor Host Load (1, 5, 15) +- Number of Subscriptions +- Number of Running Tasks +- Data Ingest Rate diff --git a/content/platform/monitoring/influxdata-platform/tools/_index.md b/content/platform/monitoring/influxdata-platform/tools/_index.md new file mode 100644 index 000000000..5823c558d --- /dev/null +++ b/content/platform/monitoring/influxdata-platform/tools/_index.md @@ -0,0 +1,26 @@ +--- +title: Tools for monitoring the InfluxData Platform (TICK stack) +description: Use the "internal" measurements, SHOW DIAGNOSTICS, and SHOW STATS to monitor your InfluxData Platform. +aliases: + - /platform/monitoring/tools/ +menu: + platform: + name: Other monitoring tools + parent: monitor-platform + weight: 5 +--- + +The following tools are available to help monitor and troubleshoot the InfluxData platform. + +## [InfluxDB `_internal` measurements](/platform/monitoring/influxdata-platform/tools/measurements-internal) +Understand the InfluxDB `_internal` measurements and fields and use them to monitor +InfluxDB OSS and InfluxDB Enterprise servers. + +## [Kapacitor measurements](/platform/monitoring/influxdata-platform/tools/kapacitor-measurements) +Understand the Kapacitor internal metrics and use them to monitor Kapacitor. + +## [SHOW DIAGNOSTICS statement ](/platform/monitoring/influxdata-platform/tools/show-diagnostics) +Use the `SHOW DIAGNOSTICS` statement to get current InfluxDB instance information, including build details, uptime, hostname, server configuration, memory usage, and Go runtime diagnostics. + +## [SHOW STATS statement](/platform/monitoring/influxdata-platform/tools/show-stats) +Use the `SHOW STATS` statement for current measurement statistics of InfluxDB servers and available (enabled) components.}} diff --git a/content/platform/monitoring/influxdata-platform/tools/kapacitor-measurements.md b/content/platform/monitoring/influxdata-platform/tools/kapacitor-measurements.md new file mode 100644 index 000000000..5af50b00f --- /dev/null +++ b/content/platform/monitoring/influxdata-platform/tools/kapacitor-measurements.md @@ -0,0 +1,270 @@ +--- +title: Kapacitor measurements and fields +description: > + Understand internal Kapacitor measurements and fields and use them to monitor + Kapacitor servers. +aliases: + - /platform/monitoring/tools/kapacitor-measurements/ +menu: + platform: + name: Kapacitor measurements + parent: Other monitoring tools + weight: 2 +--- + +Kapacitor exposes internal performance through the `/kacapitor/v1/debug/vars` endpoint. +Use the Telegraf `kapacitor` plugins to capture these metrics and store them in InfluxDB. + +Enable the Kapacitor input plugin in your Telegraf configuration file: + +```toml +# ... +[[inputs.kapacitor]] + ## Multiple URLs from which to read Kapacitor-formatted JSON + ## Default is "http://localhost:9092/kapacitor/v1/debug/vars". + urls = [ + "http://localhost:9092/kapacitor/v1/debug/vars" + ] +# ... +``` + +### Visualize Kapacitor metrics +Use the [Kapacitor Monitor dashboard](/platform/monitoring/influxdata-platform/monitoring-dashboards/#monitor-kapacitor) +to visualize Kapacitor metrics. + +## Kapacitor measurements & fields +Kapacitor exposes the following measurements and fields through the +`/kacapitor/v1/debug/vars` endpoint. + +- [kapacitor](#kapacitor) + - [num_enabled_tasks](#num_enabled_tasks) + - [num_subscriptions](#num_subscriptions) + - [num_tasks](#num_tasks) +- [kapacitor_edges](#kapacitor_edges) + - [collected](#collected) + - [emitted](#emitted) +- [kapacitor_ingress](#kapacitor_ingress) + - [points_received](#points_received) +- [kapacitor_load](#kapacitor_load) + - [errors](#errors) +- [kapacitor_memstats](#kapacitor_memstats) + - [alloc_bytes](#alloc_bytes) + - [buck_hash_sys_bytes](#buck_hash_sys_bytes) + - [frees](#frees) + - [gc_sys_bytes](#gc_sys_bytes) + - [gcc_pu_fraction](#gcc_pu_fraction) + - [heap_alloc_bytes](#heap_alloc_bytes) + - [heap_idle_bytes](#heap_idle_bytes) + - [heap_in_use_bytes](#heap_in_use_bytes) + - [heap_objects](#heap_objects) + - [heap_released_bytes](#heap_released_bytes) + - [heap_sys_bytes](#heap_sys_bytes) + - [last_gc_ns](#last_gc_ns) + - [lookups](#lookups) + - [mallocs](#mallocs) + - [mcache_in_use_bytes](#mcache_in_use_bytes) + - [mcache_sys_bytes](#mcache_sys_bytes) + - [mspan_in_use_bytes](#mspan_in_use_bytes) + - [mspan_sys_bytes](#mspan_sys_bytes) + - [next_gc_ns](#next_gc_ns) + - [num_gc](#num_gc) + - [other_sys_bytes](#other_sys_bytes) + - [pause_total_ns](#pause_total_ns) + - [stack_in_use_bytes](#stack_in_use_bytes) + - [stack_sys_bytes](#stack_sys_bytes) + - [sys_bytes](#sys_bytes) + - [total_alloc_bytes](#total_alloc_bytes) +- [kapacitor_nodes](#kapacitor_nodes) + - [alerts_inhibited](#alerts_inhibited) + - [alerts_triggered](#alerts_triggered) + - [avg_exec_time_ns](#avg_exec_time_ns) + - [crits_triggered](#crits_triggered) + - [errors](#errors) + - [infos_triggered](#infos_triggered) + - [oks_triggered](#oks_triggered) + - [points_written](#points_written) + - [warns_triggered](#warns_triggered) + - [write_errors](#write_errors) +- [kapacitor_topics](#kapacitor_topics) + - [collected](#collected) + +--- + +### kapacitor +The `kapacitor` measurement stores fields with information related to +[Kapacitor tasks](/kapacitor/latest/introduction/getting-started/#kapacitor-tasks) +and [subscriptions](/kapacitor/latest/administration/subscription-management/). + +#### num_enabled_tasks +The number of enabled Kapacitor tasks. + +#### num_subscriptions +The number of Kapacitor/InfluxDB subscriptions. + +#### num_tasks +The total number of Kapacitor tasks. + +--- + +### kapacitor_edges +The `kapacitor_edges` measurement stores fields with information related to +[edges](/kapacitor/latest/tick/introduction/#pipelines) +in Kapacitor TICKscripts. + +#### collected +The number of messages collected by TICKscript edges. + +#### emitted +The number of messages emitted by TICKscript edges. + +--- + +### kapacitor_ingress +The `kapacitor_ingress` measurement stores fields with information related to data +coming into Kapacitor. + +#### points_received +The number of points received by Kapacitor. + +--- + +### kapacitor_load +The `kapacitor_load` measurement stores fields with information related to the +[Kapacitor Load Directory service](/kapacitor/latest/guides/load_directory/). + +#### errors +The number of errors reported from the load directory service. + +--- + +### kapacitor_memstats +The `kapacitor_memstats` measurement stores fields related to Kapacitor memory usage. + +#### alloc_bytes +The number of bytes of memory allocated by Kapacitor that are still in use. + +#### buck_hash_sys_bytes +The number of bytes of memory used by the profiling bucket hash table. + +#### frees +The number of heap objects freed. + +#### gc_sys_bytes +The number of bytes of memory used for garbage collection system metadata. + +#### gcc_pu_fraction +The fraction of Kapacitor's available CPU time used by garbage collection since +Kapacitor started. + +#### heap_alloc_bytes +The number of reachable and unreachable heap objects garbage collection has +not freed. + +#### heap_idle_bytes +The number of heap bytes waiting to be used. + +#### heap_in_use_bytes +The number of heap bytes in use. + +#### heap_objects +The number of allocated objects. + +#### heap_released_bytes +The number of heap bytes released to the operating system. + +#### heap_sys_bytes +The number of heap bytes obtained from `system`. + +#### last_gc_ns +The nanosecond epoch time of the last garbage collection. + +#### lookups +The total number of pointer lookups. + +#### mallocs +The total number of mallocs. + +#### mcache_in_use_bytes +The number of bytes in use by mcache structures. + +#### mcache_sys_bytes +The number of bytes used for mcache structures obtained from `system`. + +#### mspan_in_use_bytes +The number of bytes in use by mspan structures. + +#### mspan_sys_bytes +The number of bytes used for mspan structures obtained from `system`. + +#### next_gc_ns +The nanosecond epoch time of the next garbage collection. + +#### num_gc +The number of completed garbage collection cycles. + +#### other_sys_bytes +The number of bytes used for other system allocations. + +#### pause_total_ns +The total number of nanoseconds spent in garbage collection "stop-the-world" +pauses since Kapacitor started. + +#### stack_in_use_bytes +The number of bytes in use by the stack allocator. + +#### stack_sys_bytes +The number of bytes obtained from `system` for stack allocator. + +#### sys_bytes +The number of bytes of memory obtained from `system`. + +#### total_alloc_bytes +The total number of bytes allocated, even if freed. + +--- + +### kapacitor_nodes +The `kapacitor_nodes` measurement stores fields related to events that occur in +[TICKscript nodes](/kapacitor/latest/nodes/). + +#### alerts_inhibited +The total number of alerts inhibited by TICKscripts. + +#### alerts_triggered +The total number of alerts triggered by TICKscripts. + +#### avg_exec_time_ns +The average execution time of TICKscripts in nanoseconds. + +#### crits_triggered +The number of critical (`crit`) alerts triggered by TICKscripts. + +#### errors +The number of errors caused caused by TICKscripts. + +#### infos_triggered +The number of info (`info`) alerts triggered by TICKscripts. + +#### oks_triggered +The number of ok (`ok`) alerts triggered by TICKscripts. + +#### points_written +The number of points written to InfluxDB or back to Kapacitor. + +#### warns_triggered +The number of warning (`warn`) alerts triggered by TICKscripts. + +#### working_cardinality +The total number of unique series processed. + +#### write_errors +The number of errors that occurred when writing to InfluxDB or other write endpoints. + +--- + +### kapacitor_topics +The `kapacitor_topics` measurement stores fields related to +[Kapacitor topics](/kapacitor/latest/working/using_alert_topics/). + +#### collected +The number of events collected by Kapacitor topics. diff --git a/content/platform/monitoring/influxdata-platform/tools/measurements-internal.md b/content/platform/monitoring/influxdata-platform/tools/measurements-internal.md new file mode 100644 index 000000000..64de3e1a6 --- /dev/null +++ b/content/platform/monitoring/influxdata-platform/tools/measurements-internal.md @@ -0,0 +1,995 @@ +--- +title: InfluxDB _internal measurements and fields +description: > + Use and understand the InfluxDB _internal measurements statistics and field keys + that monitor InfluxDB and InfluxDB Enterprise servers. +aliases: + - /platform/monitoring/tools/measurements-internal/ +menu: + platform: + name: InfluxDB _internal measurements + parent: Other monitoring tools + weight: 2 +--- + +By default, InfluxDB generates internal metrics and saves to the `_internal` database. +Use these metrics to monitor InfluxDB and InfluxDB Enterprise and to create alerts to notify you when problems arise. + +### Disable the `_internal` database in production +InfluxData does **not** recommend using the `_internal` database in a production cluster. +It creates unnecessary overhead, particularly for busy clusters, that can overload an already loaded cluster. +Metrics stored in the `_internal` database primarily measure workload performance +and should only be tested in non-production environments. + +To disable the `_internal` database, set [`store-enabled`](/influxdb/latest/administration/config/#monitoring-settings-monitor) +to `false` under the `[monitor]` section of your **InfluxDB configuration file**. + +```toml +# ... +[monitor] + # ... + # Whether to record statistics internally. + store-enabled = false + #... +``` + +### Store internal metrics in an external monitor +To monitor InfluxDB `_internal` metrics in a production cluster, use Telegraf +and the [`influxdb` input plugin](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/influxdb) +to capture these metrics from the InfluxDB `/debug/vars` endpoint and store them +in an external InfluxDB monitoring instance. +For more information, see [Configure a Watcher of Watchers](/platform/monitoring/influxdata-platform/external-monitor-setup/). + +{{% note %}} +When using the "watcher of watcher (WoW)" configuration, InfluxDB +metric field keys are prepended with `infuxdb_`, but are otherwise identical +to those listed [below](#influxdb-internal-measurements-and-fields). +{{% /note %}} + +## Visualize InfluxDB internal metrics +Use the [InfluxDB OSS Monitor dashboard](/platform/monitoring/influxdata-platform/monitoring-dashboards/#monitor-influxdb-oss) +or the [InfluxDB Enterprise Monitor dashboard](/platform/monitoring/influxdata-platform/monitoring-dashboards/#monitor-influxdb-enterprise) +to visualize InfluxDB `_internal` metrics. + +## InfluxDB \_internal measurements and fields +{{% truncate %}} +- [ae](#ae-enterprise-only) (Enterprise only) + - [bytesRx](#bytesrx) + - [errors](#errors) + - [jobs](#jobs) + - [jobsActive](#jobsactive) +- [cluster](#cluster-enterprise-only) (Enterprise only) + - [copyShardReq](#copyshardreq) + - [createIteratorReq](#createiteratorreq) + - [expandSourcesReq](#expandsourcesreq) + - [fieldDimensionsReq](#fielddimensionsreq) + - [iteratorCostReq](#iteratorcostreq) + - [removeShardReq](#removeshardreq) + - [writeShardFail](#writeshardfail) + - [writeShardPointsReq](#writeshardpointsreq) + - [writeShardReq](#writeshardreq) +- [cq](#cq) + - [queryFail](#queryfail) + - [queryOk](#queryok) +- [database](#database) + - [numMeasurements](#nummeasurements) + - [numSeries](#numseries) +- [hh](#hh-enterprise-only) (Enterprise only) + - [writeShardReq](#writeshardreq) + - [writeShardReqPoints](#writeshardreqpoints) +- [hh_database](#hh-database) (Enterprise only) + - [bytesRead](#bytesread) + - [bytesWritten](#byteswritten) + - [queueBytes](#queuebytes) + - [queueDepth](#queuedepth) + - [writeBlocked](#writeblocked) + - [writeDropped](#writedropped) + - [writeNodeReq](#writenodereq) + - [writeNodeReqFail](#writenodereqfail) + - [writeNodeReqPoints](#writenodereqpoints) + - [writeShardReq](#writeshardreq) + - [writeShardReqPoints](#writeshardreqpoints) +- [hh_processor](#hh-processor-enterprise-only) (Enterprise only) + - [bytesRead](#bytesread) + - [bytesWritten](#byteswritten) + - [queueBytes](#queuebytes) + - [queueDepth](#queuedepth) + - [writeBlocked](#writeblocked) + - [writeDropped](#writedropped) + - [writeNodeReq](#writenodereq) + - [writeNodeReqFail](#writenodereqfail) + - [writeNodeReqPoints](#writenodereqpoints) + - [writeShardReq](#writeshardreq) + - [writeShardReqPoints](#writeshardreqpoints) +- [httpd](#httpd) + - [authFail](#authfail) + - [clientError](#clienterror) + - [pingReq](#pingreq) + - [pointsWrittenDropped](#pointswrittendropped) + - [pointsWrittenFail](#pointswrittenfail) + - [pointsWrittenOK](#pointswrittenok) + - [promReadReq](#promreadreq) + - [promWriteReq](#promwritereq) + - [fluxQueryReq](#fluxqueryreq) + - [fluxQueryDurationNs](#fluxquerydurationns) + - [queryReq](#queryreq) + - [queryReqDurationNs](#queryreqdurationns) + - [queryRespBytes](#queryrespbytes) + - [recoveredPanics](#recoveredpanics) + - [req](#req) + - [reqActive](#reqactive) + - [reqDurationNs](#reqdurationns) + - [serverError](#servererror) + - [statusReq](#statusreq) + - [writeReq](#writereq) + - [writeReqActive](#writereqactive) + - [writeReqBytes](#writereqbytes) + - [writeReqDurationNs](#writereqdurationns) +- [queryExecutor](#queryexecutor) + - [queriesActive](#queriesactive) + - [queriesExecuted](#queriesexecuted) + - [queriesFinished](#queriesfinished) + - [queryDurationNs](#querydurationns) + - [recoveredPanics](#recoveredpanics) +- [rpc](#rpc-enterprise-only) (Enterprise only) + - [idleStreams](#idlestreams) + - [liveConnections](#liveconnections) + - [liveStreams](#livestreams) + - [rpcCalls](#rpccalls) + - [rpcFailures](#rpcfailures) + - [rpcReadBytes](#rpcreadbytes) + - [rpcRetries](#rpcretries) + - [rpcWriteBytes](#rpcwritebytes) + - [singleUse](#singleuse) + - [singleUseOpen](#singleuseopen) + - [totalConnections](#totalconnections) + - [totalStreams](#totalstreams) +- [runtime](#runtime) + - [Alloc](#alloc) + - [Frees](#frees) + - [HeapAlloc](#heapalloc) + - [HeapIdle](#heapidle) + - [HeapInUse](#heapinuse) + - [HeapObjects](#heapobjects) + - [HeapReleased](#heapreleased) + - [HeapSys](#heapsys) + - [Lookups](#lookups) + - [Mallocs](#mallocs) + - [NumGC](#numgc) + - [NumGoroutine](#numgoroutine) + - [PauseTotalNs](#pausetotalns) + - [Sys](#sys) + - [TotalAlloc](#totalalloc) +- [shard](#shard) + - [diskBytes](#diskbytes) + - [fieldsCreate](#fieldscreate) + - [seriesCreate](#seriescreate) + - [writeBytes](#writebytes) + - [writePointsDropped](#writepointsdropped) + - [writePointsErr](#writepointserr) + - [writePointsOk](#writepointsok) + - [writeReq](#writereq) + - [writeReqErr](#writereqerr) + - [writeReqOk](#writereqok) +- [subscriber](#subscriber) + - [createFailures](#createfailures) + - [pointsWritten](#pointswritten) + - [writeFailures](#writefailures) +- [tsm1_cache](#tsm1-cache) + - [WALCompactionTimeMs](#walcompactiontimems) + - [cacheAgeMs](#cacheagems) + - [cachedBytes](#cachedbytes) + - [diskBytes](#diskbytes) + - [memBytes](#membytes) + - [snapshotCount](#snapshotcount) + - [writeDropped](#writedropped) + - [writeErr](#writeerr) + - [writeOk](#writeok) +- [tsm1_engine](#tsm1-engine) + - [cacheCompactionDuration](#cachecompactionduration) + - [cacheCompactionErr](#cachecompactionerr) + - [cacheCompactions](#cachecompactions) + - [cacheCompactionsActive](#cachecompactionsactive) + - [tsmFullCompactionDuration](#tsmfullcompactionduration) + - [tsmFullCompactionErr](#tsmfullcompactionerr) + - [tsmFullCompactionQueue](#tsmfullcompactionqueue) + - [tsmFullCompactions](#tsmfullcompactions) + - [tsmFullCompactionsActive](#tsmfullcompactionsactive) + - [tsmLevel1CompactionDuration](#tsmlevel1compactionduration) + - [tsmLevel1CompactionErr](#tsmlevel1compactionerr) + - [tsmLevel1CompactionQueue](#tsmlevel1compactionqueue) + - [tsmLevel1Compactions](#tsmlevel1compactions) + - [tsmLevel1CompactionsActive](#tsmlevel1compactionsactive) + - [tsmLevel2CompactionDuration](#tsmlevel2compactionduration) + - [tsmLevel2CompactionErr](#tsmlevel2compactionerr) + - [tsmLevel2CompactionQueue](#tsmlevel2compactionqueue) + - [tsmLevel2Compactions](#tsmlevel2compactions) + - [tsmLevel2CompactionsActive](#tsmlevel2compactionsactive) + - [tsmLevel3CompactionDuration](#tsmlevel3compactionduration) + - [tsmLevel3CompactionErr](#tsmlevel3compactionerr) + - [tsmLevel3CompactionQueue](#tsmlevel3compactionqueue) + - [tsmLevel3Compactions](#tsmlevel3compactions) + - [tsmLevel3CompactionsActive](#tsmlevel3compactionsactive) + - [tsmOptimizeCompactionDuration](#tsmoptimizecompactionduration) + - [tsmOptimizeCompactionErr](#tsmoptimizecompactionerr) + - [tsmOptimizeCompactionQueue](#tsmoptimizecompactionqueue) + - [tsmOptimizeCompactions](#tsmoptimizecompactions) + - [tsmOptimizeCompactionsActive](#tsmoptimizecompactionsactive) +- [tsm1_filestore](#tsm1-filestore) + - [diskBytes](#diskbytes) + - [numFiles](#numfiles) +- [tsm1_wal](#tsm1-wal) + - [currentSegmentDiskBytes](#currentsegmentdiskbytes) + - [oldSegmentsDiskBytes](#oldsegmentsdiskbytes) + - [writeErr](#writeerr) + - [writeOk](#writeok) +- [write](#write) + - [pointReq](#pointreq) + - [pointReqHH](#pointreqhh-enterprise-only) (Enterprise only) + - [pointReqLocal](#pointreqlocal-enterprise-only) (Enterprise only) + - [pointReqRemote](#pointreqremote-enterprise-only) (Enterprise only) + - [req](#req) + - [subWriteDrop](#subwritedrop) + - [subWriteOk](#subwriteok) + - [writeDrop](#writedrop) + - [writeError](#writeerror) + - [writeOk](#writeok) + - [writePartial](#writePartial-enterprise-only) (Enterprise only) + - [writeTimeout](#writetimeout) +{{% /truncate %}} + +### ae (Enterprise only) +The measurement statistics related to the Anti-Entropy (AE) engine in InfluxDB Enterprise clusters. + +#### bytesRx +The number of bytes received by the data node. + +#### errors +The total number of anti-entropy jobs that have resulted in errors. + +#### jobs +The total number of jobs executed by the data node. + +#### jobsActive +The number of active (currently executing) jobs. + +--- + +### cluster (Enterprise only) +The `cluster` measurement tracks statistics related to the clustering features of the data nodes in InfluxDB Enterprise. +The tags on the series indicate the source host of the stat. + +#### copyShardReq +The number of internal requests made to copy a shard from one data node to another. + +#### createIteratorReq +The number of read requests from other data nodes in the cluster. + +#### expandSourcesReq +The number of remote node requests made to find measurements on this node that match a particular regular expression. +Indicates a SELECT from a regex initiated on a different data node, which then sent an internal request to this node. +There is not currently a statistic tracking how many queries with a regex, instead of a fixed measurement, were initiated on a particular node. + +#### fieldDimensionsReq +The number of remote node requests for information about the fields and associated types, and tag keys of measurements on this data node. + +#### iteratorCostReq +The number of internal requests for iterator cost. + +#### removeShardReq +The number of internal requests to delete a shard from this data node. +Exclusively incremented by use of the `influxd-ctl remove shard` command. + +#### writeShardFail +The total number of internal write requests from a remote node that failed. +It's the cousin of InfluxDB shard stat `writeReqErr`. +A write request over HTTP is received by Node A. Node A does not have the shard locally, +so it creates an internal request to Node B instructing what to write and to which shard. +If Node B sees the request and if anything goes wrong, Node B increments its own `writeShardFail`. +Depending on what went wrong, in most circumstances Node B would also increment its `writeReqErr` stat inherited from InfluxDB OSS. +If Node A had the shard locally, there would be no internal request to write data +to a remote node, so `writeShardFail` would not be incremented. + +#### writeShardPointsReq +The number of points in every internal write request from any remote node, regardless of success. + +#### writeShardReq +The number of internal write requests from a remote data node, regardless of success. + +--- + +### cq +The measurement statistics related to continuous queries (CQs). + +#### queryFail +The total number of continuous queries that executed but failed. + +#### queryOk +The total number of continuous queries that executed successfully. +Note that this value may be incremented in some cases where a CQ is initiated +but does not actually run, for example, due to misconfigured resample intervals. + +--- + +### database + +#### numMeasurements +The current number of measurements in the specified database. + +The series cardinality values are estimates, based on [HyperLogLog++ (HLL++)](https://github.com/influxdata/influxdb/blob/master/pkg/estimator/hll/hll.go). +The numbers returned by the estimates when there are thousands or millions of +measurements or series should be accurate within a relatively small margin of error. + +#### numSeries +The current series cardinality of the specified database. +The series cardinality values are estimates, based on [HyperLogLog++ (HLL++)](https://github.com/influxdata/influxdb/blob/master/pkg/estimator/hll/hll.go). +The numbers returned by the estimates when there are thousands or millions of +measurements or series should be accurate within a relatively small margin of error. + +--- + +### hh (Enterprise only) +The `hh` measurement statistics track events resulting in new hinted handoff (HH) +processors in InfluxDB Enterprise. +The `hh` measurement has one additional tag: + +- `path` - The path to the durable hinted handoff queue on disk. + +#### writeShardReq +The number of initial write requests handled by the hinted handoff engine for a remote node. +Subsequent write requests to this node, destined for the same remote node, do not increment this statistics. +This statistic resets to `0` upon restart of `influxd`, regardless of the state +the last time the process was alive. It is incremented when the HH "supersystem" +is instructed to enqueue a write for the node, and the "subsystem" for the destination +node doesn't exist and has to be created, and the "subsystem" created successfully. +If HH files are on disk for a remote node at process startup, the branch that +increments this stat will not be reached. + +#### writeShardReqPoints +The number of write requests for each point in the initial request to the hinted +handoff engine for a remote node. + +--- + +### hh_database (Enterprise only) +The `hh_database` measurement aggregates all hinted handoff queues for a single database and node. +This allows accurate reporting of total queue size for a single database to a target node. + +The `hh_database` measurement has two additional tags: + +- `db` — The name of the database +- `node` — The node identifier + +#### bytesRead +The size, in bytes, of points read from the hinted handoff queue and sent to its destination data node. +Note that if the data node process is restarted while there is data in the HH queue, +`bytesRead` may settle to a number larger than `bytesWritten`. +Hinted handoff writes occur in concurrent batches as determined by the +[`retry-concurrency`](/enterprise_influxdb/latest/administration/configuration/#retry-concurrency-20) setting. +If an individual write succeeds, the metric is incremented. +If any write out of the whole batch fails, the entire batch is considered unsuccessful, +and every part of the batch will be retried later. This was not the intended behavior of this stat. +The other situation where `bytesRead` could be larger would be after a restart of the process. +Say at startup there were 1000 bytes still enqueued in HH from the previous run of the process. +Immediately after a restart, both `bytesRead` and `bytesWritten` are set to zero. +Assuming HH is properly depleted, and no future writes require HH, then the stats will read 1000 bytes read and 0 bytes written. + +{{% note %}} +Resets to zero after crash or restart, even if the HH queue was non-empty. +{{% /note %}} + +#### bytesWritten +The total number of bytes written to the hinted handoff queue. +Note that this statistic only tracks bytes written during the lifecycle of the current process. +Upon restart or a crash, this statistic resets to zero, even if the hinted handoff queue was not empty. + +#### queueBytes +The total number of bytes remaining in the hinted handoff queue. +This statistic should accurately and absolutely track the number of bytes of encoded +data waiting to be sent to the remote node. + +This statistic should remain correct across restarts, unlike `bytesRead` and `bytesWritten` (see [#780](https://github.com/influxdata/docs.influxdata.com/issues/780)). + +#### queueDepth +The total number of segments in the hinted handoff queue. The HH queue is a sequence of 10MB "segment" files. +This is a coarse-grained statistic that roughly represents the amount of data queued for a remote node. +The `queueDepth` values can give you a sense of when a queue is growing or shrinking. + +#### writeBlocked +The number of writes blocked because the number of concurrent HH requests exceeds the limit. + +#### writeDropped +The number of writes dropped from the HH queue because the write appeared to be corrupted. + +#### writeNodeReq +The total number of write requests that succeeded in writing a batch to the destination node. + +#### writeNodeReqFail +The total number of write requests that failed in writing a batch of data from the +hinted handoff queue to the destination node. + +#### writeNodeReqPoints +The total number of points successfully written from the HH queue to the destination node fr + +#### writeShardReq +The total number of every write batch request enqueued into the hinted handoff queue. + +#### writeShardReqPoints +The total number of points enqueued into the hinted handoff queue. + +--- + +### hh_processor (Enterprise only) +The `hh_processor` measurement stores statistics for a single queue (shard). +In InfluxDB Enterprise, there is a hinted handoff processor on each data node. + +The `hh_processor` measurement has two additional tags: + +- `node` - The destination node for the recorded metrics. +- `path` - The path to the durable hinted handoff queue on disk. + +{{% note %}} +The `hh_processor` statistics against a host are only accurate for the lifecycle of the current process. +If the process crashes or restarts, `bytesRead` and `bytesWritten` are reset to zero, even if the HH queue was non-empty. +{{% /note %}} + +#### bytesRead +The size, in bytes, of points read from the hinted handoff queue and sent to its destination data node. +Note that if the data node process is restarted while there is data in the HH queue, +`bytesRead` may settle to a number larger than `bytesWritten`. +Hinted handoff writes occur in concurrent batches as determined by the +[`retry-concurrency`](/enterprise_influxdb/latest/administration/configuration/#retry-concurrency-20) setting. +If an individual write succeeds, the metric is incremented. +If any write out of the whole batch fails, the entire batch is considered unsuccessful, +and every part of the batch will be retried later. +This was not the intended behavior of this stat. +The other situation where `bytesRead` could be larger would be after a restart of the process. +Say at startup there were 1000 bytes still enqueued in HH from the previous run of the process. +Immediately after a restart, both `bytesRead` and `bytesWritten` are set to zero. +Assuming HH is properly depleted, and no future writes require HH, then the stats +will read 1000 bytes read and 0 bytes written. + +{{% note %}} +Resets to zero after crash or restart, even if the HH queue was non-empty. +{{% /note %}} + + +#### bytesWritten +The total number of bytes written to the hinted handoff queue. +Note that this statistic only tracks bytes written during the lifecycle of the current process. +Upon restart or a crash, this statistic resets to zero, even if the hinted handoff queue was not empty. + +#### queueBytes +The total number of bytes remaining in the hinted handoff queue. +This statistic should accurately and absolutely track the number of bytes of encoded +data waiting to be sent to the remote node. + +This statistic should remain correct across restarts, unlike `bytesRead` and `bytesWritten` +(see [#780](https://github.com/influxdata/docs.influxdata.com/issues/780)). + +#### queueDepth +The total number of segments in the hinted handoff queue. The HH queue is a sequence of 10MB "segment" files. +This is a coarse-grained statistic that roughly represents the amount of data queued for a remote node. +The `queueDepth` values can give you a sense of when a queue is growing or shrinking. + +#### writeBlocked +The number of writes blocked because the number of concurrent HH requests exceeds the limit. + +#### writeDropped +The number of writes dropped from the HH queue because the write appeared to be corrupted. + +#### writeNodeReq +The total number of write requests that succeeded in writing a batch to the destination node. + +#### writeNodeReqFail +The total number of write requests that failed in writing a batch of data from the +hinted handoff queue to the destination node. + +#### writeNodeReqPoints +The total number of points successfully written from the HH queue to the destination node fr + +#### writeShardReq +The total number of every write batch request enqueued into the hinted handoff queue. + +#### writeShardReqPoints +The total number of points enqueued into the hinted handoff queue. + +--- + +### httpd +The `httpd` measurement stores fields related to the InfluxDB HTTP server. + +#### authFail +The number of HTTP requests that were aborted due to authentication being required, +but not supplied or incorrect. + +#### clientError +The number of HTTP responses due to client errors, with a `4XX` HTTP status code. + +#### fluxQueryReq +The number of Flux query requests served. + +#### fluxQueryReqDurationNs +The duration (wall-time), in nanoseconds, spent executing Flux query requests. + +#### pingReq +The number of times InfluxDB HTTP server served the `/ping` HTTP endpoint. + +#### pointsWrittenDropped +The number of points dropped by the storage engine. + +#### pointsWrittenFail +The number of points accepted by the HTTP `/write` endpoint, but unable to be persisted. + +#### pointsWrittenOK +The number of points accepted by the HTTP `/write` endpoint and persisted successfully. + +#### promReadReq +The number of read requests to the Prometheus `/read` endpoint. + +#### promWriteReq +The number of write requests to the Prometheus `/write` endpoint. + +#### queryReq +The number of query requests. + +#### queryReqDurationNs +The total query request duration, in nanosecond (ns). + +#### queryRespBytes +The total number of bytes returned in query responses. + +#### recoveredPanics +The total number of panics recovered by the HTTP handler. + +#### req +The total number of HTTP requests served. + +#### reqActive +The number of currently active requests. + +#### reqDurationNs +The duration (wall time), in nanoseconds, spent inside HTTP requests. + +#### serverError +The number of HTTP responses due to server errors. + +#### statusReq +The number of status requests served using the HTTP `/status` endpoint. + +#### writeReq +The number of write requests served using the HTTP `/write` endpoint. + +#### writeReqActive +The number of currently active write requests. + +#### writeReqBytes +The total number of bytes of line protocol data received by write requests, using the HTTP `/write` endpoint. + +#### writeReqDurationNs +The duration (wall time), in nanoseconds, of write requests served using the `/write` HTTP endpoint. + +--- + +### queryExecutor + +The `queryExecutor` statistics related to usage of the Query Executor of the InfluxDB engine. + +#### queriesActive +The number of active queries currently being handled. + +##### queriesExecuted +The number of queries executed (started). + +#### queriesFinished +The number of queries that have finished executing. + +#### queryDurationNs +The duration (wall time), in nanoseconds, of every query executed. +If one query took 1000 ns from start to finish, and another query took 500 ns +from start to finish and ran before the first query finished, the statistic +would increase by 1500. + +#### recoveredPanics +The number of panics recovered by the Query Executor. + +--- + +### rpc (Enterprise only) + +The `rpc` measurement statistics are related to the use of RPC calls within InfluxDB Enterprise clusters. + +#### idleStreams +The number of idle multiplexed streams across all live TCP connections. + +#### liveConnections +The current number of live TCP connections to other nodes. + +#### liveStreams +The current number of live multiplexed streams across all live TCP connections. + +#### rpcCalls +The total number of RPC calls made to remote nodes. + +#### rpcFailures +The total number of RPC failures, which are RPCs that did not recover. + +#### rpcReadBytes +The total number of RPC bytes read. + +#### rpcRetries +The total number of RPC calls that retried at least once. + +#### rpcWriteBytes +The total number of RPC bytes written. + +#### singleUse +The total number of single-use connections opened using Dial. + +#### singleUseOpen +The number of single-use connections currently open. + +#### totalConnections +The total number of TCP connections that have been established. + +#### totalStreams +The total number of streams established. + +--- + +### runtime +The `runtime` measurement statistics include a subset of MemStats records statistics about the Go memory allocator. +The `runtime` statistics can be useful to determine poor memory allocation strategies and related performance issues. + +The [Go runtime package](https://golang.org/pkg/runtime/) contains operations that +interact with Go's runtime system, including functions used to control goroutines. +It also includes the low-level type information used by the [Go reflect package](https://golang.org/pkg/reflect/). + +#### Alloc +The currently allocated number of bytes of heap objects. + +#### Frees +The cumulative number of freed (live) heap objects. + +#### HeapAlloc +The size, in bytes, of all heap objects. + +#### HeapIdle +The number of bytes of idle heap objects. + +#### HeapInUse +The number of bytes in in-use spans. + +#### HeapObjects +The number of allocated heap objects. + +#### HeapReleased +The number of bytes of physical memory returned to the OS. + +#### HeapSys +The number of bytes of heap memory obtained from the OS. +Measures the amount of virtual address space reserved for the heap. + +#### Lookups +The number of pointer lookups performed by the runtime. +Primarily useful for debugging runtime internals. + +#### Mallocs +The total number of heap objects allocated. +The total number of live objects is [Frees](#frees). + +#### NumGC +The number of completed GC (garbage collection) cycles. + +#### NumGoroutine +The total number of Go routines. + +#### PauseTotalNs +The total duration, in nanoseconds, of total GC (garbage collection) pauses. + +#### Sys +The total number of bytes of memory obtained from the OS. +Measures the virtual address space reserved by the Go runtime for the heap, +stacks, and other internal data structures. + +#### TotalAlloc +The total number of bytes allocated for heap objects. +This statistic does not decrease when objects are freed. + +--- + +### shard +The `shard` measurement statistics are related to working with shards in InfluxDB OSS and InfluxDB Enterprise. + +#### diskBytes +The size, in bytes, of the shard, including the size of the data directory and the WAL directory. + +#### fieldsCreate +The number of fields created. + +#### indexType +The type of index `inmem` or `tsi1`. + +#### seriesCreate +Then number of series created. + +#### writeBytes +The number of bytes written to the shard. + +#### writePointsDropped +The number of requests to write points t dropped from a write. +Also, `http.pointsWrittentDropped` incremented when a point is dropped from a write +(see [#780](https://github.com/influxdata/docs.influxdata.com/issues/780)). + +#### writePointsErr +The number of requests to write points that failed to be written due to errors. + +#### writePointsOk +The number of points written successfully. + +#### writeReq +The total number of write requests. + +#### writeReqErr +The total number of write requests that failed due to errors. + +#### writeReqOk +The total number of successful write requests. + +--- + +### subscriber +The `subscriber` measurement statistics are related to the usage of InfluxDB subscriptions. + +#### createFailures +The number of subscriptions that failed to be created. + +#### pointsWritten +The total number of points that were successfully written to subscribers. + +#### writeFailures +The total number of batches that failed to be written to subscribers. + +--- + +### tsm1_cache +The `tsm1_cache` measurement statistics are related to the usage of the TSM cache. +The following query example calculates various useful measurements related to the TSM cache. + +```sql +SELECT + max(cacheAgeMs) / 1000.000 AS CacheAgeSeconds, + max(memBytes) AS MaxMemBytes, max(diskBytes) AS MaxDiskBytes, + max(snapshotCount) AS MaxSnapShotCount, + (last(cachedBytes) - first(cachedBytes)) / (last(WALCompactionTimeMs) - first(WALCompactionTimeMs)) - 1000.000 AS CompactedBytesPerSecond, + last(cachedBytes) AS CachedBytes, + (last(cachedBytes) - first(cachedBytes))/300 as CacheThroughputBytesPerSecond +FROM _internal.monitor.tsm1_cache +WHERE time > now() - 1h +GROUP BY time(5m), path +``` + +#### cacheAgeMs +The duration, in milliseconds, since the cache was last snapshotted at sample time. +This statistic indicates how busy the cache is. +Large numbers indicate a cache which is idle with respect to writes. + +#### cachedBytes +The total number of bytes that have been written into snapshots. +This statistic is updated during the creation of a snapshot. +The purpose of this statistic is to allow calculation of cache throughput between any two instances of time. +The ratio of the difference between two samples of this statistic divided by the +interval separating the samples is a measure of the cache throughput (more accurately, +the rate at which data is being snapshotted). When combined with the `diskBytes` +and `memBytes` statistics, it can also be used to calculate the rate at which data +is entering the cache and rate at which is being purged from the cache. +If the entry rate exceeds the exit rate for a sustained period of time, +there is an issue that needs to be addressed. + +#### diskBytes +The size, in bytes, of on-disk snapshots. + +#### memBytes +The size, in bytes, of in-memory cache. + +#### snapshotCount +The current level (number) of active snapshots. +In a healthy system, this number should be between 0 and 1. A system experiencing +transient write errors might expect to see this number rise. + +#### WALCompactionTimeMs +The duration, in milliseconds, that the commit lock is held while compacting snapshots. +The expression `(cachedBytes - diskBytes) / WALCompactionTime` provides an indication +of how fast the WAL logs are being committed to TSM files. +The ratio of the difference between the start and end "WALCompactionTime" values +for an interval divided by the length of the interval provides an indication of +how much of maximum cache throughput is being consumed. + +#### writeDropped +The total number of writes dropped due to timeouts. + +#### writeErr +The total number of writes that failed. + +#### writeOk +The total number of successful writes. + +--- + +### tsm1_engine +The `tsm1_engine` measurement statistics are related to the usage of a TSM storage +engine with compressed blocks. + +#### cacheCompactionDuration +The duration (wall time), in nanoseconds, spent in cache compactions. + +#### cacheCompactionErr +The number of cache compactions that have failed due to errors. + +#### cacheCompactions +The total number of cache compactions that have ever run. + +#### cacheCompactionsActive +The number of cache compactions that are currently running. + +#### tsmFullCompactionDuration +The duration (wall time), in nanoseconds, spent in full compactions. + +#### tsmFullCompactionErr +The total number of TSM full compactions that have failed due to errors. + +#### tsmFullCompactionQueue +The current number of pending TMS Full compactions. + +#### tsmFullCompactions +The total number of TSM full compactions that have ever run. + +#### tsmFullCompactionsActive +The number of TSM full compactions currently running. + +#### tsmLevel1CompactionDuration +The duration (wall time), in nanoseconds, spent in TSM level 1 compactions. + +#### tsmLevel1CompactionErr +The total number of TSM level 1 compactions that have failed due to errors. + +#### tsmLevel1CompactionQueue +The current number of pending TSM level 1 compactions. + +#### tsmLevel1Compactions +The total number of TSM level 1 compactions that have ever run. + +#### tsmLevel1CompactionsActive +The number of TSM level 1 compactions that are currently running. + +#### tsmLevel2CompactionDuration +The duration (wall time), in nanoseconds, spent in TSM level 2 compactions. + +#### tsmLevel2CompactionErr +The number of TSM level 2 compactions that have failed due to errors. + +#### tsmLevel2CompactionQueue +The current number of pending TSM level 2 compactions. + +#### tsmLevel2Compactions +The total number of TSM level 2 compactions that have ever run. + +#### tsmLevel2CompactionsActive +The number of TSM level 2 compactions that are currently running. + +#### tsmLevel3CompactionDuration +The duration (wall time), in nanoseconds, spent in TSM level 3 compactions. + +#### tsmLevel3CompactionErr +The number of TSM level 3 compactions that have failed due to errors. + +#### tsmLevel3CompactionQueue +The current number of pending TSM level 3 compactions. + +#### tsmLevel3Compactions +The total number of TSM level 3 compactions that have ever run. + +#### tsmLevel3CompactionsActive +The number of TSM level 3 compactions that are currently running. + +#### tsmOptimizeCompactionDuration +The duration (wall time), in nanoseconds, spent during TSM optimize compactions. + +#### tsmOptimizeCompactionErr +The total number of TSM optimize compactions that have failed due to errors. + +#### tsmOptimizeCompactionQueue +The current number of pending TSM optimize compactions. + +#### tsmOptimizeCompactions +The total number of TSM optimize compactions that have ever run. + +#### tsmOptimizeCompactionsActive +The number of TSM optimize compactions that are currently running. + +--- + +### tsm1_filestore +The `tsm1_filestore` measurement statistics are related to the usage of the TSM file store. + +#### diskBytes +The size, in bytes, of disk usage by the TSM file store. + +#### numFiles +The total number of files in the TSM file store. + +--- + +### tsm1_wal +The `tsm1_wal` measurement statistics are related to the usage of the TSM Write Ahead Log (WAL). + +#### currentSegmentDiskBytes +The current size, in bytes, of the segment disk. + +#### oldSegmentDiskBytes +The size, in bytes, of the segment disk. + +#### writeErr +The number of writes that failed due to errors. + +#### writeOK +The number of writes that succeeded. + +--- + +### write +The `write` measurement statistics are about writes to the data node, regardless of the source of the write. + +#### pointReq +The total number of every point requested to be written to this data node. +Incoming writes have to make it through a couple of checks before reaching this +point (points parse correctly, correct authentication provided, etc.). +After these checks, this statistic should be incremented regardless of source +(HTTP, UDP, `_internal` stats, OpenTSDB plugin, etc.). + +#### pointReqHH (Enterprise only) +The total number of points received for write by this node and then enqueued into +hinted handoff for the destination node. + +#### pointReqLocal (Enterprise only) +The total number of point requests that have been attempted to be written into a +shard on the same (local) node. + +#### pointReqRemote (Enterprise only) +The total number of points received for write by this node but needed to be forwarded into a shard on a remote node. +The `pointReqRemote` statistic is incremented immediately before the remote write attempt, +which only happens if HH doesn't exist for that node. +Then if the write attempt fails, we check again if HH exists, and if so, add the point to HH instead. +This statistic does not distinguish between requests that are directly written to +the destination node versus enqueued into the hinted handoff queue for the destination node. + +#### req +The total number of batches of points requested to be written to this node. + +#### subWriteDrop +The total number of batches of points that failed to be sent to the subscription dispatcher. + +#### subWriteOk +The total number of batches of points that were successfully sent to the subscription dispatcher. + +#### writeDrop +The total number of write requests for points that have been dropped due to timestamps +not matching any existing retention policies. + +#### writeError +The total number of batches of points that were not successfully written, +due to a failure to write to a local or remote shard. + +#### writeOk +The total number of batches of points written at the requested consistency level. + +#### writePartial (Enterprise only) +The total number of batches of points written to at least one node but did not meet +the requested consistency level. + +#### writeTimeout +The total number of write requests that failed to complete within the default write timeout duration. +This could indicate severely reduced or contentious disk I/O or a congested network to a remote node. +For a single write request that comes in over HTTP or another input method, `writeTimeout` +will be incremented by 1 if the entire batch is not written within the timeout period, +regardless of whether the points within the batch can be written locally or remotely. diff --git a/content/platform/monitoring/influxdata-platform/tools/show-diagnostics.md b/content/platform/monitoring/influxdata-platform/tools/show-diagnostics.md new file mode 100644 index 000000000..bd2c9cf69 --- /dev/null +++ b/content/platform/monitoring/influxdata-platform/tools/show-diagnostics.md @@ -0,0 +1,179 @@ +--- +title: Use the SHOW DIAGNOSTICS statement to monitoring InfluxDB diagnostic information +description: Use the SHOW DIAGNOSTICS statement to monitor InfluxDB instances. +aliases: + - /platform/monitoring/tools/show-diagnostics/ +menu: + platform: + name: SHOW DIAGNOSTICS + parent: Other monitoring tools + weight: 3 +--- + +Diagnostic information includes mostly information about your InfluxDB server that is not necessarily numerical in format. This diagnostic information is not stored in the [`_internal`](/platform/monitoring/influxdata-platform/tools/measurements-internal/) database. + +To see InfluxDB server or node diagnostic information, you can use the [`SHOW DIAGNOSTICS`](/influxdb/latest/query_language/spec#show-diagnostics) statement. This statement returns InfluxDB instance information, including build details, uptime, hostname, server configuration, memory usage, and Go runtime diagnostics. This information is particularly useful to InfluxData Support, so be sure to include the output of this query anytime you file a support ticket or GitHub issue. + + +## `SHOW DIAGNOSTICS` measurement details + +The `SHOW DIAGNOSTICS` statement returns the following information. + +#### build +- Branch +- Build Time +- Commit + +#### config +- bind-address +- reporting-disabled + +#### config-coordinator +- log-queries-after +- max-concurrent-queries +- max-select-buckets +- max-select-point +- max-select-series +- query-timeout +- write-timeout + +#### config-cqs +- enabled +- query-stats-enabled +- run-interval + +#### config-data +- cache-max-memory-size +- cache-snapshot-memory-size +- cache-snapshot-write-cold-duration +- compact-full-write-cold-duration +- dir +- max-concurrent-compactions +- max-series-per-database +- max-values-per-tag +- wal-dir +- wal-fsync-delay + + +#### config-httpd +- access-log-path +- bind-address +- enabled +- https-enabled +- max-connection-limit +- max-row-limit + +#### config-meta +- dir + +#### config-monitor +- store-database +- store-enabled +- store-interval + +#### config-precreator +- advance-period +- check-interval +- enabled + +#### config-retention +- check-interval +- enabled + +#### config-subscriber +- enabled +- http-timeout +- write-buffer-size +- write-concurrency + +#### network +- hostname + +#### runtime +- GOARCH +- GOMAXPROCS +- GOOS +- version + +#### system +- PID currentTime +- started +- uptime + + +## Example of `SHOW DIAGNOSTICS` output + +Here is an example of the output returned when running the `SHOW DIAGNOSTICS` statement on an InfluxDB OSS server. + +``` +> show diagnostics +name: build +Branch Build Time Commit Version +------ ---------- ------ ------- +master 389de31c961831de0a9f4172173337d4a6193909 v1.6.3 + +name: config +bind-address reporting-disabled +------------ ------------------ +127.0.0.1:8088 false + +name: config-coordinator +log-queries-after max-concurrent-queries max-select-buckets max-select-point max-select-series query-timeout write-timeout +----------------- ---------------------- ------------------ ---------------- ----------------- ------------- ------------- +0s 0 0 0 0 0s 10s + +name: config-cqs +enabled query-stats-enabled run-interval +------- ------------------- ------------ +true false 1s + +name: config-data +cache-max-memory-size cache-snapshot-memory-size cache-snapshot-write-cold-duration compact-full-write-cold-duration dir max-concurrent-compactions max-series-per-database max-values-per-tag wal-dir wal-fsync-delay +--------------------- -------------------------- ---------------------------------- -------------------------------- --- -------------------------- ----------------------- ------------------ ------- --------------- +1073741824 26214400 10m0s 4h0m0s /usr/local/var/influxdb/data 0 1000000 100000 /usr/local/var/influxdb/wal 0s + +name: config-httpd +access-log-path bind-address enabled https-enabled max-connection-limit max-row-limit +--------------- ------------ ------- ------------- -------------------- ------------- + :8086 true false 0 0 + +name: config-meta +dir +--- +/usr/local/var/influxdb/meta + +name: config-monitor +store-database store-enabled store-interval +-------------- ------------- -------------- +_internal true 10s + +name: config-precreator +advance-period check-interval enabled +-------------- -------------- ------- +30m0s 10m0s true + +name: config-retention +check-interval enabled +-------------- ------- +30m0s true + +name: config-subscriber +enabled http-timeout write-buffer-size write-concurrency +------- ------------ ----------------- ----------------- +true 30s 1000 40 + +name: network +hostname +-------- +influxdb-1.local + +name: runtime +GOARCH GOMAXPROCS GOOS version +------ ---------- ---- ------- +amd64 8 darwin go1.11 + +name: system +PID currentTime started uptime +--- ----------- ------- ------ +940 2018-10-15T15:07:47.435739Z 2018-10-15T06:03:34.002126Z 9h4m13.433613s +``` diff --git a/content/platform/monitoring/influxdata-platform/tools/show-stats.md b/content/platform/monitoring/influxdata-platform/tools/show-stats.md new file mode 100644 index 000000000..179238592 --- /dev/null +++ b/content/platform/monitoring/influxdata-platform/tools/show-stats.md @@ -0,0 +1,54 @@ +--- +title: Use the SHOW STATS statement to monitor InfluxDB statistics +description: Use the SHOW STATS statement to monitor InfluxDB instances. +aliases: + - /platform/monitoring/tools/show-stats/ +menu: + platform: + name: SHOW STATS + parent: Other monitoring tools + weight: 4 +--- + +## Using the `SHOW STATS` statement + +The InfluxQL [`SHOW STATS`](https://docs.influxdata.com/influxdb/v1.6/query_language/spec#show-stats) +statement returns detailed measurement statistics on InfluxDB servers and available (enabled) components. +Each component exporting statistics exports a measurement named after the module and various series that are associated with the measurement. +To monitor InfluxDB system measurements over time, you can use the `_internal` database. +For details on measurements available in the `_internal` database, +see [Measurements for monitoring InfluxDB OSS and InfluxDB Enterprise (`_internal`)](/platform/monitoring/influxdata-platform/tools/measurements-internal/). + +{{% note %}} +These statistics are all reset to zero when the InfluxDB service starts. +{{% /note %}} + +### The `SHOW STATS [FOR ]` option + +If you are interested in monitoring a specific InfluxDB component, use the +`SHOW STATS FOR ]` to limit the results of the `SHOW STATS` statement to a specified module. +The `SHOW STATS FOR ` statement displays detailed measurement statistics +about a subsystem within a running `influxd` service. + +If a component is specified, it must be single-quoted. In the following example, +the available statistics for the `httpd` module are returned. + +```sql +SHOW STATS FOR 'httpd' +``` + +### The `SHOW STATS FOR 'indexes'` option + +The `SHOW STATS` statement does not list index memory usage unless you use the `SHOW STATS FOR 'indexes'` statement. +This statement returns an estimate of memory use of all indexes. + +{{% note %}} +Index memory usage is not reported with the default `SHOW STATS` +statement because it is a potentially expensive operation. +{{% /note %}} + +## `SHOW STATS` measurement details +The `SHOW STATS` statement returns the same statistics captured in the `internal` database, +but only for the instant you run the statement. + +For details on the measurement statistics returned, see [InfluxDB `_internal` measurements](/platform/monitoring/influxdata-platform/tools/measurements-internal/). diff --git a/content/platform/ops-guide/_index.md b/content/platform/ops-guide/_index.md new file mode 100644 index 000000000..039da9bb5 --- /dev/null +++ b/content/platform/ops-guide/_index.md @@ -0,0 +1,11 @@ +--- +title: Operations guide +description: placeholder +menu: + platform: + name: Operations guide + weight: 100 +draft: true +--- + +This is the ops guide diff --git a/content/platform/use-cases/_index.md b/content/platform/use-cases/_index.md new file mode 100644 index 000000000..33c9f1a1d --- /dev/null +++ b/content/platform/use-cases/_index.md @@ -0,0 +1,18 @@ +--- +title: InfluxData Platform use cases +description: Use-case-focused guides on how to use InfluxData's TICK stack to accomplish different tasks. +draft: true +menu: + platform: + name: Use cases + weight: 60 +--- + +Explore many ways to use the InfluxData Platform and its TICK stack components. + +## [Integrate](/platform/use-cases/integrate/) +All the tools and client libraries needed to integrate with the TICK stack. + +## [Monitor](/platform/use-cases/monitor/) +Learn how to monitoring 100+ application and services using the TICK stack. + diff --git a/content/platform/use-cases/monitor-kubernetes-apps.md b/content/platform/use-cases/monitor-kubernetes-apps.md new file mode 100644 index 000000000..8ca7a3cd2 --- /dev/null +++ b/content/platform/use-cases/monitor-kubernetes-apps.md @@ -0,0 +1,11 @@ +--- +title: Monitoring applications on Kubernetes with the TICK Stack +description: placeholder +draft: true +menu: + platform: + name: Kubernetes Application Monitoring + parent: Use cases +--- + +Coming soon! diff --git a/content/platform/use-cases/monitor-kubernetes.md b/content/platform/use-cases/monitor-kubernetes.md new file mode 100644 index 000000000..8e9a952dc --- /dev/null +++ b/content/platform/use-cases/monitor-kubernetes.md @@ -0,0 +1,11 @@ +--- +title: Kubernetes monitoring with the TICK Stack +description: placeholder +draft: true +menu: + platform: + name: Kubernetes Monitoring + parent: Use cases +--- + +Coming soon! diff --git a/layouts/partials/sidebar.html b/layouts/partials/sidebar.html index 009efe703..a11bd7e61 100644 --- a/layouts/partials/sidebar.html +++ b/layouts/partials/sidebar.html @@ -2,12 +2,22 @@ {{ $productPathData := findRE "[^/]+.*?" .RelPermalink }} {{ $product := index $productPathData 0 }} {{ $currentVersion := index $productPathData 1 }} +{{ $scratch := newScratch }} + + +{{ $scratch.Set "menuKey" "menu"}} +{{ if eq $product "platform" }} + {{ $scratch.Set "menuKey" "platform" }} +{{ else }} + {{ $scratch.Set "menuKey" (print $product (replaceRE "[v.]" "_" $currentVersion)) }} +{{ end }} + +{{ $menuKey := $scratch.Get "menuKey" }} -{{ $scratch := newScratch }} {{ $scratch.Set "searchPlaceholder" "Search"}} -{{ if and (eq $currentVersion nil) (eq (len (findRE `v[1-2]\.` $currentVersion nil)) 0) }} +{{ if and (eq (index .Site.Data.products $product).name nil) (eq (len (findRE `v[1-2]\.` $currentVersion nil)) 0) }} {{ $scratch.Set "searchPlaceholder" "Search the docs" }} {{ else if (eq $currentVersion nil) }} {{ $scratch.Set "searchPlaceholder" (print "Search " (index .Site.Data.products $product).name) }} @@ -42,8 +52,9 @@ diff --git a/layouts/partials/topnav/product-selector.html b/layouts/partials/topnav/product-selector.html index 440979fe1..dd6ed66c3 100644 --- a/layouts/partials/topnav/product-selector.html +++ b/layouts/partials/topnav/product-selector.html @@ -3,8 +3,8 @@ {{ $currentVersion := index $productPathData 1 }}