From 07f29b2cf9c2e793cdda88dfcbb35fb8e1e62be7 Mon Sep 17 00:00:00 2001
From: Scott Anderson <scott@influxdata.com>
Date: Wed, 18 Mar 2026 16:49:40 -0600
Subject: [PATCH] chore(tc-heartbeat-status): update status evaluation docs

---
 content/telegraf/controller/agents/status.md  |  46 +-
 .../{cel => agent-status-eval}/_index.md      |  21 +-
 .../{cel => agent-status-eval}/examples.md    |  39 +-
 .../{cel => agent-status-eval}/functions.md   |  14 +-
 .../reference/agent-status-eval/variables.md  | 150 +++
 .../controller/reference/cel/variables.md     | 138 ---
 .../plans/2026-03-17-tc-cel-status.md         | 972 ++++++++++++++++++
 .../specs/2026-03-17-tc-cel-status-design.md  | 131 +++
 8 files changed, 1310 insertions(+), 201 deletions(-)
 rename content/telegraf/controller/reference/{cel => agent-status-eval}/_index.md (84%)
 rename content/telegraf/controller/reference/{cel => agent-status-eval}/examples.md (89%)
 rename content/telegraf/controller/reference/{cel => agent-status-eval}/functions.md (96%)
 create mode 100644 content/telegraf/controller/reference/agent-status-eval/variables.md
 delete mode 100644 content/telegraf/controller/reference/cel/variables.md
 create mode 100644 docs/superpowers/plans/2026-03-17-tc-cel-status.md
 create mode 100644 docs/superpowers/specs/2026-03-17-tc-cel-status-design.md

diff --git a/content/telegraf/controller/agents/status.md b/content/telegraf/controller/agents/status.md
index b96a09a31..06f3ab617 100644
--- a/content/telegraf/controller/agents/status.md
+++ b/content/telegraf/controller/agents/status.md
@@ -9,28 +9,34 @@ menu:
     parent: Manage agents
 weight: 104
 related:
-  - /telegraf/controller/reference/cel/
+  - /telegraf/controller/reference/agent-status-eval/, Agent status evaluation reference
   - /telegraf/controller/agents/reporting-rules/
-  - /telegraf/v1/output-plugins/heartbeat/
+  - /telegraf/v1/output-plugins/heartbeat/, Heartbeat output plugin
 ---
 
 Agent statuses reflect the health of a Telegraf instance based on runtime data.
 The Telegraf [heartbeat output plugin](/telegraf/v1/output-plugins/heartbeat/)
-evaluates [Common Expression Language (CEL)](/telegraf/controller/reference/cel/)
+evaluates [Common Expression Language (CEL)](/telegraf/controller/reference/agent-status-eval/)
 expressions against agent metrics, error counts, and plugin statistics to
 determine the status sent with each heartbeat.
 
+> [!Note]
+> #### Requires Telegraf v1.38.2+
+>
+> Agent status evaluation in the Heartbeat output plugins requires Telegraf
+> v1.38.2+.
+
 ## Status values
 
 {{% product-name %}} displays the following agent statuses:
 
-| Status | Source | Description |
-|:-------|:-------|:------------|
-| **Ok** | Heartbeat plugin | The agent is healthy. Set when the `ok` CEL expression evaluates to `true`. |
-| **Warn** | Heartbeat plugin | The agent has a potential issue. Set when the `warn` CEL expression evaluates to `true`. |
-| **Fail** | Heartbeat plugin | The agent has a critical problem. Set when the `fail` CEL expression evaluates to `true`. |
-| **Undefined** | Heartbeat plugin | No expression matched and the `default` is set to `undefined`, or the `initial` status is `undefined`. |
-| **Not Reporting** | {{% product-name "short" %}} | The agent has not sent a heartbeat within the [reporting rule](/telegraf/controller/agents/reporting-rules/) threshold. {{% product-name "short" %}} applies this status automatically. |
+| Status            | Source               | Description                                                                                                                                                                     |
+| :---------------- | :------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| **Ok**            | Heartbeat plugin     | The agent is healthy. Set when the `ok` CEL expression evaluates to `true`.                                                                                                     |
+| **Warn**          | Heartbeat plugin     | The agent has a potential issue. Set when the `warn` CEL expression evaluates to `true`.                                                                                        |
+| **Fail**          | Heartbeat plugin     | The agent has a critical problem. Set when the `fail` CEL expression evaluates to `true`.                                                                                       |
+| **Undefined**     | Heartbeat plugin     | No expression matched and the `default` is set to `undefined`, or the `initial` status is `undefined`.                                                                          |
+| **Not Reporting** | {{% product-name %}} | The agent has not sent a heartbeat within the [reporting rule](/telegraf/controller/agents/reporting-rules/) threshold. {{% product-name %}} applies this status automatically. |
 
 ## How status evaluation works
 
@@ -41,7 +47,7 @@ of the first expression that evaluates to `true`.
 
 For full details on evaluation flow, configuration options, and available
 variables and functions, see the
-[CEL expressions reference](/telegraf/controller/reference/cel/).
+[Agent status evaluation reference](/telegraf/controller/reference/agent-status-eval/).
 
 ## Configure agent statuses
 
@@ -61,7 +67,7 @@ If no metrics arrive, fall back to the `fail` status.
   instance_id = "&{agent_id}"
   token = "${INFLUX_TOKEN}"
   interval = "1m"
-  include = ["hostname", "statistics", "status"]
+  include = ["hostname", "statistics", "configs", "logs", "status"]
 
   [outputs.heartbeat.status]
     ok = "metrics > 0"
@@ -80,7 +86,7 @@ Warn when errors are logged, fail when the error count is high.
   instance_id = "&{agent_id}"
   token = "${INFLUX_TOKEN}"
   interval = "1m"
-  include = ["hostname", "statistics", "status"]
+  include = ["hostname", "statistics", "configs", "logs", "status"]
 
   [outputs.heartbeat.status]
     ok = "log_errors == 0 && log_warnings == 0"
@@ -102,7 +108,7 @@ Combine error count and buffer pressure signals.
   instance_id = "&{agent_id}"
   token = "${INFLUX_TOKEN}"
   interval = "1m"
-  include = ["hostname", "statistics", "status"]
+  include = ["hostname", "statistics", "configs", "logs", "status"]
 
   [outputs.heartbeat.status]
     ok = "metrics > 0 && log_errors == 0"
@@ -115,7 +121,7 @@ Combine error count and buffer pressure signals.
 
 For more examples including buffer health, plugin-specific checks, and
 time-based expressions, see
-[CEL expression examples](/telegraf/controller/reference/cel/examples/).
+[CEL expression examples](/telegraf/controller/reference/agent-status-eval/examples/).
 
 ## View an agent's status
 
@@ -125,13 +131,3 @@ time-based expressions, see
     select **View Details**.
 4.  The details page shows the reported status, reporting rule assignment, and
     the time of the last heartbeat.
-
-## Learn more
-
-- [CEL expressions reference](/telegraf/controller/reference/cel/)---Full
-  reference for CEL evaluation flow, configuration, variables, functions, and
-  examples.
-- [Heartbeat output plugin](/telegraf/v1/output-plugins/heartbeat/)---Plugin
-  configuration reference.
-- [Define reporting rules](/telegraf/controller/agents/reporting-rules/)---Configure
-  thresholds for the **Not Reporting** status.
diff --git a/content/telegraf/controller/reference/cel/_index.md b/content/telegraf/controller/reference/agent-status-eval/_index.md
similarity index 84%
rename from content/telegraf/controller/reference/cel/_index.md
rename to content/telegraf/controller/reference/agent-status-eval/_index.md
index bb7c02c00..bef40c47f 100644
--- a/content/telegraf/controller/reference/cel/_index.md
+++ b/content/telegraf/controller/reference/agent-status-eval/_index.md
@@ -1,11 +1,11 @@
 ---
-title: CEL expressions
+title: Agent status evaluation
 description: >
   Reference documentation for Common Expression Language (CEL) expressions used
-  to evaluate Telegraf agent status in {{% product-name %}}.
+  to evaluate Telegraf agent status.
 menu:
   telegraf_controller:
-    name: CEL expressions
+    name: Agent status evaluation
     parent: Reference
 weight: 107
 related:
@@ -13,21 +13,20 @@ related:
   - /telegraf/v1/output-plugins/heartbeat/
 ---
 
-[Common Expression Language (CEL)](https://cel.dev) is a lightweight expression
+The Telegraf [heartbeat output plugin](/telegraf/v1/output-plugins/heartbeat/)
+uses CEL expressions to evaluate agent status based on runtime data such as
+metric counts, error rates, and plugin statistics.
+[CEL (Common Expression Language)](https://cel.dev) is a lightweight expression
 language designed for evaluating simple conditions.
-{{% product-name %}} uses CEL expressions in the Telegraf
-[heartbeat output plugin](/telegraf/v1/output-plugins/heartbeat/) to evaluate
-agent status based on runtime data such as metric counts, error rates, and
-plugin statistics.
 
 ## How status evaluation works
 
 You define CEL expressions for three status levels in the
 `[outputs.heartbeat.status]` section of your Telegraf configuration:
 
-- **`ok`** — The agent is healthy.
-- **`warn`** — The agent has a potential issue.
-- **`fail`** — The agent has a critical problem.
+- **ok** — The agent is healthy.
+- **warn** — The agent has a potential issue.
+- **fail** — The agent has a critical problem.
 
 Each expression is a CEL program that returns a boolean value.
 Telegraf evaluates expressions in a configurable order (default:
diff --git a/content/telegraf/controller/reference/cel/examples.md b/content/telegraf/controller/reference/agent-status-eval/examples.md
similarity index 89%
rename from content/telegraf/controller/reference/cel/examples.md
rename to content/telegraf/controller/reference/agent-status-eval/examples.md
index 1276ddfb2..355eb2764 100644
--- a/content/telegraf/controller/reference/cel/examples.md
+++ b/content/telegraf/controller/reference/agent-status-eval/examples.md
@@ -1,17 +1,16 @@
 ---
 title: CEL expression examples
 description: >
-  Real-world examples of CEL expressions for evaluating Telegraf agent status
-  in {{% product-name %}}.
+  Real-world examples of CEL expressions for evaluating Telegraf agent status.
 menu:
   telegraf_controller:
     name: Examples
-    parent: CEL expressions
+    parent: Agent status evaluation
 weight: 203
 related:
   - /telegraf/controller/agents/status/
-  - /telegraf/controller/reference/cel/variables/
-  - /telegraf/controller/reference/cel/functions/
+  - /telegraf/controller/reference/agent-status-eval/variables/
+  - /telegraf/controller/reference/agent-status-eval/functions/
 ---
 
 Each example includes a scenario description, the CEL expression, a full
@@ -19,8 +18,8 @@ heartbeat plugin configuration block, and an explanation.
 
 For the full list of available variables and functions, see:
 
-- [CEL variables](/telegraf/controller/reference/cel/variables/)
-- [CEL functions and operators](/telegraf/controller/reference/cel/functions/)
+- [CEL variables](/telegraf/controller/reference/agent-status-eval/variables/)
+- [CEL functions and operators](/telegraf/controller/reference/agent-status-eval/functions/)
 
 ## Basic health check
 
@@ -30,7 +29,7 @@ the agent is healthy as long as metrics are flowing.
 
 **Expression:**
 
-```cel
+```js
 ok = "metrics > 0"
 ```
 
@@ -41,7 +40,7 @@ ok = "metrics > 0"
   url = "http://telegraf_controller.example.com/agents/heartbeat"
   instance_id = "agent-123"
   interval = "1m"
-  include = ["hostname", "statistics", "status"]
+  include = ["hostname", "statistics", "configs", "logs", "status"]
 
   [outputs.heartbeat.status]
     ok = "metrics > 0"
@@ -60,7 +59,7 @@ high.
 
 **Expressions:**
 
-```cel
+```js
 warn = "log_errors > 0"
 fail = "log_errors > 10"
 ```
@@ -72,7 +71,7 @@ fail = "log_errors > 10"
   url = "http://telegraf_controller.example.com/agents/heartbeat"
   instance_id = "agent-123"
   interval = "1m"
-  include = ["hostname", "statistics", "status"]
+  include = ["hostname", "statistics", "configs", "logs", "status"]
 
   [outputs.heartbeat.status]
     ok = "log_errors == 0 && log_warnings == 0"
@@ -94,7 +93,7 @@ indicating potential data backpressure.
 
 **Expression:**
 
-```cel
+```js
 warn = "outputs.influxdb_v2.exists(o, o.buffer_fullness > 0.8)"
 fail = "outputs.influxdb_v2.exists(o, o.buffer_fullness > 0.95)"
 ```
@@ -106,7 +105,7 @@ fail = "outputs.influxdb_v2.exists(o, o.buffer_fullness > 0.95)"
   url = "http://telegraf_controller.example.com/agents/heartbeat"
   instance_id = "agent-123"
   interval = "1m"
-  include = ["hostname", "statistics", "status"]
+  include = ["hostname", "statistics", "configs", "logs", "status"]
 
   [outputs.heartbeat.status]
     ok = "metrics > 0"
@@ -129,7 +128,7 @@ safe access patterns to avoid errors when the plugin is not configured.
 
 **Expression:**
 
-```cel
+```js
 warn = "has(inputs.cpu) && inputs.cpu.exists(i, i.errors > 0)"
 fail = "has(inputs.cpu) && inputs.cpu.exists(i, i.startup_errors > 0)"
 ```
@@ -141,7 +140,7 @@ fail = "has(inputs.cpu) && inputs.cpu.exists(i, i.startup_errors > 0)"
   url = "http://telegraf_controller.example.com/agents/heartbeat"
   instance_id = "agent-123"
   interval = "1m"
-  include = ["hostname", "statistics", "status"]
+  include = ["hostname", "statistics", "configs", "logs", "status"]
 
   [outputs.heartbeat.status]
     ok = "metrics > 0"
@@ -164,7 +163,7 @@ count combined with output buffer pressure.
 
 **Expression:**
 
-```cel
+```js
 fail = "log_errors > 5 && has(outputs.influxdb_v2) && outputs.influxdb_v2.exists(o, o.buffer_fullness > 0.9)"
 ```
 
@@ -175,7 +174,7 @@ fail = "log_errors > 5 && has(outputs.influxdb_v2) && outputs.influxdb_v2.exists
   url = "http://telegraf_controller.example.com/agents/heartbeat"
   instance_id = "agent-123"
   interval = "1m"
-  include = ["hostname", "statistics", "status"]
+  include = ["hostname", "statistics", "configs", "logs", "status"]
 
   [outputs.heartbeat.status]
     ok = "metrics > 0 && log_errors == 0"
@@ -198,7 +197,7 @@ threshold, indicating potential connectivity or performance issues.
 
 **Expression:**
 
-```cel
+```js
 warn = "now() - last_update > duration('10m')"
 fail = "now() - last_update > duration('30m')"
 ```
@@ -210,7 +209,7 @@ fail = "now() - last_update > duration('30m')"
   url = "http://telegraf_controller.example.com/agents/heartbeat"
   instance_id = "agent-123"
   interval = "1m"
-  include = ["hostname", "statistics", "status"]
+  include = ["hostname", "statistics", "configs", "logs", "status"]
 
   [outputs.heartbeat.status]
     ok = "metrics > 0"
@@ -239,7 +238,7 @@ before checking for healthy status.
   url = "http://telegraf_controller.example.com/agents/heartbeat"
   instance_id = "agent-123"
   interval = "1m"
-  include = ["hostname", "statistics", "status"]
+  include = ["hostname", "statistics", "configs", "logs", "status"]
 
   [outputs.heartbeat.status]
     ok = "metrics > 0 && log_errors == 0"
diff --git a/content/telegraf/controller/reference/cel/functions.md b/content/telegraf/controller/reference/agent-status-eval/functions.md
similarity index 96%
rename from content/telegraf/controller/reference/cel/functions.md
rename to content/telegraf/controller/reference/agent-status-eval/functions.md
index e4ffe3134..c5bfcf112 100644
--- a/content/telegraf/controller/reference/cel/functions.md
+++ b/content/telegraf/controller/reference/agent-status-eval/functions.md
@@ -2,11 +2,11 @@
 title: CEL functions and operators
 description: >
   Reference for functions and operators available in CEL expressions used to
-  evaluate Telegraf agent status in {{% product-name %}}.
+  evaluate Telegraf agent status.
 menu:
   telegraf_controller:
     name: Functions
-    parent: CEL expressions
+    parent: Agent status evaluation
 weight: 202
 ---
 
@@ -20,12 +20,12 @@ the following function libraries.
 Returns the current time.
 Use with `last_update` to calculate durations or detect stale data.
 
-```cel
+```js
 // True if more than 10 minutes since last heartbeat
 now() - last_update > duration('10m')
 ```
 
-```cel
+```js
 // True if more than 5 minutes since last heartbeat
 now() - last_update > duration('5m')
 ```
@@ -45,7 +45,7 @@ are available for numeric calculations.
 
 ### Example
 
-```cel
+```js
 // Warn if either errors or warnings exceed a threshold
 math.greatest(log_errors, log_warnings) > 5
 ```
@@ -59,7 +59,7 @@ These are useful when checking plugin `alias` or `id` fields.
 
 ### Example
 
-```cel
+```js
 // Check if any input plugin has an alias containing "critical"
 inputs.cpu.exists(i, has(i.alias) && i.alias.contains("critical"))
 ```
@@ -105,7 +105,7 @@ CEL supports standard operators for building expressions.
 
 ### Ternary operator
 
-```cel
+```js
 // Conditional expression
 log_errors > 10 ? true : false
 ```
diff --git a/content/telegraf/controller/reference/agent-status-eval/variables.md b/content/telegraf/controller/reference/agent-status-eval/variables.md
new file mode 100644
index 000000000..8861d2126
--- /dev/null
+++ b/content/telegraf/controller/reference/agent-status-eval/variables.md
@@ -0,0 +1,150 @@
+---
+title: CEL variables
+description: >
+  Reference for variables available in CEL expressions used to evaluate
+  Telegraf agent status in {{% product-name %}}.
+menu:
+  telegraf_controller:
+    name: Variables
+    parent: Agent status evaluation
+weight: 201
+---
+
+CEL expressions for agent status evaluation have access to variables that
+represent data collected by Telegraf since the last successful heartbeat message
+(unless noted otherwise).
+
+## Top-level variables
+
+| Variable       | Type | Description                                                                                           |
+| :------------- | :--- | :---------------------------------------------------------------------------------------------------- |
+| `metrics`      | int  | Number of metrics arriving at the heartbeat output plugin.                                            |
+| `log_errors`   | int  | Number of errors logged by the Telegraf instance.                                                     |
+| `log_warnings` | int  | Number of warnings logged by the Telegraf instance.                                                   |
+| `last_update`  | time | Timestamp of the last successful heartbeat message. Use with `now()` to calculate durations or rates. |
+| `agent`        | map  | Agent-level statistics. See [Agent statistics](#agent-statistics).                                    |
+| `inputs`       | map  | Input plugin statistics. See [Input plugin statistics](#input-plugin-statistics-inputs).              |
+| `outputs`      | map  | Output plugin statistics. See [Output plugin statistics](#output-plugin-statistics-outputs).          |
+
+## Agent statistics
+
+The `agent` variable is a map containing aggregate statistics for the entire
+Telegraf instance.
+These fields correspond to the `internal_agent` metric from the
+Telegraf [internal input plugin](/telegraf/v1/plugins/#input-internal).
+
+| Field                    | Type | Description                                         |
+| :----------------------- | :--- | :-------------------------------------------------- |
+| `agent.metrics_written`  | int  | Total metrics written by all output plugins.        |
+| `agent.metrics_rejected` | int  | Total metrics rejected by all output plugins.       |
+| `agent.metrics_dropped`  | int  | Total metrics dropped by all output plugins.        |
+| `agent.metrics_gathered` | int  | Total metrics collected by all input plugins.       |
+| `agent.gather_errors`    | int  | Total collection errors across all input plugins.   |
+| `agent.gather_timeouts`  | int  | Total collection timeouts across all input plugins. |
+
+### Example
+
+```js
+agent.gather_errors > 0
+```
+
+## Input plugin statistics (`inputs`)
+
+The `inputs` variable is a map where each key is a plugin type (for example,
+`cpu` for `inputs.cpu`) and the value is a **list** of plugin instances.
+Each entry in the list represents one configured instance of that plugin type.
+
+These fields correspond to the `internal_gather` metric from the Telegraf
+[internal input plugin](/telegraf/v1/plugins/#input-internal).
+
+| Field              | Type   | Description                                                                               |
+| :----------------- | :----- | :---------------------------------------------------------------------------------------- |
+| `id`               | string | Unique plugin identifier.                                                                 |
+| `alias`            | string | Alias set for the plugin. Only exists if an alias is defined in the plugin configuration. |
+| `errors`           | int    | Collection errors for this plugin instance.                                               |
+| `metrics_gathered` | int    | Number of metrics collected by this instance.                                             |
+| `gather_time_ns`   | int    | Time spent gathering metrics, in nanoseconds.                                             |
+| `gather_timeouts`  | int    | Number of timeouts during metric collection.                                              |
+| `startup_errors`   | int    | Number of times the plugin failed to start.                                               |
+
+### Access patterns
+
+Access a specific plugin type and iterate over its instances:
+
+```js
+// Check if any cpu input instance has errors
+inputs.cpu.exists(i, i.errors > 0)
+```
+
+```js
+// Access the first instance of the cpu input
+inputs.cpu[0].metrics_gathered
+```
+
+Use `has()` to safely check if a plugin type exists before accessing it:
+
+```js
+// Safe access — returns false if no cpu input is configured
+has(inputs.cpu) && inputs.cpu.exists(i, i.errors > 0)
+```
+
+## Output plugin statistics (`outputs`)
+
+The `outputs` variable is a map with the same structure as `inputs`.
+Each key is a plugin type (for example, `influxdb_v3` for `outputs.influxdb_v3`)
+and the value is a list of plugin instances.
+
+These fields correspond to the `internal_write` metric from the Telegraf
+[internal input plugin](/telegraf/v1/plugins/#input-internal).
+
+| Field              | Type   | Description                                                                                              |
+| :----------------- | :----- | :------------------------------------------------------------------------------------------------------- |
+| `id`               | string | Unique plugin identifier.                                                                                |
+| `alias`            | string | Alias set for the plugin. Only exists if an alias is defined in the plugin configuration.                |
+| `errors`           | int    | Write errors for this plugin instance.                                                                   |
+| `metrics_filtered` | int    | Number of metrics filtered by the output.                                                                |
+| `write_time_ns`    | int    | Time spent writing metrics, in nanoseconds.                                                              |
+| `startup_errors`   | int    | Number of times the plugin failed to start.                                                              |
+| `metrics_added`    | int    | Number of metrics added to the output buffer.                                                            |
+| `metrics_written`  | int    | Number of metrics written to the output destination.                                                     |
+| `metrics_rejected` | int    | Number of metrics rejected by the service or serialization.                                              |
+| `metrics_dropped`  | int    | Number of metrics dropped (for example, due to buffer fullness).                                         |
+| `buffer_size`      | int    | Current number of metrics in the output buffer.                                                          |
+| `buffer_limit`     | int    | Capacity of the output buffer. Irrelevant for disk-based buffers.                                        |
+| `buffer_fullness`  | float  | Ratio of metrics in the buffer to capacity. Can exceed `1.0` (greater than 100%) for disk-based buffers. |
+
+### Access patterns
+
+```js
+// Access the first instance of the InfluxDB v3 output plugin
+outputs.influxdb_v3[0].metrics_written
+```
+
+```js
+// Check if any InfluxDB v3 output has write errors
+outputs.influxdb_v3.exists(o, o.errors > 0)
+```
+
+```js
+// Check buffer fullness across all instances of an output
+outputs.influxdb_v3.exists(o, o.buffer_fullness > 0.8)
+```
+
+Use `has()` to safely check if a plugin type exists before accessing it:
+
+```js
+// Safe access — returns false if no cpu input is configured
+has(outputs.influxdb_v3) && outputs.influxdb_v3.exists(o, o.errors > 0)
+```
+
+## Accumulation behavior
+
+Unless noted otherwise, all variable values are **accumulated since the last
+successful heartbeat message**.
+Use the `last_update` variable with `now()` to calculate rates — for example:
+
+```js
+// True if the error rate exceeds 1 error per minute
+log_errors > 0 && duration.getMinutes(now() - last_update) > 0
+  && log_errors / duration.getMinutes(now() - last_update) > 1
+```
diff --git a/content/telegraf/controller/reference/cel/variables.md b/content/telegraf/controller/reference/cel/variables.md
deleted file mode 100644
index 5d498638d..000000000
--- a/content/telegraf/controller/reference/cel/variables.md
+++ /dev/null
@@ -1,138 +0,0 @@
----
-title: CEL variables
-description: >
-  Reference for variables available in CEL expressions used to evaluate
-  Telegraf agent status in {{% product-name %}}.
-menu:
-  telegraf_controller:
-    name: Variables
-    parent: CEL expressions
-weight: 201
----
-
-CEL expressions for agent status evaluation have access to variables that
-represent data collected by Telegraf since the last successful heartbeat message
-(unless noted otherwise).
-
-## Top-level variables
-
-| Variable | Type | Description |
-|:---------|:-----|:------------|
-| `metrics` | int | Number of metrics arriving at the heartbeat output plugin. |
-| `log_errors` | int | Number of errors logged by the Telegraf instance. |
-| `log_warnings` | int | Number of warnings logged by the Telegraf instance. |
-| `last_update` | time | Timestamp of the last successful heartbeat message. Use with `now()` to calculate durations or rates. |
-| `agent` | map | Agent-level statistics. See [Agent statistics](#agent-statistics). |
-| `inputs` | map | Input plugin statistics. See [Input plugin statistics](#input-plugin-statistics-inputs). |
-| `outputs` | map | Output plugin statistics. See [Output plugin statistics](#output-plugin-statistics-outputs). |
-
-## Agent statistics
-
-The `agent` variable is a map containing aggregate statistics for the entire
-Telegraf instance.
-These fields correspond to the `internal_agent` metric from the
-Telegraf [internal input plugin](/telegraf/v1/plugins/#input-internal).
-
-| Field | Type | Description |
-|:------|:-----|:------------|
-| `agent.metrics_written` | int | Total metrics written by all output plugins. |
-| `agent.metrics_rejected` | int | Total metrics rejected by all output plugins. |
-| `agent.metrics_dropped` | int | Total metrics dropped by all output plugins. |
-| `agent.metrics_gathered` | int | Total metrics collected by all input plugins. |
-| `agent.gather_errors` | int | Total collection errors across all input plugins. |
-| `agent.gather_timeouts` | int | Total collection timeouts across all input plugins. |
-
-### Example
-
-```cel
-agent.gather_errors > 0
-```
-
-## Input plugin statistics (`inputs`)
-
-The `inputs` variable is a map where each key is a plugin type (for example,
-`cpu` for `inputs.cpu`) and the value is a **list** of plugin instances.
-Each entry in the list represents one configured instance of that plugin type.
-
-These fields correspond to the `internal_gather` metric from the Telegraf
-[internal input plugin](/telegraf/v1/plugins/#input-internal).
-
-| Field | Type | Description |
-|:------|:-----|:------------|
-| `id` | string | Unique plugin identifier. |
-| `alias` | string | Alias set for the plugin. Only exists if an alias is defined in the plugin configuration. |
-| `errors` | int | Collection errors for this plugin instance. |
-| `metrics_gathered` | int | Number of metrics collected by this instance. |
-| `gather_time_ns` | int | Time spent gathering metrics, in nanoseconds. |
-| `gather_timeouts` | int | Number of timeouts during metric collection. |
-| `startup_errors` | int | Number of times the plugin failed to start. |
-
-### Access patterns
-
-Access a specific plugin type and iterate over its instances:
-
-```cel
-// Check if any cpu input instance has errors
-inputs.cpu.exists(i, i.errors > 0)
-```
-
-```cel
-// Access the first instance of the cpu input
-inputs.cpu[0].metrics_gathered
-```
-
-Use `has()` to safely check if a plugin type exists before accessing it:
-
-```cel
-// Safe access — returns false if no cpu input is configured
-has(inputs.cpu) && inputs.cpu.exists(i, i.errors > 0)
-```
-
-## Output plugin statistics (`outputs`)
-
-The `outputs` variable is a map with the same structure as `inputs`.
-Each key is a plugin type (for example, `influxdb_v2` for `outputs.influxdb_v2`)
-and the value is a list of plugin instances.
-
-These fields correspond to the `internal_write` metric from the Telegraf
-[internal input plugin](/telegraf/v1/plugins/#input-internal).
-
-| Field | Type | Description |
-|:------|:-----|:------------|
-| `id` | string | Unique plugin identifier. |
-| `alias` | string | Alias set for the plugin. Only exists if an alias is defined in the plugin configuration. |
-| `errors` | int | Write errors for this plugin instance. |
-| `metrics_filtered` | int | Number of metrics filtered by the output. |
-| `write_time_ns` | int | Time spent writing metrics, in nanoseconds. |
-| `startup_errors` | int | Number of times the plugin failed to start. |
-| `metrics_added` | int | Number of metrics added to the output buffer. |
-| `metrics_written` | int | Number of metrics written to the output destination. |
-| `metrics_rejected` | int | Number of metrics rejected by the service or serialization. |
-| `metrics_dropped` | int | Number of metrics dropped (for example, due to buffer fullness). |
-| `buffer_size` | int | Current number of metrics in the output buffer. |
-| `buffer_limit` | int | Capacity of the output buffer. Irrelevant for disk-based buffers. |
-| `buffer_fullness` | float | Ratio of metrics in the buffer to capacity. Can exceed `1.0` (greater than 100%) for disk-based buffers. |
-
-### Access patterns
-
-```cel
-// Check if any InfluxDB v2 output has write errors
-outputs.influxdb_v2.exists(o, o.errors > 0)
-```
-
-```cel
-// Check buffer fullness across all instances of an output
-outputs.influxdb_v2.exists(o, o.buffer_fullness > 0.8)
-```
-
-## Accumulation behavior
-
-Unless noted otherwise, all variable values are **accumulated since the last
-successful heartbeat message**.
-Use the `last_update` variable with `now()` to calculate rates — for example:
-
-```cel
-// True if the error rate exceeds 1 error per minute
-log_errors > 0 && duration.getMinutes(now() - last_update) > 0
-  && log_errors / duration.getMinutes(now() - last_update) > 1
-```
diff --git a/docs/superpowers/plans/2026-03-17-tc-cel-status.md b/docs/superpowers/plans/2026-03-17-tc-cel-status.md
new file mode 100644
index 000000000..dee794819
--- /dev/null
+++ b/docs/superpowers/plans/2026-03-17-tc-cel-status.md
@@ -0,0 +1,972 @@
+# Telegraf Controller: Agent Status & CEL Reference Implementation Plan
+
+> **For agentic workers:** REQUIRED: Use superpowers:subagent-driven-development (if subagents available) or superpowers:executing-plans to implement this plan. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Add comprehensive agent status configuration docs and a multi-page CEL expression reference to the Telegraf Controller documentation.
+
+**Architecture:** Update the existing status stub page with practical examples and create four new pages under `reference/cel/`. All content is documentation-only (Markdown). The CEL reference is self-contained and does not depend on the heartbeat plugin docs.
+
+**Tech Stack:** Hugo, Markdown, TOML (config examples), CEL (expression examples)
+
+**Spec:** `docs/superpowers/specs/2026-03-17-tc-cel-status-design.md`
+
+***
+
+## File Map
+
+| Action | File                                                     | Responsibility                                            |
+| ------ | -------------------------------------------------------- | --------------------------------------------------------- |
+| Modify | `content/telegraf/controller/agents/status.md`           | Practical guide: status values, config examples, UI steps |
+| Create | `content/telegraf/controller/reference/cel/_index.md`    | CEL overview, evaluation flow, config reference           |
+| Create | `content/telegraf/controller/reference/cel/variables.md` | All CEL variables: top-level, agent, inputs, outputs      |
+| Create | `content/telegraf/controller/reference/cel/functions.md` | CEL functions, operators, quick reference                 |
+| Create | `content/telegraf/controller/reference/cel/examples.md`  | Real-world CEL expression examples by scenario            |
+
+### Conventions (from existing TC docs)
+
+- **Menu:** All TC pages use `menu: telegraf_controller:`. Child pages use `parent:` matching the parent's `name`.
+- **Reference children:** Existing reference pages use `parent: Reference` with weights 101-110. The CEL section uses `parent: Reference` on `_index.md` with weight 107 (after authorization at 106, before EULA at 110). CEL child pages use `parent: CEL expressions`.
+- **Product name shortcode:** Use `{{% product-name %}}` for "Telegraf Controller" and `{{% product-name "short" %}}` for "Controller".
+- **Dynamic values shortcode:** Wrap TOML configs containing `&{...}` parameters with `{{% telegraf/dynamic-values %}}...{{% /telegraf/dynamic-values %}}`.
+- **Callouts:** Use `> [!Note]`, `> [!Important]`, `> [!Warning]` syntax.
+- **Semantic line feeds:** One sentence per line.
+
+***
+
+## Task 1: Create CEL reference index page
+
+**Files:**
+
+- Create: `content/telegraf/controller/reference/cel/_index.md`
+
+- [ ] **Step 1: Create the CEL reference index page**
+
+Create `content/telegraf/controller/reference/cel/_index.md` with the following content:
+
+````markdown
+---
+title: CEL expressions
+description: >
+  Reference documentation for Common Expression Language (CEL) expressions used
+  to evaluate Telegraf agent status in {{% product-name %}}.
+menu:
+  telegraf_controller:
+    name: CEL expressions
+    parent: Reference
+weight: 107
+related:
+  - /telegraf/controller/agents/status/
+  - /telegraf/v1/output-plugins/heartbeat/
+---
+
+[Common Expression Language (CEL)](https://cel.dev) is a lightweight expression
+language designed for evaluating simple conditions.
+{{% product-name %}} uses CEL expressions in the Telegraf
+[heartbeat output plugin](/telegraf/v1/output-plugins/heartbeat/) to evaluate
+agent status based on runtime data such as metric counts, error rates, and
+plugin statistics.
+
+## How status evaluation works
+
+You define CEL expressions for three status levels in the
+`[outputs.heartbeat.status]` section of your Telegraf configuration:
+
+- **`ok`** — The agent is healthy.
+- **`warn`** — The agent has a potential issue.
+- **`fail`** — The agent has a critical problem.
+
+Each expression is a CEL program that returns a boolean value.
+Telegraf evaluates expressions in a configurable order (default:
+`ok`, `warn`, `fail`) and assigns the status of the **first expression that
+evaluates to `true`**.
+
+If no expression evaluates to `true`, the `default` status is used
+(default: `"ok"`).
+
+### Initial status
+
+Use the `initial` setting to define a status before the first Telegraf flush
+cycle.
+If `initial` is not set or is empty, Telegraf evaluates the status expressions
+immediately, even before the first flush.
+
+### Evaluation order
+
+The `order` setting controls which expressions are evaluated and in what
+sequence.
+
+> [!Note]
+> If you omit a status from the `order` list, its expression is **not
+> evaluated**.
+
+## Configuration reference
+
+Configure status evaluation in the `[outputs.heartbeat.status]` section of the
+heartbeat output plugin.
+You must include `"status"` in the `include` list for status evaluation to take
+effect.
+
+```toml
+[[outputs.heartbeat]]
+  url = "http://telegraf_controller.example.com/agents/heartbeat"
+  instance_id = "agent-123"
+  interval = "1m"
+  include = ["hostname", "statistics", "status"]
+
+  [outputs.heartbeat.status]
+    ## CEL expressions that return a boolean.
+    ## The first expression that evaluates to true sets the status.
+    ok = "metrics > 0"
+    warn = "log_errors > 0"
+    fail = "log_errors > 10"
+
+    ## Evaluation order (default: ["ok", "warn", "fail"])
+    order = ["ok", "warn", "fail"]
+
+    ## Default status when no expression matches
+    ## Options: "ok", "warn", "fail", "undefined"
+    default = "ok"
+
+    ## Initial status before the first flush cycle
+    ## Options: "ok", "warn", "fail", "undefined", ""
+    # initial = ""
+````
+
+| Option    | Type            | Default                  | Description                                                                                                     |
+| :-------- | :-------------- | :----------------------- | :-------------------------------------------------------------------------------------------------------------- |
+| `ok`      | string (CEL)    | `"false"`                | Expression that, when `true`, sets status to **ok**.                                                            |
+| `warn`    | string (CEL)    | `"false"`                | Expression that, when `true`, sets status to **warn**.                                                          |
+| `fail`    | string (CEL)    | `"false"`                | Expression that, when `true`, sets status to **fail**.                                                          |
+| `order`   | list of strings | `["ok", "warn", "fail"]` | Order in which expressions are evaluated.                                                                       |
+| `default` | string          | `"ok"`                   | Status used when no expression evaluates to `true`. Options: `ok`, `warn`, `fail`, `undefined`.                 |
+| `initial` | string          | `""`                     | Status before the first flush. Options: `ok`, `warn`, `fail`, `undefined`, `""` (empty = evaluate expressions). |
+
+{{< children hlevel="h2" >}}
+
+````
+
+- [ ] **Step 2: Verify the file renders correctly**
+
+Run: `npx hugo server` and navigate to the CEL expressions reference page.
+Verify: page renders, navigation shows "CEL expressions" under "Reference", child page links appear.
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add content/telegraf/controller/reference/cel/_index.md
+git commit -m "feat(tc-cel): add CEL expressions reference index page"
+````
+
+***
+
+## Task 2: Create CEL variables reference page
+
+**Files:**
+
+- Create: `content/telegraf/controller/reference/cel/variables.md`
+
+- [ ] **Step 1: Create the variables reference page**
+
+Create `content/telegraf/controller/reference/cel/variables.md` with the following content:
+
+````markdown
+---
+title: CEL variables
+description: >
+  Reference for variables available in CEL expressions used to evaluate
+  Telegraf agent status in {{% product-name %}}.
+menu:
+  telegraf_controller:
+    name: Variables
+    parent: CEL expressions
+weight: 201
+---
+
+CEL expressions for agent status evaluation have access to variables that
+represent data collected by Telegraf since the last successful heartbeat message
+(unless noted otherwise).
+
+## Top-level variables
+
+| Variable | Type | Description |
+|:---------|:-----|:------------|
+| `metrics` | int | Number of metrics arriving at the heartbeat output plugin. |
+| `log_errors` | int | Number of errors logged by the Telegraf instance. |
+| `log_warnings` | int | Number of warnings logged by the Telegraf instance. |
+| `last_update` | time | Timestamp of the last successful heartbeat message. Use with `now()` to calculate durations or rates. |
+| `agent` | map | Agent-level statistics. See [Agent statistics](#agent-statistics). |
+| `inputs` | map | Input plugin statistics. See [Input plugin statistics](#input-plugin-statistics-inputs). |
+| `outputs` | map | Output plugin statistics. See [Output plugin statistics](#output-plugin-statistics-outputs). |
+
+## Agent statistics
+
+The `agent` variable is a map containing aggregate statistics for the entire
+Telegraf instance.
+These fields correspond to the `internal_agent` metric from the
+Telegraf [internal input plugin](/telegraf/v1/plugins/#input-internal).
+
+| Field | Type | Description |
+|:------|:-----|:------------|
+| `agent.metrics_written` | int | Total metrics written by all output plugins. |
+| `agent.metrics_rejected` | int | Total metrics rejected by all output plugins. |
+| `agent.metrics_dropped` | int | Total metrics dropped by all output plugins. |
+| `agent.metrics_gathered` | int | Total metrics collected by all input plugins. |
+| `agent.gather_errors` | int | Total collection errors across all input plugins. |
+| `agent.gather_timeouts` | int | Total collection timeouts across all input plugins. |
+
+### Example
+
+```cel
+agent.gather_errors > 0
+````
+
+## Input plugin statistics (`inputs`)
+
+The `inputs` variable is a map where each key is a plugin type (for example,
+`cpu` for `inputs.cpu`) and the value is a **list** of plugin instances.
+Each entry in the list represents one configured instance of that plugin type.
+
+These fields correspond to the `internal_gather` metric from the Telegraf
+[internal input plugin](/telegraf/v1/plugins/#input-internal).
+
+| Field              | Type   | Description                                                                               |
+| :----------------- | :----- | :---------------------------------------------------------------------------------------- |
+| `id`               | string | Unique plugin identifier.                                                                 |
+| `alias`            | string | Alias set for the plugin. Only exists if an alias is defined in the plugin configuration. |
+| `errors`           | int    | Collection errors for this plugin instance.                                               |
+| `metrics_gathered` | int    | Number of metrics collected by this instance.                                             |
+| `gather_time_ns`   | int    | Time spent gathering metrics, in nanoseconds.                                             |
+| `gather_timeouts`  | int    | Number of timeouts during metric collection.                                              |
+| `startup_errors`   | int    | Number of times the plugin failed to start.                                               |
+
+### Access patterns
+
+Access a specific plugin type and iterate over its instances:
+
+```cel
+// Check if any cpu input instance has errors
+inputs.cpu.exists(i, i.errors > 0)
+```
+
+```cel
+// Access the first instance of the cpu input
+inputs.cpu[0].metrics_gathered
+```
+
+Use `has()` to safely check if a plugin type exists before accessing it:
+
+```cel
+// Safe access — returns false if no cpu input is configured
+has(inputs.cpu) && inputs.cpu.exists(i, i.errors > 0)
+```
+
+## Output plugin statistics (`outputs`)
+
+The `outputs` variable is a map with the same structure as `inputs`.
+Each key is a plugin type (for example, `influxdb_v2` for `outputs.influxdb_v2`)
+and the value is a list of plugin instances.
+
+These fields correspond to the `internal_write` metric from the Telegraf
+[internal input plugin](/telegraf/v1/plugins/#input-internal).
+
+| Field              | Type   | Description                                                                                              |
+| :----------------- | :----- | :------------------------------------------------------------------------------------------------------- |
+| `id`               | string | Unique plugin identifier.                                                                                |
+| `alias`            | string | Alias set for the plugin. Only exists if an alias is defined in the plugin configuration.                |
+| `errors`           | int    | Write errors for this plugin instance.                                                                   |
+| `metrics_filtered` | int    | Number of metrics filtered by the output.                                                                |
+| `write_time_ns`    | int    | Time spent writing metrics, in nanoseconds.                                                              |
+| `startup_errors`   | int    | Number of times the plugin failed to start.                                                              |
+| `metrics_added`    | int    | Number of metrics added to the output buffer.                                                            |
+| `metrics_written`  | int    | Number of metrics written to the output destination.                                                     |
+| `metrics_rejected` | int    | Number of metrics rejected by the service or serialization.                                              |
+| `metrics_dropped`  | int    | Number of metrics dropped (for example, due to buffer fullness).                                         |
+| `buffer_size`      | int    | Current number of metrics in the output buffer.                                                          |
+| `buffer_limit`     | int    | Capacity of the output buffer. Irrelevant for disk-based buffers.                                        |
+| `buffer_fullness`  | float  | Ratio of metrics in the buffer to capacity. Can exceed `1.0` (greater than 100%) for disk-based buffers. |
+
+### Access patterns
+
+```cel
+// Check if any InfluxDB v2 output has write errors
+outputs.influxdb_v2.exists(o, o.errors > 0)
+```
+
+```cel
+// Check buffer fullness across all instances of an output
+outputs.influxdb_v2.exists(o, o.buffer_fullness > 0.8)
+```
+
+## Accumulation behavior
+
+Unless noted otherwise, all variable values are **accumulated since the last
+successful heartbeat message**.
+Use the `last_update` variable with `now()` to calculate rates — for example:
+
+```cel
+// True if the error rate exceeds 1 error per minute
+log_errors > 0 && duration.getMinutes(now() - last_update) > 0
+  && log_errors / duration.getMinutes(now() - last_update) > 1
+```
+
+````
+
+- [ ] **Step 2: Verify the file renders correctly**
+
+Run: `npx hugo server` and navigate to the Variables page under CEL expressions.
+Verify: page renders, tables display correctly, code blocks have proper syntax highlighting, navigation shows "Variables" under "CEL expressions".
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add content/telegraf/controller/reference/cel/variables.md
+git commit -m "feat(tc-cel): add CEL variables reference page"
+````
+
+***
+
+## Task 3: Create CEL functions reference page
+
+**Files:**
+
+- Create: `content/telegraf/controller/reference/cel/functions.md`
+
+- [ ] **Step 1: Create the functions reference page**
+
+Create `content/telegraf/controller/reference/cel/functions.md` with the following content:
+
+````markdown
+---
+title: CEL functions and operators
+description: >
+  Reference for functions and operators available in CEL expressions used to
+  evaluate Telegraf agent status in {{% product-name %}}.
+menu:
+  telegraf_controller:
+    name: Functions
+    parent: CEL expressions
+weight: 202
+---
+
+CEL expressions for agent status evaluation support built-in CEL operators and
+the following function libraries.
+
+## Time functions
+
+### `now()`
+
+Returns the current time.
+Use with `last_update` to calculate durations or detect stale data.
+
+```cel
+// True if more than 10 minutes since last heartbeat
+now() - last_update > duration('10m')
+````
+
+```cel
+// True if more than 5 minutes since last heartbeat
+now() - last_update > duration('5m')
+```
+
+## Math functions
+
+Math functions from the
+[CEL math library](https://github.com/google/cel-go/blob/master/ext/README.md#math)
+are available for numeric calculations.
+
+### Commonly used functions
+
+| Function                   | Description                 | Example                                    |
+| :------------------------- | :-------------------------- | :----------------------------------------- |
+| `math.greatest(a, b, ...)` | Returns the greatest value. | `math.greatest(log_errors, log_warnings)`  |
+| `math.least(a, b, ...)`    | Returns the least value.    | `math.least(agent.metrics_gathered, 1000)` |
+
+### Example
+
+```cel
+// Warn if either errors or warnings exceed a threshold
+math.greatest(log_errors, log_warnings) > 5
+```
+
+## String functions
+
+String functions from the
+[CEL strings library](https://github.com/google/cel-go/blob/master/ext/README.md#strings)
+are available for string operations.
+These are useful when checking plugin `alias` or `id` fields.
+
+### Example
+
+```cel
+// Check if any input plugin has an alias containing "critical"
+inputs.cpu.exists(i, has(i.alias) && i.alias.contains("critical"))
+```
+
+## Encoding functions
+
+Encoding functions from the
+[CEL encoder library](https://github.com/google/cel-go/blob/master/ext/README.md#encoders)
+are available for encoding and decoding values.
+
+## Operators
+
+CEL supports standard operators for building expressions.
+
+### Comparison operators
+
+| Operator | Description           | Example                        |
+| :------- | :-------------------- | :----------------------------- |
+| `==`     | Equal                 | `metrics == 0`                 |
+| `!=`     | Not equal             | `log_errors != 0`              |
+| `<`      | Less than             | `agent.metrics_gathered < 100` |
+| `<=`     | Less than or equal    | `buffer_fullness <= 0.5`       |
+| `>`      | Greater than          | `log_errors > 10`              |
+| `>=`     | Greater than or equal | `metrics >= 1000`              |
+
+### Logical operators
+
+| Operator | Description | Example                                  |
+| :------- | :---------- | :--------------------------------------- |
+| `&&`     | Logical AND | `log_errors > 0 && metrics == 0`         |
+| `\|\|`   | Logical OR  | `log_errors > 10 \|\| log_warnings > 50` |
+| `!`      | Logical NOT | `!(metrics > 0)`                         |
+
+### Arithmetic operators
+
+| Operator | Description    | Example                                          |
+| :------- | :------------- | :----------------------------------------------- |
+| `+`      | Addition       | `log_errors + log_warnings`                      |
+| `-`      | Subtraction    | `agent.metrics_gathered - agent.metrics_dropped` |
+| `*`      | Multiplication | `log_errors * 2`                                 |
+| `/`      | Division       | `agent.metrics_dropped / agent.metrics_gathered` |
+| `%`      | Modulo         | `metrics % 100`                                  |
+
+### Ternary operator
+
+```cel
+// Conditional expression
+log_errors > 10 ? true : false
+```
+
+### List operations
+
+| Function                 | Description                    | Example                                     |
+| :----------------------- | :----------------------------- | :------------------------------------------ |
+| `exists(var, condition)` | True if any element matches.   | `inputs.cpu.exists(i, i.errors > 0)`        |
+| `all(var, condition)`    | True if all elements match.    | `outputs.influxdb_v2.all(o, o.errors == 0)` |
+| `size()`                 | Number of elements.            | `inputs.cpu.size() > 0`                     |
+| `has()`                  | True if a field or key exists. | `has(inputs.cpu)`                           |
+
+````
+
+- [ ] **Step 2: Verify the file renders correctly**
+
+Run: `npx hugo server` and navigate to the Functions page under CEL expressions.
+Verify: page renders, tables display correctly, pipe characters in logical operators table render properly, navigation shows "Functions" under "CEL expressions".
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add content/telegraf/controller/reference/cel/functions.md
+git commit -m "feat(tc-cel): add CEL functions and operators reference page"
+````
+
+***
+
+## Task 4: Create CEL examples page
+
+**Files:**
+
+- Create: `content/telegraf/controller/reference/cel/examples.md`
+
+- [ ] **Step 1: Create the examples page**
+
+Create `content/telegraf/controller/reference/cel/examples.md` with the following content:
+
+````markdown
+---
+title: CEL expression examples
+description: >
+  Real-world examples of CEL expressions for evaluating Telegraf agent status
+  in {{% product-name %}}.
+menu:
+  telegraf_controller:
+    name: Examples
+    parent: CEL expressions
+weight: 203
+related:
+  - /telegraf/controller/agents/status/
+  - /telegraf/controller/reference/cel/variables/
+  - /telegraf/controller/reference/cel/functions/
+---
+
+Each example includes a scenario description, the CEL expression, a full
+heartbeat plugin configuration block, and an explanation.
+
+For the full list of available variables and functions, see:
+
+- [CEL variables](/telegraf/controller/reference/cel/variables/)
+- [CEL functions and operators](/telegraf/controller/reference/cel/functions/)
+
+## Basic health check
+
+**Scenario:** Report `ok` when Telegraf is actively processing metrics.
+Fall back to the default status (`ok`) when no expression matches — this means
+the agent is healthy as long as metrics are flowing.
+
+**Expression:**
+
+```cel
+ok = "metrics > 0"
+````
+
+**Configuration:**
+
+```toml
+[[outputs.heartbeat]]
+  url = "http://telegraf_controller.example.com/agents/heartbeat"
+  instance_id = "agent-123"
+  interval = "1m"
+  include = ["hostname", "statistics", "status"]
+
+  [outputs.heartbeat.status]
+    ok = "metrics > 0"
+    default = "fail"
+```
+
+**How it works:** If the heartbeat plugin received metrics since the last
+heartbeat, the status is `ok`.
+If no metrics arrived, no expression matches and the `default` status of `fail`
+is used, indicating the agent is not processing data.
+
+## Error rate monitoring
+
+**Scenario:** Warn when any errors are logged and fail when the error count is
+high.
+
+**Expressions:**
+
+```cel
+warn = "log_errors > 0"
+fail = "log_errors > 10"
+```
+
+**Configuration:**
+
+```toml
+[[outputs.heartbeat]]
+  url = "http://telegraf_controller.example.com/agents/heartbeat"
+  instance_id = "agent-123"
+  interval = "1m"
+  include = ["hostname", "statistics", "status"]
+
+  [outputs.heartbeat.status]
+    ok = "log_errors == 0 && log_warnings == 0"
+    warn = "log_errors > 0"
+    fail = "log_errors > 10"
+    order = ["fail", "warn", "ok"]
+    default = "ok"
+```
+
+**How it works:** Expressions are evaluated in `fail`, `warn`, `ok` order.
+If more than 10 errors occurred since the last heartbeat, the status is `fail`.
+If 1-10 errors occurred, the status is `warn`.
+If no errors or warnings occurred, the status is `ok`.
+
+## Buffer health
+
+**Scenario:** Warn when any output plugin's buffer exceeds 80% fullness,
+indicating potential data backpressure.
+
+**Expression:**
+
+```cel
+warn = "outputs.influxdb_v2.exists(o, o.buffer_fullness > 0.8)"
+fail = "outputs.influxdb_v2.exists(o, o.buffer_fullness > 0.95)"
+```
+
+**Configuration:**
+
+```toml
+[[outputs.heartbeat]]
+  url = "http://telegraf_controller.example.com/agents/heartbeat"
+  instance_id = "agent-123"
+  interval = "1m"
+  include = ["hostname", "statistics", "status"]
+
+  [outputs.heartbeat.status]
+    ok = "metrics > 0"
+    warn = "outputs.influxdb_v2.exists(o, o.buffer_fullness > 0.8)"
+    fail = "outputs.influxdb_v2.exists(o, o.buffer_fullness > 0.95)"
+    order = ["fail", "warn", "ok"]
+    default = "ok"
+```
+
+**How it works:** The `outputs.influxdb_v2` map contains a list of all
+`influxdb_v2` output plugin instances.
+The `exists()` function iterates over all instances and returns `true` if any
+instance's `buffer_fullness` exceeds the threshold.
+At 95% fullness, the status is `fail`; at 80%, `warn`; otherwise `ok`.
+
+## Plugin-specific checks
+
+**Scenario:** Monitor a specific input plugin for collection errors and use
+safe access patterns to avoid errors when the plugin is not configured.
+
+**Expression:**
+
+```cel
+warn = "has(inputs.cpu) && inputs.cpu.exists(i, i.errors > 0)"
+fail = "has(inputs.cpu) && inputs.cpu.exists(i, i.startup_errors > 0)"
+```
+
+**Configuration:**
+
+```toml
+[[outputs.heartbeat]]
+  url = "http://telegraf_controller.example.com/agents/heartbeat"
+  instance_id = "agent-123"
+  interval = "1m"
+  include = ["hostname", "statistics", "status"]
+
+  [outputs.heartbeat.status]
+    ok = "metrics > 0"
+    warn = "has(inputs.cpu) && inputs.cpu.exists(i, i.errors > 0)"
+    fail = "has(inputs.cpu) && inputs.cpu.exists(i, i.startup_errors > 0)"
+    order = ["fail", "warn", "ok"]
+    default = "ok"
+```
+
+**How it works:** The `has()` function checks if the `cpu` key exists in the
+`inputs` map before attempting to access it.
+This prevents evaluation errors when the plugin is not configured.
+If the plugin has startup errors, the status is `fail`.
+If it has collection errors, the status is `warn`.
+
+## Composite conditions
+
+**Scenario:** Combine multiple signals to detect a degraded agent — high error
+count combined with output buffer pressure.
+
+**Expression:**
+
+```cel
+fail = "log_errors > 5 && has(outputs.influxdb_v2) && outputs.influxdb_v2.exists(o, o.buffer_fullness > 0.9)"
+```
+
+**Configuration:**
+
+```toml
+[[outputs.heartbeat]]
+  url = "http://telegraf_controller.example.com/agents/heartbeat"
+  instance_id = "agent-123"
+  interval = "1m"
+  include = ["hostname", "statistics", "status"]
+
+  [outputs.heartbeat.status]
+    ok = "metrics > 0 && log_errors == 0"
+    warn = "log_errors > 0 || (has(outputs.influxdb_v2) && outputs.influxdb_v2.exists(o, o.buffer_fullness > 0.8))"
+    fail = "log_errors > 5 && has(outputs.influxdb_v2) && outputs.influxdb_v2.exists(o, o.buffer_fullness > 0.9)"
+    order = ["fail", "warn", "ok"]
+    default = "ok"
+```
+
+**How it works:** The `fail` expression requires **both** a high error count
+**and** buffer pressure to trigger.
+The `warn` expression uses `||` to trigger on **either** condition independently.
+This layered approach avoids false alarms from transient spikes in a single
+metric.
+
+## Time-based expressions
+
+**Scenario:** Warn when the time since the last successful heartbeat exceeds a
+threshold, indicating potential connectivity or performance issues.
+
+**Expression:**
+
+```cel
+warn = "now() - last_update > duration('10m')"
+fail = "now() - last_update > duration('30m')"
+```
+
+**Configuration:**
+
+```toml
+[[outputs.heartbeat]]
+  url = "http://telegraf_controller.example.com/agents/heartbeat"
+  instance_id = "agent-123"
+  interval = "1m"
+  include = ["hostname", "statistics", "status"]
+
+  [outputs.heartbeat.status]
+    ok = "metrics > 0"
+    warn = "now() - last_update > duration('10m')"
+    fail = "now() - last_update > duration('30m')"
+    order = ["fail", "warn", "ok"]
+    default = "undefined"
+    initial = "undefined"
+```
+
+**How it works:** The `now()` function returns the current time and
+`last_update` is the timestamp of the last successful heartbeat.
+Subtracting them produces a duration that can be compared against a threshold.
+The `initial` status is set to `undefined` so new agents don't immediately show
+a stale-data warning before their first successful heartbeat.
+
+## Custom evaluation order
+
+**Scenario:** Use fail-first evaluation to prioritize detecting critical issues
+before checking for healthy status.
+
+**Configuration:**
+
+```toml
+[[outputs.heartbeat]]
+  url = "http://telegraf_controller.example.com/agents/heartbeat"
+  instance_id = "agent-123"
+  interval = "1m"
+  include = ["hostname", "statistics", "status"]
+
+  [outputs.heartbeat.status]
+    ok = "metrics > 0 && log_errors == 0"
+    warn = "log_errors > 0"
+    fail = "log_errors > 10 || agent.metrics_dropped > 100"
+    order = ["fail", "warn", "ok"]
+    default = "undefined"
+```
+
+**How it works:** By setting `order = ["fail", "warn", "ok"]`, the most severe
+conditions are checked first.
+If the agent has more than 10 logged errors or has dropped more than 100
+metrics, the status is `fail` — regardless of whether the `ok` or `warn`
+expression would also match.
+This is the recommended order for production deployments where early detection
+of critical issues is important.
+
+````
+
+- [ ] **Step 2: Verify the file renders correctly**
+
+Run: `npx hugo server` and navigate to the Examples page under CEL expressions.
+Verify: page renders, all seven example sections display with correct TOML syntax highlighting, navigation shows "Examples" under "CEL expressions".
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add content/telegraf/controller/reference/cel/examples.md
+git commit -m "feat(tc-cel): add CEL expression examples page"
+````
+
+***
+
+## Task 5: Update the agent status page
+
+**Files:**
+
+- Modify: `content/telegraf/controller/agents/status.md`
+
+- [ ] **Step 1: Replace the status page content**
+
+Replace the full content of `content/telegraf/controller/agents/status.md` with the following:
+
+````markdown
+---
+title: Set agent statuses
+description: >
+  Configure agent status evaluation using CEL expressions in the Telegraf
+  heartbeat output plugin and view statuses in {{% product-name %}}.
+menu:
+  telegraf_controller:
+    name: Set agent statuses
+    parent: Manage agents
+weight: 104
+related:
+  - /telegraf/controller/reference/cel/
+  - /telegraf/controller/agents/reporting-rules/
+  - /telegraf/v1/output-plugins/heartbeat/
+---
+
+Agent statuses reflect the health of a Telegraf instance based on runtime data.
+The Telegraf [heartbeat output plugin](/telegraf/v1/output-plugins/heartbeat/)
+evaluates [Common Expression Language (CEL)](/telegraf/controller/reference/cel/)
+expressions against agent metrics, error counts, and plugin statistics to
+determine the status sent with each heartbeat.
+
+## Status values
+
+{{% product-name %}} displays the following agent statuses:
+
+| Status | Source | Description |
+|:-------|:-------|:------------|
+| **Ok** | Heartbeat plugin | The agent is healthy. Set when the `ok` CEL expression evaluates to `true`. |
+| **Warn** | Heartbeat plugin | The agent has a potential issue. Set when the `warn` CEL expression evaluates to `true`. |
+| **Fail** | Heartbeat plugin | The agent has a critical problem. Set when the `fail` CEL expression evaluates to `true`. |
+| **Undefined** | Heartbeat plugin | No expression matched and the `default` is set to `undefined`, or the `initial` status is `undefined`. |
+| **Not Reporting** | {{% product-name "short" %}} | The agent has not sent a heartbeat within the [reporting rule](/telegraf/controller/agents/reporting-rules/) threshold. {{% product-name "short" %}} applies this status automatically. |
+
+## How status evaluation works
+
+You define CEL expressions for `ok`, `warn`, and `fail` in the
+`[outputs.heartbeat.status]` section of your heartbeat plugin configuration.
+Telegraf evaluates expressions in a configurable order and assigns the status
+of the first expression that evaluates to `true`.
+
+For full details on evaluation flow, configuration options, and available
+variables and functions, see the
+[CEL expressions reference](/telegraf/controller/reference/cel/).
+
+## Configure agent statuses
+
+To configure status evaluation, add `"status"` to the `include` list in your
+heartbeat plugin configuration and define CEL expressions in the
+`[outputs.heartbeat.status]` section.
+
+### Example: Basic health check
+
+Report `ok` when metrics are flowing.
+If no metrics arrive, fall back to the `fail` status.
+
+{{% telegraf/dynamic-values %}}
+```toml
+[[outputs.heartbeat]]
+  url = "http://telegraf_controller.example.com/agents/heartbeat"
+  instance_id = "&{agent_id}"
+  token = "${INFLUX_TOKEN}"
+  interval = "1m"
+  include = ["hostname", "statistics", "status"]
+
+  [outputs.heartbeat.status]
+    ok = "metrics > 0"
+    default = "fail"
+````
+
+{{% /telegraf/dynamic-values %}}
+
+### Example: Error-based status
+
+Warn when errors are logged, fail when the error count is high.
+
+{{% telegraf/dynamic-values %}}
+
+```toml
+[[outputs.heartbeat]]
+  url = "http://telegraf_controller.example.com/agents/heartbeat"
+  instance_id = "&{agent_id}"
+  token = "${INFLUX_TOKEN}"
+  interval = "1m"
+  include = ["hostname", "statistics", "status"]
+
+  [outputs.heartbeat.status]
+    ok = "log_errors == 0 && log_warnings == 0"
+    warn = "log_errors > 0"
+    fail = "log_errors > 10"
+    order = ["fail", "warn", "ok"]
+    default = "ok"
+```
+
+{{% /telegraf/dynamic-values %}}
+
+### Example: Composite condition
+
+Combine error count and buffer pressure signals.
+
+{{% telegraf/dynamic-values %}}
+
+```toml
+[[outputs.heartbeat]]
+  url = "http://telegraf_controller.example.com/agents/heartbeat"
+  instance_id = "&{agent_id}"
+  token = "${INFLUX_TOKEN}"
+  interval = "1m"
+  include = ["hostname", "statistics", "status"]
+
+  [outputs.heartbeat.status]
+    ok = "metrics > 0 && log_errors == 0"
+    warn = "log_errors > 0 || (has(outputs.influxdb_v2) && outputs.influxdb_v2.exists(o, o.buffer_fullness > 0.8))"
+    fail = "log_errors > 5 && has(outputs.influxdb_v2) && outputs.influxdb_v2.exists(o, o.buffer_fullness > 0.9)"
+    order = ["fail", "warn", "ok"]
+    default = "ok"
+```
+
+{{% /telegraf/dynamic-values %}}
+
+For more examples including buffer health, plugin-specific checks, and
+time-based expressions, see
+[CEL expression examples](/telegraf/controller/reference/cel/examples/).
+
+## View an agent's status
+
+1. In {{% product-name %}}, go to **Agents**.
+2. Check the **Status** column for each agent.
+3. To see more details, click the **More button ({{% icon "tc-more" %}})** and
+   select **View Details**.
+4. The details page shows the reported status, reporting rule assignment, and
+   the time of the last heartbeat.
+
+## Learn more
+
+- [CEL expressions reference](/telegraf/controller/reference/cel/) — Full
+  reference for CEL evaluation flow, configuration, variables, functions, and
+  examples.
+- [Heartbeat output plugin](/telegraf/v1/output-plugins/heartbeat/) — Plugin
+  configuration reference.
+- [Define reporting rules](/telegraf/controller/agents/reporting-rules/) — Configure
+  thresholds for the **Not Reporting** status.
+
+````
+
+- [ ] **Step 2: Verify the file renders correctly**
+
+Run: `npx hugo server` and navigate to the "Set agent statuses" page under "Manage agents".
+Verify: page renders, status table displays correctly, all three example config blocks render with TOML syntax highlighting, cross-links resolve correctly, the "View an agent's status" section is preserved.
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add content/telegraf/controller/agents/status.md
+git commit -m "feat(tc-status): expand agent status page with CEL examples and configuration"
+````
+
+***
+
+## Task 6: Cross-link verification and final review
+
+**Files:**
+
+- All files from Tasks 1-5
+
+- [ ] **Step 1: Verify all cross-links**
+
+Run: `npx hugo server` and verify the following links resolve:
+
+1. Status page → CEL reference index: `/telegraf/controller/reference/cel/`
+2. Status page → Heartbeat plugin: `/telegraf/v1/output-plugins/heartbeat/`
+3. Status page → Reporting rules: `/telegraf/controller/agents/reporting-rules/`
+4. Status page → CEL examples: `/telegraf/controller/reference/cel/examples/`
+5. CEL index → Heartbeat plugin: `/telegraf/v1/output-plugins/heartbeat/`
+6. CEL examples → Variables: `/telegraf/controller/reference/cel/variables/`
+7. CEL examples → Functions: `/telegraf/controller/reference/cel/functions/`
+8. CEL examples → Status page: `/telegraf/controller/agents/status/`
+9. CEL variables → Internal input plugin: `/telegraf/v1/plugins/#input-internal`
+
+- [ ] **Step 2: Verify navigation structure**
+
+In the left nav, confirm:
+
+- "CEL expressions" appears under "Reference"
+
+- "Variables", "Functions", and "Examples" appear as children of "CEL expressions"
+
+- "Set agent statuses" remains under "Manage agents"
+
+- [ ] **Step 3: Run Vale linting**
+
+Run: `.ci/vale/vale.sh content/telegraf/controller/agents/status.md content/telegraf/controller/reference/cel/`
+Fix any errors or warnings. Suggestions can be evaluated but are not blocking.
+
+- [ ] **Step 4: Commit any linting fixes**
+
+```bash
+git add content/telegraf/controller/agents/status.md content/telegraf/controller/reference/cel/
+git commit -m "style(tc-cel): fix Vale linting issues"
+```
diff --git a/docs/superpowers/specs/2026-03-17-tc-cel-status-design.md b/docs/superpowers/specs/2026-03-17-tc-cel-status-design.md
new file mode 100644
index 000000000..3caf178b0
--- /dev/null
+++ b/docs/superpowers/specs/2026-03-17-tc-cel-status-design.md
@@ -0,0 +1,131 @@
+# Telegraf Controller: Agent Status & CEL Expression Reference
+
+**Date:** 2026-03-17
+**Status:** Approved
+**Scope:** Documentation content (no code changes)
+
+## Summary
+
+Add comprehensive agent status configuration documentation to Telegraf Controller docs. This includes updating the existing status page with practical examples and creating a new multi-page CEL expression reference in the reference section.
+
+## Deliverables
+
+### 1. Update existing status page
+
+**File:** `content/telegraf/controller/agents/status.md`
+
+Expand from the current stub into a practical guide with the following structure:
+
+1. **Intro** — What agent statuses are, the four status values (`ok`, `warn`, `fail`, `undefined`) plus the Controller-applied `Not Reporting` state.
+2. **How status evaluation works** — Brief explanation of CEL expressions, evaluation order, defaults, and initial status. Links to the CEL reference for full details.
+3. **Configure agent statuses** — Example heartbeat plugin config with `include = ["status"]` and the `[outputs.heartbeat.status]` section. 2-3 practical inline examples:
+   - Basic health check (ok when metrics are flowing)
+   - Error-based warning/failure
+   - Composite condition
+4. **View an agent's status** — Keep existing UI steps as-is.
+5. **Link to CEL reference** — Points users to the full reference for all variables, functions, and more examples.
+
+### 2. Create CEL expression reference (multi-page)
+
+New section under `content/telegraf/controller/reference/cel/`.
+
+#### `_index.md` — CEL Overview
+
+1. **Intro** — What CEL is (Common Expression Language), how Telegraf Controller uses it to evaluate agent status from heartbeat data.
+2. **How status evaluation works** — Detailed evaluation flow:
+   - Expressions are defined for `ok`, `warn`, `fail` — each is a CEL program returning a boolean.
+   - Evaluation order is configurable via `order` (default: `["ok", "warn", "fail"]`).
+   - First expression evaluating to `true` sets the status.
+   - If none match, `default` status is used (default: `"ok"`).
+   - `initial` status can be set for the period before the first flush.
+3. **Configuration reference** — The `[outputs.heartbeat.status]` config block with all options: `ok`, `warn`, `fail`, `order`, `default`, `initial`.
+4. **Child page links** — Variables, Functions, Examples.
+
+#### `variables.md` — Variables Reference
+
+1. **Intro** — Variables represent data collected by Telegraf since the last successful heartbeat (unless noted otherwise).
+2. **Top-level variables** — Table or definition list:
+   - `metrics` (int) — metrics arriving at the heartbeat plugin
+   - `log_errors` (int) — errors logged
+   - `log_warnings` (int) — warnings logged
+   - `last_update` (time) — time of last successful heartbeat
+   - `agent` (map) — agent-level statistics
+   - `inputs` (map) — input plugin statistics
+   - `outputs` (map) — output plugin statistics
+3. **Agent statistics (`agent`)** — Map fields:
+   - `metrics_written`, `metrics_rejected`, `metrics_dropped`, `metrics_gathered`, `gather_errors`, `gather_timeouts`
+4. **Input plugin statistics (`inputs`)** — Map structure: key = plugin type (e.g., `cpu`), value = list of instances. Fields per instance:
+   - `id`, `alias`, `errors`, `metrics_gathered`, `gather_time_ns`, `gather_timeouts`, `startup_errors`
+5. **Output plugin statistics (`outputs`)** — Same map structure. Fields per instance:
+   - `id`, `alias`, `errors`, `metrics_filtered`, `write_time_ns`, `startup_errors`, `metrics_added`, `metrics_written`, `metrics_rejected`, `metrics_dropped`, `buffer_size`, `buffer_limit`, `buffer_fullness`
+6. **Note on accumulation** — Values accumulate since last successful heartbeat; `last_update` enables rate calculation.
+
+#### `functions.md` — Functions Reference
+
+1. **Intro** — CEL expressions support built-in CEL operators plus additional function libraries.
+2. **Time functions** — `now()` returns current time; usage with `last_update` for duration/rate calculations. Include usage example.
+3. **Math functions** — Link to CEL math library. Highlight commonly useful functions (e.g., `math.greatest()`, `math.least()`). Brief examples.
+4. **String functions** — Link to CEL strings library. Note usefulness for checking `alias` or `id` fields. Brief example.
+5. **Encoding functions** — Link to CEL encoder library. Brief note on relevance.
+6. **CEL operators reference** — Quick reference for comparison (`==`, `!=`, `<`, `>`), logical (`&&`, `||`, `!`), arithmetic (`+`, `-`, `*`, `/`), and ternary (`? :`) operators.
+
+#### `examples.md` — Examples
+
+Each example follows a consistent pattern: **scenario description → CEL expression(s) → full config block → explanation**.
+
+1. **Basic health check** — `ok` when metrics are flowing, `fail` otherwise.
+   - `ok = "metrics > 0"`
+2. **Error rate monitoring** — warn on logged errors, fail on high error count.
+   - `warn = "log_errors > 0"`, `fail = "log_errors > 10"`
+3. **Buffer health** — warn when any output buffer exceeds 80% fullness.
+   - Uses `outputs` map iteration to check `buffer_fullness` across plugin instances.
+4. **Plugin-specific checks** — check a specific input or output for errors.
+   - Demonstrates map access like `outputs.influxdb_v2.exists(o, o.errors > 0)` and safe access with `has()`.
+5. **Composite conditions** — combining multiple signals.
+   - `fail = "log_errors > 5 && outputs.influxdb_v2.exists(o, o.buffer_fullness > 0.9)"`
+6. **Time-based expressions** — using `now()` and `last_update` for staleness.
+   - e.g., `warn = "now() - last_update > duration('10m')"`
+7. **Custom evaluation order** — shows `order = ["fail", "warn", "ok"]` for fail-first evaluation.
+
+## File Structure
+
+### New files
+
+```
+content/telegraf/controller/reference/
+  cel/
+    _index.md          — CEL overview, evaluation flow, config reference
+    variables.md       — All variables (top-level, agent, inputs, outputs)
+    functions.md       — Functions, operators, quick reference
+    examples.md        — Real-world examples by scenario
+```
+
+### Updated files
+
+```
+content/telegraf/controller/agents/status.md  — Expand from stub to practical guide
+```
+
+## Navigation / Menu Structure
+
+The CEL section nests under the existing `Reference` parent in the `telegraf_controller` menu:
+
+- **Reference** (existing)
+  - **CEL expressions** (`_index.md`)
+    - **Variables** (`variables.md`)
+    - **Functions** (`functions.md`)
+    - **Examples** (`examples.md`)
+
+## Cross-Linking Strategy
+
+- Status page → CEL reference `_index.md` for full details
+- Status page → heartbeat plugin for base config syntax
+- CEL examples page → status page for UI context
+- CEL variables/functions pages are **self-contained** (standalone, no dependency on heartbeat plugin docs)
+
+## Design Decisions
+
+1. **Standalone CEL reference** — The TC CEL reference is self-contained with its own variable and function documentation, independent of the heartbeat plugin page. Users configuring statuses in Controller shouldn't need to navigate to plugin docs for the variable reference.
+2. **Status page as practical guide** — Includes 2-3 inline examples for quick start; full reference lives in the CEL section.
+3. **Multi-page reference** — Keeps pages shorter and searchable. Variables, functions, and examples each get their own page. Function pages can be split further by category later if they grow large.
+4. **Consistent example format** — Every example includes scenario, expression, full config block, and explanation.