feat(testing): add link validation automation and improvements

- Add GitHub Actions for automated link validation on PRs - Implement incremental validation with caching (30-day TTL, configurable) - Add matrix generator for parallel validation strategy - Create comprehensive TESTING.md documentation - Add cache manager with configurable TTL via env var or CLI - Implement smart link extraction and validation - Add PR comment generator for broken link reports - Update Cypress tests to use incremental validation - Consolidate testing docs from CONTRIBUTING.md to TESTING.md Key improvements: - Cache-aware validation only checks changed content - Parallel execution for large changesets - Detailed PR comments with broken link reports - Support for LINK_CACHE_TTL_DAYS env var - Local testing with yarn test:links - Reduced false positives through intelligent caching
2025-07-28 15:21:28 -05:00 · 2025-07-28 15:21:28 -05:00 · 6a4e8827eb
parent d3f60a7d7d
commit 6a4e8827eb
20 changed files with 2506 additions and 330 deletions
--- a/.github/actions/report-broken-links/action.yml
+++ b/.github/actions/report-broken-links/action.yml
@ -0,0 +1,87 @@
+name: 'Report Broken Links'
+description: 'Downloads broken link reports, generates PR comment, and posts results'
+
+inputs:
+  github-token:
+    description: 'GitHub token for posting comments'
+    required: false
+    default: ${{ github.token }}
+  max-links-per-file:
+    description: 'Maximum links to show per file in comment'
+    required: false
+    default: '20'
+  include-success-message:
+    description: 'Include success message when no broken links found'
+    required: false
+    default: 'true'
+
+outputs:
+  has-broken-links:
+    description: 'Whether broken links were found (true/false)'
+    value: ${{ steps.generate-comment.outputs.has-broken-links }}
+  broken-link-count:
+    description: 'Number of broken links found'
+    value: ${{ steps.generate-comment.outputs.broken-link-count }}
+
+runs:
+  using: 'composite'
+  steps:
+    - name: Download broken link reports
+      uses: actions/download-artifact@v4
+      with:
+        path: reports
+      continue-on-error: true
+    
+    - name: Generate PR comment
+      id: generate-comment
+      run: |
+        # Generate comment using our script
+        node .github/scripts/comment-generator.js \
+          --max-links ${{ inputs.max-links-per-file }} \
+          ${{ inputs.include-success-message == 'false' && '--no-success' || '' }} \
+          --output-file comment.md \
+          reports/ || echo "No reports found or errors occurred"
+        
+        # Check if comment file was created and has content
+        if [[ -f comment.md && -s comment.md ]]; then
+          echo "has-broken-links=true" >> $GITHUB_OUTPUT
+          
+          # Count broken links by parsing the comment
+          broken_count=$(grep -o "Found [0-9]* broken link" comment.md | grep -o "[0-9]*" || echo "0")
+          echo "broken-link-count=$broken_count" >> $GITHUB_OUTPUT
+          echo "comment-generated=true" >> $GITHUB_OUTPUT
+        else
+          echo "has-broken-links=false" >> $GITHUB_OUTPUT
+          echo "broken-link-count=0" >> $GITHUB_OUTPUT
+          echo "comment-generated=false" >> $GITHUB_OUTPUT
+        fi
+      shell: bash
+    
+    - name: Post PR comment
+      if: steps.generate-comment.outputs.comment-generated == 'true'
+      uses: actions/github-script@v7
+      with:
+        github-token: ${{ inputs.github-token }}
+        script: |
+          const fs = require('fs');
+          
+          if (fs.existsSync('comment.md')) {
+            const comment = fs.readFileSync('comment.md', 'utf8');
+            
+            if (comment.trim()) {
+              await github.rest.issues.createComment({
+                issue_number: context.issue.number,
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                body: comment
+              });
+            }
+          }
+    
+    - name: Set workflow status
+      if: steps.generate-comment.outputs.has-broken-links == 'true'
+      run: |
+        broken_count="${{ steps.generate-comment.outputs.broken-link-count }}"
+        echo "::error::Found $broken_count broken link(s)"
+        exit 1
+      shell: bash
--- a/.github/actions/setup-docs-env/action.yml
+++ b/.github/actions/setup-docs-env/action.yml
@ -0,0 +1,15 @@
+name: 'Setup Documentation Environment'
+description: 'Sets up Node.js environment and installs dependencies for documentation workflows'
+
+runs:
+  using: 'composite'
+  steps:
+    - name: Setup Node.js
+      uses: actions/setup-node@v4
+      with:
+        node-version: '20'
+        cache: 'yarn'
+    
+    - name: Install dependencies
+      run: yarn install
+      shell: bash
--- a/.github/actions/validate-links/action.yml
+++ b/.github/actions/validate-links/action.yml
@ -0,0 +1,70 @@
+name: 'Validate Links'
+description: 'Runs e2e browser-based link validation tests against Hugo site using Cypress'
+
+inputs:
+  files:
+    description: 'Space-separated list of files to validate'
+    required: true
+  product-name:
+    description: 'Product name for reporting (optional)'
+    required: false
+    default: ''
+  cache-enabled:
+    description: 'Enable link validation caching'
+    required: false
+    default: 'true'
+  cache-key:
+    description: 'Cache key prefix for this validation run'
+    required: false
+    default: 'link-validation'
+
+outputs:
+  failed:
+    description: 'Whether validation failed (true/false)'
+    value: ${{ steps.validate.outputs.failed }}
+
+runs:
+  using: 'composite'
+  steps:
+    - name: Restore link validation cache
+      if: inputs.cache-enabled == 'true'
+      uses: actions/cache@v4
+      with:
+        path: .cache/link-validation
+        key: ${{ inputs.cache-key }}-${{ runner.os }}-${{ hashFiles('content/**/*.md', 'content/**/*.html') }}
+        restore-keys: |
+          ${{ inputs.cache-key }}-${{ runner.os }}-
+          ${{ inputs.cache-key }}-
+    
+    - name: Run link validation
+      id: validate
+      run: |
+        echo "Testing files: ${{ inputs.files }}"
+        if [[ -n "${{ inputs.product-name }}" ]]; then
+          echo "Product: ${{ inputs.product-name }}"
+        fi
+        
+        if [[ "${{ inputs.cache-enabled }}" == "true" ]]; then
+          echo "📦 Cache enabled for this validation run"
+        fi
+        
+        # Run the validation
+        if node cypress/support/run-e2e-specs.js \
+            --spec "cypress/e2e/content/article-links.cy.js" \
+            ${{ inputs.files }}; then
+          echo "failed=false" >> $GITHUB_OUTPUT
+        else
+          echo "failed=true" >> $GITHUB_OUTPUT
+          exit 1
+        fi
+      shell: bash
+      env:
+        CI: true
+        CACHE_ENABLED: ${{ inputs.cache-enabled }}
+    
+    - name: Upload broken links report
+      if: failure()
+      uses: actions/upload-artifact@v4
+      with:
+        name: broken-links-report${{ inputs.product-name && format('-{0}', inputs.product-name) || '' }}
+        path: /tmp/broken_links_report.json
--- a/.github/instructions/contributing.instructions.md
+++ b/.github/instructions/contributing.instructions.md
@ -18,7 +18,7 @@ Ready to contribute? Here's the essential workflow:
 2. [Fork and clone](#fork-and-clone-influxdata-documentation-repository) this repository
 3. [Install dependencies](#development-environment-setup) (Node.js, Yarn, Docker)
 4. Make your changes following [style guidelines](#making-changes)
-5. [Test your changes](#testing--quality-assurance) (pre-commit and pre-push hooks run automatically)
+5. [Test your changes](TESTING.md) (pre-commit and pre-push hooks run automatically)
 6. [Submit a pull request](#submission-process)

 For detailed setup and reference information, see the sections below.
@ -169,33 +169,30 @@ For more information about generating InfluxDB API documentation, see the

 ---

-### Pre-commit Hooks
+## Testing & Quality Assurance

-docs-v2 uses Lefthook to manage Git hooks that run during pre-commit and pre-push. The hooks run the scripts defined in `package.json` to lint Markdown and test code blocks.
-When you try to commit changes (`git commit`), Git runs
+For comprehensive testing information, including code block testing, link validation, style linting, and advanced testing procedures, see **[TESTING.md](TESTING.md)**.

-#### Skip pre-commit hooks
+### Quick Testing Reference

+```bash
+# Test code blocks
+yarn test:codeblocks:all
+
+# Test links
+yarn test:links content/influxdb3/core/**/*.md
+
+# Run style linting
+docker compose run -T vale content/**/*.md
+```
+
+Pre-commit hooks run automatically when you commit changes, testing your staged files with Vale, Prettier, Cypress, and Pytest. To skip hooks if needed:

 ```sh
 git commit -m "<COMMIT_MESSAGE>" --no-verify
 ```
-# ... (see full CONTRIBUTING.md for complete example)
-```python
-print("Hello, world!")
-```

-# ... (see full CONTRIBUTING.md for complete example)
-```sh
-docker compose run -T vale --config=content/influxdb/cloud-dedicated/.vale.ini --minAlertLevel=error content/influxdb/cloud-dedicated/write-data/**/*.md
-```
-
-
-1. Install the [Vale VSCode](https://marketplace.visualstudio.com/items?itemName=ChrisChinchilla.vale-vscode) extension.
-2. In the extension settings, set the `Vale:Vale CLI:Path` value to `${workspaceFolder}/node_modules/.bin/vale`.
-
-
-_See full CONTRIBUTING.md for complete details._
+---

 ### Commit Guidelines

@ -229,10 +226,6 @@ _For the complete Complete Frontmatter Reference reference, see frontmatter-refe

 _For the complete Complete Shortcodes Reference reference, see shortcodes-reference.instructions.md._

-### Detailed Testing Setup
-
-_For the complete Detailed Testing Setup reference, see testing-setup.instructions.md._
-
 #### Vale style linting configuration

 docs-v2 includes Vale writing style linter configurations to enforce documentation writing style rules, guidelines, branding, and vocabulary terms.
--- a/.github/instructions/shortcodes-reference.instructions.md
+++ b/.github/instructions/shortcodes-reference.instructions.md
@ -1186,3 +1186,4 @@ Replace the following:
 - {{% code-placeholder-key %}}`API_TOKEN`{{% /code-placeholder-key %}}: your [InfluxDB API token](/influxdb/v2/admin/tokens/)
 ```

+
--- a/.github/instructions/testing-setup.instructions.md
+++ b/.github/instructions/testing-setup.instructions.md
@ -4,127 +4,12 @@ applyTo: "content/**/*.md, layouts/**/*.html"

 ### Detailed Testing Setup

-#### Set up test scripts and credentials
+For comprehensive testing information, including:
+- Code block testing setup and configuration
+- Link validation testing procedures
+- Style linting with Vale
+- Pre-commit hooks and GitHub Actions integration
+- Advanced testing procedures and troubleshooting

-Tests for code blocks require your InfluxDB credentials and other typical
-InfluxDB configuration.
-
-To set up your docs-v2 instance to run tests locally, do the following:
-
-1. **Set executable permissions on test scripts** in `./test/src`:
-
-   ```sh
-   chmod +x ./test/src/*.sh
-   ```
-
-2. **Create credentials for tests**:
-   
-   - Create databases, buckets, and tokens for the product(s) you're testing.
-   - If you don't have access to a Clustered instance, you can use your
-Cloud Dedicated instance for testing in most cases. To avoid conflicts when
-     running tests, create separate Cloud Dedicated and Clustered databases.
-
-1. **Create .env.test**: Copy the `./test/env.test.example` file into each
-    product directory to test and rename the file as `.env.test`--for example:
-   
-   ```sh
-   ./content/influxdb/cloud-dedicated/.env.test
-   ```
-   
-2. Inside each product's `.env.test` file, assign your InfluxDB credentials to
-   environment variables:
-
-   - Include the usual `INFLUX_` environment variables
-   - In
-   `cloud-dedicated/.env.test` and `clustered/.env.test` files, also define the
-   following variables:
-
-     - `ACCOUNT_ID`, `CLUSTER_ID`: You can find these values in your `influxctl`
-       `config.toml` configuration file.
-     - `MANAGEMENT_TOKEN`: Use the `influxctl management create` command to generate
-       a long-lived management token to authenticate Management API requests
-
-   See the substitution
-   patterns in `./test/src/prepare-content.sh` for the full list of variables you may need to define in your `.env.test` files.
-
-3. For influxctl commands to run in tests, move or copy your `config.toml` file
-   to the `./test` directory.
-
-> [!Warning]
-> 
-> - The database you configure in `.env.test` and any written data may
-be deleted during test runs.
-> - Don't add your `.env.test` files to Git. To prevent accidentally adding credentials to the docs-v2 repo,
-> Git is configured to ignore `.env*` files. Consider backing them up on your local machine in case of accidental deletion.
-
-#### Test shell and python code blocks
-
-[pytest-codeblocks](https://github.com/nschloe/pytest-codeblocks/tree/main) extracts code from python and shell Markdown code blocks and executes assertions for the code.
-If you don't assert a value (using a Python `assert` statement), `--codeblocks` considers a non-zero exit code to be a failure.
-
-**Note**: `pytest --codeblocks` uses Python's `subprocess.run()` to execute shell code.
-
-You can use this to test CLI and interpreter commands, regardless of programming
-language, as long as they return standard exit codes.
-
-To make the documented output of a code block testable, precede it with the
-`<!--pytest-codeblocks:expected-output-->` tag and **omit the code block language
-descriptor**--for example, in your Markdown file:
-
-##### Example markdown
-
-```python
-print("Hello, world!")
-```
-
-<!--pytest-codeblocks:expected-output-->
-
-The next code block is treated as an assertion.
-If successful, the output is the following:
-
-```
-Hello, world!
-```
-
-For commands, such as `influxctl` CLI commands, that require launching an
-OAuth URL in a browser, wrap the command in a subshell and redirect the output
-to `/shared/urls.txt` in the container--for example:
-
-```sh
-# Test the preceding command outside of the code block.
-# influxctl authentication requires TTY interaction--
-# output the auth URL to a file that the host can open.
-script -c "influxctl user list " \
- /dev/null > /shared/urls.txt
-```
-
-You probably don't want to display this syntax in the docs, which unfortunately
-means you'd need to include the test block separately from the displayed code
-block.
-To hide it from users, wrap the code block inside an HTML comment.
-pytest-codeblocks will still collect and run the code block.
-
-##### Mark tests to skip 
-
-pytest-codeblocks has features for skipping tests and marking blocks as failed.
-To learn more, see the pytest-codeblocks README and tests.
-
-#### Troubleshoot tests
-
-##### Pytest collected 0 items
-
-Potential reasons:
-
- See the test discovery options in `pytest.ini`.
- For Python code blocks, use the following delimiter:
-
-    ```python
-    # Codeblocks runs this block.
-    ```
-
-  `pytest --codeblocks` ignores code blocks that use the following:
-
-    ```py
-    # Codeblocks ignores this block.
-    ```
+Please refer to the main **[TESTING.md](../../TESTING.md)** file.

--- a/.github/scripts/cache-manager.js
+++ b/.github/scripts/cache-manager.js
@ -0,0 +1,169 @@
+#!/usr/bin/env node
+
+/**
+ * Simple Cache Manager for Link Validation Results
+ * Uses GitHub Actions cache API or local file storage
+ */
+
+import fs from 'fs';
+import path from 'path';
+import crypto from 'crypto';
+import process from 'process';
+
+const CACHE_VERSION = 'v1';
+const CACHE_KEY_PREFIX = 'link-validation';
+const LOCAL_CACHE_DIR = path.join(process.cwd(), '.cache', 'link-validation');
+
+/**
+ * Simple cache interface
+ */
+class CacheManager {
+  constructor(options = {}) {
+    this.useGitHubCache =
+      options.useGitHubCache !== false && process.env.GITHUB_ACTIONS;
+    this.localCacheDir = options.localCacheDir || LOCAL_CACHE_DIR;
+
+    // Configurable cache TTL - default 30 days, support environment variable
+    this.cacheTTLDays =
+      options.cacheTTLDays || parseInt(process.env.LINK_CACHE_TTL_DAYS) || 30;
+    this.maxAge = this.cacheTTLDays * 24 * 60 * 60 * 1000;
+
+    if (!this.useGitHubCache) {
+      this.ensureLocalCacheDir();
+    }
+  }
+
+  ensureLocalCacheDir() {
+    if (!fs.existsSync(this.localCacheDir)) {
+      fs.mkdirSync(this.localCacheDir, { recursive: true });
+    }
+  }
+
+  generateCacheKey(filePath, fileHash) {
+    const pathHash = crypto
+      .createHash('sha256')
+      .update(filePath)
+      .digest('hex')
+      .substring(0, 8);
+    return `${CACHE_KEY_PREFIX}-${CACHE_VERSION}-${pathHash}-${fileHash}`;
+  }
+
+  async get(filePath, fileHash) {
+    if (this.useGitHubCache) {
+      return await this.getFromGitHubCache(filePath, fileHash);
+    } else {
+      return await this.getFromLocalCache(filePath, fileHash);
+    }
+  }
+
+  async set(filePath, fileHash, results) {
+    if (this.useGitHubCache) {
+      return await this.setToGitHubCache(filePath, fileHash, results);
+    } else {
+      return await this.setToLocalCache(filePath, fileHash, results);
+    }
+  }
+
+  async getFromGitHubCache(filePath, fileHash) {
+    // For GitHub Actions, we'll use the actions/cache action directly
+    // in the workflow, so this is a placeholder
+    return null;
+  }
+
+  async setToGitHubCache(filePath, fileHash, results) {
+    // For GitHub Actions, we'll use the actions/cache action directly
+    // in the workflow, so this is a placeholder
+    return true;
+  }
+
+  async getFromLocalCache(filePath, fileHash) {
+    const cacheKey = this.generateCacheKey(filePath, fileHash);
+    const cacheFile = path.join(this.localCacheDir, `${cacheKey}.json`);
+
+    if (!fs.existsSync(cacheFile)) {
+      return null;
+    }
+
+    try {
+      const content = fs.readFileSync(cacheFile, 'utf8');
+      const cached = JSON.parse(content);
+
+      // TTL check using configured cache duration
+      const age = Date.now() - new Date(cached.cachedAt).getTime();
+
+      if (age > this.maxAge) {
+        fs.unlinkSync(cacheFile);
+        return null;
+      }
+
+      return cached.results;
+    } catch (error) {
+      // Clean up corrupted cache
+      try {
+        fs.unlinkSync(cacheFile);
+      } catch {
+        // Ignore cleanup errors
+      }
+      return null;
+    }
+  }
+
+  async setToLocalCache(filePath, fileHash, results) {
+    const cacheKey = this.generateCacheKey(filePath, fileHash);
+    const cacheFile = path.join(this.localCacheDir, `${cacheKey}.json`);
+
+    const cacheData = {
+      filePath,
+      fileHash,
+      results,
+      cachedAt: new Date().toISOString(),
+    };
+
+    try {
+      fs.writeFileSync(cacheFile, JSON.stringify(cacheData, null, 2));
+      return true;
+    } catch (error) {
+      console.warn(`Cache save failed: ${error.message}`);
+      return false;
+    }
+  }
+
+  async cleanup() {
+    if (this.useGitHubCache) {
+      return { removed: 0, note: 'GitHub Actions cache auto-managed' };
+    }
+
+    let removed = 0;
+    if (!fs.existsSync(this.localCacheDir)) {
+      return { removed };
+    }
+
+    const files = fs.readdirSync(this.localCacheDir);
+
+    for (const file of files) {
+      if (!file.endsWith('.json')) continue;
+
+      const filePath = path.join(this.localCacheDir, file);
+      try {
+        const stat = fs.statSync(filePath);
+        if (Date.now() - stat.mtime.getTime() > this.maxAge) {
+          fs.unlinkSync(filePath);
+          removed++;
+        }
+      } catch {
+        // Remove corrupted files
+        try {
+          fs.unlinkSync(filePath);
+          removed++;
+        } catch {
+          // Ignore errors
+        }
+      }
+    }
+
+    return { removed };
+  }
+}
+
+export default CacheManager;
+export { CacheManager };
--- a/.github/scripts/comment-generator.js
+++ b/.github/scripts/comment-generator.js
@ -0,0 +1,328 @@
+/**
+ * Comment Generator for Link Validation Results
+ * Standardizes PR comment generation across workflows
+ * Includes cache performance metrics and optimization info
+ */
+
+import fs from 'fs';
+import path from 'path';
+import process from 'process';
+
+/**
+ * Normalize broken link data from different report formats
+ * @param {Object|Array} reportData - Raw report data
+ * @returns {Array} - Normalized array of broken links
+ */
+function normalizeBrokenLinks(reportData) {
+  if (!reportData) return [];
+
+  let links = [];
+
+  if (Array.isArray(reportData)) {
+    reportData.forEach((item) => {
+      if (item.links && Array.isArray(item.links)) {
+        // Format: { sourceFile: "file.md", links: [...] }
+        item.links.forEach((link) => {
+          links.push({
+            sourceFile: item.sourceFile || item.page || 'Unknown',
+            url: link.url || link.href,
+            linkText: link.linkText || link.url || link.href,
+            status: link.status,
+            error: link.error,
+            type: link.type,
+          });
+        });
+      } else {
+        // Format: direct link object
+        links.push({
+          sourceFile: item.sourceFile || item.page || 'Unknown',
+          url: item.url || item.href,
+          linkText: item.linkText || item.url || item.href,
+          status: item.status,
+          error: item.error,
+          type: item.type,
+        });
+      }
+    });
+  }
+
+  return links;
+}
+
+/**
+ * Group broken links by source file
+ * @param {Array} brokenLinks - Array of normalized broken links
+ * @returns {Object} - Object with source files as keys
+ */
+function groupLinksBySource(brokenLinks) {
+  const bySource = {};
+
+  brokenLinks.forEach((link) => {
+    const source = link.sourceFile || 'Unknown';
+    if (!bySource[source]) {
+      bySource[source] = [];
+    }
+    bySource[source].push(link);
+  });
+
+  return bySource;
+}
+
+/**
+ * Generate markdown comment for PR
+ * @param {Array} allBrokenLinks - Array of all broken links
+ * @param {Object} options - Generation options
+ * @returns {string} - Markdown comment content
+ */
+/**
+ * Load cache statistics from reports directory
+ * @param {string} reportsDir - Directory containing reports
+ * @returns {Object|null} Cache statistics or null if not found
+ */
+function loadCacheStats(reportsDir) {
+  try {
+    const cacheStatsFile = path.join(reportsDir, 'cache_statistics.json');
+    if (fs.existsSync(cacheStatsFile)) {
+      const content = fs.readFileSync(cacheStatsFile, 'utf8');
+      return JSON.parse(content);
+    }
+  } catch (error) {
+    console.warn(`Warning: Could not load cache stats: ${error.message}`);
+  }
+  return null;
+}
+
+function generateComment(allBrokenLinks, options = {}) {
+  const {
+    includeSuccessMessage = true,
+    includeStats = true,
+    includeActionRequired = true,
+    maxLinksPerFile = 20,
+    cacheStats = null,
+    reportsDir = null,
+  } = options;
+
+  // Load cache stats if reports directory is provided
+  const actualCacheStats =
+    cacheStats || (reportsDir ? loadCacheStats(reportsDir) : null);
+
+  let comment = '';
+
+  // Add cache performance metrics at the top
+  if (actualCacheStats) {
+    comment += '## 📊 Link Validation Performance\n\n';
+    comment += `- **Cache Hit Rate:** ${actualCacheStats.hitRate}%\n`;
+    comment += `- **Files Cached:** ${actualCacheStats.cacheHits} (skipped validation)\n`;
+    comment += `- **Files Validated:** ${actualCacheStats.cacheMisses}\n`;
+
+    if (actualCacheStats.hitRate >= 50) {
+      comment +=
+        '- **Performance:** 🚀 Cache optimization saved significant validation time!\n';
+    } else if (actualCacheStats.hitRate > 0) {
+      comment +=
+        '- **Performance:** ⚡ Some files were cached, improving validation speed\n';
+    }
+    comment += '\n';
+  }
+
+  if (!allBrokenLinks || allBrokenLinks.length === 0) {
+    comment += '## ✅ Link Validation Passed\n\n';
+    comment += 'All links in the changed files are valid!';
+
+    if (actualCacheStats && actualCacheStats.hitRate === 100) {
+      comment += '\n\n✨ **All files were cached** - no validation was needed!';
+    }
+
+    return includeSuccessMessage ? comment : '';
+  }
+
+  comment += '## 🔗 Broken Links Found\n\n';
+
+  if (includeStats) {
+    comment += `Found ${allBrokenLinks.length} broken link(s) in the changed files:\n\n`;
+  }
+
+  // Group by source file
+  const bySource = groupLinksBySource(allBrokenLinks);
+
+  // Generate sections for each source file
+  for (const [source, links] of Object.entries(bySource)) {
+    comment += `### ${source}\n\n`;
+
+    const displayLinks = links.slice(0, maxLinksPerFile);
+    const hiddenCount = links.length - displayLinks.length;
+
+    displayLinks.forEach((link) => {
+      const url = link.url || 'Unknown URL';
+      const linkText = link.linkText || url;
+      const status = link.status || 'Unknown';
+
+      comment += `- [ ] **${linkText}** → \`${url}\`\n`;
+      comment += `  - Status: ${status}\n`;
+
+      if (link.type) {
+        comment += `  - Type: ${link.type}\n`;
+      }
+
+      if (link.error) {
+        comment += `  - Error: ${link.error}\n`;
+      }
+
+      comment += '\n';
+    });
+
+    if (hiddenCount > 0) {
+      comment += `<details>\n<summary>... and ${hiddenCount} more broken link(s)</summary>\n\n`;
+
+      links.slice(maxLinksPerFile).forEach((link) => {
+        const url = link.url || 'Unknown URL';
+        const linkText = link.linkText || url;
+        const status = link.status || 'Unknown';
+
+        comment += `- [ ] **${linkText}** → \`${url}\` (Status: ${status})\n`;
+      });
+
+      comment += '\n</details>\n\n';
+    }
+  }
+
+  if (includeActionRequired) {
+    comment += '\n---\n';
+    comment +=
+      '**Action Required:** Please fix the broken links before merging this PR.';
+  }
+
+  return comment;
+}
+
+/**
+ * Load and merge broken link reports from artifacts
+ * @param {string} reportsDir - Directory containing report artifacts
+ * @returns {Array} - Array of all broken links
+ */
+function loadBrokenLinkReports(reportsDir) {
+  const allBrokenLinks = [];
+
+  if (!fs.existsSync(reportsDir)) {
+    return allBrokenLinks;
+  }
+
+  try {
+    const reportDirs = fs.readdirSync(reportsDir);
+
+    for (const dir of reportDirs) {
+      if (dir.startsWith('broken-links-')) {
+        const reportPath = path.join(
+          reportsDir,
+          dir,
+          'broken_links_report.json'
+        );
+
+        if (fs.existsSync(reportPath)) {
+          try {
+            const reportContent = fs.readFileSync(reportPath, 'utf8');
+            const reportData = JSON.parse(reportContent);
+            const normalizedLinks = normalizeBrokenLinks(reportData);
+            allBrokenLinks.push(...normalizedLinks);
+          } catch (e) {
+            console.error(`Error reading ${reportPath}: ${e.message}`);
+          }
+        }
+      }
+    }
+  } catch (e) {
+    console.error(
+      `Error reading reports directory ${reportsDir}: ${e.message}`
+    );
+  }
+
+  return allBrokenLinks;
+}
+
+/**
+ * CLI interface for the comment generator
+ */
+function main() {
+  const args = process.argv.slice(2);
+
+  if (args.includes('--help') || args.includes('-h')) {
+    console.log(`
+Usage: node comment-generator.js [options] <reports-dir>
+
+Options:
+  --no-success         Don't include success message when no broken links
+  --no-stats           Don't include broken link statistics
+  --no-action-required Don't include action required message
+  --max-links <n>      Maximum links to show per file (default: 20)
+  --output-file <file> Write comment to file instead of stdout
+  --help, -h           Show this help message
+
+Examples:
+  node comment-generator.js reports/
+  node comment-generator.js --max-links 10 --output-file comment.md reports/
+`);
+    process.exit(0);
+  }
+
+  // Parse arguments
+  let reportsDir = '';
+  const options = {
+    includeSuccessMessage: true,
+    includeStats: true,
+    includeActionRequired: true,
+    maxLinksPerFile: 20,
+  };
+  let outputFile = null;
+
+  for (let i = 0; i < args.length; i++) {
+    const arg = args[i];
+
+    if (arg === '--no-success') {
+      options.includeSuccessMessage = false;
+    } else if (arg === '--no-stats') {
+      options.includeStats = false;
+    } else if (arg === '--no-action-required') {
+      options.includeActionRequired = false;
+    } else if (arg === '--max-links' && i + 1 < args.length) {
+      options.maxLinksPerFile = parseInt(args[++i]);
+    } else if (arg === '--output-file' && i + 1 < args.length) {
+      outputFile = args[++i];
+    } else if (!arg.startsWith('--')) {
+      reportsDir = arg;
+    }
+  }
+
+  if (!reportsDir) {
+    console.error('Error: reports directory is required');
+    process.exit(1);
+  }
+
+  // Load reports and generate comment with cache stats
+  const brokenLinks = loadBrokenLinkReports(reportsDir);
+  options.reportsDir = reportsDir;
+  const comment = generateComment(brokenLinks, options);
+
+  if (outputFile) {
+    fs.writeFileSync(outputFile, comment);
+    console.log(`Comment written to ${outputFile}`);
+  } else {
+    console.log(comment);
+  }
+
+  // Exit with error code if there are broken links
+  if (brokenLinks.length > 0) {
+    process.exit(1);
+  }
+}
+
+// Run CLI if this file is executed directly
+if (import.meta.url === `file://${process.argv[1]}`) {
+  main();
+}
+
+export {
+  generateComment,
+  loadBrokenLinkReports,
+  normalizeBrokenLinks,
+  groupLinksBySource,
+};
--- a/.github/scripts/incremental-validator.js
+++ b/.github/scripts/incremental-validator.js
@ -0,0 +1,228 @@
+#!/usr/bin/env node
+
+/**
+ * Incremental Link Validator
+ * Combines link extraction and caching to validate only changed links
+ */
+
+import { extractLinksFromFile } from './link-extractor.js';
+import { CacheManager } from './cache-manager.js';
+import process from 'process';
+
+/**
+ * Incremental validator that only validates changed content
+ */
+class IncrementalValidator {
+  constructor(options = {}) {
+    this.cacheManager = new CacheManager(options);
+    this.validateExternal = options.validateExternal !== false;
+    this.validateInternal = options.validateInternal !== false;
+  }
+
+  /**
+   * Get validation strategy for a list of files
+   * @param {Array} filePaths - Array of file paths
+   * @returns {Object} Validation strategy with files categorized
+   */
+  async getValidationStrategy(filePaths) {
+    const strategy = {
+      unchanged: [], // Files that haven't changed (skip validation)
+      changed: [], // Files that changed (need full validation)
+      newLinks: [], // New links across all files (need validation)
+      total: filePaths.length,
+    };
+
+    const allNewLinks = new Set();
+
+    for (const filePath of filePaths) {
+      try {
+        const extractionResult = extractLinksFromFile(filePath);
+        if (!extractionResult) {
+          console.warn(`Could not extract links from ${filePath}`);
+          continue;
+        }
+
+        const { fileHash, links } = extractionResult;
+
+        // Check if we have cached results for this file version
+        const cachedResults = await this.cacheManager.get(filePath, fileHash);
+
+        if (cachedResults) {
+          // File unchanged, skip validation
+          strategy.unchanged.push({
+            filePath,
+            fileHash,
+            linkCount: links.length,
+            cachedResults,
+          });
+        } else {
+          // File changed or new, needs validation
+          strategy.changed.push({
+            filePath,
+            fileHash,
+            links: links.filter((link) => link.needsValidation),
+            extractionResult,
+          });
+
+          // Collect all new links for batch validation
+          links
+            .filter((link) => link.needsValidation)
+            .forEach((link) => allNewLinks.add(link.url));
+        }
+      } catch (error) {
+        console.error(`Error processing ${filePath}: ${error.message}`);
+        // Treat as changed file to ensure validation
+        strategy.changed.push({
+          filePath,
+          error: error.message,
+        });
+      }
+    }
+
+    strategy.newLinks = Array.from(allNewLinks);
+
+    return strategy;
+  }
+
+  /**
+   * Validate files using incremental strategy
+   * @param {Array} filePaths - Files to validate
+   * @returns {Object} Validation results
+   */
+  async validateFiles(filePaths) {
+    console.log(
+      `📊 Analyzing ${filePaths.length} files for incremental validation...`
+    );
+
+    const strategy = await this.getValidationStrategy(filePaths);
+
+    console.log(`✅ ${strategy.unchanged.length} files unchanged (cached)`);
+    console.log(`🔄 ${strategy.changed.length} files need validation`);
+    console.log(`🔗 ${strategy.newLinks.length} unique links to validate`);
+
+    const results = {
+      validationStrategy: strategy,
+      filesToValidate: strategy.changed.map((item) => ({
+        filePath: item.filePath,
+        linkCount: item.links ? item.links.length : 0,
+      })),
+      cacheStats: {
+        cacheHits: strategy.unchanged.length,
+        cacheMisses: strategy.changed.length,
+        hitRate:
+          strategy.total > 0
+            ? Math.round((strategy.unchanged.length / strategy.total) * 100)
+            : 0,
+      },
+    };
+
+    return results;
+  }
+
+  /**
+   * Store validation results in cache
+   * @param {string} filePath - File path
+   * @param {string} fileHash - File hash
+   * @param {Object} validationResults - Results to cache
+   * @returns {Promise<boolean>} Success status
+   */
+  async cacheResults(filePath, fileHash, validationResults) {
+    return await this.cacheManager.set(filePath, fileHash, validationResults);
+  }
+
+  /**
+   * Clean up expired cache entries
+   * @returns {Promise<Object>} Cleanup statistics
+   */
+  async cleanupCache() {
+    return await this.cacheManager.cleanup();
+  }
+}
+
+/**
+ * CLI usage
+ */
+async function main() {
+  const args = process.argv.slice(2);
+
+  if (args.length === 0 || args[0] === '--help') {
+    console.log(`
+Incremental Link Validator
+
+Usage:
+  node incremental-validator.js [files...]     Analyze files for validation
+  node incremental-validator.js --cleanup      Clean up expired cache
+  node incremental-validator.js --help         Show this help
+
+Options:
+  --no-external      Don't validate external links
+  --no-internal      Don't validate internal links
+  --local            Use local cache instead of GitHub Actions cache
+  --cache-ttl=DAYS   Set cache TTL in days (default: 30)
+
+Examples:
+  node incremental-validator.js content/**/*.md
+  node incremental-validator.js --cache-ttl=7 content/**/*.md
+  node incremental-validator.js --cleanup
+`);
+    process.exit(0);
+  }
+
+  if (args[0] === '--cleanup') {
+    const validator = new IncrementalValidator();
+    const stats = await validator.cleanupCache();
+    console.log(`🧹 Cleaned up ${stats.removed} expired cache entries`);
+    if (stats.note) console.log(`ℹ️  ${stats.note}`);
+    return;
+  }
+
+  const options = {
+    validateExternal: !args.includes('--no-external'),
+    validateInternal: !args.includes('--no-internal'),
+    useGitHubCache: !args.includes('--local'),
+  };
+
+  // Extract cache TTL option if provided
+  const cacheTTLArg = args.find((arg) => arg.startsWith('--cache-ttl='));
+  if (cacheTTLArg) {
+    options.cacheTTLDays = parseInt(cacheTTLArg.split('=')[1]);
+  }
+
+  const filePaths = args.filter((arg) => !arg.startsWith('--'));
+
+  if (filePaths.length === 0) {
+    console.error('No files specified for validation');
+    process.exit(1);
+  }
+
+  const validator = new IncrementalValidator(options);
+  const results = await validator.validateFiles(filePaths);
+
+  console.log('\n📈 Validation Analysis Results:');
+  console.log('================================');
+  console.log(`Cache hit rate: ${results.cacheStats.hitRate}%`);
+  console.log(`Files to validate: ${results.filesToValidate.length}`);
+
+  if (results.filesToValidate.length > 0) {
+    console.log('\nFiles needing validation:');
+    results.filesToValidate.forEach((file) => {
+      console.log(`  ${file.filePath} (${file.linkCount} links)`);
+    });
+
+    // Output files for Cypress to process
+    console.log('\n# Files for Cypress validation (one per line):');
+    results.filesToValidate.forEach((file) => {
+      console.log(file.filePath);
+    });
+  } else {
+    console.log('\n✨ All files are cached - no validation needed!');
+  }
+}
+
+export default IncrementalValidator;
+export { IncrementalValidator };
+
+// Run CLI if called directly
+if (import.meta.url === `file://${process.argv[1]}`) {
+  main().catch(console.error);
+}
--- a/.github/scripts/link-extractor.js
+++ b/.github/scripts/link-extractor.js
@ -0,0 +1,473 @@
+#!/usr/bin/env node
+
+/**
+ * Link Extractor for Documentation Files
+ * Extracts all links from markdown and HTML files with metadata for caching and incremental validation
+ */
+
+import fs from 'fs';
+import crypto from 'crypto';
+import matter from 'gray-matter';
+import path from 'path';
+import process from 'process';
+
+/**
+ * Extract links from markdown content
+ * @param {string} content - File content
+ * @param {string} filePath - Path to the file
+ * @returns {Array} Array of link objects with metadata
+ */
+function extractMarkdownLinks(content, filePath) {
+  const links = [];
+  const lines = content.split('\n');
+
+  // Track reference-style link definitions
+  const referenceLinks = new Map();
+
+  // First pass: collect reference definitions
+  content.replace(/^\s*\[([^\]]+)\]:\s*(.+)$/gm, (match, ref, url) => {
+    referenceLinks.set(ref.toLowerCase(), url.trim());
+    return match;
+  });
+
+  // Process each line for links
+  lines.forEach((line, lineIndex) => {
+    const lineNumber = lineIndex + 1;
+
+    // Standard markdown links
+    let match;
+    const standardLinkRegex = /\[([^\]]*)\]\(([^)]+)\)/g;
+    while ((match = standardLinkRegex.exec(line)) !== null) {
+      const linkText = match[1];
+      const url = match[2];
+      const columnStart = match.index;
+
+      links.push({
+        url: url.trim(),
+        text: linkText,
+        type: 'markdown',
+        line: lineNumber,
+        column: columnStart,
+        context: line.trim(),
+        hash: generateLinkHash(url.trim(), filePath, lineNumber),
+      });
+    }
+
+    // Reference-style links
+    const refLinkRegex = /\[([^\]]*)\]\[([^\]]*)\]/g;
+    while ((match = refLinkRegex.exec(line)) !== null) {
+      const linkText = match[1];
+      const refKey = (match[2] || linkText).toLowerCase();
+      const url = referenceLinks.get(refKey);
+
+      if (url) {
+        const columnStart = match.index;
+        links.push({
+          url: url,
+          text: linkText,
+          type: 'markdown-reference',
+          line: lineNumber,
+          column: columnStart,
+          context: line.trim(),
+          reference: refKey,
+          hash: generateLinkHash(url, filePath, lineNumber),
+        });
+      }
+    }
+
+    // Autolinks
+    const autolinkRegex = /<(https?:\/\/[^>]+)>/g;
+    while ((match = autolinkRegex.exec(line)) !== null) {
+      const url = match[1];
+      const columnStart = match.index;
+
+      links.push({
+        url: url,
+        text: url,
+        type: 'autolink',
+        line: lineNumber,
+        column: columnStart,
+        context: line.trim(),
+        hash: generateLinkHash(url, filePath, lineNumber),
+      });
+    }
+
+    // Bare URLs (basic detection, avoid false positives)
+    const bareUrlRegex = /(?:^|[\s\n])(https?:\/\/[^\s\)]+)/g;
+    while ((match = bareUrlRegex.exec(line)) !== null) {
+      const url = match[1];
+      const columnStart = match.index + match[0].length - url.length;
+
+      // Skip if this URL is already captured in a proper markdown link
+      const alreadyCaptured = links.some(
+        (link) =>
+          link.line === lineNumber &&
+          Math.abs(link.column - columnStart) < 10 &&
+          link.url === url
+      );
+
+      if (!alreadyCaptured) {
+        links.push({
+          url: url,
+          text: url,
+          type: 'bare-url',
+          line: lineNumber,
+          column: columnStart,
+          context: line.trim(),
+          hash: generateLinkHash(url, filePath, lineNumber),
+        });
+      }
+    }
+  });
+
+  return links;
+}
+
+/**
+ * Extract links from HTML content
+ * @param {string} content - File content
+ * @param {string} filePath - Path to the file
+ * @returns {Array} Array of link objects with metadata
+ */
+function extractHtmlLinks(content, filePath) {
+  const links = [];
+  const lines = content.split('\n');
+
+  lines.forEach((line, lineIndex) => {
+    const lineNumber = lineIndex + 1;
+    let match;
+
+    const htmlLinkRegex = /<a\s+[^>]*href\s*=\s*["']([^"']+)["'][^>]*>/gi;
+    while ((match = htmlLinkRegex.exec(line)) !== null) {
+      const url = match[1];
+      const columnStart = match.index;
+
+      // Extract link text if possible
+      const fullMatch = match[0];
+      const textMatch = fullMatch.match(/>([^<]*)</);
+      const linkText = textMatch ? textMatch[1].trim() : url;
+
+      links.push({
+        url: url,
+        text: linkText,
+        type: 'html',
+        line: lineNumber,
+        column: columnStart,
+        context: line.trim(),
+        hash: generateLinkHash(url, filePath, lineNumber),
+      });
+    }
+  });
+
+  return links;
+}
+
+/**
+ * Generate a unique hash for a link
+ * @param {string} url - The URL
+ * @param {string} filePath - File path
+ * @param {number} line - Line number
+ * @returns {string} Hash string
+ */
+function generateLinkHash(url, filePath, line) {
+  const data = `${filePath}:${line}:${url.trim()}`;
+  return crypto
+    .createHash('sha256')
+    .update(data)
+    .digest('hex')
+    .substring(0, 16);
+}
+
+/**
+ * Generate a hash for file content
+ * @param {string} content - File content
+ * @returns {string} Hash string
+ */
+function generateFileHash(content) {
+  return crypto
+    .createHash('sha256')
+    .update(content)
+    .digest('hex')
+    .substring(0, 16);
+}
+
+/**
+ * Categorize link types for validation
+ * @param {string} url - The URL to categorize
+ * @returns {Object} Link category information
+ */
+function categorizeLinkType(url) {
+  const trimmedUrl = url.trim();
+
+  // External links
+  if (trimmedUrl.startsWith('http://') || trimmedUrl.startsWith('https://')) {
+    return {
+      category: 'external',
+      protocol: trimmedUrl.startsWith('https://') ? 'https' : 'http',
+      needsValidation: true,
+    };
+  }
+
+  // Internal absolute links
+  if (trimmedUrl.startsWith('/')) {
+    return {
+      category: 'internal-absolute',
+      needsValidation: true,
+    };
+  }
+
+  // Relative links
+  if (
+    trimmedUrl.startsWith('./') ||
+    trimmedUrl.startsWith('../') ||
+    (!trimmedUrl.startsWith('#') && !trimmedUrl.includes('://'))
+  ) {
+    return {
+      category: 'internal-relative',
+      needsValidation: true,
+    };
+  }
+
+  // Fragment/anchor links
+  if (trimmedUrl.startsWith('#')) {
+    return {
+      category: 'fragment',
+      needsValidation: true, // May need validation for internal page anchors
+    };
+  }
+
+  // Special protocols (mailto, tel, etc.)
+  if (trimmedUrl.includes('://') && !trimmedUrl.startsWith('http')) {
+    return {
+      category: 'special-protocol',
+      needsValidation: false,
+    };
+  }
+
+  return {
+    category: 'unknown',
+    needsValidation: true,
+  };
+}
+
+/**
+ * Extract all links from a file
+ * @param {string} filePath - Path to the file
+ * @returns {Object} File analysis with links and metadata
+ */
+function extractLinksFromFile(filePath) {
+  try {
+    if (!fs.existsSync(filePath)) {
+      throw new Error(`File not found: ${filePath}`);
+    }
+
+    const content = fs.readFileSync(filePath, 'utf8');
+    const fileHash = generateFileHash(content);
+    const extension = path.extname(filePath).toLowerCase();
+
+    let links = [];
+    let frontmatter = {};
+    let bodyContent = content;
+
+    // Parse frontmatter for .md files
+    if (extension === '.md') {
+      try {
+        const parsed = matter(content);
+        frontmatter = parsed.data || {};
+        bodyContent = parsed.content;
+      } catch (err) {
+        console.warn(
+          `Warning: Could not parse frontmatter in ${filePath}: ${err.message}`
+        );
+      }
+
+      // Extract links from markdown content
+      links = extractMarkdownLinks(bodyContent, filePath);
+    } else if (extension === '.html') {
+      // Extract links from HTML content
+      links = extractHtmlLinks(content, filePath);
+    } else {
+      console.warn(`Warning: Unsupported file type for ${filePath}`);
+      return null;
+    }
+
+    // Categorize and enhance links
+    const enhancedLinks = links.map((link) => ({
+      ...link,
+      ...categorizeLinkType(link.url),
+      filePath,
+    }));
+
+    // Calculate statistics
+    const stats = {
+      totalLinks: enhancedLinks.length,
+      externalLinks: enhancedLinks.filter((l) => l.category === 'external')
+        .length,
+      internalLinks: enhancedLinks.filter((l) =>
+        l.category.startsWith('internal')
+      ).length,
+      fragmentLinks: enhancedLinks.filter((l) => l.category === 'fragment')
+        .length,
+      linksNeedingValidation: enhancedLinks.filter((l) => l.needsValidation)
+        .length,
+    };
+
+    return {
+      filePath,
+      fileHash,
+      extension,
+      frontmatter,
+      links: enhancedLinks,
+      stats,
+      extractedAt: new Date().toISOString(),
+    };
+  } catch (error) {
+    console.error(`Error extracting links from ${filePath}: ${error.message}`);
+    return null;
+  }
+}
+
+/**
+ * Main function for CLI usage
+ */
+function main() {
+  const args = process.argv.slice(2);
+
+  if (args.length === 0) {
+    console.error('Usage: node link-extractor.js <file1> [file2] [...]');
+    console.error('       node link-extractor.js --help');
+    process.exit(1);
+  }
+
+  if (args[0] === '--help') {
+    console.log(`
+Link Extractor for Documentation Files
+
+Usage:
+  node link-extractor.js <file1> [file2] [...]  Extract links from files
+  node link-extractor.js --help                 Show this help
+
+Options:
+  --json          Output results as JSON
+  --stats-only    Show only statistics
+  --filter TYPE   Filter links by category (external, internal-absolute, internal-relative, fragment)
+
+Examples:
+  node link-extractor.js content/influxdb3/core/install.md
+  node link-extractor.js --json content/**/*.md
+  node link-extractor.js --stats-only --filter external content/influxdb3/**/*.md
+`);
+    process.exit(0);
+  }
+
+  const jsonOutput = args.includes('--json');
+  const statsOnly = args.includes('--stats-only');
+  const filterType = args.includes('--filter')
+    ? args[args.indexOf('--filter') + 1]
+    : null;
+
+  const files = args.filter(
+    (arg) => !arg.startsWith('--') && arg !== filterType
+  );
+  const results = [];
+
+  for (const filePath of files) {
+    const result = extractLinksFromFile(filePath);
+    if (result) {
+      // Apply filter if specified
+      if (filterType) {
+        result.links = result.links.filter(
+          (link) => link.category === filterType
+        );
+        // Recalculate stats after filtering
+        result.stats = {
+          totalLinks: result.links.length,
+          externalLinks: result.links.filter((l) => l.category === 'external')
+            .length,
+          internalLinks: result.links.filter((l) =>
+            l.category.startsWith('internal')
+          ).length,
+          fragmentLinks: result.links.filter((l) => l.category === 'fragment')
+            .length,
+          linksNeedingValidation: result.links.filter((l) => l.needsValidation)
+            .length,
+        };
+      }
+
+      results.push(result);
+    }
+  }
+
+  if (jsonOutput) {
+    console.log(JSON.stringify(results, null, 2));
+  } else if (statsOnly) {
+    console.log('\nLink Extraction Statistics:');
+    console.log('==========================');
+
+    let totalFiles = 0;
+    let totalLinks = 0;
+    let totalExternal = 0;
+    let totalInternal = 0;
+    let totalFragment = 0;
+    let totalNeedingValidation = 0;
+
+    results.forEach((result) => {
+      totalFiles++;
+      totalLinks += result.stats.totalLinks;
+      totalExternal += result.stats.externalLinks;
+      totalInternal += result.stats.internalLinks;
+      totalFragment += result.stats.fragmentLinks;
+      totalNeedingValidation += result.stats.linksNeedingValidation;
+
+      console.log(
+        `${result.filePath}: ${result.stats.totalLinks} links (${result.stats.linksNeedingValidation} need validation)`
+      );
+    });
+
+    console.log('\nSummary:');
+    console.log(`  Total files: ${totalFiles}`);
+    console.log(`  Total links: ${totalLinks}`);
+    console.log(`  External links: ${totalExternal}`);
+    console.log(`  Internal links: ${totalInternal}`);
+    console.log(`  Fragment links: ${totalFragment}`);
+    console.log(`  Links needing validation: ${totalNeedingValidation}`);
+  } else {
+    results.forEach((result) => {
+      console.log(`\nFile: ${result.filePath}`);
+      console.log(`Hash: ${result.fileHash}`);
+      console.log(`Links found: ${result.stats.totalLinks}`);
+      console.log(
+        `Links needing validation: ${result.stats.linksNeedingValidation}`
+      );
+
+      if (result.links.length > 0) {
+        console.log('\nLinks:');
+        result.links.forEach((link, index) => {
+          console.log(`  ${index + 1}. [${link.category}] ${link.url}`);
+          console.log(`     Line ${link.line}, Column ${link.column}`);
+          console.log(`     Text: "${link.text}"`);
+          console.log(`     Hash: ${link.hash}`);
+          if (link.reference) {
+            console.log(`     Reference: ${link.reference}`);
+          }
+          console.log('');
+        });
+      }
+    });
+  }
+}
+
+// Export functions for use as a module
+export {
+  extractLinksFromFile,
+  extractMarkdownLinks,
+  extractHtmlLinks,
+  generateFileHash,
+  generateLinkHash,
+  categorizeLinkType,
+};
+
+// Run main function if called directly
+if (import.meta.url === `file://${process.argv[1]}`) {
+  main();
+}
--- a/.github/scripts/matrix-generator.js
+++ b/.github/scripts/matrix-generator.js
@ -0,0 +1,384 @@
+/**
+ * Matrix Generator for Link Validation Workflows
+ * Replaces complex bash scripting with maintainable JavaScript
+ * Includes cache-aware optimization to skip validation of unchanged files
+ */
+
+import { spawn } from 'child_process';
+import process from 'process';
+
+// Product configuration mapping file paths to products
+const PRODUCT_MAPPING = {
+  'content/influxdb3/core': {
+    key: 'influxdb3-core',
+    name: 'InfluxDB 3 Core',
+  },
+  'content/influxdb3/enterprise': {
+    key: 'influxdb3-enterprise',
+    name: 'InfluxDB 3 Enterprise',
+  },
+  'content/influxdb3/cloud-dedicated': {
+    key: 'influxdb3-cloud-dedicated',
+    name: 'InfluxDB 3 Cloud Dedicated',
+  },
+  'content/influxdb3/cloud-serverless': {
+    key: 'influxdb3-cloud-serverless',
+    name: 'InfluxDB 3 Cloud Serverless',
+  },
+  'content/influxdb3/clustered': {
+    key: 'influxdb3-clustered',
+    name: 'InfluxDB 3 Clustered',
+  },
+  'content/influxdb3/explorer': {
+    key: 'influxdb3-explorer',
+    name: 'InfluxDB 3 Explorer',
+  },
+  'content/influxdb/v2': {
+    key: 'influxdb-v2',
+    name: 'InfluxDB v2',
+  },
+  'content/influxdb/cloud': {
+    key: 'influxdb-cloud',
+    name: 'InfluxDB Cloud',
+  },
+  'content/influxdb/v1': {
+    key: 'influxdb-v1',
+    name: 'InfluxDB v1',
+  },
+  'content/influxdb/enterprise_influxdb': {
+    key: 'influxdb-enterprise-v1',
+    name: 'InfluxDB Enterprise v1',
+  },
+  'content/telegraf': {
+    key: 'telegraf',
+    name: 'Telegraf',
+  },
+  'content/kapacitor': {
+    key: 'kapacitor',
+    name: 'Kapacitor',
+  },
+  'content/chronograf': {
+    key: 'chronograf',
+    name: 'Chronograf',
+  },
+  'content/flux': {
+    key: 'flux',
+    name: 'Flux',
+  },
+  'content/shared': {
+    key: 'shared',
+    name: 'Shared Content',
+  },
+  'api-docs': {
+    key: 'api-docs',
+    name: 'API Documentation',
+  },
+};
+
+/**
+ * Group files by product based on their path
+ * @param {string[]} files - Array of file paths
+ * @returns {Object} - Object with product keys and arrays of files
+ */
+function groupFilesByProduct(files) {
+  const productFiles = {};
+
+  // Initialize all products
+  Object.values(PRODUCT_MAPPING).forEach((product) => {
+    productFiles[product.key] = [];
+  });
+
+  files.forEach((file) => {
+    let matched = false;
+
+    // Check each product mapping
+    for (const [pathPrefix, product] of Object.entries(PRODUCT_MAPPING)) {
+      if (file.startsWith(pathPrefix + '/')) {
+        productFiles[product.key].push(file);
+        matched = true;
+        break;
+      }
+    }
+
+    // Handle edge case for api-docs (no trailing slash)
+    if (!matched && file.startsWith('api-docs/')) {
+      productFiles['api-docs'].push(file);
+    }
+  });
+
+  return productFiles;
+}
+
+/**
+ * Run incremental validation analysis
+ * @param {string[]} files - Array of file paths to analyze
+ * @returns {Promise<Object>} - Incremental validation results
+ */
+async function runIncrementalAnalysis(files) {
+  return new Promise((resolve) => {
+    const child = spawn(
+      'node',
+      ['.github/scripts/incremental-validator.js', ...files],
+      {
+        stdio: ['pipe', 'pipe', 'pipe'],
+        env: process.env,
+      }
+    );
+
+    let stdout = '';
+    let stderr = '';
+
+    child.stdout.on('data', (data) => {
+      stdout += data.toString();
+    });
+
+    child.stderr.on('data', (data) => {
+      stderr += data.toString();
+    });
+
+    child.on('close', (code) => {
+      if (code === 0) {
+        try {
+          // Parse the JSON output from the validation script
+          const lines = stdout.trim().split('\n');
+          const jsonLine = lines.find((line) => line.startsWith('{'));
+
+          if (jsonLine) {
+            const results = JSON.parse(jsonLine);
+            resolve(results);
+          } else {
+            resolve({ filesToValidate: files.map((f) => ({ filePath: f })) });
+          }
+        } catch (error) {
+          console.warn(
+            `Warning: Could not parse incremental validation results: ${error.message}`
+          );
+          resolve({ filesToValidate: files.map((f) => ({ filePath: f })) });
+        }
+      } else {
+        console.warn(
+          `Incremental validation failed with code ${code}: ${stderr}`
+        );
+        resolve({ filesToValidate: files.map((f) => ({ filePath: f })) });
+      }
+    });
+
+    child.on('error', (error) => {
+      console.warn(`Incremental validation error: ${error.message}`);
+      resolve({ filesToValidate: files.map((f) => ({ filePath: f })) });
+    });
+  });
+}
+
+/**
+ * Generate matrix configuration for GitHub Actions with cache awareness
+ * @param {string[]} changedFiles - Array of changed file paths
+ * @param {Object} options - Configuration options
+ * @returns {Promise<Object>} - Matrix configuration object
+ */
+async function generateMatrix(changedFiles, options = {}) {
+  const {
+    maxConcurrentJobs = 5,
+    forceSequential = false,
+    minFilesForParallel = 10,
+    useCache = true,
+  } = options;
+
+  if (!changedFiles || changedFiles.length === 0) {
+    return {
+      strategy: 'none',
+      hasChanges: false,
+      matrix: { include: [] },
+      cacheStats: { hitRate: 100, cacheHits: 0, cacheMisses: 0 },
+    };
+  }
+
+  let filesToValidate = changedFiles;
+  let cacheStats = {
+    hitRate: 0,
+    cacheHits: 0,
+    cacheMisses: changedFiles.length,
+  };
+
+  // Run incremental analysis if cache is enabled
+  if (useCache) {
+    try {
+      console.log(
+        `🔍 Running cache analysis for ${changedFiles.length} files...`
+      );
+      const analysisResults = await runIncrementalAnalysis(changedFiles);
+
+      if (analysisResults.filesToValidate) {
+        filesToValidate = analysisResults.filesToValidate.map(
+          (f) => f.filePath
+        );
+        cacheStats = analysisResults.cacheStats || cacheStats;
+
+        console.log(
+          `📊 Cache analysis complete: ${cacheStats.hitRate}% hit rate`
+        );
+        console.log(
+          `✅ ${cacheStats.cacheHits} files cached, ${cacheStats.cacheMisses} need validation`
+        );
+      }
+    } catch (error) {
+      console.warn(
+        `Cache analysis failed: ${error.message}, proceeding without cache optimization`
+      );
+    }
+  }
+
+  // If no files need validation after cache analysis
+  if (filesToValidate.length === 0) {
+    return {
+      strategy: 'cache-hit',
+      hasChanges: false,
+      matrix: { include: [] },
+      cacheStats,
+      message: '✨ All files are cached - no validation needed!',
+    };
+  }
+
+  const productFiles = groupFilesByProduct(filesToValidate);
+  const productsWithFiles = Object.entries(productFiles).filter(
+    ([key, files]) => files.length > 0
+  );
+
+  // Determine strategy based on file count and configuration
+  const totalFiles = filesToValidate.length;
+  const shouldUseParallel =
+    !forceSequential &&
+    totalFiles >= minFilesForParallel &&
+    productsWithFiles.length > 1;
+
+  if (shouldUseParallel) {
+    // Parallel strategy: create matrix with products
+    const matrixIncludes = productsWithFiles.map(([productKey, files]) => {
+      const product = Object.values(PRODUCT_MAPPING).find(
+        (p) => p.key === productKey
+      );
+      return {
+        product: productKey,
+        name: product?.name || productKey,
+        files: files.join(' '),
+        cacheEnabled: useCache,
+      };
+    });
+
+    return {
+      strategy: 'parallel',
+      hasChanges: true,
+      matrix: { include: matrixIncludes.slice(0, maxConcurrentJobs) },
+      cacheStats,
+      originalFileCount: changedFiles.length,
+      validationFileCount: filesToValidate.length,
+    };
+  } else {
+    // Sequential strategy: single job with all files
+    return {
+      strategy: 'sequential',
+      hasChanges: true,
+      matrix: {
+        include: [
+          {
+            product: 'all',
+            name: 'All Files',
+            files: filesToValidate.join(' '),
+            cacheEnabled: useCache,
+          },
+        ],
+      },
+      cacheStats,
+      originalFileCount: changedFiles.length,
+      validationFileCount: filesToValidate.length,
+    };
+  }
+}
+
+/**
+ * CLI interface for the matrix generator
+ */
+async function main() {
+  const args = process.argv.slice(2);
+
+  if (args.includes('--help') || args.includes('-h')) {
+    console.log(`
+Usage: node matrix-generator.js [options] <file1> <file2> ...
+
+Options:
+  --max-concurrent <n>     Maximum concurrent jobs (default: 5)
+  --force-sequential       Force sequential execution
+  --min-files-parallel <n> Minimum files needed for parallel (default: 10)
+  --output-format <format> Output format: json, github (default: github)
+  --no-cache               Disable cache-aware optimization
+  --help, -h               Show this help message
+
+Examples:
+  node matrix-generator.js content/influxdb3/core/file1.md content/influxdb/v2/file2.md
+  node matrix-generator.js --force-sequential content/shared/file.md
+  node matrix-generator.js --no-cache --output-format json *.md
+`);
+    process.exit(0);
+  }
+
+  // Parse options
+  const options = {};
+  const files = [];
+
+  for (let i = 0; i < args.length; i++) {
+    const arg = args[i];
+
+    if (arg === '--max-concurrent' && i + 1 < args.length) {
+      options.maxConcurrentJobs = parseInt(args[++i]);
+    } else if (arg === '--force-sequential') {
+      options.forceSequential = true;
+    } else if (arg === '--min-files-parallel' && i + 1 < args.length) {
+      options.minFilesForParallel = parseInt(args[++i]);
+    } else if (arg === '--output-format' && i + 1 < args.length) {
+      options.outputFormat = args[++i];
+    } else if (arg === '--no-cache') {
+      options.useCache = false;
+    } else if (!arg.startsWith('--')) {
+      files.push(arg);
+    }
+  }
+
+  try {
+    const result = await generateMatrix(files, options);
+
+    if (options.outputFormat === 'json') {
+      console.log(JSON.stringify(result, null, 2));
+    } else {
+      // GitHub Actions format
+      console.log(`strategy=${result.strategy}`);
+      console.log(`has-changes=${result.hasChanges}`);
+      console.log(`matrix=${JSON.stringify(result.matrix)}`);
+
+      // Add cache statistics
+      if (result.cacheStats) {
+        console.log(`cache-hit-rate=${result.cacheStats.hitRate}`);
+        console.log(`cache-hits=${result.cacheStats.cacheHits}`);
+        console.log(`cache-misses=${result.cacheStats.cacheMisses}`);
+      }
+
+      if (result.originalFileCount !== undefined) {
+        console.log(`original-file-count=${result.originalFileCount}`);
+        console.log(`validation-file-count=${result.validationFileCount}`);
+      }
+
+      if (result.message) {
+        console.log(`message=${result.message}`);
+      }
+    }
+  } catch (error) {
+    console.error(`Error generating matrix: ${error.message}`);
+    process.exit(1);
+  }
+}
+
+// Run CLI if this file is executed directly
+if (import.meta.url === `file://${process.argv[1]}`) {
+  main().catch(console.error);
+}
+
+export { generateMatrix, groupFilesByProduct, PRODUCT_MAPPING };
--- a/.github/workflows/pr-link-validation.yml
+++ b/.github/workflows/pr-link-validation.yml
@ -0,0 +1,140 @@
+# PR Link Validation Workflow
+# Provides basic and parallel workflows
+# with smart strategy selection based on change volume
+name: PR Link Validation
+
+on:
+  pull_request:
+    paths:
+      - 'content/**/*.md'
+      - 'content/**/*.html'
+      - 'api-docs/**/*.yml'
+      - 'assets/**/*.js'
+      - 'layouts/**/*.html'
+
+jobs:
+  setup:
+    name: Setup and Strategy Detection
+    runs-on: ubuntu-latest
+    outputs:
+      strategy: ${{ steps.determine-strategy.outputs.strategy }}
+      has-changes: ${{ steps.determine-strategy.outputs.has-changes }}
+      matrix: ${{ steps.determine-strategy.outputs.matrix }}
+      all-files: ${{ steps.changed-files.outputs.all_changed_files }}
+      cache-hit-rate: ${{ steps.determine-strategy.outputs.cache-hit-rate }}
+      cache-hits: ${{ steps.determine-strategy.outputs.cache-hits }}
+      cache-misses: ${{ steps.determine-strategy.outputs.cache-misses }}
+      original-file-count: ${{ steps.determine-strategy.outputs.original-file-count }}
+      validation-file-count: ${{ steps.determine-strategy.outputs.validation-file-count }}
+      cache-message: ${{ steps.determine-strategy.outputs.message }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      
+      - name: Setup docs environment
+        uses: ./.github/actions/setup-docs-env
+      
+      - name: Get changed files
+        id: changed-files
+        uses: tj-actions/changed-files@v41
+        with:
+          files: |
+            content/**/*.md
+            content/**/*.html
+            api-docs/**/*.yml
+      
+      - name: Determine validation strategy
+        id: determine-strategy
+        run: |
+          if [[ "${{ steps.changed-files.outputs.any_changed }}" != "true" ]]; then
+            echo "No relevant files changed"
+            echo "strategy=none" >> $GITHUB_OUTPUT
+            echo "has-changes=false" >> $GITHUB_OUTPUT
+            echo "matrix={\"include\":[]}" >> $GITHUB_OUTPUT
+            echo "cache-hit-rate=100" >> $GITHUB_OUTPUT
+            echo "cache-hits=0" >> $GITHUB_OUTPUT
+            echo "cache-misses=0" >> $GITHUB_OUTPUT
+            exit 0
+          fi
+          
+          # Use our matrix generator with cache awareness
+          files="${{ steps.changed-files.outputs.all_changed_files }}"
+          
+          echo "🔍 Analyzing ${files} for cache-aware validation..."
+          
+          # Generate matrix and capture outputs
+          result=$(node .github/scripts/matrix-generator.js \
+            --min-files-parallel 10 \
+            --max-concurrent 5 \
+            --output-format github \
+            $files)
+          
+          # Parse all outputs from matrix generator
+          while IFS='=' read -r key value; do
+            case "$key" in
+              strategy|has-changes|cache-hit-rate|cache-hits|cache-misses|original-file-count|validation-file-count|message)
+                echo "$key=$value" >> $GITHUB_OUTPUT
+                ;;
+              matrix)
+                echo "matrix=$value" >> $GITHUB_OUTPUT
+                ;;
+            esac
+          done <<< "$result"
+          
+          # Extract values for logging
+          strategy=$(echo "$result" | grep "^strategy=" | cut -d'=' -f2)
+          cache_hit_rate=$(echo "$result" | grep "^cache-hit-rate=" | cut -d'=' -f2)
+          cache_message=$(echo "$result" | grep "^message=" | cut -d'=' -f2-)
+          
+          echo "📊 Selected strategy: $strategy"
+          if [[ -n "$cache_hit_rate" ]]; then
+            echo "📈 Cache hit rate: ${cache_hit_rate}%"
+          fi
+          if [[ -n "$cache_message" ]]; then
+            echo "$cache_message"
+          fi
+
+  validate:
+    name: ${{ matrix.name }}
+    needs: setup
+    if: needs.setup.outputs.has-changes == 'true'
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix: ${{ fromJson(needs.setup.outputs.matrix) }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      
+      - name: Setup docs environment
+        uses: ./.github/actions/setup-docs-env
+      
+      - name: Validate links
+        uses: ./.github/actions/validate-links
+        with:
+          files: ${{ matrix.files || needs.setup.outputs.all-files }}
+          product-name: ${{ matrix.product }}
+          cache-enabled: ${{ matrix.cacheEnabled || 'true' }}
+          cache-key: link-validation-${{ github.event.pull_request.base.sha }}
+
+  report:
+    name: Report Results
+    needs: [setup, validate]
+    if: always() && needs.setup.outputs.has-changes == 'true'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      
+      - name: Setup docs environment
+        uses: ./.github/actions/setup-docs-env
+      
+      - name: Report broken links
+        uses: ./.github/actions/report-broken-links
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          max-links-per-file: 20
--- a/CLAUDE.md
+++ b/CLAUDE.md
@ -21,6 +21,9 @@ See @.github/instructions/contributing.instructions.md for essential InfluxData
 documentation contributing guidelines, such as style and
 formatting, and commonly used shortcodes.

+See @TESTING.md for comprehensive testing information, including code block
+testing, link validation, style linting, and advanced testing procedures.
+
 See @.github/instructions/shortcodes-reference.instructions.md for detailed
 information about shortcodes used in this project.

--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -9,7 +9,7 @@ Ready to contribute? Here's the essential workflow:
 2. [Fork and clone](#fork-and-clone-influxdata-documentation-repository) this repository
 3. [Install dependencies](#development-environment-setup) (Node.js, Yarn, Docker)
 4. Make your changes following [style guidelines](#making-changes)
-5. [Test your changes](#testing--quality-assurance) (pre-commit and pre-push hooks run automatically)
+5. [Test your changes](TESTING.md) (pre-commit and pre-push hooks run automatically)
 6. [Submit a pull request](#submission-process)

 For detailed setup and reference information, see the sections below.
@ -250,64 +250,29 @@ For more information about generating InfluxDB API documentation, see the

 ---

-<!-- agent:instruct: condense -->
 ## Testing & Quality Assurance

-### Pre-commit Hooks
+For comprehensive testing information, including code block testing, link validation, style linting, and advanced testing procedures, see **[TESTING.md](TESTING.md)**.

-docs-v2 uses Lefthook to manage Git hooks that run during pre-commit and pre-push. The hooks run the scripts defined in `package.json` to lint Markdown and test code blocks.
-When you try to commit changes (`git commit`), Git runs
-the commands configured in `lefthook.yml` which pass your **staged** files to Vale,
-Prettier, Cypress (for UI tests and link-checking), and Pytest (for testing Python and shell code in code blocks).
+### Quick Testing Reference

-#### Skip pre-commit hooks
+```bash
+# Test code blocks
+yarn test:codeblocks:all

-**We strongly recommend running linting and tests**, but you can skip them
-(and avoid installing dependencies)
-by including the `LEFTHOOK=0` environment variable or the `--no-verify` flag with
-your commit--for example:
+# Test links
+yarn test:links content/influxdb3/core/**/*.md
+
+# Run style linting
+docker compose run -T vale content/**/*.md
+```
+
+Pre-commit hooks run automatically when you commit changes, testing your staged files with Vale, Prettier, Cypress, and Pytest. To skip hooks if needed:

 ```sh
 git commit -m "<COMMIT_MESSAGE>" --no-verify
 ```

-```sh
-LEFTHOOK=0 git commit
-```
-
-### Code Block Testing Overview
-
-[pytest-codeblocks](https://github.com/nschloe/pytest-codeblocks/tree/main) extracts code from python and shell Markdown code blocks and executes assertions for the code.
-
-**Basic example:**
-
-```python
-print("Hello, world!")
-```
-
-<!--pytest-codeblocks:expected-output-->
-
-```
-Hello, world!
-```
-
-For detailed testing setup and configuration, see [Detailed Testing Setup](#detailed-testing-setup).
-
-### Style Linting (Vale)
-
-docs-v2 includes Vale writing style linter configurations to enforce documentation writing style rules, guidelines, branding, and vocabulary terms.
-
-**Basic usage:**
-
-```sh
-docker compose run -T vale --config=content/influxdb/cloud-dedicated/.vale.ini --minAlertLevel=error content/influxdb/cloud-dedicated/write-data/**/*.md
-```
-
-**VS Code integration:**
-
-1. Install the [Vale VSCode](https://marketplace.visualstudio.com/items?itemName=ChrisChinchilla.vale-vscode) extension.
-2. In the extension settings, set the `Vale:Vale CLI:Path` value to `${workspaceFolder}/node_modules/.bin/vale`.
-
 ---

 <!-- agent:instruct: condense -->
@ -1720,132 +1685,6 @@ Replace the following:
 - {{% code-placeholder-key %}}`API_TOKEN`{{% /code-placeholder-key %}}: your [InfluxDB API token](/influxdb/v2/admin/tokens/)
 ```

-<!-- agent:instruct: extract testing-setup.instructions.md -->
-### Detailed Testing Setup
-
-#### Set up test scripts and credentials
-
-Tests for code blocks require your InfluxDB credentials and other typical
-InfluxDB configuration.
-
-To set up your docs-v2 instance to run tests locally, do the following:
-
-1. **Set executable permissions on test scripts** in `./test/src`:
-
-   ```sh
-   chmod +x ./test/src/*.sh
-   ```
-
-2. **Create credentials for tests**:
-   
-   - Create databases, buckets, and tokens for the product(s) you're testing.
-   - If you don't have access to a Clustered instance, you can use your
-Cloud Dedicated instance for testing in most cases. To avoid conflicts when
-     running tests, create separate Cloud Dedicated and Clustered databases.
-
-1. **Create .env.test**: Copy the `./test/env.test.example` file into each
-    product directory to test and rename the file as `.env.test`--for example:
-   
-   ```sh
-   ./content/influxdb/cloud-dedicated/.env.test
-   ```
-   
-2. Inside each product's `.env.test` file, assign your InfluxDB credentials to
-   environment variables:
-
-   - Include the usual `INFLUX_` environment variables
-   - In
-   `cloud-dedicated/.env.test` and `clustered/.env.test` files, also define the
-   following variables:
-
-     - `ACCOUNT_ID`, `CLUSTER_ID`: You can find these values in your `influxctl`
-       `config.toml` configuration file.
-     - `MANAGEMENT_TOKEN`: Use the `influxctl management create` command to generate
-       a long-lived management token to authenticate Management API requests
-
-   See the substitution
-   patterns in `./test/src/prepare-content.sh` for the full list of variables you may need to define in your `.env.test` files.
-
-3. For influxctl commands to run in tests, move or copy your `config.toml` file
-   to the `./test` directory.
-
-> [!Warning]
-> 
-> - The database you configure in `.env.test` and any written data may
-be deleted during test runs.
-> - Don't add your `.env.test` files to Git. To prevent accidentally adding credentials to the docs-v2 repo,
-> Git is configured to ignore `.env*` files. Consider backing them up on your local machine in case of accidental deletion.
-
-#### Test shell and python code blocks
-
-[pytest-codeblocks](https://github.com/nschloe/pytest-codeblocks/tree/main) extracts code from python and shell Markdown code blocks and executes assertions for the code.
-If you don't assert a value (using a Python `assert` statement), `--codeblocks` considers a non-zero exit code to be a failure.
-
-**Note**: `pytest --codeblocks` uses Python's `subprocess.run()` to execute shell code.
-
-You can use this to test CLI and interpreter commands, regardless of programming
-language, as long as they return standard exit codes.
-
-To make the documented output of a code block testable, precede it with the
-`<!--pytest-codeblocks:expected-output-->` tag and **omit the code block language
-descriptor**--for example, in your Markdown file:
-
-##### Example markdown
-
-```python
-print("Hello, world!")
-```
-
-<!--pytest-codeblocks:expected-output-->
-
-The next code block is treated as an assertion.
-If successful, the output is the following:
-
-```
-Hello, world!
-```
-
-For commands, such as `influxctl` CLI commands, that require launching an
-OAuth URL in a browser, wrap the command in a subshell and redirect the output
-to `/shared/urls.txt` in the container--for example:
-
-```sh
-# Test the preceding command outside of the code block.
-# influxctl authentication requires TTY interaction--
-# output the auth URL to a file that the host can open.
-script -c "influxctl user list " \
- /dev/null > /shared/urls.txt
-```
-
-You probably don't want to display this syntax in the docs, which unfortunately
-means you'd need to include the test block separately from the displayed code
-block.
-To hide it from users, wrap the code block inside an HTML comment.
-pytest-codeblocks will still collect and run the code block.
-
-##### Mark tests to skip 
-
-pytest-codeblocks has features for skipping tests and marking blocks as failed.
-To learn more, see the pytest-codeblocks README and tests.
-
-#### Troubleshoot tests
-
-##### Pytest collected 0 items
-
-Potential reasons:
-
- See the test discovery options in `pytest.ini`.
- For Python code blocks, use the following delimiter:
-
-    ```python
-    # Codeblocks runs this block.
-    ```
-
-  `pytest --codeblocks` ignores code blocks that use the following:
-
-    ```py
-    # Codeblocks ignores this block.
-    ```

 <!-- agent:instruct: condense -->
 ### Advanced Configuration
--- a/README.md
+++ b/README.md
@ -11,6 +11,10 @@ This repository contains the InfluxDB 2.x documentation published at [docs.influ
 We welcome and encourage community contributions.
 For information about contributing to the InfluxData documentation, see [Contribution guidelines](CONTRIBUTING.md).

+## Testing
+
+For information about testing the documentation, including code block testing, link validation, and style linting, see [Testing guide](TESTING.md).
+
 ## Reporting a Vulnerability

 InfluxData takes security and our users' trust very seriously.
--- a/TESTING.md
+++ b/TESTING.md
@ -0,0 +1,364 @@
+# Testing Guide for InfluxData Documentation
+
+This guide covers all testing procedures for the InfluxData documentation, including code block testing, link validation, and style linting.
+
+## Quick Start
+
+1. **Prerequisites**: Install [Node.js](https://nodejs.org/en), [Yarn](https://yarnpkg.com/getting-started/install), and [Docker](https://docs.docker.com/get-docker/)
+2. **Install dependencies**: Run `yarn` to install all dependencies
+3. **Build test environment**: Run `docker build -t influxdata/docs-pytest:latest -f Dockerfile.pytest .`
+4. **Run tests**: Use any of the test commands below
+
+## Test Types Overview
+
+| Test Type | Purpose | Command |
+|-----------|---------|---------|
+| **Code blocks** | Validate shell/Python code examples | `yarn test:codeblocks:all` |
+| **Link validation** | Check internal/external links | `yarn test:links` |
+| **Style linting** | Enforce writing standards | `docker compose run -T vale` |
+| **E2E tests** | UI and functionality testing | `yarn test:e2e` |
+
+## Code Block Testing
+
+Code block testing validates that shell commands and Python scripts in documentation work correctly using [pytest-codeblocks](https://github.com/nschloe/pytest-codeblocks/tree/main).
+
+### Basic Usage
+
+```bash
+# Test all code blocks
+yarn test:codeblocks:all
+
+# Test specific products
+yarn test:codeblocks:cloud
+yarn test:codeblocks:v2
+yarn test:codeblocks:telegraf
+```
+
+### Setup and Configuration
+
+#### 1. Set executable permissions on test scripts
+
+```sh
+chmod +x ./test/src/*.sh
+```
+
+#### 2. Create test credentials
+
+Create databases, buckets, and tokens for the product(s) you're testing.
+If you don't have access to a Clustered instance, you can use your Cloud Dedicated instance for testing in most cases.
+
+#### 3. Configure environment variables
+
+Copy the `./test/env.test.example` file into each product directory and rename as `.env.test`:
+
+```sh
+# Example locations
+./content/influxdb/cloud-dedicated/.env.test
+./content/influxdb3/clustered/.env.test
+```
+
+Inside each product's `.env.test` file, assign your InfluxDB credentials:
+
+- Include the usual `INFLUX_` environment variables
+- For `cloud-dedicated/.env.test` and `clustered/.env.test`, also define:
+  - `ACCOUNT_ID`, `CLUSTER_ID`: Found in your `influxctl config.toml`
+  - `MANAGEMENT_TOKEN`: Generate with `influxctl management create`
+
+See `./test/src/prepare-content.sh` for the full list of variables you may need.
+
+#### 4. Configure influxctl commands
+
+For influxctl commands to run in tests, move or copy your `config.toml` file to the `./test` directory.
+
+> [!Warning]
+> - The database you configure in `.env.test` and any written data may be deleted during test runs
+> - Don't add your `.env.test` files to Git. Git is configured to ignore `.env*` files to prevent accidentally committing credentials
+
+### Writing Testable Code Blocks
+
+#### Basic Example
+
+```python
+print("Hello, world!")
+```
+
+<!--pytest-codeblocks:expected-output-->
+
+```
+Hello, world!
+```
+
+#### Interactive Commands
+
+For commands that require TTY interaction (like `influxctl` authentication), wrap the command in a subshell and redirect output:
+
+```sh
+# Test the preceding command outside of the code block.
+# influxctl authentication requires TTY interaction--
+# output the auth URL to a file that the host can open.
+script -c "influxctl user list " \
+ /dev/null > /shared/urls.txt
+```
+
+To hide test blocks from users, wrap them in HTML comments. pytest-codeblocks will still collect and run them.
+
+#### Skipping Tests
+
+pytest-codeblocks has features for skipping tests and marking blocks as failed. See the [pytest-codeblocks README](https://github.com/nschloe/pytest-codeblocks/tree/main) for details.
+
+### Troubleshooting
+
+#### "Pytest collected 0 items"
+
+Potential causes:
+- Check test discovery options in `pytest.ini`
+- Use `python` (not `py`) for Python code block language identifiers:
+  ```python
+  # This works
+  ```
+  vs
+  ```py
+  # This is ignored
+  ```
+
+## Link Validation Testing
+
+Link validation uses Cypress for e2e browser-based testing against the Hugo site to ensure all internal and external links work correctly.
+
+### Basic Usage
+
+```bash
+# Test specific files
+yarn test:links content/influxdb3/core/**/*.md
+
+# Test all links (may take a long time)
+yarn test:links
+
+# Test by product (may take a long time)
+yarn test:links:v3
+yarn test:links:v2  
+yarn test:links:telegraf
+yarn test:links:chronograf
+yarn test:links:kapacitor
+```
+
+### How Link Validation Works
+
+The tests:
+1. Start a Hugo development server
+2. Navigate to each page in a browser
+3. Check all links for validity
+4. Report broken or invalid links
+
+### GitHub Actions Integration
+
+#### Composite Action
+
+The `.github/actions/validate-links/` composite action provides reusable link validation:
+
+```yaml
+- uses: ./.github/actions/validate-links
+  with:
+    files: "content/influxdb3/core/file.md content/influxdb/v2/file2.md"
+    product-name: "core"
+    cache-enabled: "true"
+    cache-key: "link-validation"
+```
+
+#### Matrix Generator
+
+The `.github/scripts/matrix-generator.js` script provides intelligent strategy selection:
+
+- **Sequential validation**: For small changes (< 10 files) or single-product changes
+- **Parallel validation**: For large changes across multiple products (up to 5 concurrent jobs)
+
+Test locally:
+
+```bash
+node .github/scripts/matrix-generator.js content/influxdb3/core/file1.md content/influxdb/v2/file2.md
+```
+
+Configuration options:
+- `--max-concurrent <n>`: Maximum parallel jobs (default: 5)
+- `--force-sequential`: Force sequential execution
+- `--min-files-parallel <n>`: Minimum files for parallel (default: 10)
+
+### Caching for Link Validation
+
+Link validation supports caching to improve performance:
+
+- **Cache location**: `.cache/link-validation/` (local), GitHub Actions cache (CI)
+- **Cache keys**: Based on content file hashes
+- **TTL**: 30 days by default, configurable
+
+#### Cache Configuration Options
+
+```bash
+# Use 7-day cache for more frequent validation
+yarn test:links --cache-ttl=7 content/influxdb3/**/*.md
+
+# Use 1-day cache via environment variable
+LINK_CACHE_TTL_DAYS=1 yarn test:links content/**/*.md
+
+# Clean up expired cache entries
+node .github/scripts/incremental-validator.js --cleanup
+```
+
+#### How Caching Works
+
+- **Cache key**: Based on file path + content hash (file changes invalidate cache immediately)
+- **External links**: Cached for the TTL period since URLs rarely change
+- **Internal links**: Effectively cached until file content changes
+- **Automatic cleanup**: Expired entries are removed on access and via `--cleanup`
+
+## Style Linting (Vale)
+
+Style linting uses [Vale](https://vale.sh/) to enforce documentation writing standards, branding guidelines, and vocabulary consistency.
+
+### Basic Usage
+
+```bash
+# Basic linting with Docker
+docker compose run -T vale --config=content/influxdb/cloud-dedicated/.vale.ini --minAlertLevel=error content/influxdb/cloud-dedicated/write-data/**/*.md
+```
+
+### VS Code Integration
+
+1. Install the [Vale VSCode](https://marketplace.visualstudio.com/items?itemName=ChrisChinchilla.vale-vscode) extension
+2. Set the `Vale:Vale CLI:Path` setting to `${workspaceFolder}/node_modules/.bin/vale`
+
+### Alert Levels
+
+Vale can raise different alert levels:
+
+- **Error**: Problems that can cause content to render incorrectly, violations of branding guidelines, rejected vocabulary terms
+- **Warning**: General style guide rules and best practices
+- **Suggestion**: Style preferences that may require refactoring or updates to an exceptions list
+
+### Configuration
+
+- **Styles**: `.ci/vale/styles/` contains configuration for the custom `InfluxDataDocs` style
+- **Vocabulary**: Add accepted/rejected terms to `.ci/vale/styles/config/vocabularies`
+- **Product-specific**: Configure per-product styles like `content/influxdb/cloud-dedicated/.vale.ini`
+
+For more configuration details, see [Vale configuration](https://vale.sh/docs/topics/config).
+
+## Pre-commit Hooks
+
+docs-v2 uses [Lefthook](https://github.com/evilmartians/lefthook) to manage Git hooks that run automatically during pre-commit and pre-push.
+
+### What Runs Automatically
+
+When you run `git commit`, Git runs:
+- **Vale**: Style linting (if configured)
+- **Prettier**: Code formatting
+- **Cypress**: Link validation tests
+- **Pytest**: Code block tests
+
+### Skipping Pre-commit Hooks
+
+We strongly recommend running linting and tests, but you can skip them:
+
+```sh
+# Skip with --no-verify flag
+git commit -m "<COMMIT_MESSAGE>" --no-verify
+
+# Skip with environment variable
+LEFTHOOK=0 git commit
+```
+
+## Advanced Testing
+
+### E2E Testing
+
+```bash
+# Run all E2E tests
+yarn test:e2e
+
+# Run specific E2E specs
+node cypress/support/run-e2e-specs.js --spec "cypress/e2e/content/article-links.cy.js"
+```
+
+### JavaScript Testing and Debugging
+
+For JavaScript code in the documentation UI (`assets/js`):
+
+#### Using Source Maps and Chrome DevTools
+
+1. In VS Code, select Run > Start Debugging
+2. Select "Debug Docs (source maps)" configuration
+3. Set breakpoints in the `assets/js/ns-hugo-imp:` namespace
+
+#### Using Debug Helpers
+
+1. Import debug helpers in your JavaScript module:
+   ```js
+   import { debugLog, debugBreak, debugInspect } from './utils/debug-helpers.js';
+   ```
+
+2. Insert debug statements:
+   ```js
+   const data = debugInspect(someData, 'Data');
+   debugLog('Processing data', 'myFunction');
+   debugBreak(); // Add breakpoint
+   ```
+
+3. Start Hugo: `yarn hugo server`
+4. In VS Code, select "Debug JS (debug-helpers)" configuration
+
+Remember to remove debug statements before committing.
+
+## Docker Compose Services
+
+Available test services:
+
+```bash
+# All code block tests
+docker compose --profile test up
+
+# Individual product tests
+docker compose run --rm cloud-pytest
+docker compose run --rm v2-pytest
+docker compose run --rm telegraf-pytest
+
+# Stop monitoring services
+yarn test:codeblocks:stop-monitors
+```
+
+## Testing Best Practices
+
+### Code Block Examples
+
+- Always test code examples before committing
+- Use realistic data and examples that users would encounter
+- Include proper error handling in examples
+- Format code to fit within 80 characters
+- Use long options in command-line examples (`--option` vs `-o`)
+
+### Link Validation
+
+- Test links regularly, especially after content restructuring
+- Use appropriate cache TTL settings for your validation needs
+- Monitor cache hit rates to optimize performance
+- Clean up expired cache entries periodically
+
+### Style Guidelines
+
+- Run Vale regularly to catch style issues early
+- Add accepted terms to vocabulary files rather than ignoring errors
+- Configure product-specific styles for branding consistency
+- Review suggestions periodically for content improvement opportunities
+
+## Related Files
+
+- **Configuration**: `pytest.ini`, `cypress.config.js`, `lefthook.yml`
+- **Docker**: `compose.yaml`, `Dockerfile.pytest`
+- **Scripts**: `.github/scripts/` directory
+- **Test data**: `./test/` directory
+- **Vale config**: `.ci/vale/styles/`
+
+## Getting Help
+
+- **GitHub Issues**: [docs-v2 issues](https://github.com/influxdata/docs-v2/issues)
+- **Good first issues**: [good-first-issue label](https://github.com/influxdata/docs-v2/issues?q=is%3Aissue+is%3Aopen+label%3Agood-first-issue)
+- **InfluxData CLA**: [Sign here](https://www.influxdata.com/legal/cla/) for substantial contributions
--- a/cypress.config.js
+++ b/cypress.config.js
@ -7,6 +7,8 @@ import {
  FIRST_BROKEN_LINK_FILE,
  initializeReport,
  readBrokenLinksReport,
+  saveCacheStats,
+  saveValidationStrategy,
 } from './cypress/support/link-reporter.js';

 export default defineConfig({
@ -177,6 +179,63 @@ export default defineConfig({
            return true;
          }
        },
+
+        // Cache and incremental validation tasks
+        saveCacheStatistics(stats) {
+          try {
+            saveCacheStats(stats);
+            return true;
+          } catch (error) {
+            console.error(`Error saving cache stats: ${error.message}`);
+            return false;
+          }
+        },
+
+        saveValidationStrategy(strategy) {
+          try {
+            saveValidationStrategy(strategy);
+            return true;
+          } catch (error) {
+            console.error(`Error saving validation strategy: ${error.message}`);
+            return false;
+          }
+        },
+
+        runIncrementalValidation(filePaths) {
+          return new Promise(async (resolve, reject) => {
+            try {
+              const { IncrementalValidator } = await import(
+                './.github/scripts/incremental-validator.js'
+              );
+              const validator = new IncrementalValidator();
+              const results = await validator.validateFiles(filePaths);
+              resolve(results);
+            } catch (error) {
+              console.error(`Incremental validation error: ${error.message}`);
+              reject(error);
+            }
+          });
+        },
+
+        cacheValidationResults(filePath, fileHash, results) {
+          return new Promise(async (resolve, reject) => {
+            try {
+              const { IncrementalValidator } = await import(
+                './.github/scripts/incremental-validator.js'
+              );
+              const validator = new IncrementalValidator();
+              const success = await validator.cacheResults(
+                filePath,
+                fileHash,
+                results
+              );
+              resolve(success);
+            } catch (error) {
+              console.error(`Cache validation results error: ${error.message}`);
+              reject(error);
+            }
+          });
+        },
      });

      // Load plugins file using dynamic import for ESM compatibility
--- a/cypress/e2e/content/article-links.cy.js
+++ b/cypress/e2e/content/article-links.cy.js
@ -1,7 +1,9 @@
 /// <reference types="cypress" />

 describe('Article', () => {
-  const subjects = Cypress.env('test_subjects').split(',');
+  let subjects = Cypress.env('test_subjects').split(',');
+  let validationStrategy = null;
+
  // Always use HEAD for downloads to avoid timeouts
  const useHeadForDownloads = true;

@ -9,6 +11,55 @@ describe('Article', () => {
  before(() => {
    // Initialize the broken links report
    cy.task('initializeBrokenLinksReport');
+
+    // Get source file paths for incremental validation
+    const testSubjectsData = Cypress.env('test_subjects_data');
+    let sourceFilePaths = subjects; // fallback to subjects if no data available
+
+    if (testSubjectsData) {
+      try {
+        const urlToSourceData = JSON.parse(testSubjectsData);
+        // Extract source file paths from the structured data
+        sourceFilePaths = urlToSourceData.map((item) => item.source);
+      } catch (e) {
+        console.warn(
+          'Could not parse test_subjects_data, using subjects as fallback'
+        );
+      }
+    }
+
+    // Run incremental validation analysis with source file paths
+    cy.task('runIncrementalValidation', sourceFilePaths).then((results) => {
+      validationStrategy = results.validationStrategy;
+
+      // Save cache statistics and validation strategy for reporting
+      cy.task('saveCacheStatistics', results.cacheStats);
+      cy.task('saveValidationStrategy', validationStrategy);
+
+      // Update subjects to only test files that need validation
+      if (results.filesToValidate.length > 0) {
+        subjects = results.filesToValidate.map((file) => {
+          // Convert file path to URL format (same logic as map-files-to-urls.js)
+          let url = file.filePath.replace(/^content/, '');
+          url = url.replace(/\/_index\.(html|md)$/, '/');
+          url = url.replace(/\.md$/, '/');
+          url = url.replace(/\.html$/, '/');
+          if (!url.startsWith('/')) {
+            url = '/' + url;
+          }
+          return url;
+        });
+
+        cy.log(`📊 Cache Analysis: ${results.cacheStats.hitRate}% hit rate`);
+        cy.log(
+          `🔄 Testing ${subjects.length} pages (${results.cacheStats.cacheHits} cached)`
+        );
+      } else {
+        // All files are cached, no validation needed
+        subjects = [];
+        cy.log('✨ All files cached - skipping validation');
+      }
+    });
  });

  // Helper function to identify download links
--- a/cypress/support/link-reporter.js
+++ b/cypress/support/link-reporter.js
@ -7,6 +7,8 @@ import fs from 'fs';
 export const BROKEN_LINKS_FILE = '/tmp/broken_links_report.json';
 export const FIRST_BROKEN_LINK_FILE = '/tmp/first_broken_link.json';
 const SOURCES_FILE = '/tmp/test_subjects_sources.json';
+const CACHE_STATS_FILE = '/tmp/cache_statistics.json';
+const VALIDATION_STRATEGY_FILE = '/tmp/validation_strategy.json';

 /**
 * Reads the broken links report from the file system
@ -69,6 +71,65 @@ function readSourcesMapping() {
  return {};
 }

+/**
+ * Read cache statistics from file
+ * @returns {Object|null} Cache statistics or null if not found
+ */
+function readCacheStats() {
+  try {
+    if (fs.existsSync(CACHE_STATS_FILE)) {
+      const content = fs.readFileSync(CACHE_STATS_FILE, 'utf8');
+      return JSON.parse(content);
+    }
+  } catch (err) {
+    console.warn(`Warning: Could not read cache stats: ${err.message}`);
+  }
+  return null;
+}
+
+/**
+ * Read validation strategy from file
+ * @returns {Object|null} Validation strategy or null if not found
+ */
+function readValidationStrategy() {
+  try {
+    if (fs.existsSync(VALIDATION_STRATEGY_FILE)) {
+      const content = fs.readFileSync(VALIDATION_STRATEGY_FILE, 'utf8');
+      return JSON.parse(content);
+    }
+  } catch (err) {
+    console.warn(`Warning: Could not read validation strategy: ${err.message}`);
+  }
+  return null;
+}
+
+/**
+ * Save cache statistics for reporting
+ * @param {Object} stats - Cache statistics to save
+ */
+export function saveCacheStats(stats) {
+  try {
+    fs.writeFileSync(CACHE_STATS_FILE, JSON.stringify(stats, null, 2));
+  } catch (err) {
+    console.warn(`Warning: Could not save cache stats: ${err.message}`);
+  }
+}
+
+/**
+ * Save validation strategy for reporting
+ * @param {Object} strategy - Validation strategy to save
+ */
+export function saveValidationStrategy(strategy) {
+  try {
+    fs.writeFileSync(
+      VALIDATION_STRATEGY_FILE,
+      JSON.stringify(strategy, null, 2)
+    );
+  } catch (err) {
+    console.warn(`Warning: Could not save validation strategy: ${err.message}`);
+  }
+}
+
 /**
 * Formats and displays the broken links report to the console
 * @param {Array} brokenLinksReport - The report data to display
@ -80,6 +141,26 @@ export function displayBrokenLinksReport(brokenLinksReport = null) {
    brokenLinksReport = readBrokenLinksReport();
  }

+  // Read cache statistics and validation strategy
+  const cacheStats = readCacheStats();
+  const validationStrategy = readValidationStrategy();
+
+  // Display cache performance first
+  if (cacheStats) {
+    console.log('\n📊 Cache Performance:');
+    console.log('=====================');
+    console.log(`Cache hit rate: ${cacheStats.hitRate}%`);
+    console.log(`Files cached: ${cacheStats.cacheHits}`);
+    console.log(`Files validated: ${cacheStats.cacheMisses}`);
+
+    if (validationStrategy) {
+      console.log(`Total files analyzed: ${validationStrategy.total}`);
+      console.log(
+        `Links needing validation: ${validationStrategy.newLinks.length}`
+      );
+    }
+  }
+
  // Check both the report and first broken link file to determine if we have broken links
  const firstBrokenLink = readFirstBrokenLink();

@ -88,7 +169,7 @@ export function displayBrokenLinksReport(brokenLinksReport = null) {
    (!brokenLinksReport || brokenLinksReport.length === 0) &&
    !firstBrokenLink
  ) {
-    console.log('✅ No broken links detected in the validation report');
+    console.log('\n✅ No broken links detected in the validation report');
    return 0;
  }

--- a/lefthook.yml
+++ b/lefthook.yml
@ -111,13 +111,15 @@ pre-push:
        node cypress/support/run-e2e-specs.js --spec "cypress/e2e/content/article-links.cy.js" content/example.md
        exit $?

-    e2e-links:
-      tags: test,links
-      glob: 'content/*.{md,html}'
-      run: |
-        echo "Running link checker for: {staged_files}"
-        yarn test:links {staged_files}
-        exit $?
+    # Link validation runs in GitHub actions.
+    # You can still run it locally for development.
+    # e2e-links:
+      # tags: test,links
+      # glob: 'content/*.{md,html}'
+      # run: |
+        # echo "Running link checker for: {staged_files}"
+        # yarn test:links {staged_files}
+        # exit $?

    # Manage Docker containers
    prune-legacy-containers: