chore(ci): Add config for new link validation tool (docs-tooling/link-checker)
chore(ci): Replaced PR link validation workflow with new workflow from docs-tooling/link-checker/.github-workflows-link-check.yml chore: organize .gitignore test: add content to trigger link-checker workflow This small change tests the pr-link-check.yml workflow feat: update link-checker workflow and documentation - Add production config with corrected User-Agent placement - Remove old link validation actions (replaced by link-checker) fix: update link-checker workflow configuration - Update Node.js version to 20 for dependency compatibility feat: use pre-built link-checker binary from docs-tooling releases - Replace building from source with downloading from releases - Use GitHub API to get latest release and binary - Maintain same artifact structure for downstream job fix: improve change detection in pr-link-check workflow - Use GitHub API for reliable PR file detection - Add debug output to show all changed files - Fix conditional logic for when jobs should run docs: update TESTING.md with binary distribution and automated GitHub Actions integration - Document pre-built binary download as recommended installation method - Explain automated PR link checking workflow for docs-v2 - Replace manual GitHub Actions example with automated integration details - Remove exaggerated language and specify actual exclusion types fix(ci): download link-checker binary from docs-v2 releases - Change binary source from private docs-tooling to public docs-v2 releases - Fixes GitHub Actions permission issues accessing private repos - Binary is now stored as a release asset on docs-v2 itself test: add test file with valid links to verify workflow passes test: remove temporary test file for link checker workflow The test file was only needed to verify the workflow functionality and should not be part of the documentation. docs: update TESTING.md to document docs-v2 binary distribution - Change primary installation method to download from docs-v2 releases - Explain that binary distribution enables reliable GitHub Actions access - Update automated workflow description to reflect docs-v2 release usage - Maintain build-from-source as alternative option refactor(ci): combine workflow into single job for cleaner PR display - Merge detect-changes, build-site, and download-link-checker into single job - All setup steps now run conditionally within one job - Cleaner PR display shows only 'Check links in affected files' - Maintains all functionality with improved UX fix(ci): exclude problematic URLs from link checking - Add reddit.com exclusions (blocks bots) - Add support.influxdata.com exclusion (SSL certificate issues in CI) - Prevents false positive failures in automated link checkingjts-link-checker
parent
d8095f50a6
commit
e10340b6ec
|
|
@ -0,0 +1,66 @@
|
|||
# Lychee link checker configuration
|
||||
# Generated by link-checker
|
||||
[lychee]
|
||||
# Performance settings
|
||||
|
||||
# Maximum number of retries for failed checks
|
||||
|
||||
max_retries = 3
|
||||
|
||||
# Timeout for each link check (in seconds)
|
||||
timeout = 30
|
||||
|
||||
# Maximum number of concurrent checks
|
||||
max_concurrency = 128
|
||||
|
||||
skip_code_blocks = false
|
||||
|
||||
# HTTP settings
|
||||
# Identify the tool to external services
|
||||
user_agent = "Mozilla/5.0 (compatible; link-checker)"
|
||||
|
||||
# Accept these HTTP status codes as valid
|
||||
accept = [200, 201, 202, 203, 204, 206, 301, 302, 303, 304,
|
||||
307, 308]
|
||||
|
||||
# Skip these URL schemes
|
||||
scheme = ["file", "mailto", "tel"]
|
||||
|
||||
# Exclude patterns (regex supported)
|
||||
exclude = [
|
||||
# Localhost URLs
|
||||
"^https?://localhost",
|
||||
"^https?://127\\.0\\.0\\.1",
|
||||
|
||||
# Common CI/CD environments
|
||||
"^https?://.*\\.local",
|
||||
|
||||
# Example domains used in documentation
|
||||
"^https?://example\\.(com|org|net)",
|
||||
|
||||
# Placeholder URLs from code block filtering
|
||||
"https://example.com/REMOVED_FROM_CODE_BLOCK",
|
||||
"example.com/INLINE_CODE_URL",
|
||||
|
||||
# URLs that require authentication
|
||||
"^https?://.*\\.slack\\.com",
|
||||
"^https?://.*\\.atlassian\\.net",
|
||||
|
||||
# GitHub URLs (often fail due to rate limiting and bot
|
||||
# detection)
|
||||
"^https?://github\\.com",
|
||||
|
||||
# Common documentation placeholders
|
||||
"YOUR_.*",
|
||||
"REPLACE_.*",
|
||||
"<.*>",
|
||||
]
|
||||
|
||||
# Request headers
|
||||
[headers]
|
||||
# Add custom headers here if needed
|
||||
# "Authorization" = "Bearer $GITHUB_TOKEN"
|
||||
|
||||
# Cache settings
|
||||
cache = true
|
||||
max_cache_age = "1d"
|
||||
|
|
@ -0,0 +1,108 @@
|
|||
# Production Link Checker Configuration for InfluxData docs-v2
|
||||
# Optimized for performance, reliability, and reduced false positives
|
||||
[lychee]
|
||||
# Performance settings
|
||||
|
||||
# Maximum number of retries for failed checks
|
||||
|
||||
max_retries = 3
|
||||
|
||||
# Timeout for each link check (in seconds)
|
||||
timeout = 30
|
||||
|
||||
# Maximum number of concurrent checks
|
||||
max_concurrency = 128
|
||||
|
||||
skip_code_blocks = false
|
||||
|
||||
# HTTP settings
|
||||
# Identify the tool to external services
|
||||
"User-Agent" = "Mozilla/5.0 (compatible; influxdata-link-checker/1.0; +https://github.com/influxdata/docs-v2)"
|
||||
accept = [200, 201, 202, 203, 204, 206, 301, 302, 303, 304, 307, 308]
|
||||
|
||||
# Skip these URL schemes
|
||||
scheme = ["mailto", "tel"]
|
||||
|
||||
# Performance optimizations
|
||||
cache = true
|
||||
max_cache_age = "1h"
|
||||
|
||||
# Retry configuration for reliability
|
||||
include_verbatim = false
|
||||
|
||||
# Exclusion patterns for docs-v2 (regex supported)
|
||||
exclude = [
|
||||
# Localhost URLs
|
||||
"^https?://localhost",
|
||||
"^https?://127\\.0\\.0\\.1",
|
||||
|
||||
# Common CI/CD environments
|
||||
"^https?://.*\\.local",
|
||||
|
||||
# Example domains used in documentation
|
||||
"^https?://example\\.(com|org|net)",
|
||||
|
||||
# Placeholder URLs from code block filtering
|
||||
"https://example.com/REMOVED_FROM_CODE_BLOCK",
|
||||
"example.com/INLINE_CODE_URL",
|
||||
|
||||
# URLs that require authentication
|
||||
"^https?://.*\\.slack\\.com",
|
||||
"^https?://.*\\.atlassian\\.net",
|
||||
|
||||
# GitHub URLs (often fail due to rate limiting and bot
|
||||
# detection)
|
||||
"^https?://github\\.com",
|
||||
|
||||
# Social media URLs (often block bots)
|
||||
"^https?://reddit\\.com",
|
||||
"^https?://.*\\.reddit\\.com",
|
||||
|
||||
# InfluxData support URLs (certificate/SSL issues in CI)
|
||||
"^https?://support\\.influxdata\\.com",
|
||||
|
||||
# Common documentation placeholders
|
||||
"YOUR_.*",
|
||||
"REPLACE_.*",
|
||||
"<.*>",
|
||||
]
|
||||
|
||||
# Request headers
|
||||
[headers]
|
||||
# Add custom headers here if needed
|
||||
# "Authorization" = "Bearer $GITHUB_TOKEN"
|
||||
"Accept" = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
|
||||
"Accept-Language" = "en-US,en;q=0.5"
|
||||
"Accept-Encoding" = "gzip, deflate"
|
||||
"DNT" = "1"
|
||||
"Connection" = "keep-alive"
|
||||
"Upgrade-Insecure-Requests" = "1"
|
||||
|
||||
[ci]
|
||||
# CI-specific settings
|
||||
|
||||
[ci.github_actions]
|
||||
output_format = "json"
|
||||
create_annotations = true
|
||||
fail_fast = false
|
||||
max_annotations = 50 # Limit to avoid overwhelming PR comments
|
||||
|
||||
[ci.performance]
|
||||
# Performance tuning for CI environment
|
||||
parallel_requests = 32
|
||||
connection_timeout = 10
|
||||
read_timeout = 30
|
||||
|
||||
# Resource limits
|
||||
max_memory_mb = 512
|
||||
max_execution_time_minutes = 10
|
||||
|
||||
[reporting]
|
||||
# Report configuration
|
||||
include_fragments = false
|
||||
verbose = false
|
||||
no_progress = true # Disable progress bar in CI
|
||||
|
||||
# Summary settings
|
||||
show_success_count = true
|
||||
show_skipped_count = true
|
||||
|
|
@ -1,103 +0,0 @@
|
|||
name: 'Report Broken Links'
|
||||
description: 'Downloads broken link reports, generates PR comment, and posts results'
|
||||
|
||||
inputs:
|
||||
github-token:
|
||||
description: 'GitHub token for posting comments'
|
||||
required: false
|
||||
default: ${{ github.token }}
|
||||
max-links-per-file:
|
||||
description: 'Maximum links to show per file in comment'
|
||||
required: false
|
||||
default: '20'
|
||||
include-success-message:
|
||||
description: 'Include success message when no broken links found'
|
||||
required: false
|
||||
default: 'true'
|
||||
|
||||
outputs:
|
||||
has-broken-links:
|
||||
description: 'Whether broken links were found (true/false)'
|
||||
value: ${{ steps.generate-comment.outputs.has-broken-links }}
|
||||
broken-link-count:
|
||||
description: 'Number of broken links found'
|
||||
value: ${{ steps.generate-comment.outputs.broken-link-count }}
|
||||
|
||||
runs:
|
||||
using: 'composite'
|
||||
steps:
|
||||
- name: Download broken link reports
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: reports
|
||||
continue-on-error: true
|
||||
|
||||
- name: Generate PR comment
|
||||
id: generate-comment
|
||||
run: |
|
||||
# Generate comment using our script
|
||||
node .github/scripts/comment-generator.js \
|
||||
--max-links ${{ inputs.max-links-per-file }} \
|
||||
${{ inputs.include-success-message == 'false' && '--no-success' || '' }} \
|
||||
--output-file comment.md \
|
||||
reports/ || echo "No reports found or errors occurred"
|
||||
|
||||
# Check if comment file was created and has content
|
||||
if [[ -f comment.md && -s comment.md ]]; then
|
||||
echo "comment-generated=true" >> $GITHUB_OUTPUT
|
||||
|
||||
# Count broken links by parsing the comment
|
||||
broken_count=$(grep -o "Found [0-9]* broken link" comment.md | grep -o "[0-9]*" || echo "0")
|
||||
echo "broken-link-count=$broken_count" >> $GITHUB_OUTPUT
|
||||
|
||||
# Check if there are actually broken links (not just a success comment)
|
||||
if [[ "$broken_count" -gt 0 ]]; then
|
||||
echo "has-broken-links=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "has-broken-links=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
else
|
||||
echo "has-broken-links=false" >> $GITHUB_OUTPUT
|
||||
echo "broken-link-count=0" >> $GITHUB_OUTPUT
|
||||
echo "comment-generated=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
shell: bash
|
||||
|
||||
- name: Post PR comment
|
||||
if: steps.generate-comment.outputs.comment-generated == 'true'
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
github-token: ${{ inputs.github-token }}
|
||||
script: |
|
||||
const fs = require('fs');
|
||||
|
||||
if (fs.existsSync('comment.md')) {
|
||||
const comment = fs.readFileSync('comment.md', 'utf8');
|
||||
|
||||
if (comment.trim()) {
|
||||
await github.rest.issues.createComment({
|
||||
issue_number: context.issue.number,
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
body: comment
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
- name: Report validation results
|
||||
run: |
|
||||
has_broken_links="${{ steps.generate-comment.outputs.has-broken-links }}"
|
||||
broken_count="${{ steps.generate-comment.outputs.broken-link-count }}"
|
||||
|
||||
if [ "$has_broken_links" = "true" ]; then
|
||||
echo "::error::❌ Link validation failed: Found $broken_count broken link(s)"
|
||||
echo "Check the PR comment for detailed broken link information"
|
||||
exit 1
|
||||
else
|
||||
echo "::notice::✅ Link validation passed successfully"
|
||||
echo "All links in the changed files are valid"
|
||||
if [ "${{ steps.generate-comment.outputs.comment-generated }}" = "true" ]; then
|
||||
echo "PR comment posted with validation summary and cache statistics"
|
||||
fi
|
||||
fi
|
||||
shell: bash
|
||||
|
|
@ -1,106 +0,0 @@
|
|||
name: 'Validate Links'
|
||||
description: 'Runs e2e browser-based link validation tests against Hugo site using Cypress'
|
||||
|
||||
inputs:
|
||||
files:
|
||||
description: 'Space-separated list of files to validate'
|
||||
required: true
|
||||
product-name:
|
||||
description: 'Product name for reporting (optional)'
|
||||
required: false
|
||||
default: ''
|
||||
cache-enabled:
|
||||
description: 'Enable link validation caching'
|
||||
required: false
|
||||
default: 'true'
|
||||
cache-key:
|
||||
description: 'Cache key prefix for this validation run'
|
||||
required: false
|
||||
default: 'link-validation'
|
||||
timeout:
|
||||
description: 'Test timeout in seconds'
|
||||
required: false
|
||||
default: '900'
|
||||
|
||||
outputs:
|
||||
failed:
|
||||
description: 'Whether validation failed (true/false)'
|
||||
value: ${{ steps.validate.outputs.failed }}
|
||||
|
||||
runs:
|
||||
using: 'composite'
|
||||
steps:
|
||||
- name: Restore link validation cache
|
||||
if: inputs.cache-enabled == 'true'
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: .cache/link-validation
|
||||
key: ${{ inputs.cache-key }}-${{ runner.os }}-${{ hashFiles('content/**/*.md', 'content/**/*.html') }}
|
||||
restore-keys: |
|
||||
${{ inputs.cache-key }}-${{ runner.os }}-
|
||||
${{ inputs.cache-key }}-
|
||||
|
||||
- name: Run link validation
|
||||
shell: bash
|
||||
run: |
|
||||
# Set CI-specific environment variables
|
||||
export CI=true
|
||||
export GITHUB_ACTIONS=true
|
||||
export NODE_OPTIONS="--max-old-space-size=4096"
|
||||
|
||||
# Set test runner timeout for Hugo shutdown
|
||||
export HUGO_SHUTDOWN_TIMEOUT=5000
|
||||
|
||||
# Add timeout to prevent hanging (timeout command syntax: timeout DURATION COMMAND)
|
||||
timeout ${{ inputs.timeout }}s node cypress/support/run-e2e-specs.js ${{ inputs.files }} \
|
||||
--spec cypress/e2e/content/article-links.cy.js || {
|
||||
exit_code=$?
|
||||
|
||||
# Handle timeout specifically
|
||||
if [ $exit_code -eq 124 ]; then
|
||||
echo "::error::Link validation timed out after ${{ inputs.timeout }} seconds"
|
||||
echo "::notice::This may indicate Hugo server startup issues or very slow link validation"
|
||||
else
|
||||
echo "::error::Link validation failed with exit code $exit_code"
|
||||
fi
|
||||
|
||||
# Check for specific error patterns and logs (but don't dump full content)
|
||||
if [ -f /tmp/hugo_server.log ]; then
|
||||
echo "Hugo server log available for debugging"
|
||||
fi
|
||||
|
||||
if [ -f hugo.log ]; then
|
||||
echo "Additional Hugo log available for debugging"
|
||||
fi
|
||||
|
||||
if [ -f /tmp/broken_links_report.json ]; then
|
||||
# Only show summary, not full report (full report is uploaded as artifact)
|
||||
broken_count=$(grep -o '"url":' /tmp/broken_links_report.json | wc -l || echo "0")
|
||||
echo "Broken links report contains $broken_count entries"
|
||||
fi
|
||||
|
||||
exit $exit_code
|
||||
}
|
||||
|
||||
# Report success if we get here
|
||||
echo "::notice::✅ Link validation completed successfully"
|
||||
echo "No broken links detected in the tested files"
|
||||
|
||||
- name: Upload logs on failure
|
||||
if: failure()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: validation-logs-${{ inputs.product-name && inputs.product-name || 'default' }}
|
||||
path: |
|
||||
hugo.log
|
||||
/tmp/hugo_server.log
|
||||
if-no-files-found: ignore
|
||||
|
||||
|
||||
- name: Upload broken links report
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: broken-links-report${{ inputs.product-name && format('-{0}', inputs.product-name) || '' }}
|
||||
path: /tmp/broken_links_report.json
|
||||
if-no-files-found: ignore
|
||||
|
|
@ -0,0 +1,241 @@
|
|||
name: Link Check PR Changes
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- 'content/**/*.md'
|
||||
- 'data/**/*.yml'
|
||||
- 'layouts/**/*.html'
|
||||
types: [opened, synchronize, reopened]
|
||||
|
||||
jobs:
|
||||
link-check:
|
||||
name: Check links in affected files
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Detect content changes
|
||||
id: detect
|
||||
run: |
|
||||
echo "🔍 Detecting changes between ${{ github.base_ref }} and ${{ github.sha }}"
|
||||
|
||||
# For PRs, use the GitHub Files API to get changed files
|
||||
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
|
||||
echo "Using GitHub API to detect PR changes..."
|
||||
curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
|
||||
"https://api.github.com/repos/${{ github.repository }}/pulls/${{ github.event.number }}/files" \
|
||||
| jq -r '.[].filename' > all_changed_files.txt
|
||||
else
|
||||
echo "Using git diff to detect changes..."
|
||||
git diff --name-only ${{ github.event.before }}..${{ github.sha }} > all_changed_files.txt
|
||||
fi
|
||||
|
||||
# Filter for content markdown files
|
||||
CHANGED_FILES=$(grep '^content/.*\.md$' all_changed_files.txt || true)
|
||||
|
||||
echo "📁 All changed files:"
|
||||
cat all_changed_files.txt
|
||||
echo ""
|
||||
echo "📝 Content markdown files:"
|
||||
echo "$CHANGED_FILES"
|
||||
|
||||
if [[ -n "$CHANGED_FILES" ]]; then
|
||||
echo "✅ Found $(echo "$CHANGED_FILES" | wc -l) changed content file(s)"
|
||||
echo "has-changes=true" >> $GITHUB_OUTPUT
|
||||
echo "changed-content<<EOF" >> $GITHUB_OUTPUT
|
||||
echo "$CHANGED_FILES" >> $GITHUB_OUTPUT
|
||||
echo "EOF" >> $GITHUB_OUTPUT
|
||||
|
||||
# Check if any shared content files were modified
|
||||
SHARED_CHANGES=$(echo "$CHANGED_FILES" | grep '^content/shared/' || true)
|
||||
if [[ -n "$SHARED_CHANGES" ]]; then
|
||||
echo "has-shared-content=true" >> $GITHUB_OUTPUT
|
||||
echo "🔄 Detected shared content changes: $SHARED_CHANGES"
|
||||
else
|
||||
echo "has-shared-content=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
else
|
||||
echo "❌ No content changes detected"
|
||||
echo "has-changes=false" >> $GITHUB_OUTPUT
|
||||
echo "has-shared-content=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Skip if no content changes
|
||||
if: steps.detect.outputs.has-changes == 'false'
|
||||
run: |
|
||||
echo "No content changes detected in this PR - skipping link check"
|
||||
echo "✅ **No content changes detected** - link check skipped" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
- name: Setup Node.js
|
||||
if: steps.detect.outputs.has-changes == 'true'
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: '20'
|
||||
cache: 'yarn'
|
||||
|
||||
- name: Install dependencies
|
||||
if: steps.detect.outputs.has-changes == 'true'
|
||||
run: yarn install --frozen-lockfile
|
||||
|
||||
- name: Build Hugo site
|
||||
if: steps.detect.outputs.has-changes == 'true'
|
||||
run: npx hugo --minify
|
||||
|
||||
- name: Download link-checker binary
|
||||
if: steps.detect.outputs.has-changes == 'true'
|
||||
run: |
|
||||
echo "Downloading link-checker binary from docs-v2 releases..."
|
||||
|
||||
# Download from docs-v2's own releases (always accessible)
|
||||
curl -L -H "Accept: application/vnd.github+json" \
|
||||
-H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
|
||||
-o link-checker-info.json \
|
||||
"https://api.github.com/repos/influxdata/docs-v2/releases/tags/link-checker-v1.0.0"
|
||||
|
||||
# Extract download URL for linux binary
|
||||
DOWNLOAD_URL=$(jq -r '.assets[] | select(.name | test("link-checker.*linux")) | .url' link-checker-info.json)
|
||||
|
||||
if [[ "$DOWNLOAD_URL" == "null" || -z "$DOWNLOAD_URL" ]]; then
|
||||
echo "❌ No linux binary found in release"
|
||||
echo "Available assets:"
|
||||
jq -r '.assets[].name' link-checker-info.json
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "📥 Downloading: $DOWNLOAD_URL"
|
||||
curl -L -H "Accept: application/octet-stream" \
|
||||
-H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
|
||||
-o link-checker "$DOWNLOAD_URL"
|
||||
|
||||
chmod +x link-checker
|
||||
./link-checker --version
|
||||
|
||||
- name: Verify link checker config exists
|
||||
if: steps.detect.outputs.has-changes == 'true'
|
||||
run: |
|
||||
if [[ ! -f .ci/link-checker/production.lycherc.toml ]]; then
|
||||
echo "❌ Configuration file .ci/link-checker/production.lycherc.toml not found"
|
||||
echo "Please copy production.lycherc.toml from docs-tooling/link-checker/"
|
||||
exit 1
|
||||
fi
|
||||
echo "✅ Using configuration: .ci/link-checker/production.lycherc.toml"
|
||||
|
||||
- name: Map changed content to public files
|
||||
if: steps.detect.outputs.has-changes == 'true'
|
||||
id: mapping
|
||||
run: |
|
||||
echo "Mapping changed content files to public HTML files..."
|
||||
|
||||
# Create temporary file with changed content files
|
||||
echo "${{ steps.detect.outputs.changed-content }}" > changed-files.txt
|
||||
|
||||
# Map content files to public files
|
||||
PUBLIC_FILES=$(cat changed-files.txt | xargs -r ./link-checker map --existing-only)
|
||||
|
||||
if [[ -n "$PUBLIC_FILES" ]]; then
|
||||
echo "Found affected public files:"
|
||||
echo "$PUBLIC_FILES"
|
||||
echo "public-files<<EOF" >> $GITHUB_OUTPUT
|
||||
echo "$PUBLIC_FILES" >> $GITHUB_OUTPUT
|
||||
echo "EOF" >> $GITHUB_OUTPUT
|
||||
|
||||
# Count files for summary
|
||||
FILE_COUNT=$(echo "$PUBLIC_FILES" | wc -l)
|
||||
echo "file-count=$FILE_COUNT" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "No public files found to check"
|
||||
echo "public-files=" >> $GITHUB_OUTPUT
|
||||
echo "file-count=0" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Run link checker
|
||||
if: steps.detect.outputs.has-changes == 'true' && steps.mapping.outputs.public-files != ''
|
||||
id: link-check
|
||||
run: |
|
||||
echo "Checking links in ${{ steps.mapping.outputs.file-count }} affected files..."
|
||||
|
||||
# Create temporary file with public files list
|
||||
echo "${{ steps.mapping.outputs.public-files }}" > public-files.txt
|
||||
|
||||
# Run link checker with detailed JSON output
|
||||
set +e # Don't fail immediately on error
|
||||
|
||||
cat public-files.txt | xargs -r ./link-checker check \
|
||||
--config .ci/link-checker/production.lycherc.toml \
|
||||
--format json \
|
||||
--output link-check-results.json
|
||||
|
||||
EXIT_CODE=$?
|
||||
|
||||
if [[ -f link-check-results.json ]]; then
|
||||
# Parse results
|
||||
BROKEN_COUNT=$(jq -r '.summary.broken_count // 0' link-check-results.json)
|
||||
TOTAL_COUNT=$(jq -r '.summary.total_checked // 0' link-check-results.json)
|
||||
SUCCESS_RATE=$(jq -r '.summary.success_rate // 0' link-check-results.json)
|
||||
|
||||
echo "broken-count=$BROKEN_COUNT" >> $GITHUB_OUTPUT
|
||||
echo "total-count=$TOTAL_COUNT" >> $GITHUB_OUTPUT
|
||||
echo "success-rate=$SUCCESS_RATE" >> $GITHUB_OUTPUT
|
||||
|
||||
if [[ $BROKEN_COUNT -gt 0 ]]; then
|
||||
echo "❌ Found $BROKEN_COUNT broken links out of $TOTAL_COUNT total links"
|
||||
echo "check-result=failed" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "✅ All $TOTAL_COUNT links are valid"
|
||||
echo "check-result=passed" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
else
|
||||
echo "❌ Link check failed to generate results"
|
||||
echo "check-result=error" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
exit $EXIT_CODE
|
||||
|
||||
- name: Process and report results
|
||||
if: always() && steps.detect.outputs.has-changes == 'true' && steps.mapping.outputs.public-files != ''
|
||||
run: |
|
||||
if [[ -f link-check-results.json ]]; then
|
||||
# Create detailed error annotations for broken links
|
||||
if [[ "${{ steps.link-check.outputs.check-result }}" == "failed" ]]; then
|
||||
echo "Creating error annotations for broken links..."
|
||||
|
||||
jq -r '.broken_links[]? |
|
||||
"::error file=\(.file // "unknown"),line=\(.line // 1)::Broken link: \(.url) - \(.error // "Unknown error")"' \
|
||||
link-check-results.json || true
|
||||
fi
|
||||
|
||||
# Generate summary comment
|
||||
cat >> $GITHUB_STEP_SUMMARY << 'EOF'
|
||||
## Link Check Results
|
||||
|
||||
**Files Checked:** ${{ steps.mapping.outputs.file-count }}
|
||||
**Total Links:** ${{ steps.link-check.outputs.total-count }}
|
||||
**Broken Links:** ${{ steps.link-check.outputs.broken-count }}
|
||||
**Success Rate:** ${{ steps.link-check.outputs.success-rate }}%
|
||||
|
||||
EOF
|
||||
|
||||
if [[ "${{ steps.link-check.outputs.check-result }}" == "failed" ]]; then
|
||||
echo "❌ **Link check failed** - see annotations above for details" >> $GITHUB_STEP_SUMMARY
|
||||
else
|
||||
echo "✅ **All links are valid**" >> $GITHUB_STEP_SUMMARY
|
||||
fi
|
||||
else
|
||||
echo "⚠️ **Link check could not complete** - no results file generated" >> $GITHUB_STEP_SUMMARY
|
||||
fi
|
||||
|
||||
- name: Upload detailed results
|
||||
if: always() && steps.detect.outputs.has-changes == 'true' && steps.mapping.outputs.public-files != ''
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: link-check-results
|
||||
path: |
|
||||
link-check-results.json
|
||||
changed-files.txt
|
||||
public-files.txt
|
||||
retention-days: 30
|
||||
|
|
@ -1,148 +0,0 @@
|
|||
# PR Link Validation Workflow
|
||||
# Provides basic and parallel workflows
|
||||
# with smart strategy selection based on change volume
|
||||
name: PR Link Validation
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- 'content/**/*.md'
|
||||
- 'content/**/*.html'
|
||||
- 'api-docs/**/*.yml'
|
||||
- 'assets/**/*.js'
|
||||
- 'layouts/**/*.html'
|
||||
|
||||
jobs:
|
||||
# TEMPORARILY DISABLED - Remove this condition to re-enable link validation
|
||||
disabled-check:
|
||||
if: false # Set to true to re-enable the workflow
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- run: echo "Link validation is temporarily disabled"
|
||||
setup:
|
||||
name: Setup and Strategy Detection
|
||||
runs-on: ubuntu-latest
|
||||
if: false # TEMPORARILY DISABLED - Remove this condition to re-enable
|
||||
outputs:
|
||||
strategy: ${{ steps.determine-strategy.outputs.strategy }}
|
||||
has-changes: ${{ steps.determine-strategy.outputs.has-changes }}
|
||||
matrix: ${{ steps.determine-strategy.outputs.matrix }}
|
||||
all-files: ${{ steps.changed-files.outputs.all_changed_files }}
|
||||
cache-hit-rate: ${{ steps.determine-strategy.outputs.cache-hit-rate }}
|
||||
cache-hits: ${{ steps.determine-strategy.outputs.cache-hits }}
|
||||
cache-misses: ${{ steps.determine-strategy.outputs.cache-misses }}
|
||||
original-file-count: ${{ steps.determine-strategy.outputs.original-file-count }}
|
||||
validation-file-count: ${{ steps.determine-strategy.outputs.validation-file-count }}
|
||||
cache-message: ${{ steps.determine-strategy.outputs.message }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup docs environment
|
||||
uses: ./.github/actions/setup-docs-env
|
||||
|
||||
- name: Get changed files
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@v41
|
||||
with:
|
||||
files: |
|
||||
content/**/*.md
|
||||
content/**/*.html
|
||||
api-docs/**/*.yml
|
||||
|
||||
- name: Determine validation strategy
|
||||
id: determine-strategy
|
||||
run: |
|
||||
if [[ "${{ steps.changed-files.outputs.any_changed }}" != "true" ]]; then
|
||||
echo "No relevant files changed"
|
||||
echo "strategy=none" >> $GITHUB_OUTPUT
|
||||
echo "has-changes=false" >> $GITHUB_OUTPUT
|
||||
echo "matrix={\"include\":[]}" >> $GITHUB_OUTPUT
|
||||
echo "cache-hit-rate=100" >> $GITHUB_OUTPUT
|
||||
echo "cache-hits=0" >> $GITHUB_OUTPUT
|
||||
echo "cache-misses=0" >> $GITHUB_OUTPUT
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Use our matrix generator with cache awareness
|
||||
files="${{ steps.changed-files.outputs.all_changed_files }}"
|
||||
|
||||
echo "🔍 Analyzing ${files} for cache-aware validation..."
|
||||
|
||||
# Generate matrix and capture outputs
|
||||
result=$(node .github/scripts/matrix-generator.js \
|
||||
--min-files-parallel 10 \
|
||||
--max-concurrent 5 \
|
||||
--output-format github \
|
||||
$files)
|
||||
|
||||
# Parse all outputs from matrix generator
|
||||
while IFS='=' read -r key value; do
|
||||
case "$key" in
|
||||
strategy|has-changes|cache-hit-rate|cache-hits|cache-misses|original-file-count|validation-file-count|message)
|
||||
echo "$key=$value" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
matrix)
|
||||
echo "matrix=$value" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
esac
|
||||
done <<< "$result"
|
||||
|
||||
# Extract values for logging
|
||||
strategy=$(echo "$result" | grep "^strategy=" | cut -d'=' -f2)
|
||||
cache_hit_rate=$(echo "$result" | grep "^cache-hit-rate=" | cut -d'=' -f2)
|
||||
cache_message=$(echo "$result" | grep "^message=" | cut -d'=' -f2-)
|
||||
|
||||
echo "📊 Selected strategy: $strategy"
|
||||
if [[ -n "$cache_hit_rate" ]]; then
|
||||
echo "📈 Cache hit rate: ${cache_hit_rate}%"
|
||||
fi
|
||||
if [[ -n "$cache_message" ]]; then
|
||||
echo "$cache_message"
|
||||
fi
|
||||
|
||||
validate:
|
||||
name: ${{ matrix.name }}
|
||||
needs: setup
|
||||
if: false # TEMPORARILY DISABLED - Original condition: needs.setup.outputs.has-changes == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix: ${{ fromJson(needs.setup.outputs.matrix) }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup docs environment
|
||||
uses: ./.github/actions/setup-docs-env
|
||||
|
||||
- name: Validate links
|
||||
uses: ./.github/actions/validate-links
|
||||
with:
|
||||
files: ${{ matrix.files || needs.setup.outputs.all-files }}
|
||||
product-name: ${{ matrix.product }}
|
||||
cache-enabled: ${{ matrix.cacheEnabled || 'true' }}
|
||||
cache-key: link-validation-${{ hashFiles(matrix.files || needs.setup.outputs.all-files) }}
|
||||
timeout: 900
|
||||
|
||||
report:
|
||||
name: Report Results
|
||||
needs: [setup, validate]
|
||||
if: false # TEMPORARILY DISABLED - Original condition: always() && needs.setup.outputs.has-changes == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup docs environment
|
||||
uses: ./.github/actions/setup-docs-env
|
||||
|
||||
- name: Report broken links
|
||||
uses: ./.github/actions/report-broken-links
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
max-links-per-file: 20
|
||||
|
|
@ -3,11 +3,14 @@
|
|||
public
|
||||
.*.swp
|
||||
node_modules
|
||||
package-lock.json
|
||||
.config*
|
||||
**/.env*
|
||||
*.log
|
||||
/resources
|
||||
.hugo_build.lock
|
||||
|
||||
# Content generation
|
||||
/content/influxdb*/**/api/**/*.html
|
||||
!api-docs/**/.config.yml
|
||||
/api-docs/redoc-static.html*
|
||||
|
|
@ -16,18 +19,22 @@ node_modules
|
|||
!telegraf-build/templates
|
||||
!telegraf-build/scripts
|
||||
!telegraf-build/README.md
|
||||
|
||||
# CI/CD tool files
|
||||
/cypress/downloads/*
|
||||
/cypress/screenshots/*
|
||||
/cypress/videos/*
|
||||
.lycheecache
|
||||
test-results.xml
|
||||
/influxdb3cli-build-scripts/content
|
||||
tmp
|
||||
|
||||
# IDE files
|
||||
.vscode/*
|
||||
!.vscode/launch.json
|
||||
.idea
|
||||
**/config.toml
|
||||
package-lock.json
|
||||
tmp
|
||||
|
||||
# Context files for LLMs and AI tools
|
||||
# User context files for AI assistant tools
|
||||
.context/*
|
||||
!.context/README.md
|
||||
|
|
|
|||
219
TESTING.md
219
TESTING.md
|
|
@ -121,96 +121,169 @@ Potential causes:
|
|||
# This is ignored
|
||||
```
|
||||
|
||||
## Link Validation Testing
|
||||
## Link Validation with Link-Checker
|
||||
|
||||
Link validation uses Cypress for e2e browser-based testing against the Hugo site to ensure all internal and external links work correctly.
|
||||
Link validation uses the `link-checker` tool to validate internal and external links in documentation files.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
#### Installation
|
||||
|
||||
**Option 1: Download from docs-v2 releases (recommended)**
|
||||
|
||||
The link-checker binary is distributed via docs-v2 releases for reliable access from GitHub Actions workflows:
|
||||
|
||||
```bash
|
||||
# Test specific files
|
||||
yarn test:links content/influxdb3/core/**/*.md
|
||||
# Download binary from docs-v2 releases
|
||||
curl -L -o link-checker \
|
||||
https://github.com/influxdata/docs-v2/releases/download/link-checker-v1.0.0/link-checker-linux-x86_64
|
||||
chmod +x link-checker
|
||||
|
||||
# Test all links (may take a long time)
|
||||
yarn test:links
|
||||
|
||||
# Test by product (may take a long time)
|
||||
yarn test:links:v3
|
||||
yarn test:links:v2
|
||||
yarn test:links:telegraf
|
||||
yarn test:links:chronograf
|
||||
yarn test:links:kapacitor
|
||||
# Verify installation
|
||||
./link-checker --version
|
||||
```
|
||||
|
||||
### How Link Validation Works
|
||||
**Option 2: Build from source**
|
||||
|
||||
The tests:
|
||||
1. Start a Hugo development server
|
||||
2. Navigate to each page in a browser
|
||||
3. Check all links for validity
|
||||
4. Report broken or invalid links
|
||||
```bash
|
||||
# Clone and build link-checker
|
||||
git clone https://github.com/influxdata/docs-tooling.git
|
||||
cd docs-tooling/link-checker
|
||||
cargo build --release
|
||||
|
||||
# Copy binary to your PATH or use directly
|
||||
cp target/release/link-checker /usr/local/bin/
|
||||
```
|
||||
|
||||
#### Core Commands
|
||||
|
||||
```bash
|
||||
# Map content files to public HTML files
|
||||
link-checker map content/path/to/file.md
|
||||
|
||||
# Check links in HTML files
|
||||
link-checker check public/path/to/file.html
|
||||
|
||||
# Generate configuration file
|
||||
link-checker config
|
||||
```
|
||||
|
||||
### Content Mapping Workflows
|
||||
|
||||
#### Scenario 1: Map and check InfluxDB 3 Core content
|
||||
|
||||
```bash
|
||||
# Map Markdown files to HTML
|
||||
link-checker map content/influxdb3/core/get-started/
|
||||
|
||||
# Check links in mapped HTML files
|
||||
link-checker check public/influxdb3/core/get-started/
|
||||
```
|
||||
|
||||
#### Scenario 2: Map and check shared CLI content
|
||||
|
||||
```bash
|
||||
# Map shared content files
|
||||
link-checker map content/shared/influxdb3-cli/
|
||||
|
||||
# Check the mapped output files
|
||||
# (link-checker map outputs the HTML file paths)
|
||||
link-checker map content/shared/influxdb3-cli/ | \
|
||||
xargs link-checker check
|
||||
```
|
||||
|
||||
#### Scenario 3: Direct HTML checking
|
||||
|
||||
```bash
|
||||
# Check HTML files directly without mapping
|
||||
link-checker check public/influxdb3/core/get-started/
|
||||
```
|
||||
|
||||
#### Combined workflow for changed files
|
||||
|
||||
```bash
|
||||
# Check only files changed in the last commit
|
||||
git diff --name-only HEAD~1 HEAD | grep '\.md$' | \
|
||||
xargs link-checker map | \
|
||||
xargs link-checker check
|
||||
```
|
||||
|
||||
### Configuration Options
|
||||
|
||||
#### Local usage (default configuration)
|
||||
|
||||
```bash
|
||||
# Uses default settings or test.lycherc.toml if present
|
||||
link-checker check public/influxdb3/core/get-started/
|
||||
```
|
||||
|
||||
#### Production usage (GitHub Actions)
|
||||
|
||||
```bash
|
||||
# Use production configuration with comprehensive exclusions
|
||||
link-checker check \
|
||||
--config .ci/link-checker/production.lycherc.toml \
|
||||
public/influxdb3/core/get-started/
|
||||
```
|
||||
|
||||
### GitHub Actions Integration
|
||||
|
||||
#### Composite Action
|
||||
**Automated Integration (docs-v2)**
|
||||
|
||||
The `.github/actions/validate-links/` composite action provides reusable link validation:
|
||||
The docs-v2 repository includes automated link checking for pull requests:
|
||||
|
||||
- **Trigger**: Runs automatically on PRs that modify content files
|
||||
- **Binary distribution**: Downloads latest pre-built binary from docs-v2 releases
|
||||
- **Smart detection**: Only checks files affected by PR changes
|
||||
- **Production config**: Uses optimized settings with exclusions for GitHub, social media, etc.
|
||||
- **Results reporting**: Broken links reported as GitHub annotations with detailed summaries
|
||||
|
||||
The workflow automatically:
|
||||
1. Detects content changes in PRs using GitHub Files API
|
||||
2. Downloads latest link-checker binary from docs-v2 releases
|
||||
3. Builds Hugo site and maps changed content to public HTML files
|
||||
4. Runs link checking with production configuration
|
||||
5. Reports results with annotations and step summaries
|
||||
|
||||
**Manual Integration (other repositories)**
|
||||
|
||||
For other repositories, you can integrate link checking manually:
|
||||
|
||||
```yaml
|
||||
- uses: ./.github/actions/validate-links
|
||||
with:
|
||||
files: "content/influxdb3/core/file.md content/influxdb/v2/file2.md"
|
||||
product-name: "core"
|
||||
cache-enabled: "true"
|
||||
cache-key: "link-validation"
|
||||
name: Link Check
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- 'content/**/*.md'
|
||||
|
||||
jobs:
|
||||
link-check:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Download link-checker
|
||||
run: |
|
||||
curl -L -o link-checker \
|
||||
https://github.com/influxdata/docs-tooling/releases/latest/download/link-checker-linux-x86_64
|
||||
chmod +x link-checker
|
||||
cp target/release/link-checker ../../link-checker
|
||||
cd ../..
|
||||
|
||||
- name: Build Hugo site
|
||||
run: |
|
||||
npm install
|
||||
npx hugo --minify
|
||||
|
||||
- name: Check changed files
|
||||
run: |
|
||||
git diff --name-only origin/main HEAD | \
|
||||
grep '\.md$' | \
|
||||
xargs ./link-checker map | \
|
||||
xargs ./link-checker check \
|
||||
--config .ci/link-checker/production.lycherc.toml
|
||||
```
|
||||
|
||||
#### Matrix Generator
|
||||
|
||||
The `.github/scripts/matrix-generator.js` script provides intelligent strategy selection:
|
||||
|
||||
- **Sequential validation**: For small changes (< 10 files) or single-product changes
|
||||
- **Parallel validation**: For large changes across multiple products (up to 5 concurrent jobs)
|
||||
|
||||
Test locally:
|
||||
|
||||
```bash
|
||||
node .github/scripts/matrix-generator.js content/influxdb3/core/file1.md content/influxdb/v2/file2.md
|
||||
```
|
||||
|
||||
Configuration options:
|
||||
- `--max-concurrent <n>`: Maximum parallel jobs (default: 5)
|
||||
- `--force-sequential`: Force sequential execution
|
||||
- `--min-files-parallel <n>`: Minimum files for parallel (default: 10)
|
||||
|
||||
### Caching for Link Validation
|
||||
|
||||
Link validation supports caching to improve performance:
|
||||
|
||||
- **Cache location**: `.cache/link-validation/` (local), GitHub Actions cache (CI)
|
||||
- **Cache keys**: Based on content file hashes
|
||||
- **TTL**: 30 days by default, configurable
|
||||
|
||||
#### Cache Configuration Options
|
||||
|
||||
```bash
|
||||
# Use 7-day cache for more frequent validation
|
||||
yarn test:links --cache-ttl=7 content/influxdb3/**/*.md
|
||||
|
||||
# Use 1-day cache via environment variable
|
||||
LINK_CACHE_TTL_DAYS=1 yarn test:links content/**/*.md
|
||||
|
||||
# Clean up expired cache entries
|
||||
node .github/scripts/incremental-validator.js --cleanup
|
||||
```
|
||||
|
||||
#### How Caching Works
|
||||
|
||||
- **Cache key**: Based on file path + content hash (file changes invalidate cache immediately)
|
||||
- **External links**: Cached for the TTL period since URLs rarely change
|
||||
- **Internal links**: Effectively cached until file content changes
|
||||
- **Automatic cleanup**: Expired entries are removed on access and via `--cleanup`
|
||||
|
||||
## Style Linting (Vale)
|
||||
|
||||
Style linting uses [Vale](https://vale.sh/) to enforce documentation writing standards, branding guidelines, and vocabulary consistency.
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ prepend: |
|
|||
> [!Note]
|
||||
> InfluxDB 3 Core is purpose-built for real-time data monitoring and recent data.
|
||||
> InfluxDB 3 Enterprise builds on top of Core with support for historical data
|
||||
> analysis and extended features.
|
||||
> querying, high availability, read replicas, and more.
|
||||
> Enterprise will soon unlock
|
||||
> enhanced security, row-level deletions, an administration UI, and more.
|
||||
|
|
|
|||
Loading…
Reference in New Issue