diff --git a/.ci/link-checker/default.lycherc.toml b/.ci/link-checker/default.lycherc.toml new file mode 100644 index 000000000..22f97a0f9 --- /dev/null +++ b/.ci/link-checker/default.lycherc.toml @@ -0,0 +1,66 @@ +# Lychee link checker configuration +# Generated by link-checker +[lychee] +# Performance settings + +# Maximum number of retries for failed checks + +max_retries = 3 + +# Timeout for each link check (in seconds) +timeout = 30 + +# Maximum number of concurrent checks +max_concurrency = 128 + +skip_code_blocks = false + +# HTTP settings +# Identify the tool to external services +user_agent = "Mozilla/5.0 (compatible; link-checker)" + +# Accept these HTTP status codes as valid +accept = [200, 201, 202, 203, 204, 206, 301, 302, 303, 304, +307, 308] + +# Skip these URL schemes +scheme = ["file", "mailto", "tel"] + +# Exclude patterns (regex supported) +exclude = [ + # Localhost URLs + "^https?://localhost", + "^https?://127\\.0\\.0\\.1", + + # Common CI/CD environments + "^https?://.*\\.local", + + # Example domains used in documentation + "^https?://example\\.(com|org|net)", + + # Placeholder URLs from code block filtering + "https://example.com/REMOVED_FROM_CODE_BLOCK", + "example.com/INLINE_CODE_URL", + + # URLs that require authentication + "^https?://.*\\.slack\\.com", + "^https?://.*\\.atlassian\\.net", + + # GitHub URLs (often fail due to rate limiting and bot + # detection) + "^https?://github\\.com", + + # Common documentation placeholders + "YOUR_.*", + "REPLACE_.*", + "<.*>", +] + +# Request headers +[headers] +# Add custom headers here if needed +# "Authorization" = "Bearer $GITHUB_TOKEN" + +# Cache settings +cache = true +max_cache_age = "1d" \ No newline at end of file diff --git a/.ci/link-checker/production.lycherc.toml b/.ci/link-checker/production.lycherc.toml new file mode 100644 index 000000000..9b8be5aa3 --- /dev/null +++ b/.ci/link-checker/production.lycherc.toml @@ -0,0 +1,108 @@ +# Production Link Checker Configuration for InfluxData docs-v2 +# Optimized for performance, reliability, and reduced false positives +[lychee] +# Performance settings + +# Maximum number of retries for failed checks + +max_retries = 3 + +# Timeout for each link check (in seconds) +timeout = 30 + +# Maximum number of concurrent checks +max_concurrency = 128 + +skip_code_blocks = false + +# HTTP settings +# Identify the tool to external services +"User-Agent" = "Mozilla/5.0 (compatible; influxdata-link-checker/1.0; +https://github.com/influxdata/docs-v2)" +accept = [200, 201, 202, 203, 204, 206, 301, 302, 303, 304, 307, 308] + +# Skip these URL schemes +scheme = ["mailto", "tel"] + +# Performance optimizations +cache = true +max_cache_age = "1h" + +# Retry configuration for reliability +include_verbatim = false + +# Exclusion patterns for docs-v2 (regex supported) +exclude = [ + # Localhost URLs + "^https?://localhost", + "^https?://127\\.0\\.0\\.1", + + # Common CI/CD environments + "^https?://.*\\.local", + + # Example domains used in documentation + "^https?://example\\.(com|org|net)", + + # Placeholder URLs from code block filtering + "https://example.com/REMOVED_FROM_CODE_BLOCK", + "example.com/INLINE_CODE_URL", + + # URLs that require authentication + "^https?://.*\\.slack\\.com", + "^https?://.*\\.atlassian\\.net", + + # GitHub URLs (often fail due to rate limiting and bot + # detection) + "^https?://github\\.com", + + # Social media URLs (often block bots) + "^https?://reddit\\.com", + "^https?://.*\\.reddit\\.com", + + # InfluxData support URLs (certificate/SSL issues in CI) + "^https?://support\\.influxdata\\.com", + + # Common documentation placeholders + "YOUR_.*", + "REPLACE_.*", + "<.*>", +] + +# Request headers +[headers] +# Add custom headers here if needed +# "Authorization" = "Bearer $GITHUB_TOKEN" +"Accept" = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" +"Accept-Language" = "en-US,en;q=0.5" +"Accept-Encoding" = "gzip, deflate" +"DNT" = "1" +"Connection" = "keep-alive" +"Upgrade-Insecure-Requests" = "1" + +[ci] +# CI-specific settings + +[ci.github_actions] +output_format = "json" +create_annotations = true +fail_fast = false +max_annotations = 50 # Limit to avoid overwhelming PR comments + +[ci.performance] +# Performance tuning for CI environment +parallel_requests = 32 +connection_timeout = 10 +read_timeout = 30 + +# Resource limits +max_memory_mb = 512 +max_execution_time_minutes = 10 + +[reporting] +# Report configuration +include_fragments = false +verbose = false +no_progress = true # Disable progress bar in CI + +# Summary settings +show_success_count = true +show_skipped_count = true \ No newline at end of file diff --git a/.github/actions/report-broken-links/action.yml b/.github/actions/report-broken-links/action.yml deleted file mode 100644 index 9e95e5605..000000000 --- a/.github/actions/report-broken-links/action.yml +++ /dev/null @@ -1,103 +0,0 @@ -name: 'Report Broken Links' -description: 'Downloads broken link reports, generates PR comment, and posts results' - -inputs: - github-token: - description: 'GitHub token for posting comments' - required: false - default: ${{ github.token }} - max-links-per-file: - description: 'Maximum links to show per file in comment' - required: false - default: '20' - include-success-message: - description: 'Include success message when no broken links found' - required: false - default: 'true' - -outputs: - has-broken-links: - description: 'Whether broken links were found (true/false)' - value: ${{ steps.generate-comment.outputs.has-broken-links }} - broken-link-count: - description: 'Number of broken links found' - value: ${{ steps.generate-comment.outputs.broken-link-count }} - -runs: - using: 'composite' - steps: - - name: Download broken link reports - uses: actions/download-artifact@v4 - with: - path: reports - continue-on-error: true - - - name: Generate PR comment - id: generate-comment - run: | - # Generate comment using our script - node .github/scripts/comment-generator.js \ - --max-links ${{ inputs.max-links-per-file }} \ - ${{ inputs.include-success-message == 'false' && '--no-success' || '' }} \ - --output-file comment.md \ - reports/ || echo "No reports found or errors occurred" - - # Check if comment file was created and has content - if [[ -f comment.md && -s comment.md ]]; then - echo "comment-generated=true" >> $GITHUB_OUTPUT - - # Count broken links by parsing the comment - broken_count=$(grep -o "Found [0-9]* broken link" comment.md | grep -o "[0-9]*" || echo "0") - echo "broken-link-count=$broken_count" >> $GITHUB_OUTPUT - - # Check if there are actually broken links (not just a success comment) - if [[ "$broken_count" -gt 0 ]]; then - echo "has-broken-links=true" >> $GITHUB_OUTPUT - else - echo "has-broken-links=false" >> $GITHUB_OUTPUT - fi - else - echo "has-broken-links=false" >> $GITHUB_OUTPUT - echo "broken-link-count=0" >> $GITHUB_OUTPUT - echo "comment-generated=false" >> $GITHUB_OUTPUT - fi - shell: bash - - - name: Post PR comment - if: steps.generate-comment.outputs.comment-generated == 'true' - uses: actions/github-script@v7 - with: - github-token: ${{ inputs.github-token }} - script: | - const fs = require('fs'); - - if (fs.existsSync('comment.md')) { - const comment = fs.readFileSync('comment.md', 'utf8'); - - if (comment.trim()) { - await github.rest.issues.createComment({ - issue_number: context.issue.number, - owner: context.repo.owner, - repo: context.repo.repo, - body: comment - }); - } - } - - - name: Report validation results - run: | - has_broken_links="${{ steps.generate-comment.outputs.has-broken-links }}" - broken_count="${{ steps.generate-comment.outputs.broken-link-count }}" - - if [ "$has_broken_links" = "true" ]; then - echo "::error::❌ Link validation failed: Found $broken_count broken link(s)" - echo "Check the PR comment for detailed broken link information" - exit 1 - else - echo "::notice::✅ Link validation passed successfully" - echo "All links in the changed files are valid" - if [ "${{ steps.generate-comment.outputs.comment-generated }}" = "true" ]; then - echo "PR comment posted with validation summary and cache statistics" - fi - fi - shell: bash \ No newline at end of file diff --git a/.github/actions/validate-links/action.yml b/.github/actions/validate-links/action.yml deleted file mode 100644 index cf180556c..000000000 --- a/.github/actions/validate-links/action.yml +++ /dev/null @@ -1,106 +0,0 @@ -name: 'Validate Links' -description: 'Runs e2e browser-based link validation tests against Hugo site using Cypress' - -inputs: - files: - description: 'Space-separated list of files to validate' - required: true - product-name: - description: 'Product name for reporting (optional)' - required: false - default: '' - cache-enabled: - description: 'Enable link validation caching' - required: false - default: 'true' - cache-key: - description: 'Cache key prefix for this validation run' - required: false - default: 'link-validation' - timeout: - description: 'Test timeout in seconds' - required: false - default: '900' - -outputs: - failed: - description: 'Whether validation failed (true/false)' - value: ${{ steps.validate.outputs.failed }} - -runs: - using: 'composite' - steps: - - name: Restore link validation cache - if: inputs.cache-enabled == 'true' - uses: actions/cache@v4 - with: - path: .cache/link-validation - key: ${{ inputs.cache-key }}-${{ runner.os }}-${{ hashFiles('content/**/*.md', 'content/**/*.html') }} - restore-keys: | - ${{ inputs.cache-key }}-${{ runner.os }}- - ${{ inputs.cache-key }}- - - - name: Run link validation - shell: bash - run: | - # Set CI-specific environment variables - export CI=true - export GITHUB_ACTIONS=true - export NODE_OPTIONS="--max-old-space-size=4096" - - # Set test runner timeout for Hugo shutdown - export HUGO_SHUTDOWN_TIMEOUT=5000 - - # Add timeout to prevent hanging (timeout command syntax: timeout DURATION COMMAND) - timeout ${{ inputs.timeout }}s node cypress/support/run-e2e-specs.js ${{ inputs.files }} \ - --spec cypress/e2e/content/article-links.cy.js || { - exit_code=$? - - # Handle timeout specifically - if [ $exit_code -eq 124 ]; then - echo "::error::Link validation timed out after ${{ inputs.timeout }} seconds" - echo "::notice::This may indicate Hugo server startup issues or very slow link validation" - else - echo "::error::Link validation failed with exit code $exit_code" - fi - - # Check for specific error patterns and logs (but don't dump full content) - if [ -f /tmp/hugo_server.log ]; then - echo "Hugo server log available for debugging" - fi - - if [ -f hugo.log ]; then - echo "Additional Hugo log available for debugging" - fi - - if [ -f /tmp/broken_links_report.json ]; then - # Only show summary, not full report (full report is uploaded as artifact) - broken_count=$(grep -o '"url":' /tmp/broken_links_report.json | wc -l || echo "0") - echo "Broken links report contains $broken_count entries" - fi - - exit $exit_code - } - - # Report success if we get here - echo "::notice::✅ Link validation completed successfully" - echo "No broken links detected in the tested files" - - - name: Upload logs on failure - if: failure() - uses: actions/upload-artifact@v4 - with: - name: validation-logs-${{ inputs.product-name && inputs.product-name || 'default' }} - path: | - hugo.log - /tmp/hugo_server.log - if-no-files-found: ignore - - - - name: Upload broken links report - if: always() - uses: actions/upload-artifact@v4 - with: - name: broken-links-report${{ inputs.product-name && format('-{0}', inputs.product-name) || '' }} - path: /tmp/broken_links_report.json - if-no-files-found: ignore \ No newline at end of file diff --git a/.github/workflows/pr-link-check.yml b/.github/workflows/pr-link-check.yml new file mode 100644 index 000000000..b0764089a --- /dev/null +++ b/.github/workflows/pr-link-check.yml @@ -0,0 +1,241 @@ +name: Link Check PR Changes + +on: + pull_request: + paths: + - 'content/**/*.md' + - 'data/**/*.yml' + - 'layouts/**/*.html' + types: [opened, synchronize, reopened] + +jobs: + link-check: + name: Check links in affected files + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Detect content changes + id: detect + run: | + echo "🔍 Detecting changes between ${{ github.base_ref }} and ${{ github.sha }}" + + # For PRs, use the GitHub Files API to get changed files + if [[ "${{ github.event_name }}" == "pull_request" ]]; then + echo "Using GitHub API to detect PR changes..." + curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + "https://api.github.com/repos/${{ github.repository }}/pulls/${{ github.event.number }}/files" \ + | jq -r '.[].filename' > all_changed_files.txt + else + echo "Using git diff to detect changes..." + git diff --name-only ${{ github.event.before }}..${{ github.sha }} > all_changed_files.txt + fi + + # Filter for content markdown files + CHANGED_FILES=$(grep '^content/.*\.md$' all_changed_files.txt || true) + + echo "📁 All changed files:" + cat all_changed_files.txt + echo "" + echo "📝 Content markdown files:" + echo "$CHANGED_FILES" + + if [[ -n "$CHANGED_FILES" ]]; then + echo "✅ Found $(echo "$CHANGED_FILES" | wc -l) changed content file(s)" + echo "has-changes=true" >> $GITHUB_OUTPUT + echo "changed-content<> $GITHUB_OUTPUT + echo "$CHANGED_FILES" >> $GITHUB_OUTPUT + echo "EOF" >> $GITHUB_OUTPUT + + # Check if any shared content files were modified + SHARED_CHANGES=$(echo "$CHANGED_FILES" | grep '^content/shared/' || true) + if [[ -n "$SHARED_CHANGES" ]]; then + echo "has-shared-content=true" >> $GITHUB_OUTPUT + echo "🔄 Detected shared content changes: $SHARED_CHANGES" + else + echo "has-shared-content=false" >> $GITHUB_OUTPUT + fi + else + echo "❌ No content changes detected" + echo "has-changes=false" >> $GITHUB_OUTPUT + echo "has-shared-content=false" >> $GITHUB_OUTPUT + fi + + - name: Skip if no content changes + if: steps.detect.outputs.has-changes == 'false' + run: | + echo "No content changes detected in this PR - skipping link check" + echo "✅ **No content changes detected** - link check skipped" >> $GITHUB_STEP_SUMMARY + + - name: Setup Node.js + if: steps.detect.outputs.has-changes == 'true' + uses: actions/setup-node@v4 + with: + node-version: '20' + cache: 'yarn' + + - name: Install dependencies + if: steps.detect.outputs.has-changes == 'true' + run: yarn install --frozen-lockfile + + - name: Build Hugo site + if: steps.detect.outputs.has-changes == 'true' + run: npx hugo --minify + + - name: Download link-checker binary + if: steps.detect.outputs.has-changes == 'true' + run: | + echo "Downloading link-checker binary from docs-v2 releases..." + + # Download from docs-v2's own releases (always accessible) + curl -L -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \ + -o link-checker-info.json \ + "https://api.github.com/repos/influxdata/docs-v2/releases/tags/link-checker-v1.0.0" + + # Extract download URL for linux binary + DOWNLOAD_URL=$(jq -r '.assets[] | select(.name | test("link-checker.*linux")) | .url' link-checker-info.json) + + if [[ "$DOWNLOAD_URL" == "null" || -z "$DOWNLOAD_URL" ]]; then + echo "❌ No linux binary found in release" + echo "Available assets:" + jq -r '.assets[].name' link-checker-info.json + exit 1 + fi + + echo "📥 Downloading: $DOWNLOAD_URL" + curl -L -H "Accept: application/octet-stream" \ + -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \ + -o link-checker "$DOWNLOAD_URL" + + chmod +x link-checker + ./link-checker --version + + - name: Verify link checker config exists + if: steps.detect.outputs.has-changes == 'true' + run: | + if [[ ! -f .ci/link-checker/production.lycherc.toml ]]; then + echo "❌ Configuration file .ci/link-checker/production.lycherc.toml not found" + echo "Please copy production.lycherc.toml from docs-tooling/link-checker/" + exit 1 + fi + echo "✅ Using configuration: .ci/link-checker/production.lycherc.toml" + + - name: Map changed content to public files + if: steps.detect.outputs.has-changes == 'true' + id: mapping + run: | + echo "Mapping changed content files to public HTML files..." + + # Create temporary file with changed content files + echo "${{ steps.detect.outputs.changed-content }}" > changed-files.txt + + # Map content files to public files + PUBLIC_FILES=$(cat changed-files.txt | xargs -r ./link-checker map --existing-only) + + if [[ -n "$PUBLIC_FILES" ]]; then + echo "Found affected public files:" + echo "$PUBLIC_FILES" + echo "public-files<> $GITHUB_OUTPUT + echo "$PUBLIC_FILES" >> $GITHUB_OUTPUT + echo "EOF" >> $GITHUB_OUTPUT + + # Count files for summary + FILE_COUNT=$(echo "$PUBLIC_FILES" | wc -l) + echo "file-count=$FILE_COUNT" >> $GITHUB_OUTPUT + else + echo "No public files found to check" + echo "public-files=" >> $GITHUB_OUTPUT + echo "file-count=0" >> $GITHUB_OUTPUT + fi + + - name: Run link checker + if: steps.detect.outputs.has-changes == 'true' && steps.mapping.outputs.public-files != '' + id: link-check + run: | + echo "Checking links in ${{ steps.mapping.outputs.file-count }} affected files..." + + # Create temporary file with public files list + echo "${{ steps.mapping.outputs.public-files }}" > public-files.txt + + # Run link checker with detailed JSON output + set +e # Don't fail immediately on error + + cat public-files.txt | xargs -r ./link-checker check \ + --config .ci/link-checker/production.lycherc.toml \ + --format json \ + --output link-check-results.json + + EXIT_CODE=$? + + if [[ -f link-check-results.json ]]; then + # Parse results + BROKEN_COUNT=$(jq -r '.summary.broken_count // 0' link-check-results.json) + TOTAL_COUNT=$(jq -r '.summary.total_checked // 0' link-check-results.json) + SUCCESS_RATE=$(jq -r '.summary.success_rate // 0' link-check-results.json) + + echo "broken-count=$BROKEN_COUNT" >> $GITHUB_OUTPUT + echo "total-count=$TOTAL_COUNT" >> $GITHUB_OUTPUT + echo "success-rate=$SUCCESS_RATE" >> $GITHUB_OUTPUT + + if [[ $BROKEN_COUNT -gt 0 ]]; then + echo "❌ Found $BROKEN_COUNT broken links out of $TOTAL_COUNT total links" + echo "check-result=failed" >> $GITHUB_OUTPUT + else + echo "✅ All $TOTAL_COUNT links are valid" + echo "check-result=passed" >> $GITHUB_OUTPUT + fi + else + echo "❌ Link check failed to generate results" + echo "check-result=error" >> $GITHUB_OUTPUT + fi + + exit $EXIT_CODE + + - name: Process and report results + if: always() && steps.detect.outputs.has-changes == 'true' && steps.mapping.outputs.public-files != '' + run: | + if [[ -f link-check-results.json ]]; then + # Create detailed error annotations for broken links + if [[ "${{ steps.link-check.outputs.check-result }}" == "failed" ]]; then + echo "Creating error annotations for broken links..." + + jq -r '.broken_links[]? | + "::error file=\(.file // "unknown"),line=\(.line // 1)::Broken link: \(.url) - \(.error // "Unknown error")"' \ + link-check-results.json || true + fi + + # Generate summary comment + cat >> $GITHUB_STEP_SUMMARY << 'EOF' + ## Link Check Results + + **Files Checked:** ${{ steps.mapping.outputs.file-count }} + **Total Links:** ${{ steps.link-check.outputs.total-count }} + **Broken Links:** ${{ steps.link-check.outputs.broken-count }} + **Success Rate:** ${{ steps.link-check.outputs.success-rate }}% + + EOF + + if [[ "${{ steps.link-check.outputs.check-result }}" == "failed" ]]; then + echo "❌ **Link check failed** - see annotations above for details" >> $GITHUB_STEP_SUMMARY + else + echo "✅ **All links are valid**" >> $GITHUB_STEP_SUMMARY + fi + else + echo "⚠️ **Link check could not complete** - no results file generated" >> $GITHUB_STEP_SUMMARY + fi + + - name: Upload detailed results + if: always() && steps.detect.outputs.has-changes == 'true' && steps.mapping.outputs.public-files != '' + uses: actions/upload-artifact@v4 + with: + name: link-check-results + path: | + link-check-results.json + changed-files.txt + public-files.txt + retention-days: 30 \ No newline at end of file diff --git a/.github/workflows/pr-link-validation.yml b/.github/workflows/pr-link-validation.yml deleted file mode 100644 index 8d6a8a735..000000000 --- a/.github/workflows/pr-link-validation.yml +++ /dev/null @@ -1,148 +0,0 @@ -# PR Link Validation Workflow -# Provides basic and parallel workflows -# with smart strategy selection based on change volume -name: PR Link Validation - -on: - pull_request: - paths: - - 'content/**/*.md' - - 'content/**/*.html' - - 'api-docs/**/*.yml' - - 'assets/**/*.js' - - 'layouts/**/*.html' - -jobs: - # TEMPORARILY DISABLED - Remove this condition to re-enable link validation - disabled-check: - if: false # Set to true to re-enable the workflow - runs-on: ubuntu-latest - steps: - - run: echo "Link validation is temporarily disabled" - setup: - name: Setup and Strategy Detection - runs-on: ubuntu-latest - if: false # TEMPORARILY DISABLED - Remove this condition to re-enable - outputs: - strategy: ${{ steps.determine-strategy.outputs.strategy }} - has-changes: ${{ steps.determine-strategy.outputs.has-changes }} - matrix: ${{ steps.determine-strategy.outputs.matrix }} - all-files: ${{ steps.changed-files.outputs.all_changed_files }} - cache-hit-rate: ${{ steps.determine-strategy.outputs.cache-hit-rate }} - cache-hits: ${{ steps.determine-strategy.outputs.cache-hits }} - cache-misses: ${{ steps.determine-strategy.outputs.cache-misses }} - original-file-count: ${{ steps.determine-strategy.outputs.original-file-count }} - validation-file-count: ${{ steps.determine-strategy.outputs.validation-file-count }} - cache-message: ${{ steps.determine-strategy.outputs.message }} - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Setup docs environment - uses: ./.github/actions/setup-docs-env - - - name: Get changed files - id: changed-files - uses: tj-actions/changed-files@v41 - with: - files: | - content/**/*.md - content/**/*.html - api-docs/**/*.yml - - - name: Determine validation strategy - id: determine-strategy - run: | - if [[ "${{ steps.changed-files.outputs.any_changed }}" != "true" ]]; then - echo "No relevant files changed" - echo "strategy=none" >> $GITHUB_OUTPUT - echo "has-changes=false" >> $GITHUB_OUTPUT - echo "matrix={\"include\":[]}" >> $GITHUB_OUTPUT - echo "cache-hit-rate=100" >> $GITHUB_OUTPUT - echo "cache-hits=0" >> $GITHUB_OUTPUT - echo "cache-misses=0" >> $GITHUB_OUTPUT - exit 0 - fi - - # Use our matrix generator with cache awareness - files="${{ steps.changed-files.outputs.all_changed_files }}" - - echo "🔍 Analyzing ${files} for cache-aware validation..." - - # Generate matrix and capture outputs - result=$(node .github/scripts/matrix-generator.js \ - --min-files-parallel 10 \ - --max-concurrent 5 \ - --output-format github \ - $files) - - # Parse all outputs from matrix generator - while IFS='=' read -r key value; do - case "$key" in - strategy|has-changes|cache-hit-rate|cache-hits|cache-misses|original-file-count|validation-file-count|message) - echo "$key=$value" >> $GITHUB_OUTPUT - ;; - matrix) - echo "matrix=$value" >> $GITHUB_OUTPUT - ;; - esac - done <<< "$result" - - # Extract values for logging - strategy=$(echo "$result" | grep "^strategy=" | cut -d'=' -f2) - cache_hit_rate=$(echo "$result" | grep "^cache-hit-rate=" | cut -d'=' -f2) - cache_message=$(echo "$result" | grep "^message=" | cut -d'=' -f2-) - - echo "📊 Selected strategy: $strategy" - if [[ -n "$cache_hit_rate" ]]; then - echo "📈 Cache hit rate: ${cache_hit_rate}%" - fi - if [[ -n "$cache_message" ]]; then - echo "$cache_message" - fi - - validate: - name: ${{ matrix.name }} - needs: setup - if: false # TEMPORARILY DISABLED - Original condition: needs.setup.outputs.has-changes == 'true' - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: ${{ fromJson(needs.setup.outputs.matrix) }} - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Setup docs environment - uses: ./.github/actions/setup-docs-env - - - name: Validate links - uses: ./.github/actions/validate-links - with: - files: ${{ matrix.files || needs.setup.outputs.all-files }} - product-name: ${{ matrix.product }} - cache-enabled: ${{ matrix.cacheEnabled || 'true' }} - cache-key: link-validation-${{ hashFiles(matrix.files || needs.setup.outputs.all-files) }} - timeout: 900 - - report: - name: Report Results - needs: [setup, validate] - if: false # TEMPORARILY DISABLED - Original condition: always() && needs.setup.outputs.has-changes == 'true' - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Setup docs environment - uses: ./.github/actions/setup-docs-env - - - name: Report broken links - uses: ./.github/actions/report-broken-links - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - max-links-per-file: 20 \ No newline at end of file diff --git a/.gitignore b/.gitignore index 0d9d333c3..32765da72 100644 --- a/.gitignore +++ b/.gitignore @@ -3,11 +3,14 @@ public .*.swp node_modules +package-lock.json .config* **/.env* *.log /resources .hugo_build.lock + +# Content generation /content/influxdb*/**/api/**/*.html !api-docs/**/.config.yml /api-docs/redoc-static.html* @@ -16,18 +19,22 @@ node_modules !telegraf-build/templates !telegraf-build/scripts !telegraf-build/README.md + +# CI/CD tool files /cypress/downloads/* /cypress/screenshots/* /cypress/videos/* +.lycheecache test-results.xml /influxdb3cli-build-scripts/content +tmp + +# IDE files .vscode/* !.vscode/launch.json .idea **/config.toml -package-lock.json -tmp -# Context files for LLMs and AI tools +# User context files for AI assistant tools .context/* !.context/README.md diff --git a/TESTING.md b/TESTING.md index 44a5006ae..e0a2f6f78 100644 --- a/TESTING.md +++ b/TESTING.md @@ -121,96 +121,169 @@ Potential causes: # This is ignored ``` -## Link Validation Testing +## Link Validation with Link-Checker -Link validation uses Cypress for e2e browser-based testing against the Hugo site to ensure all internal and external links work correctly. +Link validation uses the `link-checker` tool to validate internal and external links in documentation files. ### Basic Usage +#### Installation + +**Option 1: Download from docs-v2 releases (recommended)** + +The link-checker binary is distributed via docs-v2 releases for reliable access from GitHub Actions workflows: + ```bash -# Test specific files -yarn test:links content/influxdb3/core/**/*.md +# Download binary from docs-v2 releases +curl -L -o link-checker \ + https://github.com/influxdata/docs-v2/releases/download/link-checker-v1.0.0/link-checker-linux-x86_64 +chmod +x link-checker -# Test all links (may take a long time) -yarn test:links - -# Test by product (may take a long time) -yarn test:links:v3 -yarn test:links:v2 -yarn test:links:telegraf -yarn test:links:chronograf -yarn test:links:kapacitor +# Verify installation +./link-checker --version ``` -### How Link Validation Works +**Option 2: Build from source** -The tests: -1. Start a Hugo development server -2. Navigate to each page in a browser -3. Check all links for validity -4. Report broken or invalid links +```bash +# Clone and build link-checker +git clone https://github.com/influxdata/docs-tooling.git +cd docs-tooling/link-checker +cargo build --release + +# Copy binary to your PATH or use directly +cp target/release/link-checker /usr/local/bin/ +``` + +#### Core Commands + +```bash +# Map content files to public HTML files +link-checker map content/path/to/file.md + +# Check links in HTML files +link-checker check public/path/to/file.html + +# Generate configuration file +link-checker config +``` + +### Content Mapping Workflows + +#### Scenario 1: Map and check InfluxDB 3 Core content + +```bash +# Map Markdown files to HTML +link-checker map content/influxdb3/core/get-started/ + +# Check links in mapped HTML files +link-checker check public/influxdb3/core/get-started/ +``` + +#### Scenario 2: Map and check shared CLI content + +```bash +# Map shared content files +link-checker map content/shared/influxdb3-cli/ + +# Check the mapped output files +# (link-checker map outputs the HTML file paths) +link-checker map content/shared/influxdb3-cli/ | \ + xargs link-checker check +``` + +#### Scenario 3: Direct HTML checking + +```bash +# Check HTML files directly without mapping +link-checker check public/influxdb3/core/get-started/ +``` + +#### Combined workflow for changed files + +```bash +# Check only files changed in the last commit +git diff --name-only HEAD~1 HEAD | grep '\.md$' | \ + xargs link-checker map | \ + xargs link-checker check +``` + +### Configuration Options + +#### Local usage (default configuration) + +```bash +# Uses default settings or test.lycherc.toml if present +link-checker check public/influxdb3/core/get-started/ +``` + +#### Production usage (GitHub Actions) + +```bash +# Use production configuration with comprehensive exclusions +link-checker check \ + --config .ci/link-checker/production.lycherc.toml \ + public/influxdb3/core/get-started/ +``` ### GitHub Actions Integration -#### Composite Action +**Automated Integration (docs-v2)** -The `.github/actions/validate-links/` composite action provides reusable link validation: +The docs-v2 repository includes automated link checking for pull requests: + +- **Trigger**: Runs automatically on PRs that modify content files +- **Binary distribution**: Downloads latest pre-built binary from docs-v2 releases +- **Smart detection**: Only checks files affected by PR changes +- **Production config**: Uses optimized settings with exclusions for GitHub, social media, etc. +- **Results reporting**: Broken links reported as GitHub annotations with detailed summaries + +The workflow automatically: +1. Detects content changes in PRs using GitHub Files API +2. Downloads latest link-checker binary from docs-v2 releases +3. Builds Hugo site and maps changed content to public HTML files +4. Runs link checking with production configuration +5. Reports results with annotations and step summaries + +**Manual Integration (other repositories)** + +For other repositories, you can integrate link checking manually: ```yaml -- uses: ./.github/actions/validate-links - with: - files: "content/influxdb3/core/file.md content/influxdb/v2/file2.md" - product-name: "core" - cache-enabled: "true" - cache-key: "link-validation" +name: Link Check +on: + pull_request: + paths: + - 'content/**/*.md' + +jobs: + link-check: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Download link-checker + run: | + curl -L -o link-checker \ + https://github.com/influxdata/docs-tooling/releases/latest/download/link-checker-linux-x86_64 + chmod +x link-checker + cp target/release/link-checker ../../link-checker + cd ../.. + + - name: Build Hugo site + run: | + npm install + npx hugo --minify + + - name: Check changed files + run: | + git diff --name-only origin/main HEAD | \ + grep '\.md$' | \ + xargs ./link-checker map | \ + xargs ./link-checker check \ + --config .ci/link-checker/production.lycherc.toml ``` -#### Matrix Generator - -The `.github/scripts/matrix-generator.js` script provides intelligent strategy selection: - -- **Sequential validation**: For small changes (< 10 files) or single-product changes -- **Parallel validation**: For large changes across multiple products (up to 5 concurrent jobs) - -Test locally: - -```bash -node .github/scripts/matrix-generator.js content/influxdb3/core/file1.md content/influxdb/v2/file2.md -``` - -Configuration options: -- `--max-concurrent `: Maximum parallel jobs (default: 5) -- `--force-sequential`: Force sequential execution -- `--min-files-parallel `: Minimum files for parallel (default: 10) - -### Caching for Link Validation - -Link validation supports caching to improve performance: - -- **Cache location**: `.cache/link-validation/` (local), GitHub Actions cache (CI) -- **Cache keys**: Based on content file hashes -- **TTL**: 30 days by default, configurable - -#### Cache Configuration Options - -```bash -# Use 7-day cache for more frequent validation -yarn test:links --cache-ttl=7 content/influxdb3/**/*.md - -# Use 1-day cache via environment variable -LINK_CACHE_TTL_DAYS=1 yarn test:links content/**/*.md - -# Clean up expired cache entries -node .github/scripts/incremental-validator.js --cleanup -``` - -#### How Caching Works - -- **Cache key**: Based on file path + content hash (file changes invalidate cache immediately) -- **External links**: Cached for the TTL period since URLs rarely change -- **Internal links**: Effectively cached until file content changes -- **Automatic cleanup**: Expired entries are removed on access and via `--cleanup` - ## Style Linting (Vale) Style linting uses [Vale](https://vale.sh/) to enforce documentation writing standards, branding guidelines, and vocabulary consistency. diff --git a/content/influxdb3/core/get-started/_index.md b/content/influxdb3/core/get-started/_index.md index 16398f32f..72cbc7746 100644 --- a/content/influxdb3/core/get-started/_index.md +++ b/content/influxdb3/core/get-started/_index.md @@ -18,6 +18,7 @@ prepend: | > [!Note] > InfluxDB 3 Core is purpose-built for real-time data monitoring and recent data. > InfluxDB 3 Enterprise builds on top of Core with support for historical data + > analysis and extended features. > querying, high availability, read replicas, and more. > Enterprise will soon unlock > enhanced security, row-level deletions, an administration UI, and more.