name: Link Check PR Changes on: pull_request: paths: - 'content/**/*.md' - 'data/**/*.yml' - 'layouts/**/*.html' types: [opened, synchronize, reopened] jobs: link-check: name: Check links in affected files runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@v4 with: fetch-depth: 0 - name: Detect content changes id: detect run: | echo "🔍 Detecting changes between ${{ github.base_ref }} and ${{ github.sha }}" # For PRs, use the GitHub Files API to get changed files if [[ "${{ github.event_name }}" == "pull_request" ]]; then echo "Using GitHub API to detect PR changes..." curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ "https://api.github.com/repos/${{ github.repository }}/pulls/${{ github.event.number }}/files" \ | jq -r '.[].filename' > all_changed_files.txt else echo "Using git diff to detect changes..." git diff --name-only ${{ github.event.before }}..${{ github.sha }} > all_changed_files.txt fi # Filter for content markdown files CHANGED_FILES=$(grep '^content/.*\.md$' all_changed_files.txt || true) echo "📁 All changed files:" cat all_changed_files.txt echo "" echo "📝 Content markdown files:" echo "$CHANGED_FILES" if [[ -n "$CHANGED_FILES" ]]; then echo "✅ Found $(echo "$CHANGED_FILES" | wc -l) changed content file(s)" echo "has-changes=true" >> $GITHUB_OUTPUT echo "changed-content<> $GITHUB_OUTPUT echo "$CHANGED_FILES" >> $GITHUB_OUTPUT echo "EOF" >> $GITHUB_OUTPUT # Check if any shared content files were modified SHARED_CHANGES=$(echo "$CHANGED_FILES" | grep '^content/shared/' || true) if [[ -n "$SHARED_CHANGES" ]]; then echo "has-shared-content=true" >> $GITHUB_OUTPUT echo "🔄 Detected shared content changes: $SHARED_CHANGES" else echo "has-shared-content=false" >> $GITHUB_OUTPUT fi else echo "❌ No content changes detected" echo "has-changes=false" >> $GITHUB_OUTPUT echo "has-shared-content=false" >> $GITHUB_OUTPUT fi - name: Skip if no content changes if: steps.detect.outputs.has-changes == 'false' run: | echo "No content changes detected in this PR - skipping link check" echo "✅ **No content changes detected** - link check skipped" >> $GITHUB_STEP_SUMMARY - name: Setup Node.js if: steps.detect.outputs.has-changes == 'true' uses: actions/setup-node@v4 with: node-version: '20' cache: 'yarn' - name: Install dependencies if: steps.detect.outputs.has-changes == 'true' run: yarn install --frozen-lockfile - name: Build Hugo site if: steps.detect.outputs.has-changes == 'true' run: npx hugo --minify - name: Download link-checker binary if: steps.detect.outputs.has-changes == 'true' run: | echo "Downloading link-checker binary from docs-v2 releases..." # Download from docs-v2's own releases (always accessible) curl -L -H "Accept: application/vnd.github+json" \ -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \ -o link-checker-info.json \ "https://api.github.com/repos/influxdata/docs-v2/releases/tags/link-checker-v1.5.0" # Extract download URL for linux binary DOWNLOAD_URL=$(jq -r '.assets[] | select(.name | test("link-checker.*linux")) | .url' link-checker-info.json) if [[ "$DOWNLOAD_URL" == "null" || -z "$DOWNLOAD_URL" ]]; then echo "❌ No linux binary found in release" echo "Available assets:" jq -r '.assets[].name' link-checker-info.json exit 1 fi echo "📥 Downloading: $DOWNLOAD_URL" curl -L -H "Accept: application/octet-stream" \ -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \ -o link-checker "$DOWNLOAD_URL" chmod +x link-checker ./link-checker --version - name: Verify link checker config exists if: steps.detect.outputs.has-changes == 'true' run: | if [[ ! -f .ci/link-checker/production.lycherc.toml ]]; then echo "❌ Configuration file .ci/link-checker/production.lycherc.toml not found" echo "Please copy production.lycherc.toml from docs-tooling/link-checker/" exit 1 fi echo "✅ Using configuration: .ci/link-checker/production.lycherc.toml" - name: Map changed content to public files if: steps.detect.outputs.has-changes == 'true' id: mapping run: | echo "Mapping changed content files to public HTML files..." # Create temporary file with changed content files echo "${{ steps.detect.outputs.changed-content }}" > changed-files.txt # Map content files to public files PUBLIC_FILES=$(cat changed-files.txt | xargs -r ./link-checker map --existing-only) if [[ -n "$PUBLIC_FILES" ]]; then echo "Found affected public files:" echo "$PUBLIC_FILES" echo "public-files<> $GITHUB_OUTPUT echo "$PUBLIC_FILES" >> $GITHUB_OUTPUT echo "EOF" >> $GITHUB_OUTPUT # Count files for summary FILE_COUNT=$(echo "$PUBLIC_FILES" | wc -l) echo "file-count=$FILE_COUNT" >> $GITHUB_OUTPUT else echo "No public files found to check" echo "public-files=" >> $GITHUB_OUTPUT echo "file-count=0" >> $GITHUB_OUTPUT fi - name: Run link checker if: steps.detect.outputs.has-changes == 'true' && steps.mapping.outputs.public-files != '' id: link-check run: | echo "Checking links in ${{ steps.mapping.outputs.file-count }} affected files..." # Create temporary file with public files list echo "${{ steps.mapping.outputs.public-files }}" > public-files.txt # Run link checker with detailed JSON output set +e # Don't fail immediately on error cat public-files.txt | xargs -r ./link-checker check \ --config .ci/link-checker/production.lycherc.toml \ --format json \ --output link-check-results.json EXIT_CODE=$? if [[ -f link-check-results.json ]]; then # Parse results using actual v1.3.1 JSON structure ERROR_COUNT=$(jq -r '.summary.error_count // 0' link-check-results.json) WARNING_COUNT=$(jq -r '.summary.warning_count // 0' link-check-results.json) TOTAL_COUNT=$(jq -r '.summary.total_checked // 0' link-check-results.json) SUCCESS_RATE=$(jq -r '.summary.success_rate // 0' link-check-results.json) # Reclassify file-not-found warnings as errors # link-checker classifies missing local files as warnings (no HTTP status code), # but these represent genuinely broken internal links and should fail CI. FILE_NOT_FOUND_COUNT=$(jq '[.warnings[] | select(.error | test("Cannot find file"))] | length' link-check-results.json 2>/dev/null || echo 0) if [[ $FILE_NOT_FOUND_COUNT -gt 0 ]]; then echo "⚠️ Found $FILE_NOT_FOUND_COUNT missing local file(s) — reclassifying as errors" # Move file-not-found entries from warnings to errors jq ' .errors += [.warnings[] | select(.error | test("Cannot find file")) | .severity = "error"] | .warnings = [.warnings[] | select(.error | test("Cannot find file") | not)] | .summary.error_count = (.errors | length) | .summary.warning_count = (.warnings | length) ' link-check-results.json > link-check-results-fixed.json mv link-check-results-fixed.json link-check-results.json ERROR_COUNT=$(jq -r '.summary.error_count // 0' link-check-results.json) WARNING_COUNT=$(jq -r '.summary.warning_count // 0' link-check-results.json) fi echo "error-count=$ERROR_COUNT" >> $GITHUB_OUTPUT echo "warning-count=$WARNING_COUNT" >> $GITHUB_OUTPUT echo "total-count=$TOTAL_COUNT" >> $GITHUB_OUTPUT echo "success-rate=$SUCCESS_RATE" >> $GITHUB_OUTPUT if [[ $ERROR_COUNT -gt 0 ]]; then echo "❌ Found $ERROR_COUNT broken links out of $TOTAL_COUNT total links" echo "check-result=failed" >> $GITHUB_OUTPUT else echo "✅ All $TOTAL_COUNT links are valid ($WARNING_COUNT warnings)" echo "check-result=passed" >> $GITHUB_OUTPUT fi else echo "❌ Link check failed to generate results" echo "check-result=error" >> $GITHUB_OUTPUT fi exit $EXIT_CODE - name: Process and report results if: always() && steps.detect.outputs.has-changes == 'true' && steps.mapping.outputs.public-files != '' env: FILE_COUNT: ${{ steps.mapping.outputs.file-count }} TOTAL_COUNT: ${{ steps.link-check.outputs.total-count }} ERROR_COUNT: ${{ steps.link-check.outputs.error-count }} WARNING_COUNT: ${{ steps.link-check.outputs.warning-count }} SUCCESS_RATE: ${{ steps.link-check.outputs.success-rate }} CHECK_RESULT: ${{ steps.link-check.outputs.check-result }} run: | if [[ -f link-check-results.json ]]; then # Generate summary header echo "## Link Check Results" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "| Metric | Value |" >> $GITHUB_STEP_SUMMARY echo "|--------|-------|" >> $GITHUB_STEP_SUMMARY echo "| Files Checked | ${FILE_COUNT} |" >> $GITHUB_STEP_SUMMARY echo "| Total Links | ${TOTAL_COUNT} |" >> $GITHUB_STEP_SUMMARY echo "| Errors | ${ERROR_COUNT} |" >> $GITHUB_STEP_SUMMARY echo "| Warnings | ${WARNING_COUNT} |" >> $GITHUB_STEP_SUMMARY echo "| Success Rate | ${SUCCESS_RATE}% |" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY # Report broken links (errors) with annotations if [[ "${CHECK_RESULT}" == "failed" ]]; then echo "### Broken Links" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "| Severity | Source File | Broken URL | Error |" >> $GITHUB_STEP_SUMMARY echo "|----------|------------|------------|-------|" >> $GITHUB_STEP_SUMMARY # Process errors (these fail CI) jq -c '.errors[]?' link-check-results.json 2>/dev/null | while read -r entry; do URL=$(echo "$entry" | jq -r '.url // "unknown"') ERROR=$(echo "$entry" | jq -r '.error // "Unknown error"') FILE=$(echo "$entry" | jq -r '.file // "unknown"') LINE=$(echo "$entry" | jq -r '.line // empty') # Map public path to content path for annotations CONTENT_FILE=$(echo "$FILE" | sed 's|.*/public/|content/|' | sed 's|/index\.html$|/_index.md|') # Create GitHub annotation if [[ -n "$LINE" && "$LINE" != "null" ]]; then echo "::error file=${CONTENT_FILE},line=${LINE}::Broken link: ${URL} (${ERROR})" else echo "::error file=${CONTENT_FILE}::Broken link: ${URL} (${ERROR})" fi # Add row to summary table SAFE_URL=$(echo "$URL" | sed 's/|/\\|/g') SAFE_ERROR=$(echo "$ERROR" | sed 's/|/\\|/g' | cut -c1-80) echo "| 🔴 error | \`${CONTENT_FILE}\` | ${SAFE_URL} | ${SAFE_ERROR} |" >> $GITHUB_STEP_SUMMARY done echo "" >> $GITHUB_STEP_SUMMARY echo "---" >> $GITHUB_STEP_SUMMARY echo "❌ **Link check failed** — fix the broken links listed above before merging." >> $GITHUB_STEP_SUMMARY else echo "✅ **All links are valid**" >> $GITHUB_STEP_SUMMARY fi # Report warnings (don't fail CI, but useful context) WARNING_ARRAY_LEN=$(jq '.warnings | length' link-check-results.json 2>/dev/null || echo 0) if [[ "$WARNING_ARRAY_LEN" -gt 0 ]]; then echo "" >> $GITHUB_STEP_SUMMARY echo "
" >> $GITHUB_STEP_SUMMARY echo "⚠️ ${WARNING_ARRAY_LEN} warning(s) (do not fail CI)" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "| Source File | URL | Issue |" >> $GITHUB_STEP_SUMMARY echo "|------------|-----|-------|" >> $GITHUB_STEP_SUMMARY jq -c '.warnings[]?' link-check-results.json 2>/dev/null | head -20 | while read -r entry; do URL=$(echo "$entry" | jq -r '.url // "unknown"') ERROR=$(echo "$entry" | jq -r '.error // "Unknown"') FILE=$(echo "$entry" | jq -r '.file // "unknown"') CONTENT_FILE=$(echo "$FILE" | sed 's|.*/public/|content/|' | sed 's|/index\.html$|/_index.md|') SAFE_URL=$(echo "$URL" | sed 's/|/\\|/g') SAFE_ERROR=$(echo "$ERROR" | sed 's/|/\\|/g' | cut -c1-80) echo "| \`${CONTENT_FILE}\` | ${SAFE_URL} | ${SAFE_ERROR} |" >> $GITHUB_STEP_SUMMARY done if [[ "$WARNING_ARRAY_LEN" -gt 20 ]]; then echo "" >> $GITHUB_STEP_SUMMARY echo "_Showing first 20 of ${WARNING_ARRAY_LEN} warnings. Download the artifact for full results._" >> $GITHUB_STEP_SUMMARY fi echo "" >> $GITHUB_STEP_SUMMARY echo "
" >> $GITHUB_STEP_SUMMARY fi # Add helpful tips echo "" >> $GITHUB_STEP_SUMMARY echo "
" >> $GITHUB_STEP_SUMMARY echo "💡 Troubleshooting Tips" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "- **404 errors**: The linked page doesn't exist. Check for typos or update the link." >> $GITHUB_STEP_SUMMARY echo "- **Relative links**: Use relative paths starting with \`/\` for internal links." >> $GITHUB_STEP_SUMMARY echo "- **Anchors**: Ensure heading anchors match the linked fragment exactly." >> $GITHUB_STEP_SUMMARY echo "- **Warnings**: External sites may be temporarily unavailable — these don't fail CI." >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "
" >> $GITHUB_STEP_SUMMARY else echo "⚠️ **Link check could not complete** — no results file generated" >> $GITHUB_STEP_SUMMARY fi - name: Upload detailed results if: always() && steps.detect.outputs.has-changes == 'true' && steps.mapping.outputs.public-files != '' uses: actions/upload-artifact@v4 with: name: link-check-results path: | link-check-results.json changed-files.txt public-files.txt retention-days: 30