From 6a4e8827ebc825c96872ceef1d746173a1232c70 Mon Sep 17 00:00:00 2001 From: Jason Stirnaman Date: Mon, 28 Jul 2025 15:21:28 -0500 Subject: [PATCH] feat(testing): add link validation automation and improvements - Add GitHub Actions for automated link validation on PRs - Implement incremental validation with caching (30-day TTL, configurable) - Add matrix generator for parallel validation strategy - Create comprehensive TESTING.md documentation - Add cache manager with configurable TTL via env var or CLI - Implement smart link extraction and validation - Add PR comment generator for broken link reports - Update Cypress tests to use incremental validation - Consolidate testing docs from CONTRIBUTING.md to TESTING.md Key improvements: - Cache-aware validation only checks changed content - Parallel execution for large changesets - Detailed PR comments with broken link reports - Support for LINK_CACHE_TTL_DAYS env var - Local testing with yarn test:links - Reduced false positives through intelligent caching --- .../actions/report-broken-links/action.yml | 87 ++++ .github/actions/setup-docs-env/action.yml | 15 + .github/actions/validate-links/action.yml | 70 +++ .../instructions/contributing.instructions.md | 41 +- .../shortcodes-reference.instructions.md | 1 + .../testing-setup.instructions.md | 129 +---- .github/scripts/cache-manager.js | 169 +++++++ .github/scripts/comment-generator.js | 328 ++++++++++++ .github/scripts/incremental-validator.js | 228 +++++++++ .github/scripts/link-extractor.js | 473 ++++++++++++++++++ .github/scripts/matrix-generator.js | 384 ++++++++++++++ .github/workflows/pr-link-validation.yml | 140 ++++++ CLAUDE.md | 3 + CONTRIBUTING.md | 189 +------ README.md | 4 + TESTING.md | 364 ++++++++++++++ cypress.config.js | 59 +++ cypress/e2e/content/article-links.cy.js | 53 +- cypress/support/link-reporter.js | 83 ++- lefthook.yml | 16 +- 20 files changed, 2506 insertions(+), 330 deletions(-) create mode 100644 .github/actions/report-broken-links/action.yml create mode 100644 .github/actions/setup-docs-env/action.yml create mode 100644 .github/actions/validate-links/action.yml create mode 100644 .github/scripts/cache-manager.js create mode 100644 .github/scripts/comment-generator.js create mode 100644 .github/scripts/incremental-validator.js create mode 100644 .github/scripts/link-extractor.js create mode 100644 .github/scripts/matrix-generator.js create mode 100644 .github/workflows/pr-link-validation.yml create mode 100644 TESTING.md diff --git a/.github/actions/report-broken-links/action.yml b/.github/actions/report-broken-links/action.yml new file mode 100644 index 000000000..ea65819d3 --- /dev/null +++ b/.github/actions/report-broken-links/action.yml @@ -0,0 +1,87 @@ +name: 'Report Broken Links' +description: 'Downloads broken link reports, generates PR comment, and posts results' + +inputs: + github-token: + description: 'GitHub token for posting comments' + required: false + default: ${{ github.token }} + max-links-per-file: + description: 'Maximum links to show per file in comment' + required: false + default: '20' + include-success-message: + description: 'Include success message when no broken links found' + required: false + default: 'true' + +outputs: + has-broken-links: + description: 'Whether broken links were found (true/false)' + value: ${{ steps.generate-comment.outputs.has-broken-links }} + broken-link-count: + description: 'Number of broken links found' + value: ${{ steps.generate-comment.outputs.broken-link-count }} + +runs: + using: 'composite' + steps: + - name: Download broken link reports + uses: actions/download-artifact@v4 + with: + path: reports + continue-on-error: true + + - name: Generate PR comment + id: generate-comment + run: | + # Generate comment using our script + node .github/scripts/comment-generator.js \ + --max-links ${{ inputs.max-links-per-file }} \ + ${{ inputs.include-success-message == 'false' && '--no-success' || '' }} \ + --output-file comment.md \ + reports/ || echo "No reports found or errors occurred" + + # Check if comment file was created and has content + if [[ -f comment.md && -s comment.md ]]; then + echo "has-broken-links=true" >> $GITHUB_OUTPUT + + # Count broken links by parsing the comment + broken_count=$(grep -o "Found [0-9]* broken link" comment.md | grep -o "[0-9]*" || echo "0") + echo "broken-link-count=$broken_count" >> $GITHUB_OUTPUT + echo "comment-generated=true" >> $GITHUB_OUTPUT + else + echo "has-broken-links=false" >> $GITHUB_OUTPUT + echo "broken-link-count=0" >> $GITHUB_OUTPUT + echo "comment-generated=false" >> $GITHUB_OUTPUT + fi + shell: bash + + - name: Post PR comment + if: steps.generate-comment.outputs.comment-generated == 'true' + uses: actions/github-script@v7 + with: + github-token: ${{ inputs.github-token }} + script: | + const fs = require('fs'); + + if (fs.existsSync('comment.md')) { + const comment = fs.readFileSync('comment.md', 'utf8'); + + if (comment.trim()) { + await github.rest.issues.createComment({ + issue_number: context.issue.number, + owner: context.repo.owner, + repo: context.repo.repo, + body: comment + }); + } + } + + - name: Set workflow status + if: steps.generate-comment.outputs.has-broken-links == 'true' + run: | + broken_count="${{ steps.generate-comment.outputs.broken-link-count }}" + echo "::error::Found $broken_count broken link(s)" + exit 1 + shell: bash \ No newline at end of file diff --git a/.github/actions/setup-docs-env/action.yml b/.github/actions/setup-docs-env/action.yml new file mode 100644 index 000000000..b5ed01120 --- /dev/null +++ b/.github/actions/setup-docs-env/action.yml @@ -0,0 +1,15 @@ +name: 'Setup Documentation Environment' +description: 'Sets up Node.js environment and installs dependencies for documentation workflows' + +runs: + using: 'composite' + steps: + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: '20' + cache: 'yarn' + + - name: Install dependencies + run: yarn install + shell: bash \ No newline at end of file diff --git a/.github/actions/validate-links/action.yml b/.github/actions/validate-links/action.yml new file mode 100644 index 000000000..369f4e14a --- /dev/null +++ b/.github/actions/validate-links/action.yml @@ -0,0 +1,70 @@ +name: 'Validate Links' +description: 'Runs e2e browser-based link validation tests against Hugo site using Cypress' + +inputs: + files: + description: 'Space-separated list of files to validate' + required: true + product-name: + description: 'Product name for reporting (optional)' + required: false + default: '' + cache-enabled: + description: 'Enable link validation caching' + required: false + default: 'true' + cache-key: + description: 'Cache key prefix for this validation run' + required: false + default: 'link-validation' + +outputs: + failed: + description: 'Whether validation failed (true/false)' + value: ${{ steps.validate.outputs.failed }} + +runs: + using: 'composite' + steps: + - name: Restore link validation cache + if: inputs.cache-enabled == 'true' + uses: actions/cache@v4 + with: + path: .cache/link-validation + key: ${{ inputs.cache-key }}-${{ runner.os }}-${{ hashFiles('content/**/*.md', 'content/**/*.html') }} + restore-keys: | + ${{ inputs.cache-key }}-${{ runner.os }}- + ${{ inputs.cache-key }}- + + - name: Run link validation + id: validate + run: | + echo "Testing files: ${{ inputs.files }}" + if [[ -n "${{ inputs.product-name }}" ]]; then + echo "Product: ${{ inputs.product-name }}" + fi + + if [[ "${{ inputs.cache-enabled }}" == "true" ]]; then + echo "๐Ÿ“ฆ Cache enabled for this validation run" + fi + + # Run the validation + if node cypress/support/run-e2e-specs.js \ + --spec "cypress/e2e/content/article-links.cy.js" \ + ${{ inputs.files }}; then + echo "failed=false" >> $GITHUB_OUTPUT + else + echo "failed=true" >> $GITHUB_OUTPUT + exit 1 + fi + shell: bash + env: + CI: true + CACHE_ENABLED: ${{ inputs.cache-enabled }} + + - name: Upload broken links report + if: failure() + uses: actions/upload-artifact@v4 + with: + name: broken-links-report${{ inputs.product-name && format('-{0}', inputs.product-name) || '' }} + path: /tmp/broken_links_report.json \ No newline at end of file diff --git a/.github/instructions/contributing.instructions.md b/.github/instructions/contributing.instructions.md index 6c33b7eba..f80ec35d0 100644 --- a/.github/instructions/contributing.instructions.md +++ b/.github/instructions/contributing.instructions.md @@ -18,7 +18,7 @@ Ready to contribute? Here's the essential workflow: 2. [Fork and clone](#fork-and-clone-influxdata-documentation-repository) this repository 3. [Install dependencies](#development-environment-setup) (Node.js, Yarn, Docker) 4. Make your changes following [style guidelines](#making-changes) -5. [Test your changes](#testing--quality-assurance) (pre-commit and pre-push hooks run automatically) +5. [Test your changes](TESTING.md) (pre-commit and pre-push hooks run automatically) 6. [Submit a pull request](#submission-process) For detailed setup and reference information, see the sections below. @@ -169,33 +169,30 @@ For more information about generating InfluxDB API documentation, see the --- -### Pre-commit Hooks +## Testing & Quality Assurance -docs-v2 uses Lefthook to manage Git hooks that run during pre-commit and pre-push. The hooks run the scripts defined in `package.json` to lint Markdown and test code blocks. -When you try to commit changes (`git commit`), Git runs +For comprehensive testing information, including code block testing, link validation, style linting, and advanced testing procedures, see **[TESTING.md](TESTING.md)**. -#### Skip pre-commit hooks +### Quick Testing Reference +```bash +# Test code blocks +yarn test:codeblocks:all + +# Test links +yarn test:links content/influxdb3/core/**/*.md + +# Run style linting +docker compose run -T vale content/**/*.md +``` + +Pre-commit hooks run automatically when you commit changes, testing your staged files with Vale, Prettier, Cypress, and Pytest. To skip hooks if needed: ```sh git commit -m "" --no-verify ``` -# ... (see full CONTRIBUTING.md for complete example) -```python -print("Hello, world!") -``` -# ... (see full CONTRIBUTING.md for complete example) -```sh -docker compose run -T vale --config=content/influxdb/cloud-dedicated/.vale.ini --minAlertLevel=error content/influxdb/cloud-dedicated/write-data/**/*.md -``` - - -1. Install the [Vale VSCode](https://marketplace.visualstudio.com/items?itemName=ChrisChinchilla.vale-vscode) extension. -2. In the extension settings, set the `Vale:Vale CLI:Path` value to `${workspaceFolder}/node_modules/.bin/vale`. - - -_See full CONTRIBUTING.md for complete details._ +--- ### Commit Guidelines @@ -229,10 +226,6 @@ _For the complete Complete Frontmatter Reference reference, see frontmatter-refe _For the complete Complete Shortcodes Reference reference, see shortcodes-reference.instructions.md._ -### Detailed Testing Setup - -_For the complete Detailed Testing Setup reference, see testing-setup.instructions.md._ - #### Vale style linting configuration docs-v2 includes Vale writing style linter configurations to enforce documentation writing style rules, guidelines, branding, and vocabulary terms. diff --git a/.github/instructions/shortcodes-reference.instructions.md b/.github/instructions/shortcodes-reference.instructions.md index 099a9a9f8..098ae1e07 100644 --- a/.github/instructions/shortcodes-reference.instructions.md +++ b/.github/instructions/shortcodes-reference.instructions.md @@ -1186,3 +1186,4 @@ Replace the following: - {{% code-placeholder-key %}}`API_TOKEN`{{% /code-placeholder-key %}}: your [InfluxDB API token](/influxdb/v2/admin/tokens/) ``` + diff --git a/.github/instructions/testing-setup.instructions.md b/.github/instructions/testing-setup.instructions.md index d30f3bcc9..7c0b9509b 100644 --- a/.github/instructions/testing-setup.instructions.md +++ b/.github/instructions/testing-setup.instructions.md @@ -4,127 +4,12 @@ applyTo: "content/**/*.md, layouts/**/*.html" ### Detailed Testing Setup -#### Set up test scripts and credentials +For comprehensive testing information, including: +- Code block testing setup and configuration +- Link validation testing procedures +- Style linting with Vale +- Pre-commit hooks and GitHub Actions integration +- Advanced testing procedures and troubleshooting -Tests for code blocks require your InfluxDB credentials and other typical -InfluxDB configuration. - -To set up your docs-v2 instance to run tests locally, do the following: - -1. **Set executable permissions on test scripts** in `./test/src`: - - ```sh - chmod +x ./test/src/*.sh - ``` - -2. **Create credentials for tests**: - - - Create databases, buckets, and tokens for the product(s) you're testing. - - If you don't have access to a Clustered instance, you can use your -Cloud Dedicated instance for testing in most cases. To avoid conflicts when - running tests, create separate Cloud Dedicated and Clustered databases. - -1. **Create .env.test**: Copy the `./test/env.test.example` file into each - product directory to test and rename the file as `.env.test`--for example: - - ```sh - ./content/influxdb/cloud-dedicated/.env.test - ``` - -2. Inside each product's `.env.test` file, assign your InfluxDB credentials to - environment variables: - - - Include the usual `INFLUX_` environment variables - - In - `cloud-dedicated/.env.test` and `clustered/.env.test` files, also define the - following variables: - - - `ACCOUNT_ID`, `CLUSTER_ID`: You can find these values in your `influxctl` - `config.toml` configuration file. - - `MANAGEMENT_TOKEN`: Use the `influxctl management create` command to generate - a long-lived management token to authenticate Management API requests - - See the substitution - patterns in `./test/src/prepare-content.sh` for the full list of variables you may need to define in your `.env.test` files. - -3. For influxctl commands to run in tests, move or copy your `config.toml` file - to the `./test` directory. - -> [!Warning] -> -> - The database you configure in `.env.test` and any written data may -be deleted during test runs. -> - Don't add your `.env.test` files to Git. To prevent accidentally adding credentials to the docs-v2 repo, -> Git is configured to ignore `.env*` files. Consider backing them up on your local machine in case of accidental deletion. - -#### Test shell and python code blocks - -[pytest-codeblocks](https://github.com/nschloe/pytest-codeblocks/tree/main) extracts code from python and shell Markdown code blocks and executes assertions for the code. -If you don't assert a value (using a Python `assert` statement), `--codeblocks` considers a non-zero exit code to be a failure. - -**Note**: `pytest --codeblocks` uses Python's `subprocess.run()` to execute shell code. - -You can use this to test CLI and interpreter commands, regardless of programming -language, as long as they return standard exit codes. - -To make the documented output of a code block testable, precede it with the -`` tag and **omit the code block language -descriptor**--for example, in your Markdown file: - -##### Example markdown - -```python -print("Hello, world!") -``` - - - -The next code block is treated as an assertion. -If successful, the output is the following: - -``` -Hello, world! -``` - -For commands, such as `influxctl` CLI commands, that require launching an -OAuth URL in a browser, wrap the command in a subshell and redirect the output -to `/shared/urls.txt` in the container--for example: - -```sh -# Test the preceding command outside of the code block. -# influxctl authentication requires TTY interaction-- -# output the auth URL to a file that the host can open. -script -c "influxctl user list " \ - /dev/null > /shared/urls.txt -``` - -You probably don't want to display this syntax in the docs, which unfortunately -means you'd need to include the test block separately from the displayed code -block. -To hide it from users, wrap the code block inside an HTML comment. -pytest-codeblocks will still collect and run the code block. - -##### Mark tests to skip - -pytest-codeblocks has features for skipping tests and marking blocks as failed. -To learn more, see the pytest-codeblocks README and tests. - -#### Troubleshoot tests - -##### Pytest collected 0 items - -Potential reasons: - -- See the test discovery options in `pytest.ini`. -- For Python code blocks, use the following delimiter: - - ```python - # Codeblocks runs this block. - ``` - - `pytest --codeblocks` ignores code blocks that use the following: - - ```py - # Codeblocks ignores this block. - ``` +Please refer to the main **[TESTING.md](../../TESTING.md)** file. diff --git a/.github/scripts/cache-manager.js b/.github/scripts/cache-manager.js new file mode 100644 index 000000000..b7c2c2e6c --- /dev/null +++ b/.github/scripts/cache-manager.js @@ -0,0 +1,169 @@ +#!/usr/bin/env node + +/** + * Simple Cache Manager for Link Validation Results + * Uses GitHub Actions cache API or local file storage + */ + +import fs from 'fs'; +import path from 'path'; +import crypto from 'crypto'; +import process from 'process'; + +const CACHE_VERSION = 'v1'; +const CACHE_KEY_PREFIX = 'link-validation'; +const LOCAL_CACHE_DIR = path.join(process.cwd(), '.cache', 'link-validation'); + +/** + * Simple cache interface + */ +class CacheManager { + constructor(options = {}) { + this.useGitHubCache = + options.useGitHubCache !== false && process.env.GITHUB_ACTIONS; + this.localCacheDir = options.localCacheDir || LOCAL_CACHE_DIR; + + // Configurable cache TTL - default 30 days, support environment variable + this.cacheTTLDays = + options.cacheTTLDays || parseInt(process.env.LINK_CACHE_TTL_DAYS) || 30; + this.maxAge = this.cacheTTLDays * 24 * 60 * 60 * 1000; + + if (!this.useGitHubCache) { + this.ensureLocalCacheDir(); + } + } + + ensureLocalCacheDir() { + if (!fs.existsSync(this.localCacheDir)) { + fs.mkdirSync(this.localCacheDir, { recursive: true }); + } + } + + generateCacheKey(filePath, fileHash) { + const pathHash = crypto + .createHash('sha256') + .update(filePath) + .digest('hex') + .substring(0, 8); + return `${CACHE_KEY_PREFIX}-${CACHE_VERSION}-${pathHash}-${fileHash}`; + } + + async get(filePath, fileHash) { + if (this.useGitHubCache) { + return await this.getFromGitHubCache(filePath, fileHash); + } else { + return await this.getFromLocalCache(filePath, fileHash); + } + } + + async set(filePath, fileHash, results) { + if (this.useGitHubCache) { + return await this.setToGitHubCache(filePath, fileHash, results); + } else { + return await this.setToLocalCache(filePath, fileHash, results); + } + } + + async getFromGitHubCache(filePath, fileHash) { + // For GitHub Actions, we'll use the actions/cache action directly + // in the workflow, so this is a placeholder + return null; + } + + async setToGitHubCache(filePath, fileHash, results) { + // For GitHub Actions, we'll use the actions/cache action directly + // in the workflow, so this is a placeholder + return true; + } + + async getFromLocalCache(filePath, fileHash) { + const cacheKey = this.generateCacheKey(filePath, fileHash); + const cacheFile = path.join(this.localCacheDir, `${cacheKey}.json`); + + if (!fs.existsSync(cacheFile)) { + return null; + } + + try { + const content = fs.readFileSync(cacheFile, 'utf8'); + const cached = JSON.parse(content); + + // TTL check using configured cache duration + const age = Date.now() - new Date(cached.cachedAt).getTime(); + + if (age > this.maxAge) { + fs.unlinkSync(cacheFile); + return null; + } + + return cached.results; + } catch (error) { + // Clean up corrupted cache + try { + fs.unlinkSync(cacheFile); + } catch { + // Ignore cleanup errors + } + return null; + } + } + + async setToLocalCache(filePath, fileHash, results) { + const cacheKey = this.generateCacheKey(filePath, fileHash); + const cacheFile = path.join(this.localCacheDir, `${cacheKey}.json`); + + const cacheData = { + filePath, + fileHash, + results, + cachedAt: new Date().toISOString(), + }; + + try { + fs.writeFileSync(cacheFile, JSON.stringify(cacheData, null, 2)); + return true; + } catch (error) { + console.warn(`Cache save failed: ${error.message}`); + return false; + } + } + + async cleanup() { + if (this.useGitHubCache) { + return { removed: 0, note: 'GitHub Actions cache auto-managed' }; + } + + let removed = 0; + if (!fs.existsSync(this.localCacheDir)) { + return { removed }; + } + + const files = fs.readdirSync(this.localCacheDir); + + for (const file of files) { + if (!file.endsWith('.json')) continue; + + const filePath = path.join(this.localCacheDir, file); + try { + const stat = fs.statSync(filePath); + if (Date.now() - stat.mtime.getTime() > this.maxAge) { + fs.unlinkSync(filePath); + removed++; + } + } catch { + // Remove corrupted files + try { + fs.unlinkSync(filePath); + removed++; + } catch { + // Ignore errors + } + } + } + + return { removed }; + } +} + +export default CacheManager; +export { CacheManager }; diff --git a/.github/scripts/comment-generator.js b/.github/scripts/comment-generator.js new file mode 100644 index 000000000..7b1658b55 --- /dev/null +++ b/.github/scripts/comment-generator.js @@ -0,0 +1,328 @@ +/** + * Comment Generator for Link Validation Results + * Standardizes PR comment generation across workflows + * Includes cache performance metrics and optimization info + */ + +import fs from 'fs'; +import path from 'path'; +import process from 'process'; + +/** + * Normalize broken link data from different report formats + * @param {Object|Array} reportData - Raw report data + * @returns {Array} - Normalized array of broken links + */ +function normalizeBrokenLinks(reportData) { + if (!reportData) return []; + + let links = []; + + if (Array.isArray(reportData)) { + reportData.forEach((item) => { + if (item.links && Array.isArray(item.links)) { + // Format: { sourceFile: "file.md", links: [...] } + item.links.forEach((link) => { + links.push({ + sourceFile: item.sourceFile || item.page || 'Unknown', + url: link.url || link.href, + linkText: link.linkText || link.url || link.href, + status: link.status, + error: link.error, + type: link.type, + }); + }); + } else { + // Format: direct link object + links.push({ + sourceFile: item.sourceFile || item.page || 'Unknown', + url: item.url || item.href, + linkText: item.linkText || item.url || item.href, + status: item.status, + error: item.error, + type: item.type, + }); + } + }); + } + + return links; +} + +/** + * Group broken links by source file + * @param {Array} brokenLinks - Array of normalized broken links + * @returns {Object} - Object with source files as keys + */ +function groupLinksBySource(brokenLinks) { + const bySource = {}; + + brokenLinks.forEach((link) => { + const source = link.sourceFile || 'Unknown'; + if (!bySource[source]) { + bySource[source] = []; + } + bySource[source].push(link); + }); + + return bySource; +} + +/** + * Generate markdown comment for PR + * @param {Array} allBrokenLinks - Array of all broken links + * @param {Object} options - Generation options + * @returns {string} - Markdown comment content + */ +/** + * Load cache statistics from reports directory + * @param {string} reportsDir - Directory containing reports + * @returns {Object|null} Cache statistics or null if not found + */ +function loadCacheStats(reportsDir) { + try { + const cacheStatsFile = path.join(reportsDir, 'cache_statistics.json'); + if (fs.existsSync(cacheStatsFile)) { + const content = fs.readFileSync(cacheStatsFile, 'utf8'); + return JSON.parse(content); + } + } catch (error) { + console.warn(`Warning: Could not load cache stats: ${error.message}`); + } + return null; +} + +function generateComment(allBrokenLinks, options = {}) { + const { + includeSuccessMessage = true, + includeStats = true, + includeActionRequired = true, + maxLinksPerFile = 20, + cacheStats = null, + reportsDir = null, + } = options; + + // Load cache stats if reports directory is provided + const actualCacheStats = + cacheStats || (reportsDir ? loadCacheStats(reportsDir) : null); + + let comment = ''; + + // Add cache performance metrics at the top + if (actualCacheStats) { + comment += '## ๐Ÿ“Š Link Validation Performance\n\n'; + comment += `- **Cache Hit Rate:** ${actualCacheStats.hitRate}%\n`; + comment += `- **Files Cached:** ${actualCacheStats.cacheHits} (skipped validation)\n`; + comment += `- **Files Validated:** ${actualCacheStats.cacheMisses}\n`; + + if (actualCacheStats.hitRate >= 50) { + comment += + '- **Performance:** ๐Ÿš€ Cache optimization saved significant validation time!\n'; + } else if (actualCacheStats.hitRate > 0) { + comment += + '- **Performance:** โšก Some files were cached, improving validation speed\n'; + } + comment += '\n'; + } + + if (!allBrokenLinks || allBrokenLinks.length === 0) { + comment += '## โœ… Link Validation Passed\n\n'; + comment += 'All links in the changed files are valid!'; + + if (actualCacheStats && actualCacheStats.hitRate === 100) { + comment += '\n\nโœจ **All files were cached** - no validation was needed!'; + } + + return includeSuccessMessage ? comment : ''; + } + + comment += '## ๐Ÿ”— Broken Links Found\n\n'; + + if (includeStats) { + comment += `Found ${allBrokenLinks.length} broken link(s) in the changed files:\n\n`; + } + + // Group by source file + const bySource = groupLinksBySource(allBrokenLinks); + + // Generate sections for each source file + for (const [source, links] of Object.entries(bySource)) { + comment += `### ${source}\n\n`; + + const displayLinks = links.slice(0, maxLinksPerFile); + const hiddenCount = links.length - displayLinks.length; + + displayLinks.forEach((link) => { + const url = link.url || 'Unknown URL'; + const linkText = link.linkText || url; + const status = link.status || 'Unknown'; + + comment += `- [ ] **${linkText}** โ†’ \`${url}\`\n`; + comment += ` - Status: ${status}\n`; + + if (link.type) { + comment += ` - Type: ${link.type}\n`; + } + + if (link.error) { + comment += ` - Error: ${link.error}\n`; + } + + comment += '\n'; + }); + + if (hiddenCount > 0) { + comment += `
\n... and ${hiddenCount} more broken link(s)\n\n`; + + links.slice(maxLinksPerFile).forEach((link) => { + const url = link.url || 'Unknown URL'; + const linkText = link.linkText || url; + const status = link.status || 'Unknown'; + + comment += `- [ ] **${linkText}** โ†’ \`${url}\` (Status: ${status})\n`; + }); + + comment += '\n
\n\n'; + } + } + + if (includeActionRequired) { + comment += '\n---\n'; + comment += + '**Action Required:** Please fix the broken links before merging this PR.'; + } + + return comment; +} + +/** + * Load and merge broken link reports from artifacts + * @param {string} reportsDir - Directory containing report artifacts + * @returns {Array} - Array of all broken links + */ +function loadBrokenLinkReports(reportsDir) { + const allBrokenLinks = []; + + if (!fs.existsSync(reportsDir)) { + return allBrokenLinks; + } + + try { + const reportDirs = fs.readdirSync(reportsDir); + + for (const dir of reportDirs) { + if (dir.startsWith('broken-links-')) { + const reportPath = path.join( + reportsDir, + dir, + 'broken_links_report.json' + ); + + if (fs.existsSync(reportPath)) { + try { + const reportContent = fs.readFileSync(reportPath, 'utf8'); + const reportData = JSON.parse(reportContent); + const normalizedLinks = normalizeBrokenLinks(reportData); + allBrokenLinks.push(...normalizedLinks); + } catch (e) { + console.error(`Error reading ${reportPath}: ${e.message}`); + } + } + } + } + } catch (e) { + console.error( + `Error reading reports directory ${reportsDir}: ${e.message}` + ); + } + + return allBrokenLinks; +} + +/** + * CLI interface for the comment generator + */ +function main() { + const args = process.argv.slice(2); + + if (args.includes('--help') || args.includes('-h')) { + console.log(` +Usage: node comment-generator.js [options] + +Options: + --no-success Don't include success message when no broken links + --no-stats Don't include broken link statistics + --no-action-required Don't include action required message + --max-links Maximum links to show per file (default: 20) + --output-file Write comment to file instead of stdout + --help, -h Show this help message + +Examples: + node comment-generator.js reports/ + node comment-generator.js --max-links 10 --output-file comment.md reports/ +`); + process.exit(0); + } + + // Parse arguments + let reportsDir = ''; + const options = { + includeSuccessMessage: true, + includeStats: true, + includeActionRequired: true, + maxLinksPerFile: 20, + }; + let outputFile = null; + + for (let i = 0; i < args.length; i++) { + const arg = args[i]; + + if (arg === '--no-success') { + options.includeSuccessMessage = false; + } else if (arg === '--no-stats') { + options.includeStats = false; + } else if (arg === '--no-action-required') { + options.includeActionRequired = false; + } else if (arg === '--max-links' && i + 1 < args.length) { + options.maxLinksPerFile = parseInt(args[++i]); + } else if (arg === '--output-file' && i + 1 < args.length) { + outputFile = args[++i]; + } else if (!arg.startsWith('--')) { + reportsDir = arg; + } + } + + if (!reportsDir) { + console.error('Error: reports directory is required'); + process.exit(1); + } + + // Load reports and generate comment with cache stats + const brokenLinks = loadBrokenLinkReports(reportsDir); + options.reportsDir = reportsDir; + const comment = generateComment(brokenLinks, options); + + if (outputFile) { + fs.writeFileSync(outputFile, comment); + console.log(`Comment written to ${outputFile}`); + } else { + console.log(comment); + } + + // Exit with error code if there are broken links + if (brokenLinks.length > 0) { + process.exit(1); + } +} + +// Run CLI if this file is executed directly +if (import.meta.url === `file://${process.argv[1]}`) { + main(); +} + +export { + generateComment, + loadBrokenLinkReports, + normalizeBrokenLinks, + groupLinksBySource, +}; diff --git a/.github/scripts/incremental-validator.js b/.github/scripts/incremental-validator.js new file mode 100644 index 000000000..1ca0b3d04 --- /dev/null +++ b/.github/scripts/incremental-validator.js @@ -0,0 +1,228 @@ +#!/usr/bin/env node + +/** + * Incremental Link Validator + * Combines link extraction and caching to validate only changed links + */ + +import { extractLinksFromFile } from './link-extractor.js'; +import { CacheManager } from './cache-manager.js'; +import process from 'process'; + +/** + * Incremental validator that only validates changed content + */ +class IncrementalValidator { + constructor(options = {}) { + this.cacheManager = new CacheManager(options); + this.validateExternal = options.validateExternal !== false; + this.validateInternal = options.validateInternal !== false; + } + + /** + * Get validation strategy for a list of files + * @param {Array} filePaths - Array of file paths + * @returns {Object} Validation strategy with files categorized + */ + async getValidationStrategy(filePaths) { + const strategy = { + unchanged: [], // Files that haven't changed (skip validation) + changed: [], // Files that changed (need full validation) + newLinks: [], // New links across all files (need validation) + total: filePaths.length, + }; + + const allNewLinks = new Set(); + + for (const filePath of filePaths) { + try { + const extractionResult = extractLinksFromFile(filePath); + if (!extractionResult) { + console.warn(`Could not extract links from ${filePath}`); + continue; + } + + const { fileHash, links } = extractionResult; + + // Check if we have cached results for this file version + const cachedResults = await this.cacheManager.get(filePath, fileHash); + + if (cachedResults) { + // File unchanged, skip validation + strategy.unchanged.push({ + filePath, + fileHash, + linkCount: links.length, + cachedResults, + }); + } else { + // File changed or new, needs validation + strategy.changed.push({ + filePath, + fileHash, + links: links.filter((link) => link.needsValidation), + extractionResult, + }); + + // Collect all new links for batch validation + links + .filter((link) => link.needsValidation) + .forEach((link) => allNewLinks.add(link.url)); + } + } catch (error) { + console.error(`Error processing ${filePath}: ${error.message}`); + // Treat as changed file to ensure validation + strategy.changed.push({ + filePath, + error: error.message, + }); + } + } + + strategy.newLinks = Array.from(allNewLinks); + + return strategy; + } + + /** + * Validate files using incremental strategy + * @param {Array} filePaths - Files to validate + * @returns {Object} Validation results + */ + async validateFiles(filePaths) { + console.log( + `๐Ÿ“Š Analyzing ${filePaths.length} files for incremental validation...` + ); + + const strategy = await this.getValidationStrategy(filePaths); + + console.log(`โœ… ${strategy.unchanged.length} files unchanged (cached)`); + console.log(`๐Ÿ”„ ${strategy.changed.length} files need validation`); + console.log(`๐Ÿ”— ${strategy.newLinks.length} unique links to validate`); + + const results = { + validationStrategy: strategy, + filesToValidate: strategy.changed.map((item) => ({ + filePath: item.filePath, + linkCount: item.links ? item.links.length : 0, + })), + cacheStats: { + cacheHits: strategy.unchanged.length, + cacheMisses: strategy.changed.length, + hitRate: + strategy.total > 0 + ? Math.round((strategy.unchanged.length / strategy.total) * 100) + : 0, + }, + }; + + return results; + } + + /** + * Store validation results in cache + * @param {string} filePath - File path + * @param {string} fileHash - File hash + * @param {Object} validationResults - Results to cache + * @returns {Promise} Success status + */ + async cacheResults(filePath, fileHash, validationResults) { + return await this.cacheManager.set(filePath, fileHash, validationResults); + } + + /** + * Clean up expired cache entries + * @returns {Promise} Cleanup statistics + */ + async cleanupCache() { + return await this.cacheManager.cleanup(); + } +} + +/** + * CLI usage + */ +async function main() { + const args = process.argv.slice(2); + + if (args.length === 0 || args[0] === '--help') { + console.log(` +Incremental Link Validator + +Usage: + node incremental-validator.js [files...] Analyze files for validation + node incremental-validator.js --cleanup Clean up expired cache + node incremental-validator.js --help Show this help + +Options: + --no-external Don't validate external links + --no-internal Don't validate internal links + --local Use local cache instead of GitHub Actions cache + --cache-ttl=DAYS Set cache TTL in days (default: 30) + +Examples: + node incremental-validator.js content/**/*.md + node incremental-validator.js --cache-ttl=7 content/**/*.md + node incremental-validator.js --cleanup +`); + process.exit(0); + } + + if (args[0] === '--cleanup') { + const validator = new IncrementalValidator(); + const stats = await validator.cleanupCache(); + console.log(`๐Ÿงน Cleaned up ${stats.removed} expired cache entries`); + if (stats.note) console.log(`โ„น๏ธ ${stats.note}`); + return; + } + + const options = { + validateExternal: !args.includes('--no-external'), + validateInternal: !args.includes('--no-internal'), + useGitHubCache: !args.includes('--local'), + }; + + // Extract cache TTL option if provided + const cacheTTLArg = args.find((arg) => arg.startsWith('--cache-ttl=')); + if (cacheTTLArg) { + options.cacheTTLDays = parseInt(cacheTTLArg.split('=')[1]); + } + + const filePaths = args.filter((arg) => !arg.startsWith('--')); + + if (filePaths.length === 0) { + console.error('No files specified for validation'); + process.exit(1); + } + + const validator = new IncrementalValidator(options); + const results = await validator.validateFiles(filePaths); + + console.log('\n๐Ÿ“ˆ Validation Analysis Results:'); + console.log('================================'); + console.log(`Cache hit rate: ${results.cacheStats.hitRate}%`); + console.log(`Files to validate: ${results.filesToValidate.length}`); + + if (results.filesToValidate.length > 0) { + console.log('\nFiles needing validation:'); + results.filesToValidate.forEach((file) => { + console.log(` ${file.filePath} (${file.linkCount} links)`); + }); + + // Output files for Cypress to process + console.log('\n# Files for Cypress validation (one per line):'); + results.filesToValidate.forEach((file) => { + console.log(file.filePath); + }); + } else { + console.log('\nโœจ All files are cached - no validation needed!'); + } +} + +export default IncrementalValidator; +export { IncrementalValidator }; + +// Run CLI if called directly +if (import.meta.url === `file://${process.argv[1]}`) { + main().catch(console.error); +} diff --git a/.github/scripts/link-extractor.js b/.github/scripts/link-extractor.js new file mode 100644 index 000000000..453004822 --- /dev/null +++ b/.github/scripts/link-extractor.js @@ -0,0 +1,473 @@ +#!/usr/bin/env node + +/** + * Link Extractor for Documentation Files + * Extracts all links from markdown and HTML files with metadata for caching and incremental validation + */ + +import fs from 'fs'; +import crypto from 'crypto'; +import matter from 'gray-matter'; +import path from 'path'; +import process from 'process'; + +/** + * Extract links from markdown content + * @param {string} content - File content + * @param {string} filePath - Path to the file + * @returns {Array} Array of link objects with metadata + */ +function extractMarkdownLinks(content, filePath) { + const links = []; + const lines = content.split('\n'); + + // Track reference-style link definitions + const referenceLinks = new Map(); + + // First pass: collect reference definitions + content.replace(/^\s*\[([^\]]+)\]:\s*(.+)$/gm, (match, ref, url) => { + referenceLinks.set(ref.toLowerCase(), url.trim()); + return match; + }); + + // Process each line for links + lines.forEach((line, lineIndex) => { + const lineNumber = lineIndex + 1; + + // Standard markdown links + let match; + const standardLinkRegex = /\[([^\]]*)\]\(([^)]+)\)/g; + while ((match = standardLinkRegex.exec(line)) !== null) { + const linkText = match[1]; + const url = match[2]; + const columnStart = match.index; + + links.push({ + url: url.trim(), + text: linkText, + type: 'markdown', + line: lineNumber, + column: columnStart, + context: line.trim(), + hash: generateLinkHash(url.trim(), filePath, lineNumber), + }); + } + + // Reference-style links + const refLinkRegex = /\[([^\]]*)\]\[([^\]]*)\]/g; + while ((match = refLinkRegex.exec(line)) !== null) { + const linkText = match[1]; + const refKey = (match[2] || linkText).toLowerCase(); + const url = referenceLinks.get(refKey); + + if (url) { + const columnStart = match.index; + links.push({ + url: url, + text: linkText, + type: 'markdown-reference', + line: lineNumber, + column: columnStart, + context: line.trim(), + reference: refKey, + hash: generateLinkHash(url, filePath, lineNumber), + }); + } + } + + // Autolinks + const autolinkRegex = /<(https?:\/\/[^>]+)>/g; + while ((match = autolinkRegex.exec(line)) !== null) { + const url = match[1]; + const columnStart = match.index; + + links.push({ + url: url, + text: url, + type: 'autolink', + line: lineNumber, + column: columnStart, + context: line.trim(), + hash: generateLinkHash(url, filePath, lineNumber), + }); + } + + // Bare URLs (basic detection, avoid false positives) + const bareUrlRegex = /(?:^|[\s\n])(https?:\/\/[^\s\)]+)/g; + while ((match = bareUrlRegex.exec(line)) !== null) { + const url = match[1]; + const columnStart = match.index + match[0].length - url.length; + + // Skip if this URL is already captured in a proper markdown link + const alreadyCaptured = links.some( + (link) => + link.line === lineNumber && + Math.abs(link.column - columnStart) < 10 && + link.url === url + ); + + if (!alreadyCaptured) { + links.push({ + url: url, + text: url, + type: 'bare-url', + line: lineNumber, + column: columnStart, + context: line.trim(), + hash: generateLinkHash(url, filePath, lineNumber), + }); + } + } + }); + + return links; +} + +/** + * Extract links from HTML content + * @param {string} content - File content + * @param {string} filePath - Path to the file + * @returns {Array} Array of link objects with metadata + */ +function extractHtmlLinks(content, filePath) { + const links = []; + const lines = content.split('\n'); + + lines.forEach((line, lineIndex) => { + const lineNumber = lineIndex + 1; + let match; + + const htmlLinkRegex = /]*href\s*=\s*["']([^"']+)["'][^>]*>/gi; + while ((match = htmlLinkRegex.exec(line)) !== null) { + const url = match[1]; + const columnStart = match.index; + + // Extract link text if possible + const fullMatch = match[0]; + const textMatch = fullMatch.match(/>([^<]*) ({ + ...link, + ...categorizeLinkType(link.url), + filePath, + })); + + // Calculate statistics + const stats = { + totalLinks: enhancedLinks.length, + externalLinks: enhancedLinks.filter((l) => l.category === 'external') + .length, + internalLinks: enhancedLinks.filter((l) => + l.category.startsWith('internal') + ).length, + fragmentLinks: enhancedLinks.filter((l) => l.category === 'fragment') + .length, + linksNeedingValidation: enhancedLinks.filter((l) => l.needsValidation) + .length, + }; + + return { + filePath, + fileHash, + extension, + frontmatter, + links: enhancedLinks, + stats, + extractedAt: new Date().toISOString(), + }; + } catch (error) { + console.error(`Error extracting links from ${filePath}: ${error.message}`); + return null; + } +} + +/** + * Main function for CLI usage + */ +function main() { + const args = process.argv.slice(2); + + if (args.length === 0) { + console.error('Usage: node link-extractor.js [file2] [...]'); + console.error(' node link-extractor.js --help'); + process.exit(1); + } + + if (args[0] === '--help') { + console.log(` +Link Extractor for Documentation Files + +Usage: + node link-extractor.js [file2] [...] Extract links from files + node link-extractor.js --help Show this help + +Options: + --json Output results as JSON + --stats-only Show only statistics + --filter TYPE Filter links by category (external, internal-absolute, internal-relative, fragment) + +Examples: + node link-extractor.js content/influxdb3/core/install.md + node link-extractor.js --json content/**/*.md + node link-extractor.js --stats-only --filter external content/influxdb3/**/*.md +`); + process.exit(0); + } + + const jsonOutput = args.includes('--json'); + const statsOnly = args.includes('--stats-only'); + const filterType = args.includes('--filter') + ? args[args.indexOf('--filter') + 1] + : null; + + const files = args.filter( + (arg) => !arg.startsWith('--') && arg !== filterType + ); + const results = []; + + for (const filePath of files) { + const result = extractLinksFromFile(filePath); + if (result) { + // Apply filter if specified + if (filterType) { + result.links = result.links.filter( + (link) => link.category === filterType + ); + // Recalculate stats after filtering + result.stats = { + totalLinks: result.links.length, + externalLinks: result.links.filter((l) => l.category === 'external') + .length, + internalLinks: result.links.filter((l) => + l.category.startsWith('internal') + ).length, + fragmentLinks: result.links.filter((l) => l.category === 'fragment') + .length, + linksNeedingValidation: result.links.filter((l) => l.needsValidation) + .length, + }; + } + + results.push(result); + } + } + + if (jsonOutput) { + console.log(JSON.stringify(results, null, 2)); + } else if (statsOnly) { + console.log('\nLink Extraction Statistics:'); + console.log('=========================='); + + let totalFiles = 0; + let totalLinks = 0; + let totalExternal = 0; + let totalInternal = 0; + let totalFragment = 0; + let totalNeedingValidation = 0; + + results.forEach((result) => { + totalFiles++; + totalLinks += result.stats.totalLinks; + totalExternal += result.stats.externalLinks; + totalInternal += result.stats.internalLinks; + totalFragment += result.stats.fragmentLinks; + totalNeedingValidation += result.stats.linksNeedingValidation; + + console.log( + `${result.filePath}: ${result.stats.totalLinks} links (${result.stats.linksNeedingValidation} need validation)` + ); + }); + + console.log('\nSummary:'); + console.log(` Total files: ${totalFiles}`); + console.log(` Total links: ${totalLinks}`); + console.log(` External links: ${totalExternal}`); + console.log(` Internal links: ${totalInternal}`); + console.log(` Fragment links: ${totalFragment}`); + console.log(` Links needing validation: ${totalNeedingValidation}`); + } else { + results.forEach((result) => { + console.log(`\nFile: ${result.filePath}`); + console.log(`Hash: ${result.fileHash}`); + console.log(`Links found: ${result.stats.totalLinks}`); + console.log( + `Links needing validation: ${result.stats.linksNeedingValidation}` + ); + + if (result.links.length > 0) { + console.log('\nLinks:'); + result.links.forEach((link, index) => { + console.log(` ${index + 1}. [${link.category}] ${link.url}`); + console.log(` Line ${link.line}, Column ${link.column}`); + console.log(` Text: "${link.text}"`); + console.log(` Hash: ${link.hash}`); + if (link.reference) { + console.log(` Reference: ${link.reference}`); + } + console.log(''); + }); + } + }); + } +} + +// Export functions for use as a module +export { + extractLinksFromFile, + extractMarkdownLinks, + extractHtmlLinks, + generateFileHash, + generateLinkHash, + categorizeLinkType, +}; + +// Run main function if called directly +if (import.meta.url === `file://${process.argv[1]}`) { + main(); +} diff --git a/.github/scripts/matrix-generator.js b/.github/scripts/matrix-generator.js new file mode 100644 index 000000000..65b546241 --- /dev/null +++ b/.github/scripts/matrix-generator.js @@ -0,0 +1,384 @@ +/** + * Matrix Generator for Link Validation Workflows + * Replaces complex bash scripting with maintainable JavaScript + * Includes cache-aware optimization to skip validation of unchanged files + */ + +import { spawn } from 'child_process'; +import process from 'process'; + +// Product configuration mapping file paths to products +const PRODUCT_MAPPING = { + 'content/influxdb3/core': { + key: 'influxdb3-core', + name: 'InfluxDB 3 Core', + }, + 'content/influxdb3/enterprise': { + key: 'influxdb3-enterprise', + name: 'InfluxDB 3 Enterprise', + }, + 'content/influxdb3/cloud-dedicated': { + key: 'influxdb3-cloud-dedicated', + name: 'InfluxDB 3 Cloud Dedicated', + }, + 'content/influxdb3/cloud-serverless': { + key: 'influxdb3-cloud-serverless', + name: 'InfluxDB 3 Cloud Serverless', + }, + 'content/influxdb3/clustered': { + key: 'influxdb3-clustered', + name: 'InfluxDB 3 Clustered', + }, + 'content/influxdb3/explorer': { + key: 'influxdb3-explorer', + name: 'InfluxDB 3 Explorer', + }, + 'content/influxdb/v2': { + key: 'influxdb-v2', + name: 'InfluxDB v2', + }, + 'content/influxdb/cloud': { + key: 'influxdb-cloud', + name: 'InfluxDB Cloud', + }, + 'content/influxdb/v1': { + key: 'influxdb-v1', + name: 'InfluxDB v1', + }, + 'content/influxdb/enterprise_influxdb': { + key: 'influxdb-enterprise-v1', + name: 'InfluxDB Enterprise v1', + }, + 'content/telegraf': { + key: 'telegraf', + name: 'Telegraf', + }, + 'content/kapacitor': { + key: 'kapacitor', + name: 'Kapacitor', + }, + 'content/chronograf': { + key: 'chronograf', + name: 'Chronograf', + }, + 'content/flux': { + key: 'flux', + name: 'Flux', + }, + 'content/shared': { + key: 'shared', + name: 'Shared Content', + }, + 'api-docs': { + key: 'api-docs', + name: 'API Documentation', + }, +}; + +/** + * Group files by product based on their path + * @param {string[]} files - Array of file paths + * @returns {Object} - Object with product keys and arrays of files + */ +function groupFilesByProduct(files) { + const productFiles = {}; + + // Initialize all products + Object.values(PRODUCT_MAPPING).forEach((product) => { + productFiles[product.key] = []; + }); + + files.forEach((file) => { + let matched = false; + + // Check each product mapping + for (const [pathPrefix, product] of Object.entries(PRODUCT_MAPPING)) { + if (file.startsWith(pathPrefix + '/')) { + productFiles[product.key].push(file); + matched = true; + break; + } + } + + // Handle edge case for api-docs (no trailing slash) + if (!matched && file.startsWith('api-docs/')) { + productFiles['api-docs'].push(file); + } + }); + + return productFiles; +} + +/** + * Run incremental validation analysis + * @param {string[]} files - Array of file paths to analyze + * @returns {Promise} - Incremental validation results + */ +async function runIncrementalAnalysis(files) { + return new Promise((resolve) => { + const child = spawn( + 'node', + ['.github/scripts/incremental-validator.js', ...files], + { + stdio: ['pipe', 'pipe', 'pipe'], + env: process.env, + } + ); + + let stdout = ''; + let stderr = ''; + + child.stdout.on('data', (data) => { + stdout += data.toString(); + }); + + child.stderr.on('data', (data) => { + stderr += data.toString(); + }); + + child.on('close', (code) => { + if (code === 0) { + try { + // Parse the JSON output from the validation script + const lines = stdout.trim().split('\n'); + const jsonLine = lines.find((line) => line.startsWith('{')); + + if (jsonLine) { + const results = JSON.parse(jsonLine); + resolve(results); + } else { + resolve({ filesToValidate: files.map((f) => ({ filePath: f })) }); + } + } catch (error) { + console.warn( + `Warning: Could not parse incremental validation results: ${error.message}` + ); + resolve({ filesToValidate: files.map((f) => ({ filePath: f })) }); + } + } else { + console.warn( + `Incremental validation failed with code ${code}: ${stderr}` + ); + resolve({ filesToValidate: files.map((f) => ({ filePath: f })) }); + } + }); + + child.on('error', (error) => { + console.warn(`Incremental validation error: ${error.message}`); + resolve({ filesToValidate: files.map((f) => ({ filePath: f })) }); + }); + }); +} + +/** + * Generate matrix configuration for GitHub Actions with cache awareness + * @param {string[]} changedFiles - Array of changed file paths + * @param {Object} options - Configuration options + * @returns {Promise} - Matrix configuration object + */ +async function generateMatrix(changedFiles, options = {}) { + const { + maxConcurrentJobs = 5, + forceSequential = false, + minFilesForParallel = 10, + useCache = true, + } = options; + + if (!changedFiles || changedFiles.length === 0) { + return { + strategy: 'none', + hasChanges: false, + matrix: { include: [] }, + cacheStats: { hitRate: 100, cacheHits: 0, cacheMisses: 0 }, + }; + } + + let filesToValidate = changedFiles; + let cacheStats = { + hitRate: 0, + cacheHits: 0, + cacheMisses: changedFiles.length, + }; + + // Run incremental analysis if cache is enabled + if (useCache) { + try { + console.log( + `๐Ÿ” Running cache analysis for ${changedFiles.length} files...` + ); + const analysisResults = await runIncrementalAnalysis(changedFiles); + + if (analysisResults.filesToValidate) { + filesToValidate = analysisResults.filesToValidate.map( + (f) => f.filePath + ); + cacheStats = analysisResults.cacheStats || cacheStats; + + console.log( + `๐Ÿ“Š Cache analysis complete: ${cacheStats.hitRate}% hit rate` + ); + console.log( + `โœ… ${cacheStats.cacheHits} files cached, ${cacheStats.cacheMisses} need validation` + ); + } + } catch (error) { + console.warn( + `Cache analysis failed: ${error.message}, proceeding without cache optimization` + ); + } + } + + // If no files need validation after cache analysis + if (filesToValidate.length === 0) { + return { + strategy: 'cache-hit', + hasChanges: false, + matrix: { include: [] }, + cacheStats, + message: 'โœจ All files are cached - no validation needed!', + }; + } + + const productFiles = groupFilesByProduct(filesToValidate); + const productsWithFiles = Object.entries(productFiles).filter( + ([key, files]) => files.length > 0 + ); + + // Determine strategy based on file count and configuration + const totalFiles = filesToValidate.length; + const shouldUseParallel = + !forceSequential && + totalFiles >= minFilesForParallel && + productsWithFiles.length > 1; + + if (shouldUseParallel) { + // Parallel strategy: create matrix with products + const matrixIncludes = productsWithFiles.map(([productKey, files]) => { + const product = Object.values(PRODUCT_MAPPING).find( + (p) => p.key === productKey + ); + return { + product: productKey, + name: product?.name || productKey, + files: files.join(' '), + cacheEnabled: useCache, + }; + }); + + return { + strategy: 'parallel', + hasChanges: true, + matrix: { include: matrixIncludes.slice(0, maxConcurrentJobs) }, + cacheStats, + originalFileCount: changedFiles.length, + validationFileCount: filesToValidate.length, + }; + } else { + // Sequential strategy: single job with all files + return { + strategy: 'sequential', + hasChanges: true, + matrix: { + include: [ + { + product: 'all', + name: 'All Files', + files: filesToValidate.join(' '), + cacheEnabled: useCache, + }, + ], + }, + cacheStats, + originalFileCount: changedFiles.length, + validationFileCount: filesToValidate.length, + }; + } +} + +/** + * CLI interface for the matrix generator + */ +async function main() { + const args = process.argv.slice(2); + + if (args.includes('--help') || args.includes('-h')) { + console.log(` +Usage: node matrix-generator.js [options] ... + +Options: + --max-concurrent Maximum concurrent jobs (default: 5) + --force-sequential Force sequential execution + --min-files-parallel Minimum files needed for parallel (default: 10) + --output-format Output format: json, github (default: github) + --no-cache Disable cache-aware optimization + --help, -h Show this help message + +Examples: + node matrix-generator.js content/influxdb3/core/file1.md content/influxdb/v2/file2.md + node matrix-generator.js --force-sequential content/shared/file.md + node matrix-generator.js --no-cache --output-format json *.md +`); + process.exit(0); + } + + // Parse options + const options = {}; + const files = []; + + for (let i = 0; i < args.length; i++) { + const arg = args[i]; + + if (arg === '--max-concurrent' && i + 1 < args.length) { + options.maxConcurrentJobs = parseInt(args[++i]); + } else if (arg === '--force-sequential') { + options.forceSequential = true; + } else if (arg === '--min-files-parallel' && i + 1 < args.length) { + options.minFilesForParallel = parseInt(args[++i]); + } else if (arg === '--output-format' && i + 1 < args.length) { + options.outputFormat = args[++i]; + } else if (arg === '--no-cache') { + options.useCache = false; + } else if (!arg.startsWith('--')) { + files.push(arg); + } + } + + try { + const result = await generateMatrix(files, options); + + if (options.outputFormat === 'json') { + console.log(JSON.stringify(result, null, 2)); + } else { + // GitHub Actions format + console.log(`strategy=${result.strategy}`); + console.log(`has-changes=${result.hasChanges}`); + console.log(`matrix=${JSON.stringify(result.matrix)}`); + + // Add cache statistics + if (result.cacheStats) { + console.log(`cache-hit-rate=${result.cacheStats.hitRate}`); + console.log(`cache-hits=${result.cacheStats.cacheHits}`); + console.log(`cache-misses=${result.cacheStats.cacheMisses}`); + } + + if (result.originalFileCount !== undefined) { + console.log(`original-file-count=${result.originalFileCount}`); + console.log(`validation-file-count=${result.validationFileCount}`); + } + + if (result.message) { + console.log(`message=${result.message}`); + } + } + } catch (error) { + console.error(`Error generating matrix: ${error.message}`); + process.exit(1); + } +} + +// Run CLI if this file is executed directly +if (import.meta.url === `file://${process.argv[1]}`) { + main().catch(console.error); +} + +export { generateMatrix, groupFilesByProduct, PRODUCT_MAPPING }; diff --git a/.github/workflows/pr-link-validation.yml b/.github/workflows/pr-link-validation.yml new file mode 100644 index 000000000..1527f57e0 --- /dev/null +++ b/.github/workflows/pr-link-validation.yml @@ -0,0 +1,140 @@ +# PR Link Validation Workflow +# Provides basic and parallel workflows +# with smart strategy selection based on change volume +name: PR Link Validation + +on: + pull_request: + paths: + - 'content/**/*.md' + - 'content/**/*.html' + - 'api-docs/**/*.yml' + - 'assets/**/*.js' + - 'layouts/**/*.html' + +jobs: + setup: + name: Setup and Strategy Detection + runs-on: ubuntu-latest + outputs: + strategy: ${{ steps.determine-strategy.outputs.strategy }} + has-changes: ${{ steps.determine-strategy.outputs.has-changes }} + matrix: ${{ steps.determine-strategy.outputs.matrix }} + all-files: ${{ steps.changed-files.outputs.all_changed_files }} + cache-hit-rate: ${{ steps.determine-strategy.outputs.cache-hit-rate }} + cache-hits: ${{ steps.determine-strategy.outputs.cache-hits }} + cache-misses: ${{ steps.determine-strategy.outputs.cache-misses }} + original-file-count: ${{ steps.determine-strategy.outputs.original-file-count }} + validation-file-count: ${{ steps.determine-strategy.outputs.validation-file-count }} + cache-message: ${{ steps.determine-strategy.outputs.message }} + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup docs environment + uses: ./.github/actions/setup-docs-env + + - name: Get changed files + id: changed-files + uses: tj-actions/changed-files@v41 + with: + files: | + content/**/*.md + content/**/*.html + api-docs/**/*.yml + + - name: Determine validation strategy + id: determine-strategy + run: | + if [[ "${{ steps.changed-files.outputs.any_changed }}" != "true" ]]; then + echo "No relevant files changed" + echo "strategy=none" >> $GITHUB_OUTPUT + echo "has-changes=false" >> $GITHUB_OUTPUT + echo "matrix={\"include\":[]}" >> $GITHUB_OUTPUT + echo "cache-hit-rate=100" >> $GITHUB_OUTPUT + echo "cache-hits=0" >> $GITHUB_OUTPUT + echo "cache-misses=0" >> $GITHUB_OUTPUT + exit 0 + fi + + # Use our matrix generator with cache awareness + files="${{ steps.changed-files.outputs.all_changed_files }}" + + echo "๐Ÿ” Analyzing ${files} for cache-aware validation..." + + # Generate matrix and capture outputs + result=$(node .github/scripts/matrix-generator.js \ + --min-files-parallel 10 \ + --max-concurrent 5 \ + --output-format github \ + $files) + + # Parse all outputs from matrix generator + while IFS='=' read -r key value; do + case "$key" in + strategy|has-changes|cache-hit-rate|cache-hits|cache-misses|original-file-count|validation-file-count|message) + echo "$key=$value" >> $GITHUB_OUTPUT + ;; + matrix) + echo "matrix=$value" >> $GITHUB_OUTPUT + ;; + esac + done <<< "$result" + + # Extract values for logging + strategy=$(echo "$result" | grep "^strategy=" | cut -d'=' -f2) + cache_hit_rate=$(echo "$result" | grep "^cache-hit-rate=" | cut -d'=' -f2) + cache_message=$(echo "$result" | grep "^message=" | cut -d'=' -f2-) + + echo "๐Ÿ“Š Selected strategy: $strategy" + if [[ -n "$cache_hit_rate" ]]; then + echo "๐Ÿ“ˆ Cache hit rate: ${cache_hit_rate}%" + fi + if [[ -n "$cache_message" ]]; then + echo "$cache_message" + fi + + validate: + name: ${{ matrix.name }} + needs: setup + if: needs.setup.outputs.has-changes == 'true' + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: ${{ fromJson(needs.setup.outputs.matrix) }} + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup docs environment + uses: ./.github/actions/setup-docs-env + + - name: Validate links + uses: ./.github/actions/validate-links + with: + files: ${{ matrix.files || needs.setup.outputs.all-files }} + product-name: ${{ matrix.product }} + cache-enabled: ${{ matrix.cacheEnabled || 'true' }} + cache-key: link-validation-${{ github.event.pull_request.base.sha }} + + report: + name: Report Results + needs: [setup, validate] + if: always() && needs.setup.outputs.has-changes == 'true' + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup docs environment + uses: ./.github/actions/setup-docs-env + + - name: Report broken links + uses: ./.github/actions/report-broken-links + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + max-links-per-file: 20 \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md index 3f795d372..d606868cc 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -21,6 +21,9 @@ See @.github/instructions/contributing.instructions.md for essential InfluxData documentation contributing guidelines, such as style and formatting, and commonly used shortcodes. +See @TESTING.md for comprehensive testing information, including code block +testing, link validation, style linting, and advanced testing procedures. + See @.github/instructions/shortcodes-reference.instructions.md for detailed information about shortcodes used in this project. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d4943f104..096ded745 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -9,7 +9,7 @@ Ready to contribute? Here's the essential workflow: 2. [Fork and clone](#fork-and-clone-influxdata-documentation-repository) this repository 3. [Install dependencies](#development-environment-setup) (Node.js, Yarn, Docker) 4. Make your changes following [style guidelines](#making-changes) -5. [Test your changes](#testing--quality-assurance) (pre-commit and pre-push hooks run automatically) +5. [Test your changes](TESTING.md) (pre-commit and pre-push hooks run automatically) 6. [Submit a pull request](#submission-process) For detailed setup and reference information, see the sections below. @@ -250,64 +250,29 @@ For more information about generating InfluxDB API documentation, see the --- - ## Testing & Quality Assurance -### Pre-commit Hooks +For comprehensive testing information, including code block testing, link validation, style linting, and advanced testing procedures, see **[TESTING.md](TESTING.md)**. -docs-v2 uses Lefthook to manage Git hooks that run during pre-commit and pre-push. The hooks run the scripts defined in `package.json` to lint Markdown and test code blocks. -When you try to commit changes (`git commit`), Git runs -the commands configured in `lefthook.yml` which pass your **staged** files to Vale, -Prettier, Cypress (for UI tests and link-checking), and Pytest (for testing Python and shell code in code blocks). +### Quick Testing Reference -#### Skip pre-commit hooks +```bash +# Test code blocks +yarn test:codeblocks:all -**We strongly recommend running linting and tests**, but you can skip them -(and avoid installing dependencies) -by including the `LEFTHOOK=0` environment variable or the `--no-verify` flag with -your commit--for example: +# Test links +yarn test:links content/influxdb3/core/**/*.md + +# Run style linting +docker compose run -T vale content/**/*.md +``` + +Pre-commit hooks run automatically when you commit changes, testing your staged files with Vale, Prettier, Cypress, and Pytest. To skip hooks if needed: ```sh git commit -m "" --no-verify ``` -```sh -LEFTHOOK=0 git commit -``` - -### Code Block Testing Overview - -[pytest-codeblocks](https://github.com/nschloe/pytest-codeblocks/tree/main) extracts code from python and shell Markdown code blocks and executes assertions for the code. - -**Basic example:** - -```python -print("Hello, world!") -``` - - - -``` -Hello, world! -``` - -For detailed testing setup and configuration, see [Detailed Testing Setup](#detailed-testing-setup). - -### Style Linting (Vale) - -docs-v2 includes Vale writing style linter configurations to enforce documentation writing style rules, guidelines, branding, and vocabulary terms. - -**Basic usage:** - -```sh -docker compose run -T vale --config=content/influxdb/cloud-dedicated/.vale.ini --minAlertLevel=error content/influxdb/cloud-dedicated/write-data/**/*.md -``` - -**VS Code integration:** - -1. Install the [Vale VSCode](https://marketplace.visualstudio.com/items?itemName=ChrisChinchilla.vale-vscode) extension. -2. In the extension settings, set the `Vale:Vale CLI:Path` value to `${workspaceFolder}/node_modules/.bin/vale`. - --- @@ -1720,132 +1685,6 @@ Replace the following: - {{% code-placeholder-key %}}`API_TOKEN`{{% /code-placeholder-key %}}: your [InfluxDB API token](/influxdb/v2/admin/tokens/) ``` - -### Detailed Testing Setup - -#### Set up test scripts and credentials - -Tests for code blocks require your InfluxDB credentials and other typical -InfluxDB configuration. - -To set up your docs-v2 instance to run tests locally, do the following: - -1. **Set executable permissions on test scripts** in `./test/src`: - - ```sh - chmod +x ./test/src/*.sh - ``` - -2. **Create credentials for tests**: - - - Create databases, buckets, and tokens for the product(s) you're testing. - - If you don't have access to a Clustered instance, you can use your -Cloud Dedicated instance for testing in most cases. To avoid conflicts when - running tests, create separate Cloud Dedicated and Clustered databases. - -1. **Create .env.test**: Copy the `./test/env.test.example` file into each - product directory to test and rename the file as `.env.test`--for example: - - ```sh - ./content/influxdb/cloud-dedicated/.env.test - ``` - -2. Inside each product's `.env.test` file, assign your InfluxDB credentials to - environment variables: - - - Include the usual `INFLUX_` environment variables - - In - `cloud-dedicated/.env.test` and `clustered/.env.test` files, also define the - following variables: - - - `ACCOUNT_ID`, `CLUSTER_ID`: You can find these values in your `influxctl` - `config.toml` configuration file. - - `MANAGEMENT_TOKEN`: Use the `influxctl management create` command to generate - a long-lived management token to authenticate Management API requests - - See the substitution - patterns in `./test/src/prepare-content.sh` for the full list of variables you may need to define in your `.env.test` files. - -3. For influxctl commands to run in tests, move or copy your `config.toml` file - to the `./test` directory. - -> [!Warning] -> -> - The database you configure in `.env.test` and any written data may -be deleted during test runs. -> - Don't add your `.env.test` files to Git. To prevent accidentally adding credentials to the docs-v2 repo, -> Git is configured to ignore `.env*` files. Consider backing them up on your local machine in case of accidental deletion. - -#### Test shell and python code blocks - -[pytest-codeblocks](https://github.com/nschloe/pytest-codeblocks/tree/main) extracts code from python and shell Markdown code blocks and executes assertions for the code. -If you don't assert a value (using a Python `assert` statement), `--codeblocks` considers a non-zero exit code to be a failure. - -**Note**: `pytest --codeblocks` uses Python's `subprocess.run()` to execute shell code. - -You can use this to test CLI and interpreter commands, regardless of programming -language, as long as they return standard exit codes. - -To make the documented output of a code block testable, precede it with the -`` tag and **omit the code block language -descriptor**--for example, in your Markdown file: - -##### Example markdown - -```python -print("Hello, world!") -``` - - - -The next code block is treated as an assertion. -If successful, the output is the following: - -``` -Hello, world! -``` - -For commands, such as `influxctl` CLI commands, that require launching an -OAuth URL in a browser, wrap the command in a subshell and redirect the output -to `/shared/urls.txt` in the container--for example: - -```sh -# Test the preceding command outside of the code block. -# influxctl authentication requires TTY interaction-- -# output the auth URL to a file that the host can open. -script -c "influxctl user list " \ - /dev/null > /shared/urls.txt -``` - -You probably don't want to display this syntax in the docs, which unfortunately -means you'd need to include the test block separately from the displayed code -block. -To hide it from users, wrap the code block inside an HTML comment. -pytest-codeblocks will still collect and run the code block. - -##### Mark tests to skip - -pytest-codeblocks has features for skipping tests and marking blocks as failed. -To learn more, see the pytest-codeblocks README and tests. - -#### Troubleshoot tests - -##### Pytest collected 0 items - -Potential reasons: - -- See the test discovery options in `pytest.ini`. -- For Python code blocks, use the following delimiter: - - ```python - # Codeblocks runs this block. - ``` - - `pytest --codeblocks` ignores code blocks that use the following: - - ```py - # Codeblocks ignores this block. - ``` ### Advanced Configuration diff --git a/README.md b/README.md index d24564c76..ab99436fd 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,10 @@ This repository contains the InfluxDB 2.x documentation published at [docs.influ We welcome and encourage community contributions. For information about contributing to the InfluxData documentation, see [Contribution guidelines](CONTRIBUTING.md). +## Testing + +For information about testing the documentation, including code block testing, link validation, and style linting, see [Testing guide](TESTING.md). + ## Reporting a Vulnerability InfluxData takes security and our users' trust very seriously. diff --git a/TESTING.md b/TESTING.md new file mode 100644 index 000000000..44a5006ae --- /dev/null +++ b/TESTING.md @@ -0,0 +1,364 @@ +# Testing Guide for InfluxData Documentation + +This guide covers all testing procedures for the InfluxData documentation, including code block testing, link validation, and style linting. + +## Quick Start + +1. **Prerequisites**: Install [Node.js](https://nodejs.org/en), [Yarn](https://yarnpkg.com/getting-started/install), and [Docker](https://docs.docker.com/get-docker/) +2. **Install dependencies**: Run `yarn` to install all dependencies +3. **Build test environment**: Run `docker build -t influxdata/docs-pytest:latest -f Dockerfile.pytest .` +4. **Run tests**: Use any of the test commands below + +## Test Types Overview + +| Test Type | Purpose | Command | +|-----------|---------|---------| +| **Code blocks** | Validate shell/Python code examples | `yarn test:codeblocks:all` | +| **Link validation** | Check internal/external links | `yarn test:links` | +| **Style linting** | Enforce writing standards | `docker compose run -T vale` | +| **E2E tests** | UI and functionality testing | `yarn test:e2e` | + +## Code Block Testing + +Code block testing validates that shell commands and Python scripts in documentation work correctly using [pytest-codeblocks](https://github.com/nschloe/pytest-codeblocks/tree/main). + +### Basic Usage + +```bash +# Test all code blocks +yarn test:codeblocks:all + +# Test specific products +yarn test:codeblocks:cloud +yarn test:codeblocks:v2 +yarn test:codeblocks:telegraf +``` + +### Setup and Configuration + +#### 1. Set executable permissions on test scripts + +```sh +chmod +x ./test/src/*.sh +``` + +#### 2. Create test credentials + +Create databases, buckets, and tokens for the product(s) you're testing. +If you don't have access to a Clustered instance, you can use your Cloud Dedicated instance for testing in most cases. + +#### 3. Configure environment variables + +Copy the `./test/env.test.example` file into each product directory and rename as `.env.test`: + +```sh +# Example locations +./content/influxdb/cloud-dedicated/.env.test +./content/influxdb3/clustered/.env.test +``` + +Inside each product's `.env.test` file, assign your InfluxDB credentials: + +- Include the usual `INFLUX_` environment variables +- For `cloud-dedicated/.env.test` and `clustered/.env.test`, also define: + - `ACCOUNT_ID`, `CLUSTER_ID`: Found in your `influxctl config.toml` + - `MANAGEMENT_TOKEN`: Generate with `influxctl management create` + +See `./test/src/prepare-content.sh` for the full list of variables you may need. + +#### 4. Configure influxctl commands + +For influxctl commands to run in tests, move or copy your `config.toml` file to the `./test` directory. + +> [!Warning] +> - The database you configure in `.env.test` and any written data may be deleted during test runs +> - Don't add your `.env.test` files to Git. Git is configured to ignore `.env*` files to prevent accidentally committing credentials + +### Writing Testable Code Blocks + +#### Basic Example + +```python +print("Hello, world!") +``` + + + +``` +Hello, world! +``` + +#### Interactive Commands + +For commands that require TTY interaction (like `influxctl` authentication), wrap the command in a subshell and redirect output: + +```sh +# Test the preceding command outside of the code block. +# influxctl authentication requires TTY interaction-- +# output the auth URL to a file that the host can open. +script -c "influxctl user list " \ + /dev/null > /shared/urls.txt +``` + +To hide test blocks from users, wrap them in HTML comments. pytest-codeblocks will still collect and run them. + +#### Skipping Tests + +pytest-codeblocks has features for skipping tests and marking blocks as failed. See the [pytest-codeblocks README](https://github.com/nschloe/pytest-codeblocks/tree/main) for details. + +### Troubleshooting + +#### "Pytest collected 0 items" + +Potential causes: +- Check test discovery options in `pytest.ini` +- Use `python` (not `py`) for Python code block language identifiers: + ```python + # This works + ``` + vs + ```py + # This is ignored + ``` + +## Link Validation Testing + +Link validation uses Cypress for e2e browser-based testing against the Hugo site to ensure all internal and external links work correctly. + +### Basic Usage + +```bash +# Test specific files +yarn test:links content/influxdb3/core/**/*.md + +# Test all links (may take a long time) +yarn test:links + +# Test by product (may take a long time) +yarn test:links:v3 +yarn test:links:v2 +yarn test:links:telegraf +yarn test:links:chronograf +yarn test:links:kapacitor +``` + +### How Link Validation Works + +The tests: +1. Start a Hugo development server +2. Navigate to each page in a browser +3. Check all links for validity +4. Report broken or invalid links + +### GitHub Actions Integration + +#### Composite Action + +The `.github/actions/validate-links/` composite action provides reusable link validation: + +```yaml +- uses: ./.github/actions/validate-links + with: + files: "content/influxdb3/core/file.md content/influxdb/v2/file2.md" + product-name: "core" + cache-enabled: "true" + cache-key: "link-validation" +``` + +#### Matrix Generator + +The `.github/scripts/matrix-generator.js` script provides intelligent strategy selection: + +- **Sequential validation**: For small changes (< 10 files) or single-product changes +- **Parallel validation**: For large changes across multiple products (up to 5 concurrent jobs) + +Test locally: + +```bash +node .github/scripts/matrix-generator.js content/influxdb3/core/file1.md content/influxdb/v2/file2.md +``` + +Configuration options: +- `--max-concurrent `: Maximum parallel jobs (default: 5) +- `--force-sequential`: Force sequential execution +- `--min-files-parallel `: Minimum files for parallel (default: 10) + +### Caching for Link Validation + +Link validation supports caching to improve performance: + +- **Cache location**: `.cache/link-validation/` (local), GitHub Actions cache (CI) +- **Cache keys**: Based on content file hashes +- **TTL**: 30 days by default, configurable + +#### Cache Configuration Options + +```bash +# Use 7-day cache for more frequent validation +yarn test:links --cache-ttl=7 content/influxdb3/**/*.md + +# Use 1-day cache via environment variable +LINK_CACHE_TTL_DAYS=1 yarn test:links content/**/*.md + +# Clean up expired cache entries +node .github/scripts/incremental-validator.js --cleanup +``` + +#### How Caching Works + +- **Cache key**: Based on file path + content hash (file changes invalidate cache immediately) +- **External links**: Cached for the TTL period since URLs rarely change +- **Internal links**: Effectively cached until file content changes +- **Automatic cleanup**: Expired entries are removed on access and via `--cleanup` + +## Style Linting (Vale) + +Style linting uses [Vale](https://vale.sh/) to enforce documentation writing standards, branding guidelines, and vocabulary consistency. + +### Basic Usage + +```bash +# Basic linting with Docker +docker compose run -T vale --config=content/influxdb/cloud-dedicated/.vale.ini --minAlertLevel=error content/influxdb/cloud-dedicated/write-data/**/*.md +``` + +### VS Code Integration + +1. Install the [Vale VSCode](https://marketplace.visualstudio.com/items?itemName=ChrisChinchilla.vale-vscode) extension +2. Set the `Vale:Vale CLI:Path` setting to `${workspaceFolder}/node_modules/.bin/vale` + +### Alert Levels + +Vale can raise different alert levels: + +- **Error**: Problems that can cause content to render incorrectly, violations of branding guidelines, rejected vocabulary terms +- **Warning**: General style guide rules and best practices +- **Suggestion**: Style preferences that may require refactoring or updates to an exceptions list + +### Configuration + +- **Styles**: `.ci/vale/styles/` contains configuration for the custom `InfluxDataDocs` style +- **Vocabulary**: Add accepted/rejected terms to `.ci/vale/styles/config/vocabularies` +- **Product-specific**: Configure per-product styles like `content/influxdb/cloud-dedicated/.vale.ini` + +For more configuration details, see [Vale configuration](https://vale.sh/docs/topics/config). + +## Pre-commit Hooks + +docs-v2 uses [Lefthook](https://github.com/evilmartians/lefthook) to manage Git hooks that run automatically during pre-commit and pre-push. + +### What Runs Automatically + +When you run `git commit`, Git runs: +- **Vale**: Style linting (if configured) +- **Prettier**: Code formatting +- **Cypress**: Link validation tests +- **Pytest**: Code block tests + +### Skipping Pre-commit Hooks + +We strongly recommend running linting and tests, but you can skip them: + +```sh +# Skip with --no-verify flag +git commit -m "" --no-verify + +# Skip with environment variable +LEFTHOOK=0 git commit +``` + +## Advanced Testing + +### E2E Testing + +```bash +# Run all E2E tests +yarn test:e2e + +# Run specific E2E specs +node cypress/support/run-e2e-specs.js --spec "cypress/e2e/content/article-links.cy.js" +``` + +### JavaScript Testing and Debugging + +For JavaScript code in the documentation UI (`assets/js`): + +#### Using Source Maps and Chrome DevTools + +1. In VS Code, select Run > Start Debugging +2. Select "Debug Docs (source maps)" configuration +3. Set breakpoints in the `assets/js/ns-hugo-imp:` namespace + +#### Using Debug Helpers + +1. Import debug helpers in your JavaScript module: + ```js + import { debugLog, debugBreak, debugInspect } from './utils/debug-helpers.js'; + ``` + +2. Insert debug statements: + ```js + const data = debugInspect(someData, 'Data'); + debugLog('Processing data', 'myFunction'); + debugBreak(); // Add breakpoint + ``` + +3. Start Hugo: `yarn hugo server` +4. In VS Code, select "Debug JS (debug-helpers)" configuration + +Remember to remove debug statements before committing. + +## Docker Compose Services + +Available test services: + +```bash +# All code block tests +docker compose --profile test up + +# Individual product tests +docker compose run --rm cloud-pytest +docker compose run --rm v2-pytest +docker compose run --rm telegraf-pytest + +# Stop monitoring services +yarn test:codeblocks:stop-monitors +``` + +## Testing Best Practices + +### Code Block Examples + +- Always test code examples before committing +- Use realistic data and examples that users would encounter +- Include proper error handling in examples +- Format code to fit within 80 characters +- Use long options in command-line examples (`--option` vs `-o`) + +### Link Validation + +- Test links regularly, especially after content restructuring +- Use appropriate cache TTL settings for your validation needs +- Monitor cache hit rates to optimize performance +- Clean up expired cache entries periodically + +### Style Guidelines + +- Run Vale regularly to catch style issues early +- Add accepted terms to vocabulary files rather than ignoring errors +- Configure product-specific styles for branding consistency +- Review suggestions periodically for content improvement opportunities + +## Related Files + +- **Configuration**: `pytest.ini`, `cypress.config.js`, `lefthook.yml` +- **Docker**: `compose.yaml`, `Dockerfile.pytest` +- **Scripts**: `.github/scripts/` directory +- **Test data**: `./test/` directory +- **Vale config**: `.ci/vale/styles/` + +## Getting Help + +- **GitHub Issues**: [docs-v2 issues](https://github.com/influxdata/docs-v2/issues) +- **Good first issues**: [good-first-issue label](https://github.com/influxdata/docs-v2/issues?q=is%3Aissue+is%3Aopen+label%3Agood-first-issue) +- **InfluxData CLA**: [Sign here](https://www.influxdata.com/legal/cla/) for substantial contributions \ No newline at end of file diff --git a/cypress.config.js b/cypress.config.js index f1b1655c8..88ae2e2dd 100644 --- a/cypress.config.js +++ b/cypress.config.js @@ -7,6 +7,8 @@ import { FIRST_BROKEN_LINK_FILE, initializeReport, readBrokenLinksReport, + saveCacheStats, + saveValidationStrategy, } from './cypress/support/link-reporter.js'; export default defineConfig({ @@ -177,6 +179,63 @@ export default defineConfig({ return true; } }, + + // Cache and incremental validation tasks + saveCacheStatistics(stats) { + try { + saveCacheStats(stats); + return true; + } catch (error) { + console.error(`Error saving cache stats: ${error.message}`); + return false; + } + }, + + saveValidationStrategy(strategy) { + try { + saveValidationStrategy(strategy); + return true; + } catch (error) { + console.error(`Error saving validation strategy: ${error.message}`); + return false; + } + }, + + runIncrementalValidation(filePaths) { + return new Promise(async (resolve, reject) => { + try { + const { IncrementalValidator } = await import( + './.github/scripts/incremental-validator.js' + ); + const validator = new IncrementalValidator(); + const results = await validator.validateFiles(filePaths); + resolve(results); + } catch (error) { + console.error(`Incremental validation error: ${error.message}`); + reject(error); + } + }); + }, + + cacheValidationResults(filePath, fileHash, results) { + return new Promise(async (resolve, reject) => { + try { + const { IncrementalValidator } = await import( + './.github/scripts/incremental-validator.js' + ); + const validator = new IncrementalValidator(); + const success = await validator.cacheResults( + filePath, + fileHash, + results + ); + resolve(success); + } catch (error) { + console.error(`Cache validation results error: ${error.message}`); + reject(error); + } + }); + }, }); // Load plugins file using dynamic import for ESM compatibility diff --git a/cypress/e2e/content/article-links.cy.js b/cypress/e2e/content/article-links.cy.js index 3b9ef7b01..83ee3a058 100644 --- a/cypress/e2e/content/article-links.cy.js +++ b/cypress/e2e/content/article-links.cy.js @@ -1,7 +1,9 @@ /// describe('Article', () => { - const subjects = Cypress.env('test_subjects').split(','); + let subjects = Cypress.env('test_subjects').split(','); + let validationStrategy = null; + // Always use HEAD for downloads to avoid timeouts const useHeadForDownloads = true; @@ -9,6 +11,55 @@ describe('Article', () => { before(() => { // Initialize the broken links report cy.task('initializeBrokenLinksReport'); + + // Get source file paths for incremental validation + const testSubjectsData = Cypress.env('test_subjects_data'); + let sourceFilePaths = subjects; // fallback to subjects if no data available + + if (testSubjectsData) { + try { + const urlToSourceData = JSON.parse(testSubjectsData); + // Extract source file paths from the structured data + sourceFilePaths = urlToSourceData.map((item) => item.source); + } catch (e) { + console.warn( + 'Could not parse test_subjects_data, using subjects as fallback' + ); + } + } + + // Run incremental validation analysis with source file paths + cy.task('runIncrementalValidation', sourceFilePaths).then((results) => { + validationStrategy = results.validationStrategy; + + // Save cache statistics and validation strategy for reporting + cy.task('saveCacheStatistics', results.cacheStats); + cy.task('saveValidationStrategy', validationStrategy); + + // Update subjects to only test files that need validation + if (results.filesToValidate.length > 0) { + subjects = results.filesToValidate.map((file) => { + // Convert file path to URL format (same logic as map-files-to-urls.js) + let url = file.filePath.replace(/^content/, ''); + url = url.replace(/\/_index\.(html|md)$/, '/'); + url = url.replace(/\.md$/, '/'); + url = url.replace(/\.html$/, '/'); + if (!url.startsWith('/')) { + url = '/' + url; + } + return url; + }); + + cy.log(`๐Ÿ“Š Cache Analysis: ${results.cacheStats.hitRate}% hit rate`); + cy.log( + `๐Ÿ”„ Testing ${subjects.length} pages (${results.cacheStats.cacheHits} cached)` + ); + } else { + // All files are cached, no validation needed + subjects = []; + cy.log('โœจ All files cached - skipping validation'); + } + }); }); // Helper function to identify download links diff --git a/cypress/support/link-reporter.js b/cypress/support/link-reporter.js index 39097cefe..5dc947ef1 100644 --- a/cypress/support/link-reporter.js +++ b/cypress/support/link-reporter.js @@ -7,6 +7,8 @@ import fs from 'fs'; export const BROKEN_LINKS_FILE = '/tmp/broken_links_report.json'; export const FIRST_BROKEN_LINK_FILE = '/tmp/first_broken_link.json'; const SOURCES_FILE = '/tmp/test_subjects_sources.json'; +const CACHE_STATS_FILE = '/tmp/cache_statistics.json'; +const VALIDATION_STRATEGY_FILE = '/tmp/validation_strategy.json'; /** * Reads the broken links report from the file system @@ -69,6 +71,65 @@ function readSourcesMapping() { return {}; } +/** + * Read cache statistics from file + * @returns {Object|null} Cache statistics or null if not found + */ +function readCacheStats() { + try { + if (fs.existsSync(CACHE_STATS_FILE)) { + const content = fs.readFileSync(CACHE_STATS_FILE, 'utf8'); + return JSON.parse(content); + } + } catch (err) { + console.warn(`Warning: Could not read cache stats: ${err.message}`); + } + return null; +} + +/** + * Read validation strategy from file + * @returns {Object|null} Validation strategy or null if not found + */ +function readValidationStrategy() { + try { + if (fs.existsSync(VALIDATION_STRATEGY_FILE)) { + const content = fs.readFileSync(VALIDATION_STRATEGY_FILE, 'utf8'); + return JSON.parse(content); + } + } catch (err) { + console.warn(`Warning: Could not read validation strategy: ${err.message}`); + } + return null; +} + +/** + * Save cache statistics for reporting + * @param {Object} stats - Cache statistics to save + */ +export function saveCacheStats(stats) { + try { + fs.writeFileSync(CACHE_STATS_FILE, JSON.stringify(stats, null, 2)); + } catch (err) { + console.warn(`Warning: Could not save cache stats: ${err.message}`); + } +} + +/** + * Save validation strategy for reporting + * @param {Object} strategy - Validation strategy to save + */ +export function saveValidationStrategy(strategy) { + try { + fs.writeFileSync( + VALIDATION_STRATEGY_FILE, + JSON.stringify(strategy, null, 2) + ); + } catch (err) { + console.warn(`Warning: Could not save validation strategy: ${err.message}`); + } +} + /** * Formats and displays the broken links report to the console * @param {Array} brokenLinksReport - The report data to display @@ -80,6 +141,26 @@ export function displayBrokenLinksReport(brokenLinksReport = null) { brokenLinksReport = readBrokenLinksReport(); } + // Read cache statistics and validation strategy + const cacheStats = readCacheStats(); + const validationStrategy = readValidationStrategy(); + + // Display cache performance first + if (cacheStats) { + console.log('\n๐Ÿ“Š Cache Performance:'); + console.log('====================='); + console.log(`Cache hit rate: ${cacheStats.hitRate}%`); + console.log(`Files cached: ${cacheStats.cacheHits}`); + console.log(`Files validated: ${cacheStats.cacheMisses}`); + + if (validationStrategy) { + console.log(`Total files analyzed: ${validationStrategy.total}`); + console.log( + `Links needing validation: ${validationStrategy.newLinks.length}` + ); + } + } + // Check both the report and first broken link file to determine if we have broken links const firstBrokenLink = readFirstBrokenLink(); @@ -88,7 +169,7 @@ export function displayBrokenLinksReport(brokenLinksReport = null) { (!brokenLinksReport || brokenLinksReport.length === 0) && !firstBrokenLink ) { - console.log('โœ… No broken links detected in the validation report'); + console.log('\nโœ… No broken links detected in the validation report'); return 0; } diff --git a/lefthook.yml b/lefthook.yml index c7ef301d4..68face524 100644 --- a/lefthook.yml +++ b/lefthook.yml @@ -111,13 +111,15 @@ pre-push: node cypress/support/run-e2e-specs.js --spec "cypress/e2e/content/article-links.cy.js" content/example.md exit $? - e2e-links: - tags: test,links - glob: 'content/*.{md,html}' - run: | - echo "Running link checker for: {staged_files}" - yarn test:links {staged_files} - exit $? + # Link validation runs in GitHub actions. + # You can still run it locally for development. + # e2e-links: + # tags: test,links + # glob: 'content/*.{md,html}' + # run: | + # echo "Running link checker for: {staged_files}" + # yarn test:links {staged_files} + # exit $? # Manage Docker containers prune-legacy-containers: