feat(testing): add link validation automation and improvements
- Add GitHub Actions for automated link validation on PRs - Implement incremental validation with caching (30-day TTL, configurable) - Add matrix generator for parallel validation strategy - Create comprehensive TESTING.md documentation - Add cache manager with configurable TTL via env var or CLI - Implement smart link extraction and validation - Add PR comment generator for broken link reports - Update Cypress tests to use incremental validation - Consolidate testing docs from CONTRIBUTING.md to TESTING.md Key improvements: - Cache-aware validation only checks changed content - Parallel execution for large changesets - Detailed PR comments with broken link reports - Support for LINK_CACHE_TTL_DAYS env var - Local testing with yarn test:links - Reduced false positives through intelligent cachingpull/6255/head
parent
d3f60a7d7d
commit
6a4e8827eb
|
|
@ -0,0 +1,87 @@
|
|||
name: 'Report Broken Links'
|
||||
description: 'Downloads broken link reports, generates PR comment, and posts results'
|
||||
|
||||
inputs:
|
||||
github-token:
|
||||
description: 'GitHub token for posting comments'
|
||||
required: false
|
||||
default: ${{ github.token }}
|
||||
max-links-per-file:
|
||||
description: 'Maximum links to show per file in comment'
|
||||
required: false
|
||||
default: '20'
|
||||
include-success-message:
|
||||
description: 'Include success message when no broken links found'
|
||||
required: false
|
||||
default: 'true'
|
||||
|
||||
outputs:
|
||||
has-broken-links:
|
||||
description: 'Whether broken links were found (true/false)'
|
||||
value: ${{ steps.generate-comment.outputs.has-broken-links }}
|
||||
broken-link-count:
|
||||
description: 'Number of broken links found'
|
||||
value: ${{ steps.generate-comment.outputs.broken-link-count }}
|
||||
|
||||
runs:
|
||||
using: 'composite'
|
||||
steps:
|
||||
- name: Download broken link reports
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: reports
|
||||
continue-on-error: true
|
||||
|
||||
- name: Generate PR comment
|
||||
id: generate-comment
|
||||
run: |
|
||||
# Generate comment using our script
|
||||
node .github/scripts/comment-generator.js \
|
||||
--max-links ${{ inputs.max-links-per-file }} \
|
||||
${{ inputs.include-success-message == 'false' && '--no-success' || '' }} \
|
||||
--output-file comment.md \
|
||||
reports/ || echo "No reports found or errors occurred"
|
||||
|
||||
# Check if comment file was created and has content
|
||||
if [[ -f comment.md && -s comment.md ]]; then
|
||||
echo "has-broken-links=true" >> $GITHUB_OUTPUT
|
||||
|
||||
# Count broken links by parsing the comment
|
||||
broken_count=$(grep -o "Found [0-9]* broken link" comment.md | grep -o "[0-9]*" || echo "0")
|
||||
echo "broken-link-count=$broken_count" >> $GITHUB_OUTPUT
|
||||
echo "comment-generated=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "has-broken-links=false" >> $GITHUB_OUTPUT
|
||||
echo "broken-link-count=0" >> $GITHUB_OUTPUT
|
||||
echo "comment-generated=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
shell: bash
|
||||
|
||||
- name: Post PR comment
|
||||
if: steps.generate-comment.outputs.comment-generated == 'true'
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
github-token: ${{ inputs.github-token }}
|
||||
script: |
|
||||
const fs = require('fs');
|
||||
|
||||
if (fs.existsSync('comment.md')) {
|
||||
const comment = fs.readFileSync('comment.md', 'utf8');
|
||||
|
||||
if (comment.trim()) {
|
||||
await github.rest.issues.createComment({
|
||||
issue_number: context.issue.number,
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
body: comment
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
- name: Set workflow status
|
||||
if: steps.generate-comment.outputs.has-broken-links == 'true'
|
||||
run: |
|
||||
broken_count="${{ steps.generate-comment.outputs.broken-link-count }}"
|
||||
echo "::error::Found $broken_count broken link(s)"
|
||||
exit 1
|
||||
shell: bash
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
name: 'Setup Documentation Environment'
|
||||
description: 'Sets up Node.js environment and installs dependencies for documentation workflows'
|
||||
|
||||
runs:
|
||||
using: 'composite'
|
||||
steps:
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: '20'
|
||||
cache: 'yarn'
|
||||
|
||||
- name: Install dependencies
|
||||
run: yarn install
|
||||
shell: bash
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
name: 'Validate Links'
|
||||
description: 'Runs e2e browser-based link validation tests against Hugo site using Cypress'
|
||||
|
||||
inputs:
|
||||
files:
|
||||
description: 'Space-separated list of files to validate'
|
||||
required: true
|
||||
product-name:
|
||||
description: 'Product name for reporting (optional)'
|
||||
required: false
|
||||
default: ''
|
||||
cache-enabled:
|
||||
description: 'Enable link validation caching'
|
||||
required: false
|
||||
default: 'true'
|
||||
cache-key:
|
||||
description: 'Cache key prefix for this validation run'
|
||||
required: false
|
||||
default: 'link-validation'
|
||||
|
||||
outputs:
|
||||
failed:
|
||||
description: 'Whether validation failed (true/false)'
|
||||
value: ${{ steps.validate.outputs.failed }}
|
||||
|
||||
runs:
|
||||
using: 'composite'
|
||||
steps:
|
||||
- name: Restore link validation cache
|
||||
if: inputs.cache-enabled == 'true'
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: .cache/link-validation
|
||||
key: ${{ inputs.cache-key }}-${{ runner.os }}-${{ hashFiles('content/**/*.md', 'content/**/*.html') }}
|
||||
restore-keys: |
|
||||
${{ inputs.cache-key }}-${{ runner.os }}-
|
||||
${{ inputs.cache-key }}-
|
||||
|
||||
- name: Run link validation
|
||||
id: validate
|
||||
run: |
|
||||
echo "Testing files: ${{ inputs.files }}"
|
||||
if [[ -n "${{ inputs.product-name }}" ]]; then
|
||||
echo "Product: ${{ inputs.product-name }}"
|
||||
fi
|
||||
|
||||
if [[ "${{ inputs.cache-enabled }}" == "true" ]]; then
|
||||
echo "📦 Cache enabled for this validation run"
|
||||
fi
|
||||
|
||||
# Run the validation
|
||||
if node cypress/support/run-e2e-specs.js \
|
||||
--spec "cypress/e2e/content/article-links.cy.js" \
|
||||
${{ inputs.files }}; then
|
||||
echo "failed=false" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "failed=true" >> $GITHUB_OUTPUT
|
||||
exit 1
|
||||
fi
|
||||
shell: bash
|
||||
env:
|
||||
CI: true
|
||||
CACHE_ENABLED: ${{ inputs.cache-enabled }}
|
||||
|
||||
- name: Upload broken links report
|
||||
if: failure()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: broken-links-report${{ inputs.product-name && format('-{0}', inputs.product-name) || '' }}
|
||||
path: /tmp/broken_links_report.json
|
||||
|
|
@ -18,7 +18,7 @@ Ready to contribute? Here's the essential workflow:
|
|||
2. [Fork and clone](#fork-and-clone-influxdata-documentation-repository) this repository
|
||||
3. [Install dependencies](#development-environment-setup) (Node.js, Yarn, Docker)
|
||||
4. Make your changes following [style guidelines](#making-changes)
|
||||
5. [Test your changes](#testing--quality-assurance) (pre-commit and pre-push hooks run automatically)
|
||||
5. [Test your changes](TESTING.md) (pre-commit and pre-push hooks run automatically)
|
||||
6. [Submit a pull request](#submission-process)
|
||||
|
||||
For detailed setup and reference information, see the sections below.
|
||||
|
|
@ -169,33 +169,30 @@ For more information about generating InfluxDB API documentation, see the
|
|||
|
||||
---
|
||||
|
||||
### Pre-commit Hooks
|
||||
## Testing & Quality Assurance
|
||||
|
||||
docs-v2 uses Lefthook to manage Git hooks that run during pre-commit and pre-push. The hooks run the scripts defined in `package.json` to lint Markdown and test code blocks.
|
||||
When you try to commit changes (`git commit`), Git runs
|
||||
For comprehensive testing information, including code block testing, link validation, style linting, and advanced testing procedures, see **[TESTING.md](TESTING.md)**.
|
||||
|
||||
#### Skip pre-commit hooks
|
||||
### Quick Testing Reference
|
||||
|
||||
```bash
|
||||
# Test code blocks
|
||||
yarn test:codeblocks:all
|
||||
|
||||
# Test links
|
||||
yarn test:links content/influxdb3/core/**/*.md
|
||||
|
||||
# Run style linting
|
||||
docker compose run -T vale content/**/*.md
|
||||
```
|
||||
|
||||
Pre-commit hooks run automatically when you commit changes, testing your staged files with Vale, Prettier, Cypress, and Pytest. To skip hooks if needed:
|
||||
|
||||
```sh
|
||||
git commit -m "<COMMIT_MESSAGE>" --no-verify
|
||||
```
|
||||
# ... (see full CONTRIBUTING.md for complete example)
|
||||
```python
|
||||
print("Hello, world!")
|
||||
```
|
||||
|
||||
# ... (see full CONTRIBUTING.md for complete example)
|
||||
```sh
|
||||
docker compose run -T vale --config=content/influxdb/cloud-dedicated/.vale.ini --minAlertLevel=error content/influxdb/cloud-dedicated/write-data/**/*.md
|
||||
```
|
||||
|
||||
|
||||
1. Install the [Vale VSCode](https://marketplace.visualstudio.com/items?itemName=ChrisChinchilla.vale-vscode) extension.
|
||||
2. In the extension settings, set the `Vale:Vale CLI:Path` value to `${workspaceFolder}/node_modules/.bin/vale`.
|
||||
|
||||
|
||||
_See full CONTRIBUTING.md for complete details._
|
||||
---
|
||||
|
||||
### Commit Guidelines
|
||||
|
||||
|
|
@ -229,10 +226,6 @@ _For the complete Complete Frontmatter Reference reference, see frontmatter-refe
|
|||
|
||||
_For the complete Complete Shortcodes Reference reference, see shortcodes-reference.instructions.md._
|
||||
|
||||
### Detailed Testing Setup
|
||||
|
||||
_For the complete Detailed Testing Setup reference, see testing-setup.instructions.md._
|
||||
|
||||
#### Vale style linting configuration
|
||||
|
||||
docs-v2 includes Vale writing style linter configurations to enforce documentation writing style rules, guidelines, branding, and vocabulary terms.
|
||||
|
|
|
|||
|
|
@ -1186,3 +1186,4 @@ Replace the following:
|
|||
- {{% code-placeholder-key %}}`API_TOKEN`{{% /code-placeholder-key %}}: your [InfluxDB API token](/influxdb/v2/admin/tokens/)
|
||||
```
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -4,127 +4,12 @@ applyTo: "content/**/*.md, layouts/**/*.html"
|
|||
|
||||
### Detailed Testing Setup
|
||||
|
||||
#### Set up test scripts and credentials
|
||||
For comprehensive testing information, including:
|
||||
- Code block testing setup and configuration
|
||||
- Link validation testing procedures
|
||||
- Style linting with Vale
|
||||
- Pre-commit hooks and GitHub Actions integration
|
||||
- Advanced testing procedures and troubleshooting
|
||||
|
||||
Tests for code blocks require your InfluxDB credentials and other typical
|
||||
InfluxDB configuration.
|
||||
|
||||
To set up your docs-v2 instance to run tests locally, do the following:
|
||||
|
||||
1. **Set executable permissions on test scripts** in `./test/src`:
|
||||
|
||||
```sh
|
||||
chmod +x ./test/src/*.sh
|
||||
```
|
||||
|
||||
2. **Create credentials for tests**:
|
||||
|
||||
- Create databases, buckets, and tokens for the product(s) you're testing.
|
||||
- If you don't have access to a Clustered instance, you can use your
|
||||
Cloud Dedicated instance for testing in most cases. To avoid conflicts when
|
||||
running tests, create separate Cloud Dedicated and Clustered databases.
|
||||
|
||||
1. **Create .env.test**: Copy the `./test/env.test.example` file into each
|
||||
product directory to test and rename the file as `.env.test`--for example:
|
||||
|
||||
```sh
|
||||
./content/influxdb/cloud-dedicated/.env.test
|
||||
```
|
||||
|
||||
2. Inside each product's `.env.test` file, assign your InfluxDB credentials to
|
||||
environment variables:
|
||||
|
||||
- Include the usual `INFLUX_` environment variables
|
||||
- In
|
||||
`cloud-dedicated/.env.test` and `clustered/.env.test` files, also define the
|
||||
following variables:
|
||||
|
||||
- `ACCOUNT_ID`, `CLUSTER_ID`: You can find these values in your `influxctl`
|
||||
`config.toml` configuration file.
|
||||
- `MANAGEMENT_TOKEN`: Use the `influxctl management create` command to generate
|
||||
a long-lived management token to authenticate Management API requests
|
||||
|
||||
See the substitution
|
||||
patterns in `./test/src/prepare-content.sh` for the full list of variables you may need to define in your `.env.test` files.
|
||||
|
||||
3. For influxctl commands to run in tests, move or copy your `config.toml` file
|
||||
to the `./test` directory.
|
||||
|
||||
> [!Warning]
|
||||
>
|
||||
> - The database you configure in `.env.test` and any written data may
|
||||
be deleted during test runs.
|
||||
> - Don't add your `.env.test` files to Git. To prevent accidentally adding credentials to the docs-v2 repo,
|
||||
> Git is configured to ignore `.env*` files. Consider backing them up on your local machine in case of accidental deletion.
|
||||
|
||||
#### Test shell and python code blocks
|
||||
|
||||
[pytest-codeblocks](https://github.com/nschloe/pytest-codeblocks/tree/main) extracts code from python and shell Markdown code blocks and executes assertions for the code.
|
||||
If you don't assert a value (using a Python `assert` statement), `--codeblocks` considers a non-zero exit code to be a failure.
|
||||
|
||||
**Note**: `pytest --codeblocks` uses Python's `subprocess.run()` to execute shell code.
|
||||
|
||||
You can use this to test CLI and interpreter commands, regardless of programming
|
||||
language, as long as they return standard exit codes.
|
||||
|
||||
To make the documented output of a code block testable, precede it with the
|
||||
`<!--pytest-codeblocks:expected-output-->` tag and **omit the code block language
|
||||
descriptor**--for example, in your Markdown file:
|
||||
|
||||
##### Example markdown
|
||||
|
||||
```python
|
||||
print("Hello, world!")
|
||||
```
|
||||
|
||||
<!--pytest-codeblocks:expected-output-->
|
||||
|
||||
The next code block is treated as an assertion.
|
||||
If successful, the output is the following:
|
||||
|
||||
```
|
||||
Hello, world!
|
||||
```
|
||||
|
||||
For commands, such as `influxctl` CLI commands, that require launching an
|
||||
OAuth URL in a browser, wrap the command in a subshell and redirect the output
|
||||
to `/shared/urls.txt` in the container--for example:
|
||||
|
||||
```sh
|
||||
# Test the preceding command outside of the code block.
|
||||
# influxctl authentication requires TTY interaction--
|
||||
# output the auth URL to a file that the host can open.
|
||||
script -c "influxctl user list " \
|
||||
/dev/null > /shared/urls.txt
|
||||
```
|
||||
|
||||
You probably don't want to display this syntax in the docs, which unfortunately
|
||||
means you'd need to include the test block separately from the displayed code
|
||||
block.
|
||||
To hide it from users, wrap the code block inside an HTML comment.
|
||||
pytest-codeblocks will still collect and run the code block.
|
||||
|
||||
##### Mark tests to skip
|
||||
|
||||
pytest-codeblocks has features for skipping tests and marking blocks as failed.
|
||||
To learn more, see the pytest-codeblocks README and tests.
|
||||
|
||||
#### Troubleshoot tests
|
||||
|
||||
##### Pytest collected 0 items
|
||||
|
||||
Potential reasons:
|
||||
|
||||
- See the test discovery options in `pytest.ini`.
|
||||
- For Python code blocks, use the following delimiter:
|
||||
|
||||
```python
|
||||
# Codeblocks runs this block.
|
||||
```
|
||||
|
||||
`pytest --codeblocks` ignores code blocks that use the following:
|
||||
|
||||
```py
|
||||
# Codeblocks ignores this block.
|
||||
```
|
||||
Please refer to the main **[TESTING.md](../../TESTING.md)** file.
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,169 @@
|
|||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* Simple Cache Manager for Link Validation Results
|
||||
* Uses GitHub Actions cache API or local file storage
|
||||
*/
|
||||
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import crypto from 'crypto';
|
||||
import process from 'process';
|
||||
|
||||
const CACHE_VERSION = 'v1';
|
||||
const CACHE_KEY_PREFIX = 'link-validation';
|
||||
const LOCAL_CACHE_DIR = path.join(process.cwd(), '.cache', 'link-validation');
|
||||
|
||||
/**
|
||||
* Simple cache interface
|
||||
*/
|
||||
class CacheManager {
|
||||
constructor(options = {}) {
|
||||
this.useGitHubCache =
|
||||
options.useGitHubCache !== false && process.env.GITHUB_ACTIONS;
|
||||
this.localCacheDir = options.localCacheDir || LOCAL_CACHE_DIR;
|
||||
|
||||
// Configurable cache TTL - default 30 days, support environment variable
|
||||
this.cacheTTLDays =
|
||||
options.cacheTTLDays || parseInt(process.env.LINK_CACHE_TTL_DAYS) || 30;
|
||||
this.maxAge = this.cacheTTLDays * 24 * 60 * 60 * 1000;
|
||||
|
||||
if (!this.useGitHubCache) {
|
||||
this.ensureLocalCacheDir();
|
||||
}
|
||||
}
|
||||
|
||||
ensureLocalCacheDir() {
|
||||
if (!fs.existsSync(this.localCacheDir)) {
|
||||
fs.mkdirSync(this.localCacheDir, { recursive: true });
|
||||
}
|
||||
}
|
||||
|
||||
generateCacheKey(filePath, fileHash) {
|
||||
const pathHash = crypto
|
||||
.createHash('sha256')
|
||||
.update(filePath)
|
||||
.digest('hex')
|
||||
.substring(0, 8);
|
||||
return `${CACHE_KEY_PREFIX}-${CACHE_VERSION}-${pathHash}-${fileHash}`;
|
||||
}
|
||||
|
||||
async get(filePath, fileHash) {
|
||||
if (this.useGitHubCache) {
|
||||
return await this.getFromGitHubCache(filePath, fileHash);
|
||||
} else {
|
||||
return await this.getFromLocalCache(filePath, fileHash);
|
||||
}
|
||||
}
|
||||
|
||||
async set(filePath, fileHash, results) {
|
||||
if (this.useGitHubCache) {
|
||||
return await this.setToGitHubCache(filePath, fileHash, results);
|
||||
} else {
|
||||
return await this.setToLocalCache(filePath, fileHash, results);
|
||||
}
|
||||
}
|
||||
|
||||
async getFromGitHubCache(filePath, fileHash) {
|
||||
// For GitHub Actions, we'll use the actions/cache action directly
|
||||
// in the workflow, so this is a placeholder
|
||||
return null;
|
||||
}
|
||||
|
||||
async setToGitHubCache(filePath, fileHash, results) {
|
||||
// For GitHub Actions, we'll use the actions/cache action directly
|
||||
// in the workflow, so this is a placeholder
|
||||
return true;
|
||||
}
|
||||
|
||||
async getFromLocalCache(filePath, fileHash) {
|
||||
const cacheKey = this.generateCacheKey(filePath, fileHash);
|
||||
const cacheFile = path.join(this.localCacheDir, `${cacheKey}.json`);
|
||||
|
||||
if (!fs.existsSync(cacheFile)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
const content = fs.readFileSync(cacheFile, 'utf8');
|
||||
const cached = JSON.parse(content);
|
||||
|
||||
// TTL check using configured cache duration
|
||||
const age = Date.now() - new Date(cached.cachedAt).getTime();
|
||||
|
||||
if (age > this.maxAge) {
|
||||
fs.unlinkSync(cacheFile);
|
||||
return null;
|
||||
}
|
||||
|
||||
return cached.results;
|
||||
} catch (error) {
|
||||
// Clean up corrupted cache
|
||||
try {
|
||||
fs.unlinkSync(cacheFile);
|
||||
} catch {
|
||||
// Ignore cleanup errors
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async setToLocalCache(filePath, fileHash, results) {
|
||||
const cacheKey = this.generateCacheKey(filePath, fileHash);
|
||||
const cacheFile = path.join(this.localCacheDir, `${cacheKey}.json`);
|
||||
|
||||
const cacheData = {
|
||||
filePath,
|
||||
fileHash,
|
||||
results,
|
||||
cachedAt: new Date().toISOString(),
|
||||
};
|
||||
|
||||
try {
|
||||
fs.writeFileSync(cacheFile, JSON.stringify(cacheData, null, 2));
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.warn(`Cache save failed: ${error.message}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async cleanup() {
|
||||
if (this.useGitHubCache) {
|
||||
return { removed: 0, note: 'GitHub Actions cache auto-managed' };
|
||||
}
|
||||
|
||||
let removed = 0;
|
||||
if (!fs.existsSync(this.localCacheDir)) {
|
||||
return { removed };
|
||||
}
|
||||
|
||||
const files = fs.readdirSync(this.localCacheDir);
|
||||
|
||||
for (const file of files) {
|
||||
if (!file.endsWith('.json')) continue;
|
||||
|
||||
const filePath = path.join(this.localCacheDir, file);
|
||||
try {
|
||||
const stat = fs.statSync(filePath);
|
||||
if (Date.now() - stat.mtime.getTime() > this.maxAge) {
|
||||
fs.unlinkSync(filePath);
|
||||
removed++;
|
||||
}
|
||||
} catch {
|
||||
// Remove corrupted files
|
||||
try {
|
||||
fs.unlinkSync(filePath);
|
||||
removed++;
|
||||
} catch {
|
||||
// Ignore errors
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { removed };
|
||||
}
|
||||
}
|
||||
|
||||
export default CacheManager;
|
||||
export { CacheManager };
|
||||
|
|
@ -0,0 +1,328 @@
|
|||
/**
|
||||
* Comment Generator for Link Validation Results
|
||||
* Standardizes PR comment generation across workflows
|
||||
* Includes cache performance metrics and optimization info
|
||||
*/
|
||||
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import process from 'process';
|
||||
|
||||
/**
|
||||
* Normalize broken link data from different report formats
|
||||
* @param {Object|Array} reportData - Raw report data
|
||||
* @returns {Array} - Normalized array of broken links
|
||||
*/
|
||||
function normalizeBrokenLinks(reportData) {
|
||||
if (!reportData) return [];
|
||||
|
||||
let links = [];
|
||||
|
||||
if (Array.isArray(reportData)) {
|
||||
reportData.forEach((item) => {
|
||||
if (item.links && Array.isArray(item.links)) {
|
||||
// Format: { sourceFile: "file.md", links: [...] }
|
||||
item.links.forEach((link) => {
|
||||
links.push({
|
||||
sourceFile: item.sourceFile || item.page || 'Unknown',
|
||||
url: link.url || link.href,
|
||||
linkText: link.linkText || link.url || link.href,
|
||||
status: link.status,
|
||||
error: link.error,
|
||||
type: link.type,
|
||||
});
|
||||
});
|
||||
} else {
|
||||
// Format: direct link object
|
||||
links.push({
|
||||
sourceFile: item.sourceFile || item.page || 'Unknown',
|
||||
url: item.url || item.href,
|
||||
linkText: item.linkText || item.url || item.href,
|
||||
status: item.status,
|
||||
error: item.error,
|
||||
type: item.type,
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return links;
|
||||
}
|
||||
|
||||
/**
|
||||
* Group broken links by source file
|
||||
* @param {Array} brokenLinks - Array of normalized broken links
|
||||
* @returns {Object} - Object with source files as keys
|
||||
*/
|
||||
function groupLinksBySource(brokenLinks) {
|
||||
const bySource = {};
|
||||
|
||||
brokenLinks.forEach((link) => {
|
||||
const source = link.sourceFile || 'Unknown';
|
||||
if (!bySource[source]) {
|
||||
bySource[source] = [];
|
||||
}
|
||||
bySource[source].push(link);
|
||||
});
|
||||
|
||||
return bySource;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate markdown comment for PR
|
||||
* @param {Array} allBrokenLinks - Array of all broken links
|
||||
* @param {Object} options - Generation options
|
||||
* @returns {string} - Markdown comment content
|
||||
*/
|
||||
/**
|
||||
* Load cache statistics from reports directory
|
||||
* @param {string} reportsDir - Directory containing reports
|
||||
* @returns {Object|null} Cache statistics or null if not found
|
||||
*/
|
||||
function loadCacheStats(reportsDir) {
|
||||
try {
|
||||
const cacheStatsFile = path.join(reportsDir, 'cache_statistics.json');
|
||||
if (fs.existsSync(cacheStatsFile)) {
|
||||
const content = fs.readFileSync(cacheStatsFile, 'utf8');
|
||||
return JSON.parse(content);
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn(`Warning: Could not load cache stats: ${error.message}`);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function generateComment(allBrokenLinks, options = {}) {
|
||||
const {
|
||||
includeSuccessMessage = true,
|
||||
includeStats = true,
|
||||
includeActionRequired = true,
|
||||
maxLinksPerFile = 20,
|
||||
cacheStats = null,
|
||||
reportsDir = null,
|
||||
} = options;
|
||||
|
||||
// Load cache stats if reports directory is provided
|
||||
const actualCacheStats =
|
||||
cacheStats || (reportsDir ? loadCacheStats(reportsDir) : null);
|
||||
|
||||
let comment = '';
|
||||
|
||||
// Add cache performance metrics at the top
|
||||
if (actualCacheStats) {
|
||||
comment += '## 📊 Link Validation Performance\n\n';
|
||||
comment += `- **Cache Hit Rate:** ${actualCacheStats.hitRate}%\n`;
|
||||
comment += `- **Files Cached:** ${actualCacheStats.cacheHits} (skipped validation)\n`;
|
||||
comment += `- **Files Validated:** ${actualCacheStats.cacheMisses}\n`;
|
||||
|
||||
if (actualCacheStats.hitRate >= 50) {
|
||||
comment +=
|
||||
'- **Performance:** 🚀 Cache optimization saved significant validation time!\n';
|
||||
} else if (actualCacheStats.hitRate > 0) {
|
||||
comment +=
|
||||
'- **Performance:** ⚡ Some files were cached, improving validation speed\n';
|
||||
}
|
||||
comment += '\n';
|
||||
}
|
||||
|
||||
if (!allBrokenLinks || allBrokenLinks.length === 0) {
|
||||
comment += '## ✅ Link Validation Passed\n\n';
|
||||
comment += 'All links in the changed files are valid!';
|
||||
|
||||
if (actualCacheStats && actualCacheStats.hitRate === 100) {
|
||||
comment += '\n\n✨ **All files were cached** - no validation was needed!';
|
||||
}
|
||||
|
||||
return includeSuccessMessage ? comment : '';
|
||||
}
|
||||
|
||||
comment += '## 🔗 Broken Links Found\n\n';
|
||||
|
||||
if (includeStats) {
|
||||
comment += `Found ${allBrokenLinks.length} broken link(s) in the changed files:\n\n`;
|
||||
}
|
||||
|
||||
// Group by source file
|
||||
const bySource = groupLinksBySource(allBrokenLinks);
|
||||
|
||||
// Generate sections for each source file
|
||||
for (const [source, links] of Object.entries(bySource)) {
|
||||
comment += `### ${source}\n\n`;
|
||||
|
||||
const displayLinks = links.slice(0, maxLinksPerFile);
|
||||
const hiddenCount = links.length - displayLinks.length;
|
||||
|
||||
displayLinks.forEach((link) => {
|
||||
const url = link.url || 'Unknown URL';
|
||||
const linkText = link.linkText || url;
|
||||
const status = link.status || 'Unknown';
|
||||
|
||||
comment += `- [ ] **${linkText}** → \`${url}\`\n`;
|
||||
comment += ` - Status: ${status}\n`;
|
||||
|
||||
if (link.type) {
|
||||
comment += ` - Type: ${link.type}\n`;
|
||||
}
|
||||
|
||||
if (link.error) {
|
||||
comment += ` - Error: ${link.error}\n`;
|
||||
}
|
||||
|
||||
comment += '\n';
|
||||
});
|
||||
|
||||
if (hiddenCount > 0) {
|
||||
comment += `<details>\n<summary>... and ${hiddenCount} more broken link(s)</summary>\n\n`;
|
||||
|
||||
links.slice(maxLinksPerFile).forEach((link) => {
|
||||
const url = link.url || 'Unknown URL';
|
||||
const linkText = link.linkText || url;
|
||||
const status = link.status || 'Unknown';
|
||||
|
||||
comment += `- [ ] **${linkText}** → \`${url}\` (Status: ${status})\n`;
|
||||
});
|
||||
|
||||
comment += '\n</details>\n\n';
|
||||
}
|
||||
}
|
||||
|
||||
if (includeActionRequired) {
|
||||
comment += '\n---\n';
|
||||
comment +=
|
||||
'**Action Required:** Please fix the broken links before merging this PR.';
|
||||
}
|
||||
|
||||
return comment;
|
||||
}
|
||||
|
||||
/**
|
||||
* Load and merge broken link reports from artifacts
|
||||
* @param {string} reportsDir - Directory containing report artifacts
|
||||
* @returns {Array} - Array of all broken links
|
||||
*/
|
||||
function loadBrokenLinkReports(reportsDir) {
|
||||
const allBrokenLinks = [];
|
||||
|
||||
if (!fs.existsSync(reportsDir)) {
|
||||
return allBrokenLinks;
|
||||
}
|
||||
|
||||
try {
|
||||
const reportDirs = fs.readdirSync(reportsDir);
|
||||
|
||||
for (const dir of reportDirs) {
|
||||
if (dir.startsWith('broken-links-')) {
|
||||
const reportPath = path.join(
|
||||
reportsDir,
|
||||
dir,
|
||||
'broken_links_report.json'
|
||||
);
|
||||
|
||||
if (fs.existsSync(reportPath)) {
|
||||
try {
|
||||
const reportContent = fs.readFileSync(reportPath, 'utf8');
|
||||
const reportData = JSON.parse(reportContent);
|
||||
const normalizedLinks = normalizeBrokenLinks(reportData);
|
||||
allBrokenLinks.push(...normalizedLinks);
|
||||
} catch (e) {
|
||||
console.error(`Error reading ${reportPath}: ${e.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.error(
|
||||
`Error reading reports directory ${reportsDir}: ${e.message}`
|
||||
);
|
||||
}
|
||||
|
||||
return allBrokenLinks;
|
||||
}
|
||||
|
||||
/**
|
||||
* CLI interface for the comment generator
|
||||
*/
|
||||
function main() {
|
||||
const args = process.argv.slice(2);
|
||||
|
||||
if (args.includes('--help') || args.includes('-h')) {
|
||||
console.log(`
|
||||
Usage: node comment-generator.js [options] <reports-dir>
|
||||
|
||||
Options:
|
||||
--no-success Don't include success message when no broken links
|
||||
--no-stats Don't include broken link statistics
|
||||
--no-action-required Don't include action required message
|
||||
--max-links <n> Maximum links to show per file (default: 20)
|
||||
--output-file <file> Write comment to file instead of stdout
|
||||
--help, -h Show this help message
|
||||
|
||||
Examples:
|
||||
node comment-generator.js reports/
|
||||
node comment-generator.js --max-links 10 --output-file comment.md reports/
|
||||
`);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
// Parse arguments
|
||||
let reportsDir = '';
|
||||
const options = {
|
||||
includeSuccessMessage: true,
|
||||
includeStats: true,
|
||||
includeActionRequired: true,
|
||||
maxLinksPerFile: 20,
|
||||
};
|
||||
let outputFile = null;
|
||||
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
const arg = args[i];
|
||||
|
||||
if (arg === '--no-success') {
|
||||
options.includeSuccessMessage = false;
|
||||
} else if (arg === '--no-stats') {
|
||||
options.includeStats = false;
|
||||
} else if (arg === '--no-action-required') {
|
||||
options.includeActionRequired = false;
|
||||
} else if (arg === '--max-links' && i + 1 < args.length) {
|
||||
options.maxLinksPerFile = parseInt(args[++i]);
|
||||
} else if (arg === '--output-file' && i + 1 < args.length) {
|
||||
outputFile = args[++i];
|
||||
} else if (!arg.startsWith('--')) {
|
||||
reportsDir = arg;
|
||||
}
|
||||
}
|
||||
|
||||
if (!reportsDir) {
|
||||
console.error('Error: reports directory is required');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Load reports and generate comment with cache stats
|
||||
const brokenLinks = loadBrokenLinkReports(reportsDir);
|
||||
options.reportsDir = reportsDir;
|
||||
const comment = generateComment(brokenLinks, options);
|
||||
|
||||
if (outputFile) {
|
||||
fs.writeFileSync(outputFile, comment);
|
||||
console.log(`Comment written to ${outputFile}`);
|
||||
} else {
|
||||
console.log(comment);
|
||||
}
|
||||
|
||||
// Exit with error code if there are broken links
|
||||
if (brokenLinks.length > 0) {
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Run CLI if this file is executed directly
|
||||
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||
main();
|
||||
}
|
||||
|
||||
export {
|
||||
generateComment,
|
||||
loadBrokenLinkReports,
|
||||
normalizeBrokenLinks,
|
||||
groupLinksBySource,
|
||||
};
|
||||
|
|
@ -0,0 +1,228 @@
|
|||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* Incremental Link Validator
|
||||
* Combines link extraction and caching to validate only changed links
|
||||
*/
|
||||
|
||||
import { extractLinksFromFile } from './link-extractor.js';
|
||||
import { CacheManager } from './cache-manager.js';
|
||||
import process from 'process';
|
||||
|
||||
/**
|
||||
* Incremental validator that only validates changed content
|
||||
*/
|
||||
class IncrementalValidator {
|
||||
constructor(options = {}) {
|
||||
this.cacheManager = new CacheManager(options);
|
||||
this.validateExternal = options.validateExternal !== false;
|
||||
this.validateInternal = options.validateInternal !== false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get validation strategy for a list of files
|
||||
* @param {Array} filePaths - Array of file paths
|
||||
* @returns {Object} Validation strategy with files categorized
|
||||
*/
|
||||
async getValidationStrategy(filePaths) {
|
||||
const strategy = {
|
||||
unchanged: [], // Files that haven't changed (skip validation)
|
||||
changed: [], // Files that changed (need full validation)
|
||||
newLinks: [], // New links across all files (need validation)
|
||||
total: filePaths.length,
|
||||
};
|
||||
|
||||
const allNewLinks = new Set();
|
||||
|
||||
for (const filePath of filePaths) {
|
||||
try {
|
||||
const extractionResult = extractLinksFromFile(filePath);
|
||||
if (!extractionResult) {
|
||||
console.warn(`Could not extract links from ${filePath}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const { fileHash, links } = extractionResult;
|
||||
|
||||
// Check if we have cached results for this file version
|
||||
const cachedResults = await this.cacheManager.get(filePath, fileHash);
|
||||
|
||||
if (cachedResults) {
|
||||
// File unchanged, skip validation
|
||||
strategy.unchanged.push({
|
||||
filePath,
|
||||
fileHash,
|
||||
linkCount: links.length,
|
||||
cachedResults,
|
||||
});
|
||||
} else {
|
||||
// File changed or new, needs validation
|
||||
strategy.changed.push({
|
||||
filePath,
|
||||
fileHash,
|
||||
links: links.filter((link) => link.needsValidation),
|
||||
extractionResult,
|
||||
});
|
||||
|
||||
// Collect all new links for batch validation
|
||||
links
|
||||
.filter((link) => link.needsValidation)
|
||||
.forEach((link) => allNewLinks.add(link.url));
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Error processing ${filePath}: ${error.message}`);
|
||||
// Treat as changed file to ensure validation
|
||||
strategy.changed.push({
|
||||
filePath,
|
||||
error: error.message,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
strategy.newLinks = Array.from(allNewLinks);
|
||||
|
||||
return strategy;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate files using incremental strategy
|
||||
* @param {Array} filePaths - Files to validate
|
||||
* @returns {Object} Validation results
|
||||
*/
|
||||
async validateFiles(filePaths) {
|
||||
console.log(
|
||||
`📊 Analyzing ${filePaths.length} files for incremental validation...`
|
||||
);
|
||||
|
||||
const strategy = await this.getValidationStrategy(filePaths);
|
||||
|
||||
console.log(`✅ ${strategy.unchanged.length} files unchanged (cached)`);
|
||||
console.log(`🔄 ${strategy.changed.length} files need validation`);
|
||||
console.log(`🔗 ${strategy.newLinks.length} unique links to validate`);
|
||||
|
||||
const results = {
|
||||
validationStrategy: strategy,
|
||||
filesToValidate: strategy.changed.map((item) => ({
|
||||
filePath: item.filePath,
|
||||
linkCount: item.links ? item.links.length : 0,
|
||||
})),
|
||||
cacheStats: {
|
||||
cacheHits: strategy.unchanged.length,
|
||||
cacheMisses: strategy.changed.length,
|
||||
hitRate:
|
||||
strategy.total > 0
|
||||
? Math.round((strategy.unchanged.length / strategy.total) * 100)
|
||||
: 0,
|
||||
},
|
||||
};
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Store validation results in cache
|
||||
* @param {string} filePath - File path
|
||||
* @param {string} fileHash - File hash
|
||||
* @param {Object} validationResults - Results to cache
|
||||
* @returns {Promise<boolean>} Success status
|
||||
*/
|
||||
async cacheResults(filePath, fileHash, validationResults) {
|
||||
return await this.cacheManager.set(filePath, fileHash, validationResults);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up expired cache entries
|
||||
* @returns {Promise<Object>} Cleanup statistics
|
||||
*/
|
||||
async cleanupCache() {
|
||||
return await this.cacheManager.cleanup();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* CLI usage
|
||||
*/
|
||||
async function main() {
|
||||
const args = process.argv.slice(2);
|
||||
|
||||
if (args.length === 0 || args[0] === '--help') {
|
||||
console.log(`
|
||||
Incremental Link Validator
|
||||
|
||||
Usage:
|
||||
node incremental-validator.js [files...] Analyze files for validation
|
||||
node incremental-validator.js --cleanup Clean up expired cache
|
||||
node incremental-validator.js --help Show this help
|
||||
|
||||
Options:
|
||||
--no-external Don't validate external links
|
||||
--no-internal Don't validate internal links
|
||||
--local Use local cache instead of GitHub Actions cache
|
||||
--cache-ttl=DAYS Set cache TTL in days (default: 30)
|
||||
|
||||
Examples:
|
||||
node incremental-validator.js content/**/*.md
|
||||
node incremental-validator.js --cache-ttl=7 content/**/*.md
|
||||
node incremental-validator.js --cleanup
|
||||
`);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
if (args[0] === '--cleanup') {
|
||||
const validator = new IncrementalValidator();
|
||||
const stats = await validator.cleanupCache();
|
||||
console.log(`🧹 Cleaned up ${stats.removed} expired cache entries`);
|
||||
if (stats.note) console.log(`ℹ️ ${stats.note}`);
|
||||
return;
|
||||
}
|
||||
|
||||
const options = {
|
||||
validateExternal: !args.includes('--no-external'),
|
||||
validateInternal: !args.includes('--no-internal'),
|
||||
useGitHubCache: !args.includes('--local'),
|
||||
};
|
||||
|
||||
// Extract cache TTL option if provided
|
||||
const cacheTTLArg = args.find((arg) => arg.startsWith('--cache-ttl='));
|
||||
if (cacheTTLArg) {
|
||||
options.cacheTTLDays = parseInt(cacheTTLArg.split('=')[1]);
|
||||
}
|
||||
|
||||
const filePaths = args.filter((arg) => !arg.startsWith('--'));
|
||||
|
||||
if (filePaths.length === 0) {
|
||||
console.error('No files specified for validation');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const validator = new IncrementalValidator(options);
|
||||
const results = await validator.validateFiles(filePaths);
|
||||
|
||||
console.log('\n📈 Validation Analysis Results:');
|
||||
console.log('================================');
|
||||
console.log(`Cache hit rate: ${results.cacheStats.hitRate}%`);
|
||||
console.log(`Files to validate: ${results.filesToValidate.length}`);
|
||||
|
||||
if (results.filesToValidate.length > 0) {
|
||||
console.log('\nFiles needing validation:');
|
||||
results.filesToValidate.forEach((file) => {
|
||||
console.log(` ${file.filePath} (${file.linkCount} links)`);
|
||||
});
|
||||
|
||||
// Output files for Cypress to process
|
||||
console.log('\n# Files for Cypress validation (one per line):');
|
||||
results.filesToValidate.forEach((file) => {
|
||||
console.log(file.filePath);
|
||||
});
|
||||
} else {
|
||||
console.log('\n✨ All files are cached - no validation needed!');
|
||||
}
|
||||
}
|
||||
|
||||
export default IncrementalValidator;
|
||||
export { IncrementalValidator };
|
||||
|
||||
// Run CLI if called directly
|
||||
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||
main().catch(console.error);
|
||||
}
|
||||
|
|
@ -0,0 +1,473 @@
|
|||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* Link Extractor for Documentation Files
|
||||
* Extracts all links from markdown and HTML files with metadata for caching and incremental validation
|
||||
*/
|
||||
|
||||
import fs from 'fs';
|
||||
import crypto from 'crypto';
|
||||
import matter from 'gray-matter';
|
||||
import path from 'path';
|
||||
import process from 'process';
|
||||
|
||||
/**
|
||||
* Extract links from markdown content
|
||||
* @param {string} content - File content
|
||||
* @param {string} filePath - Path to the file
|
||||
* @returns {Array} Array of link objects with metadata
|
||||
*/
|
||||
function extractMarkdownLinks(content, filePath) {
|
||||
const links = [];
|
||||
const lines = content.split('\n');
|
||||
|
||||
// Track reference-style link definitions
|
||||
const referenceLinks = new Map();
|
||||
|
||||
// First pass: collect reference definitions
|
||||
content.replace(/^\s*\[([^\]]+)\]:\s*(.+)$/gm, (match, ref, url) => {
|
||||
referenceLinks.set(ref.toLowerCase(), url.trim());
|
||||
return match;
|
||||
});
|
||||
|
||||
// Process each line for links
|
||||
lines.forEach((line, lineIndex) => {
|
||||
const lineNumber = lineIndex + 1;
|
||||
|
||||
// Standard markdown links
|
||||
let match;
|
||||
const standardLinkRegex = /\[([^\]]*)\]\(([^)]+)\)/g;
|
||||
while ((match = standardLinkRegex.exec(line)) !== null) {
|
||||
const linkText = match[1];
|
||||
const url = match[2];
|
||||
const columnStart = match.index;
|
||||
|
||||
links.push({
|
||||
url: url.trim(),
|
||||
text: linkText,
|
||||
type: 'markdown',
|
||||
line: lineNumber,
|
||||
column: columnStart,
|
||||
context: line.trim(),
|
||||
hash: generateLinkHash(url.trim(), filePath, lineNumber),
|
||||
});
|
||||
}
|
||||
|
||||
// Reference-style links
|
||||
const refLinkRegex = /\[([^\]]*)\]\[([^\]]*)\]/g;
|
||||
while ((match = refLinkRegex.exec(line)) !== null) {
|
||||
const linkText = match[1];
|
||||
const refKey = (match[2] || linkText).toLowerCase();
|
||||
const url = referenceLinks.get(refKey);
|
||||
|
||||
if (url) {
|
||||
const columnStart = match.index;
|
||||
links.push({
|
||||
url: url,
|
||||
text: linkText,
|
||||
type: 'markdown-reference',
|
||||
line: lineNumber,
|
||||
column: columnStart,
|
||||
context: line.trim(),
|
||||
reference: refKey,
|
||||
hash: generateLinkHash(url, filePath, lineNumber),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Autolinks
|
||||
const autolinkRegex = /<(https?:\/\/[^>]+)>/g;
|
||||
while ((match = autolinkRegex.exec(line)) !== null) {
|
||||
const url = match[1];
|
||||
const columnStart = match.index;
|
||||
|
||||
links.push({
|
||||
url: url,
|
||||
text: url,
|
||||
type: 'autolink',
|
||||
line: lineNumber,
|
||||
column: columnStart,
|
||||
context: line.trim(),
|
||||
hash: generateLinkHash(url, filePath, lineNumber),
|
||||
});
|
||||
}
|
||||
|
||||
// Bare URLs (basic detection, avoid false positives)
|
||||
const bareUrlRegex = /(?:^|[\s\n])(https?:\/\/[^\s\)]+)/g;
|
||||
while ((match = bareUrlRegex.exec(line)) !== null) {
|
||||
const url = match[1];
|
||||
const columnStart = match.index + match[0].length - url.length;
|
||||
|
||||
// Skip if this URL is already captured in a proper markdown link
|
||||
const alreadyCaptured = links.some(
|
||||
(link) =>
|
||||
link.line === lineNumber &&
|
||||
Math.abs(link.column - columnStart) < 10 &&
|
||||
link.url === url
|
||||
);
|
||||
|
||||
if (!alreadyCaptured) {
|
||||
links.push({
|
||||
url: url,
|
||||
text: url,
|
||||
type: 'bare-url',
|
||||
line: lineNumber,
|
||||
column: columnStart,
|
||||
context: line.trim(),
|
||||
hash: generateLinkHash(url, filePath, lineNumber),
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return links;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract links from HTML content
|
||||
* @param {string} content - File content
|
||||
* @param {string} filePath - Path to the file
|
||||
* @returns {Array} Array of link objects with metadata
|
||||
*/
|
||||
function extractHtmlLinks(content, filePath) {
|
||||
const links = [];
|
||||
const lines = content.split('\n');
|
||||
|
||||
lines.forEach((line, lineIndex) => {
|
||||
const lineNumber = lineIndex + 1;
|
||||
let match;
|
||||
|
||||
const htmlLinkRegex = /<a\s+[^>]*href\s*=\s*["']([^"']+)["'][^>]*>/gi;
|
||||
while ((match = htmlLinkRegex.exec(line)) !== null) {
|
||||
const url = match[1];
|
||||
const columnStart = match.index;
|
||||
|
||||
// Extract link text if possible
|
||||
const fullMatch = match[0];
|
||||
const textMatch = fullMatch.match(/>([^<]*)</);
|
||||
const linkText = textMatch ? textMatch[1].trim() : url;
|
||||
|
||||
links.push({
|
||||
url: url,
|
||||
text: linkText,
|
||||
type: 'html',
|
||||
line: lineNumber,
|
||||
column: columnStart,
|
||||
context: line.trim(),
|
||||
hash: generateLinkHash(url, filePath, lineNumber),
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
return links;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a unique hash for a link
|
||||
* @param {string} url - The URL
|
||||
* @param {string} filePath - File path
|
||||
* @param {number} line - Line number
|
||||
* @returns {string} Hash string
|
||||
*/
|
||||
function generateLinkHash(url, filePath, line) {
|
||||
const data = `${filePath}:${line}:${url.trim()}`;
|
||||
return crypto
|
||||
.createHash('sha256')
|
||||
.update(data)
|
||||
.digest('hex')
|
||||
.substring(0, 16);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a hash for file content
|
||||
* @param {string} content - File content
|
||||
* @returns {string} Hash string
|
||||
*/
|
||||
function generateFileHash(content) {
|
||||
return crypto
|
||||
.createHash('sha256')
|
||||
.update(content)
|
||||
.digest('hex')
|
||||
.substring(0, 16);
|
||||
}
|
||||
|
||||
/**
|
||||
* Categorize link types for validation
|
||||
* @param {string} url - The URL to categorize
|
||||
* @returns {Object} Link category information
|
||||
*/
|
||||
function categorizeLinkType(url) {
|
||||
const trimmedUrl = url.trim();
|
||||
|
||||
// External links
|
||||
if (trimmedUrl.startsWith('http://') || trimmedUrl.startsWith('https://')) {
|
||||
return {
|
||||
category: 'external',
|
||||
protocol: trimmedUrl.startsWith('https://') ? 'https' : 'http',
|
||||
needsValidation: true,
|
||||
};
|
||||
}
|
||||
|
||||
// Internal absolute links
|
||||
if (trimmedUrl.startsWith('/')) {
|
||||
return {
|
||||
category: 'internal-absolute',
|
||||
needsValidation: true,
|
||||
};
|
||||
}
|
||||
|
||||
// Relative links
|
||||
if (
|
||||
trimmedUrl.startsWith('./') ||
|
||||
trimmedUrl.startsWith('../') ||
|
||||
(!trimmedUrl.startsWith('#') && !trimmedUrl.includes('://'))
|
||||
) {
|
||||
return {
|
||||
category: 'internal-relative',
|
||||
needsValidation: true,
|
||||
};
|
||||
}
|
||||
|
||||
// Fragment/anchor links
|
||||
if (trimmedUrl.startsWith('#')) {
|
||||
return {
|
||||
category: 'fragment',
|
||||
needsValidation: true, // May need validation for internal page anchors
|
||||
};
|
||||
}
|
||||
|
||||
// Special protocols (mailto, tel, etc.)
|
||||
if (trimmedUrl.includes('://') && !trimmedUrl.startsWith('http')) {
|
||||
return {
|
||||
category: 'special-protocol',
|
||||
needsValidation: false,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
category: 'unknown',
|
||||
needsValidation: true,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract all links from a file
|
||||
* @param {string} filePath - Path to the file
|
||||
* @returns {Object} File analysis with links and metadata
|
||||
*/
|
||||
function extractLinksFromFile(filePath) {
|
||||
try {
|
||||
if (!fs.existsSync(filePath)) {
|
||||
throw new Error(`File not found: ${filePath}`);
|
||||
}
|
||||
|
||||
const content = fs.readFileSync(filePath, 'utf8');
|
||||
const fileHash = generateFileHash(content);
|
||||
const extension = path.extname(filePath).toLowerCase();
|
||||
|
||||
let links = [];
|
||||
let frontmatter = {};
|
||||
let bodyContent = content;
|
||||
|
||||
// Parse frontmatter for .md files
|
||||
if (extension === '.md') {
|
||||
try {
|
||||
const parsed = matter(content);
|
||||
frontmatter = parsed.data || {};
|
||||
bodyContent = parsed.content;
|
||||
} catch (err) {
|
||||
console.warn(
|
||||
`Warning: Could not parse frontmatter in ${filePath}: ${err.message}`
|
||||
);
|
||||
}
|
||||
|
||||
// Extract links from markdown content
|
||||
links = extractMarkdownLinks(bodyContent, filePath);
|
||||
} else if (extension === '.html') {
|
||||
// Extract links from HTML content
|
||||
links = extractHtmlLinks(content, filePath);
|
||||
} else {
|
||||
console.warn(`Warning: Unsupported file type for ${filePath}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
// Categorize and enhance links
|
||||
const enhancedLinks = links.map((link) => ({
|
||||
...link,
|
||||
...categorizeLinkType(link.url),
|
||||
filePath,
|
||||
}));
|
||||
|
||||
// Calculate statistics
|
||||
const stats = {
|
||||
totalLinks: enhancedLinks.length,
|
||||
externalLinks: enhancedLinks.filter((l) => l.category === 'external')
|
||||
.length,
|
||||
internalLinks: enhancedLinks.filter((l) =>
|
||||
l.category.startsWith('internal')
|
||||
).length,
|
||||
fragmentLinks: enhancedLinks.filter((l) => l.category === 'fragment')
|
||||
.length,
|
||||
linksNeedingValidation: enhancedLinks.filter((l) => l.needsValidation)
|
||||
.length,
|
||||
};
|
||||
|
||||
return {
|
||||
filePath,
|
||||
fileHash,
|
||||
extension,
|
||||
frontmatter,
|
||||
links: enhancedLinks,
|
||||
stats,
|
||||
extractedAt: new Date().toISOString(),
|
||||
};
|
||||
} catch (error) {
|
||||
console.error(`Error extracting links from ${filePath}: ${error.message}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Main function for CLI usage
|
||||
*/
|
||||
function main() {
|
||||
const args = process.argv.slice(2);
|
||||
|
||||
if (args.length === 0) {
|
||||
console.error('Usage: node link-extractor.js <file1> [file2] [...]');
|
||||
console.error(' node link-extractor.js --help');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
if (args[0] === '--help') {
|
||||
console.log(`
|
||||
Link Extractor for Documentation Files
|
||||
|
||||
Usage:
|
||||
node link-extractor.js <file1> [file2] [...] Extract links from files
|
||||
node link-extractor.js --help Show this help
|
||||
|
||||
Options:
|
||||
--json Output results as JSON
|
||||
--stats-only Show only statistics
|
||||
--filter TYPE Filter links by category (external, internal-absolute, internal-relative, fragment)
|
||||
|
||||
Examples:
|
||||
node link-extractor.js content/influxdb3/core/install.md
|
||||
node link-extractor.js --json content/**/*.md
|
||||
node link-extractor.js --stats-only --filter external content/influxdb3/**/*.md
|
||||
`);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const jsonOutput = args.includes('--json');
|
||||
const statsOnly = args.includes('--stats-only');
|
||||
const filterType = args.includes('--filter')
|
||||
? args[args.indexOf('--filter') + 1]
|
||||
: null;
|
||||
|
||||
const files = args.filter(
|
||||
(arg) => !arg.startsWith('--') && arg !== filterType
|
||||
);
|
||||
const results = [];
|
||||
|
||||
for (const filePath of files) {
|
||||
const result = extractLinksFromFile(filePath);
|
||||
if (result) {
|
||||
// Apply filter if specified
|
||||
if (filterType) {
|
||||
result.links = result.links.filter(
|
||||
(link) => link.category === filterType
|
||||
);
|
||||
// Recalculate stats after filtering
|
||||
result.stats = {
|
||||
totalLinks: result.links.length,
|
||||
externalLinks: result.links.filter((l) => l.category === 'external')
|
||||
.length,
|
||||
internalLinks: result.links.filter((l) =>
|
||||
l.category.startsWith('internal')
|
||||
).length,
|
||||
fragmentLinks: result.links.filter((l) => l.category === 'fragment')
|
||||
.length,
|
||||
linksNeedingValidation: result.links.filter((l) => l.needsValidation)
|
||||
.length,
|
||||
};
|
||||
}
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
}
|
||||
|
||||
if (jsonOutput) {
|
||||
console.log(JSON.stringify(results, null, 2));
|
||||
} else if (statsOnly) {
|
||||
console.log('\nLink Extraction Statistics:');
|
||||
console.log('==========================');
|
||||
|
||||
let totalFiles = 0;
|
||||
let totalLinks = 0;
|
||||
let totalExternal = 0;
|
||||
let totalInternal = 0;
|
||||
let totalFragment = 0;
|
||||
let totalNeedingValidation = 0;
|
||||
|
||||
results.forEach((result) => {
|
||||
totalFiles++;
|
||||
totalLinks += result.stats.totalLinks;
|
||||
totalExternal += result.stats.externalLinks;
|
||||
totalInternal += result.stats.internalLinks;
|
||||
totalFragment += result.stats.fragmentLinks;
|
||||
totalNeedingValidation += result.stats.linksNeedingValidation;
|
||||
|
||||
console.log(
|
||||
`${result.filePath}: ${result.stats.totalLinks} links (${result.stats.linksNeedingValidation} need validation)`
|
||||
);
|
||||
});
|
||||
|
||||
console.log('\nSummary:');
|
||||
console.log(` Total files: ${totalFiles}`);
|
||||
console.log(` Total links: ${totalLinks}`);
|
||||
console.log(` External links: ${totalExternal}`);
|
||||
console.log(` Internal links: ${totalInternal}`);
|
||||
console.log(` Fragment links: ${totalFragment}`);
|
||||
console.log(` Links needing validation: ${totalNeedingValidation}`);
|
||||
} else {
|
||||
results.forEach((result) => {
|
||||
console.log(`\nFile: ${result.filePath}`);
|
||||
console.log(`Hash: ${result.fileHash}`);
|
||||
console.log(`Links found: ${result.stats.totalLinks}`);
|
||||
console.log(
|
||||
`Links needing validation: ${result.stats.linksNeedingValidation}`
|
||||
);
|
||||
|
||||
if (result.links.length > 0) {
|
||||
console.log('\nLinks:');
|
||||
result.links.forEach((link, index) => {
|
||||
console.log(` ${index + 1}. [${link.category}] ${link.url}`);
|
||||
console.log(` Line ${link.line}, Column ${link.column}`);
|
||||
console.log(` Text: "${link.text}"`);
|
||||
console.log(` Hash: ${link.hash}`);
|
||||
if (link.reference) {
|
||||
console.log(` Reference: ${link.reference}`);
|
||||
}
|
||||
console.log('');
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Export functions for use as a module
|
||||
export {
|
||||
extractLinksFromFile,
|
||||
extractMarkdownLinks,
|
||||
extractHtmlLinks,
|
||||
generateFileHash,
|
||||
generateLinkHash,
|
||||
categorizeLinkType,
|
||||
};
|
||||
|
||||
// Run main function if called directly
|
||||
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||
main();
|
||||
}
|
||||
|
|
@ -0,0 +1,384 @@
|
|||
/**
|
||||
* Matrix Generator for Link Validation Workflows
|
||||
* Replaces complex bash scripting with maintainable JavaScript
|
||||
* Includes cache-aware optimization to skip validation of unchanged files
|
||||
*/
|
||||
|
||||
import { spawn } from 'child_process';
|
||||
import process from 'process';
|
||||
|
||||
// Product configuration mapping file paths to products
|
||||
const PRODUCT_MAPPING = {
|
||||
'content/influxdb3/core': {
|
||||
key: 'influxdb3-core',
|
||||
name: 'InfluxDB 3 Core',
|
||||
},
|
||||
'content/influxdb3/enterprise': {
|
||||
key: 'influxdb3-enterprise',
|
||||
name: 'InfluxDB 3 Enterprise',
|
||||
},
|
||||
'content/influxdb3/cloud-dedicated': {
|
||||
key: 'influxdb3-cloud-dedicated',
|
||||
name: 'InfluxDB 3 Cloud Dedicated',
|
||||
},
|
||||
'content/influxdb3/cloud-serverless': {
|
||||
key: 'influxdb3-cloud-serverless',
|
||||
name: 'InfluxDB 3 Cloud Serverless',
|
||||
},
|
||||
'content/influxdb3/clustered': {
|
||||
key: 'influxdb3-clustered',
|
||||
name: 'InfluxDB 3 Clustered',
|
||||
},
|
||||
'content/influxdb3/explorer': {
|
||||
key: 'influxdb3-explorer',
|
||||
name: 'InfluxDB 3 Explorer',
|
||||
},
|
||||
'content/influxdb/v2': {
|
||||
key: 'influxdb-v2',
|
||||
name: 'InfluxDB v2',
|
||||
},
|
||||
'content/influxdb/cloud': {
|
||||
key: 'influxdb-cloud',
|
||||
name: 'InfluxDB Cloud',
|
||||
},
|
||||
'content/influxdb/v1': {
|
||||
key: 'influxdb-v1',
|
||||
name: 'InfluxDB v1',
|
||||
},
|
||||
'content/influxdb/enterprise_influxdb': {
|
||||
key: 'influxdb-enterprise-v1',
|
||||
name: 'InfluxDB Enterprise v1',
|
||||
},
|
||||
'content/telegraf': {
|
||||
key: 'telegraf',
|
||||
name: 'Telegraf',
|
||||
},
|
||||
'content/kapacitor': {
|
||||
key: 'kapacitor',
|
||||
name: 'Kapacitor',
|
||||
},
|
||||
'content/chronograf': {
|
||||
key: 'chronograf',
|
||||
name: 'Chronograf',
|
||||
},
|
||||
'content/flux': {
|
||||
key: 'flux',
|
||||
name: 'Flux',
|
||||
},
|
||||
'content/shared': {
|
||||
key: 'shared',
|
||||
name: 'Shared Content',
|
||||
},
|
||||
'api-docs': {
|
||||
key: 'api-docs',
|
||||
name: 'API Documentation',
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Group files by product based on their path
|
||||
* @param {string[]} files - Array of file paths
|
||||
* @returns {Object} - Object with product keys and arrays of files
|
||||
*/
|
||||
function groupFilesByProduct(files) {
|
||||
const productFiles = {};
|
||||
|
||||
// Initialize all products
|
||||
Object.values(PRODUCT_MAPPING).forEach((product) => {
|
||||
productFiles[product.key] = [];
|
||||
});
|
||||
|
||||
files.forEach((file) => {
|
||||
let matched = false;
|
||||
|
||||
// Check each product mapping
|
||||
for (const [pathPrefix, product] of Object.entries(PRODUCT_MAPPING)) {
|
||||
if (file.startsWith(pathPrefix + '/')) {
|
||||
productFiles[product.key].push(file);
|
||||
matched = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Handle edge case for api-docs (no trailing slash)
|
||||
if (!matched && file.startsWith('api-docs/')) {
|
||||
productFiles['api-docs'].push(file);
|
||||
}
|
||||
});
|
||||
|
||||
return productFiles;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run incremental validation analysis
|
||||
* @param {string[]} files - Array of file paths to analyze
|
||||
* @returns {Promise<Object>} - Incremental validation results
|
||||
*/
|
||||
async function runIncrementalAnalysis(files) {
|
||||
return new Promise((resolve) => {
|
||||
const child = spawn(
|
||||
'node',
|
||||
['.github/scripts/incremental-validator.js', ...files],
|
||||
{
|
||||
stdio: ['pipe', 'pipe', 'pipe'],
|
||||
env: process.env,
|
||||
}
|
||||
);
|
||||
|
||||
let stdout = '';
|
||||
let stderr = '';
|
||||
|
||||
child.stdout.on('data', (data) => {
|
||||
stdout += data.toString();
|
||||
});
|
||||
|
||||
child.stderr.on('data', (data) => {
|
||||
stderr += data.toString();
|
||||
});
|
||||
|
||||
child.on('close', (code) => {
|
||||
if (code === 0) {
|
||||
try {
|
||||
// Parse the JSON output from the validation script
|
||||
const lines = stdout.trim().split('\n');
|
||||
const jsonLine = lines.find((line) => line.startsWith('{'));
|
||||
|
||||
if (jsonLine) {
|
||||
const results = JSON.parse(jsonLine);
|
||||
resolve(results);
|
||||
} else {
|
||||
resolve({ filesToValidate: files.map((f) => ({ filePath: f })) });
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn(
|
||||
`Warning: Could not parse incremental validation results: ${error.message}`
|
||||
);
|
||||
resolve({ filesToValidate: files.map((f) => ({ filePath: f })) });
|
||||
}
|
||||
} else {
|
||||
console.warn(
|
||||
`Incremental validation failed with code ${code}: ${stderr}`
|
||||
);
|
||||
resolve({ filesToValidate: files.map((f) => ({ filePath: f })) });
|
||||
}
|
||||
});
|
||||
|
||||
child.on('error', (error) => {
|
||||
console.warn(`Incremental validation error: ${error.message}`);
|
||||
resolve({ filesToValidate: files.map((f) => ({ filePath: f })) });
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate matrix configuration for GitHub Actions with cache awareness
|
||||
* @param {string[]} changedFiles - Array of changed file paths
|
||||
* @param {Object} options - Configuration options
|
||||
* @returns {Promise<Object>} - Matrix configuration object
|
||||
*/
|
||||
async function generateMatrix(changedFiles, options = {}) {
|
||||
const {
|
||||
maxConcurrentJobs = 5,
|
||||
forceSequential = false,
|
||||
minFilesForParallel = 10,
|
||||
useCache = true,
|
||||
} = options;
|
||||
|
||||
if (!changedFiles || changedFiles.length === 0) {
|
||||
return {
|
||||
strategy: 'none',
|
||||
hasChanges: false,
|
||||
matrix: { include: [] },
|
||||
cacheStats: { hitRate: 100, cacheHits: 0, cacheMisses: 0 },
|
||||
};
|
||||
}
|
||||
|
||||
let filesToValidate = changedFiles;
|
||||
let cacheStats = {
|
||||
hitRate: 0,
|
||||
cacheHits: 0,
|
||||
cacheMisses: changedFiles.length,
|
||||
};
|
||||
|
||||
// Run incremental analysis if cache is enabled
|
||||
if (useCache) {
|
||||
try {
|
||||
console.log(
|
||||
`🔍 Running cache analysis for ${changedFiles.length} files...`
|
||||
);
|
||||
const analysisResults = await runIncrementalAnalysis(changedFiles);
|
||||
|
||||
if (analysisResults.filesToValidate) {
|
||||
filesToValidate = analysisResults.filesToValidate.map(
|
||||
(f) => f.filePath
|
||||
);
|
||||
cacheStats = analysisResults.cacheStats || cacheStats;
|
||||
|
||||
console.log(
|
||||
`📊 Cache analysis complete: ${cacheStats.hitRate}% hit rate`
|
||||
);
|
||||
console.log(
|
||||
`✅ ${cacheStats.cacheHits} files cached, ${cacheStats.cacheMisses} need validation`
|
||||
);
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn(
|
||||
`Cache analysis failed: ${error.message}, proceeding without cache optimization`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// If no files need validation after cache analysis
|
||||
if (filesToValidate.length === 0) {
|
||||
return {
|
||||
strategy: 'cache-hit',
|
||||
hasChanges: false,
|
||||
matrix: { include: [] },
|
||||
cacheStats,
|
||||
message: '✨ All files are cached - no validation needed!',
|
||||
};
|
||||
}
|
||||
|
||||
const productFiles = groupFilesByProduct(filesToValidate);
|
||||
const productsWithFiles = Object.entries(productFiles).filter(
|
||||
([key, files]) => files.length > 0
|
||||
);
|
||||
|
||||
// Determine strategy based on file count and configuration
|
||||
const totalFiles = filesToValidate.length;
|
||||
const shouldUseParallel =
|
||||
!forceSequential &&
|
||||
totalFiles >= minFilesForParallel &&
|
||||
productsWithFiles.length > 1;
|
||||
|
||||
if (shouldUseParallel) {
|
||||
// Parallel strategy: create matrix with products
|
||||
const matrixIncludes = productsWithFiles.map(([productKey, files]) => {
|
||||
const product = Object.values(PRODUCT_MAPPING).find(
|
||||
(p) => p.key === productKey
|
||||
);
|
||||
return {
|
||||
product: productKey,
|
||||
name: product?.name || productKey,
|
||||
files: files.join(' '),
|
||||
cacheEnabled: useCache,
|
||||
};
|
||||
});
|
||||
|
||||
return {
|
||||
strategy: 'parallel',
|
||||
hasChanges: true,
|
||||
matrix: { include: matrixIncludes.slice(0, maxConcurrentJobs) },
|
||||
cacheStats,
|
||||
originalFileCount: changedFiles.length,
|
||||
validationFileCount: filesToValidate.length,
|
||||
};
|
||||
} else {
|
||||
// Sequential strategy: single job with all files
|
||||
return {
|
||||
strategy: 'sequential',
|
||||
hasChanges: true,
|
||||
matrix: {
|
||||
include: [
|
||||
{
|
||||
product: 'all',
|
||||
name: 'All Files',
|
||||
files: filesToValidate.join(' '),
|
||||
cacheEnabled: useCache,
|
||||
},
|
||||
],
|
||||
},
|
||||
cacheStats,
|
||||
originalFileCount: changedFiles.length,
|
||||
validationFileCount: filesToValidate.length,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* CLI interface for the matrix generator
|
||||
*/
|
||||
async function main() {
|
||||
const args = process.argv.slice(2);
|
||||
|
||||
if (args.includes('--help') || args.includes('-h')) {
|
||||
console.log(`
|
||||
Usage: node matrix-generator.js [options] <file1> <file2> ...
|
||||
|
||||
Options:
|
||||
--max-concurrent <n> Maximum concurrent jobs (default: 5)
|
||||
--force-sequential Force sequential execution
|
||||
--min-files-parallel <n> Minimum files needed for parallel (default: 10)
|
||||
--output-format <format> Output format: json, github (default: github)
|
||||
--no-cache Disable cache-aware optimization
|
||||
--help, -h Show this help message
|
||||
|
||||
Examples:
|
||||
node matrix-generator.js content/influxdb3/core/file1.md content/influxdb/v2/file2.md
|
||||
node matrix-generator.js --force-sequential content/shared/file.md
|
||||
node matrix-generator.js --no-cache --output-format json *.md
|
||||
`);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
// Parse options
|
||||
const options = {};
|
||||
const files = [];
|
||||
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
const arg = args[i];
|
||||
|
||||
if (arg === '--max-concurrent' && i + 1 < args.length) {
|
||||
options.maxConcurrentJobs = parseInt(args[++i]);
|
||||
} else if (arg === '--force-sequential') {
|
||||
options.forceSequential = true;
|
||||
} else if (arg === '--min-files-parallel' && i + 1 < args.length) {
|
||||
options.minFilesForParallel = parseInt(args[++i]);
|
||||
} else if (arg === '--output-format' && i + 1 < args.length) {
|
||||
options.outputFormat = args[++i];
|
||||
} else if (arg === '--no-cache') {
|
||||
options.useCache = false;
|
||||
} else if (!arg.startsWith('--')) {
|
||||
files.push(arg);
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await generateMatrix(files, options);
|
||||
|
||||
if (options.outputFormat === 'json') {
|
||||
console.log(JSON.stringify(result, null, 2));
|
||||
} else {
|
||||
// GitHub Actions format
|
||||
console.log(`strategy=${result.strategy}`);
|
||||
console.log(`has-changes=${result.hasChanges}`);
|
||||
console.log(`matrix=${JSON.stringify(result.matrix)}`);
|
||||
|
||||
// Add cache statistics
|
||||
if (result.cacheStats) {
|
||||
console.log(`cache-hit-rate=${result.cacheStats.hitRate}`);
|
||||
console.log(`cache-hits=${result.cacheStats.cacheHits}`);
|
||||
console.log(`cache-misses=${result.cacheStats.cacheMisses}`);
|
||||
}
|
||||
|
||||
if (result.originalFileCount !== undefined) {
|
||||
console.log(`original-file-count=${result.originalFileCount}`);
|
||||
console.log(`validation-file-count=${result.validationFileCount}`);
|
||||
}
|
||||
|
||||
if (result.message) {
|
||||
console.log(`message=${result.message}`);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Error generating matrix: ${error.message}`);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Run CLI if this file is executed directly
|
||||
if (import.meta.url === `file://${process.argv[1]}`) {
|
||||
main().catch(console.error);
|
||||
}
|
||||
|
||||
export { generateMatrix, groupFilesByProduct, PRODUCT_MAPPING };
|
||||
|
|
@ -0,0 +1,140 @@
|
|||
# PR Link Validation Workflow
|
||||
# Provides basic and parallel workflows
|
||||
# with smart strategy selection based on change volume
|
||||
name: PR Link Validation
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- 'content/**/*.md'
|
||||
- 'content/**/*.html'
|
||||
- 'api-docs/**/*.yml'
|
||||
- 'assets/**/*.js'
|
||||
- 'layouts/**/*.html'
|
||||
|
||||
jobs:
|
||||
setup:
|
||||
name: Setup and Strategy Detection
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
strategy: ${{ steps.determine-strategy.outputs.strategy }}
|
||||
has-changes: ${{ steps.determine-strategy.outputs.has-changes }}
|
||||
matrix: ${{ steps.determine-strategy.outputs.matrix }}
|
||||
all-files: ${{ steps.changed-files.outputs.all_changed_files }}
|
||||
cache-hit-rate: ${{ steps.determine-strategy.outputs.cache-hit-rate }}
|
||||
cache-hits: ${{ steps.determine-strategy.outputs.cache-hits }}
|
||||
cache-misses: ${{ steps.determine-strategy.outputs.cache-misses }}
|
||||
original-file-count: ${{ steps.determine-strategy.outputs.original-file-count }}
|
||||
validation-file-count: ${{ steps.determine-strategy.outputs.validation-file-count }}
|
||||
cache-message: ${{ steps.determine-strategy.outputs.message }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup docs environment
|
||||
uses: ./.github/actions/setup-docs-env
|
||||
|
||||
- name: Get changed files
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@v41
|
||||
with:
|
||||
files: |
|
||||
content/**/*.md
|
||||
content/**/*.html
|
||||
api-docs/**/*.yml
|
||||
|
||||
- name: Determine validation strategy
|
||||
id: determine-strategy
|
||||
run: |
|
||||
if [[ "${{ steps.changed-files.outputs.any_changed }}" != "true" ]]; then
|
||||
echo "No relevant files changed"
|
||||
echo "strategy=none" >> $GITHUB_OUTPUT
|
||||
echo "has-changes=false" >> $GITHUB_OUTPUT
|
||||
echo "matrix={\"include\":[]}" >> $GITHUB_OUTPUT
|
||||
echo "cache-hit-rate=100" >> $GITHUB_OUTPUT
|
||||
echo "cache-hits=0" >> $GITHUB_OUTPUT
|
||||
echo "cache-misses=0" >> $GITHUB_OUTPUT
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Use our matrix generator with cache awareness
|
||||
files="${{ steps.changed-files.outputs.all_changed_files }}"
|
||||
|
||||
echo "🔍 Analyzing ${files} for cache-aware validation..."
|
||||
|
||||
# Generate matrix and capture outputs
|
||||
result=$(node .github/scripts/matrix-generator.js \
|
||||
--min-files-parallel 10 \
|
||||
--max-concurrent 5 \
|
||||
--output-format github \
|
||||
$files)
|
||||
|
||||
# Parse all outputs from matrix generator
|
||||
while IFS='=' read -r key value; do
|
||||
case "$key" in
|
||||
strategy|has-changes|cache-hit-rate|cache-hits|cache-misses|original-file-count|validation-file-count|message)
|
||||
echo "$key=$value" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
matrix)
|
||||
echo "matrix=$value" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
esac
|
||||
done <<< "$result"
|
||||
|
||||
# Extract values for logging
|
||||
strategy=$(echo "$result" | grep "^strategy=" | cut -d'=' -f2)
|
||||
cache_hit_rate=$(echo "$result" | grep "^cache-hit-rate=" | cut -d'=' -f2)
|
||||
cache_message=$(echo "$result" | grep "^message=" | cut -d'=' -f2-)
|
||||
|
||||
echo "📊 Selected strategy: $strategy"
|
||||
if [[ -n "$cache_hit_rate" ]]; then
|
||||
echo "📈 Cache hit rate: ${cache_hit_rate}%"
|
||||
fi
|
||||
if [[ -n "$cache_message" ]]; then
|
||||
echo "$cache_message"
|
||||
fi
|
||||
|
||||
validate:
|
||||
name: ${{ matrix.name }}
|
||||
needs: setup
|
||||
if: needs.setup.outputs.has-changes == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix: ${{ fromJson(needs.setup.outputs.matrix) }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup docs environment
|
||||
uses: ./.github/actions/setup-docs-env
|
||||
|
||||
- name: Validate links
|
||||
uses: ./.github/actions/validate-links
|
||||
with:
|
||||
files: ${{ matrix.files || needs.setup.outputs.all-files }}
|
||||
product-name: ${{ matrix.product }}
|
||||
cache-enabled: ${{ matrix.cacheEnabled || 'true' }}
|
||||
cache-key: link-validation-${{ github.event.pull_request.base.sha }}
|
||||
|
||||
report:
|
||||
name: Report Results
|
||||
needs: [setup, validate]
|
||||
if: always() && needs.setup.outputs.has-changes == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup docs environment
|
||||
uses: ./.github/actions/setup-docs-env
|
||||
|
||||
- name: Report broken links
|
||||
uses: ./.github/actions/report-broken-links
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
max-links-per-file: 20
|
||||
|
|
@ -21,6 +21,9 @@ See @.github/instructions/contributing.instructions.md for essential InfluxData
|
|||
documentation contributing guidelines, such as style and
|
||||
formatting, and commonly used shortcodes.
|
||||
|
||||
See @TESTING.md for comprehensive testing information, including code block
|
||||
testing, link validation, style linting, and advanced testing procedures.
|
||||
|
||||
See @.github/instructions/shortcodes-reference.instructions.md for detailed
|
||||
information about shortcodes used in this project.
|
||||
|
||||
|
|
|
|||
189
CONTRIBUTING.md
189
CONTRIBUTING.md
|
|
@ -9,7 +9,7 @@ Ready to contribute? Here's the essential workflow:
|
|||
2. [Fork and clone](#fork-and-clone-influxdata-documentation-repository) this repository
|
||||
3. [Install dependencies](#development-environment-setup) (Node.js, Yarn, Docker)
|
||||
4. Make your changes following [style guidelines](#making-changes)
|
||||
5. [Test your changes](#testing--quality-assurance) (pre-commit and pre-push hooks run automatically)
|
||||
5. [Test your changes](TESTING.md) (pre-commit and pre-push hooks run automatically)
|
||||
6. [Submit a pull request](#submission-process)
|
||||
|
||||
For detailed setup and reference information, see the sections below.
|
||||
|
|
@ -250,64 +250,29 @@ For more information about generating InfluxDB API documentation, see the
|
|||
|
||||
---
|
||||
|
||||
<!-- agent:instruct: condense -->
|
||||
## Testing & Quality Assurance
|
||||
|
||||
### Pre-commit Hooks
|
||||
For comprehensive testing information, including code block testing, link validation, style linting, and advanced testing procedures, see **[TESTING.md](TESTING.md)**.
|
||||
|
||||
docs-v2 uses Lefthook to manage Git hooks that run during pre-commit and pre-push. The hooks run the scripts defined in `package.json` to lint Markdown and test code blocks.
|
||||
When you try to commit changes (`git commit`), Git runs
|
||||
the commands configured in `lefthook.yml` which pass your **staged** files to Vale,
|
||||
Prettier, Cypress (for UI tests and link-checking), and Pytest (for testing Python and shell code in code blocks).
|
||||
### Quick Testing Reference
|
||||
|
||||
#### Skip pre-commit hooks
|
||||
```bash
|
||||
# Test code blocks
|
||||
yarn test:codeblocks:all
|
||||
|
||||
**We strongly recommend running linting and tests**, but you can skip them
|
||||
(and avoid installing dependencies)
|
||||
by including the `LEFTHOOK=0` environment variable or the `--no-verify` flag with
|
||||
your commit--for example:
|
||||
# Test links
|
||||
yarn test:links content/influxdb3/core/**/*.md
|
||||
|
||||
# Run style linting
|
||||
docker compose run -T vale content/**/*.md
|
||||
```
|
||||
|
||||
Pre-commit hooks run automatically when you commit changes, testing your staged files with Vale, Prettier, Cypress, and Pytest. To skip hooks if needed:
|
||||
|
||||
```sh
|
||||
git commit -m "<COMMIT_MESSAGE>" --no-verify
|
||||
```
|
||||
|
||||
```sh
|
||||
LEFTHOOK=0 git commit
|
||||
```
|
||||
|
||||
### Code Block Testing Overview
|
||||
|
||||
[pytest-codeblocks](https://github.com/nschloe/pytest-codeblocks/tree/main) extracts code from python and shell Markdown code blocks and executes assertions for the code.
|
||||
|
||||
**Basic example:**
|
||||
|
||||
```python
|
||||
print("Hello, world!")
|
||||
```
|
||||
|
||||
<!--pytest-codeblocks:expected-output-->
|
||||
|
||||
```
|
||||
Hello, world!
|
||||
```
|
||||
|
||||
For detailed testing setup and configuration, see [Detailed Testing Setup](#detailed-testing-setup).
|
||||
|
||||
### Style Linting (Vale)
|
||||
|
||||
docs-v2 includes Vale writing style linter configurations to enforce documentation writing style rules, guidelines, branding, and vocabulary terms.
|
||||
|
||||
**Basic usage:**
|
||||
|
||||
```sh
|
||||
docker compose run -T vale --config=content/influxdb/cloud-dedicated/.vale.ini --minAlertLevel=error content/influxdb/cloud-dedicated/write-data/**/*.md
|
||||
```
|
||||
|
||||
**VS Code integration:**
|
||||
|
||||
1. Install the [Vale VSCode](https://marketplace.visualstudio.com/items?itemName=ChrisChinchilla.vale-vscode) extension.
|
||||
2. In the extension settings, set the `Vale:Vale CLI:Path` value to `${workspaceFolder}/node_modules/.bin/vale`.
|
||||
|
||||
---
|
||||
|
||||
<!-- agent:instruct: condense -->
|
||||
|
|
@ -1720,132 +1685,6 @@ Replace the following:
|
|||
- {{% code-placeholder-key %}}`API_TOKEN`{{% /code-placeholder-key %}}: your [InfluxDB API token](/influxdb/v2/admin/tokens/)
|
||||
```
|
||||
|
||||
<!-- agent:instruct: extract testing-setup.instructions.md -->
|
||||
### Detailed Testing Setup
|
||||
|
||||
#### Set up test scripts and credentials
|
||||
|
||||
Tests for code blocks require your InfluxDB credentials and other typical
|
||||
InfluxDB configuration.
|
||||
|
||||
To set up your docs-v2 instance to run tests locally, do the following:
|
||||
|
||||
1. **Set executable permissions on test scripts** in `./test/src`:
|
||||
|
||||
```sh
|
||||
chmod +x ./test/src/*.sh
|
||||
```
|
||||
|
||||
2. **Create credentials for tests**:
|
||||
|
||||
- Create databases, buckets, and tokens for the product(s) you're testing.
|
||||
- If you don't have access to a Clustered instance, you can use your
|
||||
Cloud Dedicated instance for testing in most cases. To avoid conflicts when
|
||||
running tests, create separate Cloud Dedicated and Clustered databases.
|
||||
|
||||
1. **Create .env.test**: Copy the `./test/env.test.example` file into each
|
||||
product directory to test and rename the file as `.env.test`--for example:
|
||||
|
||||
```sh
|
||||
./content/influxdb/cloud-dedicated/.env.test
|
||||
```
|
||||
|
||||
2. Inside each product's `.env.test` file, assign your InfluxDB credentials to
|
||||
environment variables:
|
||||
|
||||
- Include the usual `INFLUX_` environment variables
|
||||
- In
|
||||
`cloud-dedicated/.env.test` and `clustered/.env.test` files, also define the
|
||||
following variables:
|
||||
|
||||
- `ACCOUNT_ID`, `CLUSTER_ID`: You can find these values in your `influxctl`
|
||||
`config.toml` configuration file.
|
||||
- `MANAGEMENT_TOKEN`: Use the `influxctl management create` command to generate
|
||||
a long-lived management token to authenticate Management API requests
|
||||
|
||||
See the substitution
|
||||
patterns in `./test/src/prepare-content.sh` for the full list of variables you may need to define in your `.env.test` files.
|
||||
|
||||
3. For influxctl commands to run in tests, move or copy your `config.toml` file
|
||||
to the `./test` directory.
|
||||
|
||||
> [!Warning]
|
||||
>
|
||||
> - The database you configure in `.env.test` and any written data may
|
||||
be deleted during test runs.
|
||||
> - Don't add your `.env.test` files to Git. To prevent accidentally adding credentials to the docs-v2 repo,
|
||||
> Git is configured to ignore `.env*` files. Consider backing them up on your local machine in case of accidental deletion.
|
||||
|
||||
#### Test shell and python code blocks
|
||||
|
||||
[pytest-codeblocks](https://github.com/nschloe/pytest-codeblocks/tree/main) extracts code from python and shell Markdown code blocks and executes assertions for the code.
|
||||
If you don't assert a value (using a Python `assert` statement), `--codeblocks` considers a non-zero exit code to be a failure.
|
||||
|
||||
**Note**: `pytest --codeblocks` uses Python's `subprocess.run()` to execute shell code.
|
||||
|
||||
You can use this to test CLI and interpreter commands, regardless of programming
|
||||
language, as long as they return standard exit codes.
|
||||
|
||||
To make the documented output of a code block testable, precede it with the
|
||||
`<!--pytest-codeblocks:expected-output-->` tag and **omit the code block language
|
||||
descriptor**--for example, in your Markdown file:
|
||||
|
||||
##### Example markdown
|
||||
|
||||
```python
|
||||
print("Hello, world!")
|
||||
```
|
||||
|
||||
<!--pytest-codeblocks:expected-output-->
|
||||
|
||||
The next code block is treated as an assertion.
|
||||
If successful, the output is the following:
|
||||
|
||||
```
|
||||
Hello, world!
|
||||
```
|
||||
|
||||
For commands, such as `influxctl` CLI commands, that require launching an
|
||||
OAuth URL in a browser, wrap the command in a subshell and redirect the output
|
||||
to `/shared/urls.txt` in the container--for example:
|
||||
|
||||
```sh
|
||||
# Test the preceding command outside of the code block.
|
||||
# influxctl authentication requires TTY interaction--
|
||||
# output the auth URL to a file that the host can open.
|
||||
script -c "influxctl user list " \
|
||||
/dev/null > /shared/urls.txt
|
||||
```
|
||||
|
||||
You probably don't want to display this syntax in the docs, which unfortunately
|
||||
means you'd need to include the test block separately from the displayed code
|
||||
block.
|
||||
To hide it from users, wrap the code block inside an HTML comment.
|
||||
pytest-codeblocks will still collect and run the code block.
|
||||
|
||||
##### Mark tests to skip
|
||||
|
||||
pytest-codeblocks has features for skipping tests and marking blocks as failed.
|
||||
To learn more, see the pytest-codeblocks README and tests.
|
||||
|
||||
#### Troubleshoot tests
|
||||
|
||||
##### Pytest collected 0 items
|
||||
|
||||
Potential reasons:
|
||||
|
||||
- See the test discovery options in `pytest.ini`.
|
||||
- For Python code blocks, use the following delimiter:
|
||||
|
||||
```python
|
||||
# Codeblocks runs this block.
|
||||
```
|
||||
|
||||
`pytest --codeblocks` ignores code blocks that use the following:
|
||||
|
||||
```py
|
||||
# Codeblocks ignores this block.
|
||||
```
|
||||
|
||||
<!-- agent:instruct: condense -->
|
||||
### Advanced Configuration
|
||||
|
|
|
|||
|
|
@ -11,6 +11,10 @@ This repository contains the InfluxDB 2.x documentation published at [docs.influ
|
|||
We welcome and encourage community contributions.
|
||||
For information about contributing to the InfluxData documentation, see [Contribution guidelines](CONTRIBUTING.md).
|
||||
|
||||
## Testing
|
||||
|
||||
For information about testing the documentation, including code block testing, link validation, and style linting, see [Testing guide](TESTING.md).
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
InfluxData takes security and our users' trust very seriously.
|
||||
|
|
|
|||
|
|
@ -0,0 +1,364 @@
|
|||
# Testing Guide for InfluxData Documentation
|
||||
|
||||
This guide covers all testing procedures for the InfluxData documentation, including code block testing, link validation, and style linting.
|
||||
|
||||
## Quick Start
|
||||
|
||||
1. **Prerequisites**: Install [Node.js](https://nodejs.org/en), [Yarn](https://yarnpkg.com/getting-started/install), and [Docker](https://docs.docker.com/get-docker/)
|
||||
2. **Install dependencies**: Run `yarn` to install all dependencies
|
||||
3. **Build test environment**: Run `docker build -t influxdata/docs-pytest:latest -f Dockerfile.pytest .`
|
||||
4. **Run tests**: Use any of the test commands below
|
||||
|
||||
## Test Types Overview
|
||||
|
||||
| Test Type | Purpose | Command |
|
||||
|-----------|---------|---------|
|
||||
| **Code blocks** | Validate shell/Python code examples | `yarn test:codeblocks:all` |
|
||||
| **Link validation** | Check internal/external links | `yarn test:links` |
|
||||
| **Style linting** | Enforce writing standards | `docker compose run -T vale` |
|
||||
| **E2E tests** | UI and functionality testing | `yarn test:e2e` |
|
||||
|
||||
## Code Block Testing
|
||||
|
||||
Code block testing validates that shell commands and Python scripts in documentation work correctly using [pytest-codeblocks](https://github.com/nschloe/pytest-codeblocks/tree/main).
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```bash
|
||||
# Test all code blocks
|
||||
yarn test:codeblocks:all
|
||||
|
||||
# Test specific products
|
||||
yarn test:codeblocks:cloud
|
||||
yarn test:codeblocks:v2
|
||||
yarn test:codeblocks:telegraf
|
||||
```
|
||||
|
||||
### Setup and Configuration
|
||||
|
||||
#### 1. Set executable permissions on test scripts
|
||||
|
||||
```sh
|
||||
chmod +x ./test/src/*.sh
|
||||
```
|
||||
|
||||
#### 2. Create test credentials
|
||||
|
||||
Create databases, buckets, and tokens for the product(s) you're testing.
|
||||
If you don't have access to a Clustered instance, you can use your Cloud Dedicated instance for testing in most cases.
|
||||
|
||||
#### 3. Configure environment variables
|
||||
|
||||
Copy the `./test/env.test.example` file into each product directory and rename as `.env.test`:
|
||||
|
||||
```sh
|
||||
# Example locations
|
||||
./content/influxdb/cloud-dedicated/.env.test
|
||||
./content/influxdb3/clustered/.env.test
|
||||
```
|
||||
|
||||
Inside each product's `.env.test` file, assign your InfluxDB credentials:
|
||||
|
||||
- Include the usual `INFLUX_` environment variables
|
||||
- For `cloud-dedicated/.env.test` and `clustered/.env.test`, also define:
|
||||
- `ACCOUNT_ID`, `CLUSTER_ID`: Found in your `influxctl config.toml`
|
||||
- `MANAGEMENT_TOKEN`: Generate with `influxctl management create`
|
||||
|
||||
See `./test/src/prepare-content.sh` for the full list of variables you may need.
|
||||
|
||||
#### 4. Configure influxctl commands
|
||||
|
||||
For influxctl commands to run in tests, move or copy your `config.toml` file to the `./test` directory.
|
||||
|
||||
> [!Warning]
|
||||
> - The database you configure in `.env.test` and any written data may be deleted during test runs
|
||||
> - Don't add your `.env.test` files to Git. Git is configured to ignore `.env*` files to prevent accidentally committing credentials
|
||||
|
||||
### Writing Testable Code Blocks
|
||||
|
||||
#### Basic Example
|
||||
|
||||
```python
|
||||
print("Hello, world!")
|
||||
```
|
||||
|
||||
<!--pytest-codeblocks:expected-output-->
|
||||
|
||||
```
|
||||
Hello, world!
|
||||
```
|
||||
|
||||
#### Interactive Commands
|
||||
|
||||
For commands that require TTY interaction (like `influxctl` authentication), wrap the command in a subshell and redirect output:
|
||||
|
||||
```sh
|
||||
# Test the preceding command outside of the code block.
|
||||
# influxctl authentication requires TTY interaction--
|
||||
# output the auth URL to a file that the host can open.
|
||||
script -c "influxctl user list " \
|
||||
/dev/null > /shared/urls.txt
|
||||
```
|
||||
|
||||
To hide test blocks from users, wrap them in HTML comments. pytest-codeblocks will still collect and run them.
|
||||
|
||||
#### Skipping Tests
|
||||
|
||||
pytest-codeblocks has features for skipping tests and marking blocks as failed. See the [pytest-codeblocks README](https://github.com/nschloe/pytest-codeblocks/tree/main) for details.
|
||||
|
||||
### Troubleshooting
|
||||
|
||||
#### "Pytest collected 0 items"
|
||||
|
||||
Potential causes:
|
||||
- Check test discovery options in `pytest.ini`
|
||||
- Use `python` (not `py`) for Python code block language identifiers:
|
||||
```python
|
||||
# This works
|
||||
```
|
||||
vs
|
||||
```py
|
||||
# This is ignored
|
||||
```
|
||||
|
||||
## Link Validation Testing
|
||||
|
||||
Link validation uses Cypress for e2e browser-based testing against the Hugo site to ensure all internal and external links work correctly.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```bash
|
||||
# Test specific files
|
||||
yarn test:links content/influxdb3/core/**/*.md
|
||||
|
||||
# Test all links (may take a long time)
|
||||
yarn test:links
|
||||
|
||||
# Test by product (may take a long time)
|
||||
yarn test:links:v3
|
||||
yarn test:links:v2
|
||||
yarn test:links:telegraf
|
||||
yarn test:links:chronograf
|
||||
yarn test:links:kapacitor
|
||||
```
|
||||
|
||||
### How Link Validation Works
|
||||
|
||||
The tests:
|
||||
1. Start a Hugo development server
|
||||
2. Navigate to each page in a browser
|
||||
3. Check all links for validity
|
||||
4. Report broken or invalid links
|
||||
|
||||
### GitHub Actions Integration
|
||||
|
||||
#### Composite Action
|
||||
|
||||
The `.github/actions/validate-links/` composite action provides reusable link validation:
|
||||
|
||||
```yaml
|
||||
- uses: ./.github/actions/validate-links
|
||||
with:
|
||||
files: "content/influxdb3/core/file.md content/influxdb/v2/file2.md"
|
||||
product-name: "core"
|
||||
cache-enabled: "true"
|
||||
cache-key: "link-validation"
|
||||
```
|
||||
|
||||
#### Matrix Generator
|
||||
|
||||
The `.github/scripts/matrix-generator.js` script provides intelligent strategy selection:
|
||||
|
||||
- **Sequential validation**: For small changes (< 10 files) or single-product changes
|
||||
- **Parallel validation**: For large changes across multiple products (up to 5 concurrent jobs)
|
||||
|
||||
Test locally:
|
||||
|
||||
```bash
|
||||
node .github/scripts/matrix-generator.js content/influxdb3/core/file1.md content/influxdb/v2/file2.md
|
||||
```
|
||||
|
||||
Configuration options:
|
||||
- `--max-concurrent <n>`: Maximum parallel jobs (default: 5)
|
||||
- `--force-sequential`: Force sequential execution
|
||||
- `--min-files-parallel <n>`: Minimum files for parallel (default: 10)
|
||||
|
||||
### Caching for Link Validation
|
||||
|
||||
Link validation supports caching to improve performance:
|
||||
|
||||
- **Cache location**: `.cache/link-validation/` (local), GitHub Actions cache (CI)
|
||||
- **Cache keys**: Based on content file hashes
|
||||
- **TTL**: 30 days by default, configurable
|
||||
|
||||
#### Cache Configuration Options
|
||||
|
||||
```bash
|
||||
# Use 7-day cache for more frequent validation
|
||||
yarn test:links --cache-ttl=7 content/influxdb3/**/*.md
|
||||
|
||||
# Use 1-day cache via environment variable
|
||||
LINK_CACHE_TTL_DAYS=1 yarn test:links content/**/*.md
|
||||
|
||||
# Clean up expired cache entries
|
||||
node .github/scripts/incremental-validator.js --cleanup
|
||||
```
|
||||
|
||||
#### How Caching Works
|
||||
|
||||
- **Cache key**: Based on file path + content hash (file changes invalidate cache immediately)
|
||||
- **External links**: Cached for the TTL period since URLs rarely change
|
||||
- **Internal links**: Effectively cached until file content changes
|
||||
- **Automatic cleanup**: Expired entries are removed on access and via `--cleanup`
|
||||
|
||||
## Style Linting (Vale)
|
||||
|
||||
Style linting uses [Vale](https://vale.sh/) to enforce documentation writing standards, branding guidelines, and vocabulary consistency.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```bash
|
||||
# Basic linting with Docker
|
||||
docker compose run -T vale --config=content/influxdb/cloud-dedicated/.vale.ini --minAlertLevel=error content/influxdb/cloud-dedicated/write-data/**/*.md
|
||||
```
|
||||
|
||||
### VS Code Integration
|
||||
|
||||
1. Install the [Vale VSCode](https://marketplace.visualstudio.com/items?itemName=ChrisChinchilla.vale-vscode) extension
|
||||
2. Set the `Vale:Vale CLI:Path` setting to `${workspaceFolder}/node_modules/.bin/vale`
|
||||
|
||||
### Alert Levels
|
||||
|
||||
Vale can raise different alert levels:
|
||||
|
||||
- **Error**: Problems that can cause content to render incorrectly, violations of branding guidelines, rejected vocabulary terms
|
||||
- **Warning**: General style guide rules and best practices
|
||||
- **Suggestion**: Style preferences that may require refactoring or updates to an exceptions list
|
||||
|
||||
### Configuration
|
||||
|
||||
- **Styles**: `.ci/vale/styles/` contains configuration for the custom `InfluxDataDocs` style
|
||||
- **Vocabulary**: Add accepted/rejected terms to `.ci/vale/styles/config/vocabularies`
|
||||
- **Product-specific**: Configure per-product styles like `content/influxdb/cloud-dedicated/.vale.ini`
|
||||
|
||||
For more configuration details, see [Vale configuration](https://vale.sh/docs/topics/config).
|
||||
|
||||
## Pre-commit Hooks
|
||||
|
||||
docs-v2 uses [Lefthook](https://github.com/evilmartians/lefthook) to manage Git hooks that run automatically during pre-commit and pre-push.
|
||||
|
||||
### What Runs Automatically
|
||||
|
||||
When you run `git commit`, Git runs:
|
||||
- **Vale**: Style linting (if configured)
|
||||
- **Prettier**: Code formatting
|
||||
- **Cypress**: Link validation tests
|
||||
- **Pytest**: Code block tests
|
||||
|
||||
### Skipping Pre-commit Hooks
|
||||
|
||||
We strongly recommend running linting and tests, but you can skip them:
|
||||
|
||||
```sh
|
||||
# Skip with --no-verify flag
|
||||
git commit -m "<COMMIT_MESSAGE>" --no-verify
|
||||
|
||||
# Skip with environment variable
|
||||
LEFTHOOK=0 git commit
|
||||
```
|
||||
|
||||
## Advanced Testing
|
||||
|
||||
### E2E Testing
|
||||
|
||||
```bash
|
||||
# Run all E2E tests
|
||||
yarn test:e2e
|
||||
|
||||
# Run specific E2E specs
|
||||
node cypress/support/run-e2e-specs.js --spec "cypress/e2e/content/article-links.cy.js"
|
||||
```
|
||||
|
||||
### JavaScript Testing and Debugging
|
||||
|
||||
For JavaScript code in the documentation UI (`assets/js`):
|
||||
|
||||
#### Using Source Maps and Chrome DevTools
|
||||
|
||||
1. In VS Code, select Run > Start Debugging
|
||||
2. Select "Debug Docs (source maps)" configuration
|
||||
3. Set breakpoints in the `assets/js/ns-hugo-imp:` namespace
|
||||
|
||||
#### Using Debug Helpers
|
||||
|
||||
1. Import debug helpers in your JavaScript module:
|
||||
```js
|
||||
import { debugLog, debugBreak, debugInspect } from './utils/debug-helpers.js';
|
||||
```
|
||||
|
||||
2. Insert debug statements:
|
||||
```js
|
||||
const data = debugInspect(someData, 'Data');
|
||||
debugLog('Processing data', 'myFunction');
|
||||
debugBreak(); // Add breakpoint
|
||||
```
|
||||
|
||||
3. Start Hugo: `yarn hugo server`
|
||||
4. In VS Code, select "Debug JS (debug-helpers)" configuration
|
||||
|
||||
Remember to remove debug statements before committing.
|
||||
|
||||
## Docker Compose Services
|
||||
|
||||
Available test services:
|
||||
|
||||
```bash
|
||||
# All code block tests
|
||||
docker compose --profile test up
|
||||
|
||||
# Individual product tests
|
||||
docker compose run --rm cloud-pytest
|
||||
docker compose run --rm v2-pytest
|
||||
docker compose run --rm telegraf-pytest
|
||||
|
||||
# Stop monitoring services
|
||||
yarn test:codeblocks:stop-monitors
|
||||
```
|
||||
|
||||
## Testing Best Practices
|
||||
|
||||
### Code Block Examples
|
||||
|
||||
- Always test code examples before committing
|
||||
- Use realistic data and examples that users would encounter
|
||||
- Include proper error handling in examples
|
||||
- Format code to fit within 80 characters
|
||||
- Use long options in command-line examples (`--option` vs `-o`)
|
||||
|
||||
### Link Validation
|
||||
|
||||
- Test links regularly, especially after content restructuring
|
||||
- Use appropriate cache TTL settings for your validation needs
|
||||
- Monitor cache hit rates to optimize performance
|
||||
- Clean up expired cache entries periodically
|
||||
|
||||
### Style Guidelines
|
||||
|
||||
- Run Vale regularly to catch style issues early
|
||||
- Add accepted terms to vocabulary files rather than ignoring errors
|
||||
- Configure product-specific styles for branding consistency
|
||||
- Review suggestions periodically for content improvement opportunities
|
||||
|
||||
## Related Files
|
||||
|
||||
- **Configuration**: `pytest.ini`, `cypress.config.js`, `lefthook.yml`
|
||||
- **Docker**: `compose.yaml`, `Dockerfile.pytest`
|
||||
- **Scripts**: `.github/scripts/` directory
|
||||
- **Test data**: `./test/` directory
|
||||
- **Vale config**: `.ci/vale/styles/`
|
||||
|
||||
## Getting Help
|
||||
|
||||
- **GitHub Issues**: [docs-v2 issues](https://github.com/influxdata/docs-v2/issues)
|
||||
- **Good first issues**: [good-first-issue label](https://github.com/influxdata/docs-v2/issues?q=is%3Aissue+is%3Aopen+label%3Agood-first-issue)
|
||||
- **InfluxData CLA**: [Sign here](https://www.influxdata.com/legal/cla/) for substantial contributions
|
||||
|
|
@ -7,6 +7,8 @@ import {
|
|||
FIRST_BROKEN_LINK_FILE,
|
||||
initializeReport,
|
||||
readBrokenLinksReport,
|
||||
saveCacheStats,
|
||||
saveValidationStrategy,
|
||||
} from './cypress/support/link-reporter.js';
|
||||
|
||||
export default defineConfig({
|
||||
|
|
@ -177,6 +179,63 @@ export default defineConfig({
|
|||
return true;
|
||||
}
|
||||
},
|
||||
|
||||
// Cache and incremental validation tasks
|
||||
saveCacheStatistics(stats) {
|
||||
try {
|
||||
saveCacheStats(stats);
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.error(`Error saving cache stats: ${error.message}`);
|
||||
return false;
|
||||
}
|
||||
},
|
||||
|
||||
saveValidationStrategy(strategy) {
|
||||
try {
|
||||
saveValidationStrategy(strategy);
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.error(`Error saving validation strategy: ${error.message}`);
|
||||
return false;
|
||||
}
|
||||
},
|
||||
|
||||
runIncrementalValidation(filePaths) {
|
||||
return new Promise(async (resolve, reject) => {
|
||||
try {
|
||||
const { IncrementalValidator } = await import(
|
||||
'./.github/scripts/incremental-validator.js'
|
||||
);
|
||||
const validator = new IncrementalValidator();
|
||||
const results = await validator.validateFiles(filePaths);
|
||||
resolve(results);
|
||||
} catch (error) {
|
||||
console.error(`Incremental validation error: ${error.message}`);
|
||||
reject(error);
|
||||
}
|
||||
});
|
||||
},
|
||||
|
||||
cacheValidationResults(filePath, fileHash, results) {
|
||||
return new Promise(async (resolve, reject) => {
|
||||
try {
|
||||
const { IncrementalValidator } = await import(
|
||||
'./.github/scripts/incremental-validator.js'
|
||||
);
|
||||
const validator = new IncrementalValidator();
|
||||
const success = await validator.cacheResults(
|
||||
filePath,
|
||||
fileHash,
|
||||
results
|
||||
);
|
||||
resolve(success);
|
||||
} catch (error) {
|
||||
console.error(`Cache validation results error: ${error.message}`);
|
||||
reject(error);
|
||||
}
|
||||
});
|
||||
},
|
||||
});
|
||||
|
||||
// Load plugins file using dynamic import for ESM compatibility
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
/// <reference types="cypress" />
|
||||
|
||||
describe('Article', () => {
|
||||
const subjects = Cypress.env('test_subjects').split(',');
|
||||
let subjects = Cypress.env('test_subjects').split(',');
|
||||
let validationStrategy = null;
|
||||
|
||||
// Always use HEAD for downloads to avoid timeouts
|
||||
const useHeadForDownloads = true;
|
||||
|
||||
|
|
@ -9,6 +11,55 @@ describe('Article', () => {
|
|||
before(() => {
|
||||
// Initialize the broken links report
|
||||
cy.task('initializeBrokenLinksReport');
|
||||
|
||||
// Get source file paths for incremental validation
|
||||
const testSubjectsData = Cypress.env('test_subjects_data');
|
||||
let sourceFilePaths = subjects; // fallback to subjects if no data available
|
||||
|
||||
if (testSubjectsData) {
|
||||
try {
|
||||
const urlToSourceData = JSON.parse(testSubjectsData);
|
||||
// Extract source file paths from the structured data
|
||||
sourceFilePaths = urlToSourceData.map((item) => item.source);
|
||||
} catch (e) {
|
||||
console.warn(
|
||||
'Could not parse test_subjects_data, using subjects as fallback'
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Run incremental validation analysis with source file paths
|
||||
cy.task('runIncrementalValidation', sourceFilePaths).then((results) => {
|
||||
validationStrategy = results.validationStrategy;
|
||||
|
||||
// Save cache statistics and validation strategy for reporting
|
||||
cy.task('saveCacheStatistics', results.cacheStats);
|
||||
cy.task('saveValidationStrategy', validationStrategy);
|
||||
|
||||
// Update subjects to only test files that need validation
|
||||
if (results.filesToValidate.length > 0) {
|
||||
subjects = results.filesToValidate.map((file) => {
|
||||
// Convert file path to URL format (same logic as map-files-to-urls.js)
|
||||
let url = file.filePath.replace(/^content/, '');
|
||||
url = url.replace(/\/_index\.(html|md)$/, '/');
|
||||
url = url.replace(/\.md$/, '/');
|
||||
url = url.replace(/\.html$/, '/');
|
||||
if (!url.startsWith('/')) {
|
||||
url = '/' + url;
|
||||
}
|
||||
return url;
|
||||
});
|
||||
|
||||
cy.log(`📊 Cache Analysis: ${results.cacheStats.hitRate}% hit rate`);
|
||||
cy.log(
|
||||
`🔄 Testing ${subjects.length} pages (${results.cacheStats.cacheHits} cached)`
|
||||
);
|
||||
} else {
|
||||
// All files are cached, no validation needed
|
||||
subjects = [];
|
||||
cy.log('✨ All files cached - skipping validation');
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// Helper function to identify download links
|
||||
|
|
|
|||
|
|
@ -7,6 +7,8 @@ import fs from 'fs';
|
|||
export const BROKEN_LINKS_FILE = '/tmp/broken_links_report.json';
|
||||
export const FIRST_BROKEN_LINK_FILE = '/tmp/first_broken_link.json';
|
||||
const SOURCES_FILE = '/tmp/test_subjects_sources.json';
|
||||
const CACHE_STATS_FILE = '/tmp/cache_statistics.json';
|
||||
const VALIDATION_STRATEGY_FILE = '/tmp/validation_strategy.json';
|
||||
|
||||
/**
|
||||
* Reads the broken links report from the file system
|
||||
|
|
@ -69,6 +71,65 @@ function readSourcesMapping() {
|
|||
return {};
|
||||
}
|
||||
|
||||
/**
|
||||
* Read cache statistics from file
|
||||
* @returns {Object|null} Cache statistics or null if not found
|
||||
*/
|
||||
function readCacheStats() {
|
||||
try {
|
||||
if (fs.existsSync(CACHE_STATS_FILE)) {
|
||||
const content = fs.readFileSync(CACHE_STATS_FILE, 'utf8');
|
||||
return JSON.parse(content);
|
||||
}
|
||||
} catch (err) {
|
||||
console.warn(`Warning: Could not read cache stats: ${err.message}`);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read validation strategy from file
|
||||
* @returns {Object|null} Validation strategy or null if not found
|
||||
*/
|
||||
function readValidationStrategy() {
|
||||
try {
|
||||
if (fs.existsSync(VALIDATION_STRATEGY_FILE)) {
|
||||
const content = fs.readFileSync(VALIDATION_STRATEGY_FILE, 'utf8');
|
||||
return JSON.parse(content);
|
||||
}
|
||||
} catch (err) {
|
||||
console.warn(`Warning: Could not read validation strategy: ${err.message}`);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Save cache statistics for reporting
|
||||
* @param {Object} stats - Cache statistics to save
|
||||
*/
|
||||
export function saveCacheStats(stats) {
|
||||
try {
|
||||
fs.writeFileSync(CACHE_STATS_FILE, JSON.stringify(stats, null, 2));
|
||||
} catch (err) {
|
||||
console.warn(`Warning: Could not save cache stats: ${err.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Save validation strategy for reporting
|
||||
* @param {Object} strategy - Validation strategy to save
|
||||
*/
|
||||
export function saveValidationStrategy(strategy) {
|
||||
try {
|
||||
fs.writeFileSync(
|
||||
VALIDATION_STRATEGY_FILE,
|
||||
JSON.stringify(strategy, null, 2)
|
||||
);
|
||||
} catch (err) {
|
||||
console.warn(`Warning: Could not save validation strategy: ${err.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Formats and displays the broken links report to the console
|
||||
* @param {Array} brokenLinksReport - The report data to display
|
||||
|
|
@ -80,6 +141,26 @@ export function displayBrokenLinksReport(brokenLinksReport = null) {
|
|||
brokenLinksReport = readBrokenLinksReport();
|
||||
}
|
||||
|
||||
// Read cache statistics and validation strategy
|
||||
const cacheStats = readCacheStats();
|
||||
const validationStrategy = readValidationStrategy();
|
||||
|
||||
// Display cache performance first
|
||||
if (cacheStats) {
|
||||
console.log('\n📊 Cache Performance:');
|
||||
console.log('=====================');
|
||||
console.log(`Cache hit rate: ${cacheStats.hitRate}%`);
|
||||
console.log(`Files cached: ${cacheStats.cacheHits}`);
|
||||
console.log(`Files validated: ${cacheStats.cacheMisses}`);
|
||||
|
||||
if (validationStrategy) {
|
||||
console.log(`Total files analyzed: ${validationStrategy.total}`);
|
||||
console.log(
|
||||
`Links needing validation: ${validationStrategy.newLinks.length}`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Check both the report and first broken link file to determine if we have broken links
|
||||
const firstBrokenLink = readFirstBrokenLink();
|
||||
|
||||
|
|
@ -88,7 +169,7 @@ export function displayBrokenLinksReport(brokenLinksReport = null) {
|
|||
(!brokenLinksReport || brokenLinksReport.length === 0) &&
|
||||
!firstBrokenLink
|
||||
) {
|
||||
console.log('✅ No broken links detected in the validation report');
|
||||
console.log('\n✅ No broken links detected in the validation report');
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
16
lefthook.yml
16
lefthook.yml
|
|
@ -111,13 +111,15 @@ pre-push:
|
|||
node cypress/support/run-e2e-specs.js --spec "cypress/e2e/content/article-links.cy.js" content/example.md
|
||||
exit $?
|
||||
|
||||
e2e-links:
|
||||
tags: test,links
|
||||
glob: 'content/*.{md,html}'
|
||||
run: |
|
||||
echo "Running link checker for: {staged_files}"
|
||||
yarn test:links {staged_files}
|
||||
exit $?
|
||||
# Link validation runs in GitHub actions.
|
||||
# You can still run it locally for development.
|
||||
# e2e-links:
|
||||
# tags: test,links
|
||||
# glob: 'content/*.{md,html}'
|
||||
# run: |
|
||||
# echo "Running link checker for: {staged_files}"
|
||||
# yarn test:links {staged_files}
|
||||
# exit $?
|
||||
|
||||
# Manage Docker containers
|
||||
prune-legacy-containers:
|
||||
|
|
|
|||
Loading…
Reference in New Issue