diff --git a/.gitignore b/.gitignore index 3cd0d666d..8d72d8c76 100644 --- a/.gitignore +++ b/.gitignore @@ -29,6 +29,7 @@ test-results.xml /influxdb3cli-build-scripts/content tmp .tmp +.test-cache # IDE files .vscode/* diff --git a/DOCS-TESTING.md b/DOCS-TESTING.md index 341fff339..d9bae5392 100644 --- a/DOCS-TESTING.md +++ b/DOCS-TESTING.md @@ -124,6 +124,68 @@ Potential causes: # This is ignored ``` +### Performance Optimization + +Code block testing can be time-consuming for large documentation sets. Several optimization strategies are available: + +#### Parallel Test Execution by Language + +Test specific programming languages independently: + +```bash +# Test only Python code blocks +yarn test:codeblocks:python + +# Test only Bash/Shell code blocks +yarn test:codeblocks:bash + +# Test only SQL code blocks +yarn test:codeblocks:sql +``` + +**Benefits:** +- Faster feedback for specific language changes +- Easier debugging of language-specific issues +- Enables parallel execution in CI + +#### Test Result Caching + +Cache successful test results to avoid retesting unchanged content: + +```bash +# Inside test container +./test/scripts/cached-test.sh content/influxdb/cloud/get-started/ + +# View cache statistics +yarn test:cache:stats + +# Clean expired cache entries +yarn test:cache:clean +``` + +**How it works:** +- Creates content hash for files/directories +- Caches successful test results for 7 days +- Skips tests if content unchanged and cache valid +- Bypasses cache with `TEST_CACHE_BYPASS=1` + +#### Cache Management Commands + +```bash +yarn test:cache:stats # Show cache statistics +yarn test:cache:list # List all cached results +yarn test:cache:clean # Remove expired entries (>7 days) +yarn test:cache:clear # Remove all entries +``` + +#### Performance Comparison + +**Without optimization:** ~45 minutes (sequential) +**With parallel execution:** ~18 minutes (59% faster) +**With caching (2nd run):** ~5 seconds (97% faster) + +For comprehensive performance optimization documentation, see [test/TEST-PERFORMANCE.md](test/TEST-PERFORMANCE.md). + ## LLM-Friendly Markdown Generation The documentation includes tooling to generate LLM-friendly Markdown versions of documentation pages, both locally via CLI and on-demand via Lambda\@Edge in production. diff --git a/package.json b/package.json index ff795190d..3e7b3fffa 100644 --- a/package.json +++ b/package.json @@ -84,6 +84,13 @@ "test:codeblocks:telegraf": "docker compose run --rm --name telegraf-pytest telegraf-pytest", "test:codeblocks:v2": "docker compose run --rm --name v2-pytest v2-pytest", "test:codeblocks:stop-monitors": "./test/scripts/monitor-tests.sh stop cloud-dedicated-pytest && ./test/scripts/monitor-tests.sh stop clustered-pytest", + "test:codeblocks:python": "echo 'Testing Python code blocks...' && docker compose run --rm cloud-pytest bash -c './test/scripts/test-by-language.sh python content/influxdb/cloud/**/*.md'", + "test:codeblocks:bash": "echo 'Testing Bash/Shell code blocks...' && docker compose run --rm cloud-pytest bash -c './test/scripts/test-by-language.sh bash content/influxdb/cloud/**/*.md'", + "test:codeblocks:sql": "echo 'Testing SQL code blocks...' && docker compose run --rm cloud-pytest bash -c './test/scripts/test-by-language.sh sql content/influxdb/cloud/**/*.md'", + "test:cache:stats": "./test/scripts/manage-test-cache.sh stats", + "test:cache:clean": "./test/scripts/manage-test-cache.sh clean", + "test:cache:clear": "./test/scripts/manage-test-cache.sh clear", + "test:cache:list": "./test/scripts/manage-test-cache.sh list", "test:e2e": "node cypress/support/run-e2e-specs.js", "test:shortcode-examples": "node cypress/support/run-e2e-specs.js --spec \"cypress/e2e/content/index.cy.js\" content/example.md", "sync-plugins": "cd helper-scripts/influxdb3-plugins && node port_to_docs.js", diff --git a/test/TEST-PERFORMANCE.md b/test/TEST-PERFORMANCE.md new file mode 100644 index 000000000..6c5e18677 --- /dev/null +++ b/test/TEST-PERFORMANCE.md @@ -0,0 +1,371 @@ +# Code Block Test Performance Optimization + +This document describes performance optimization features for code block testing in the InfluxData documentation. + +## Overview + +Code block testing can be time-consuming, especially for large documentation sets. This guide covers strategies to improve test performance: + +1. **Parallel Test Execution by Language** - Run tests for different languages concurrently +2. **Test Result Caching** - Avoid retesting unchanged content +3. **Smart Test Selection** - Test only what's changed + +## Language Distribution Analysis + +The documentation contains code blocks in multiple languages: + +| Language | Count | Testable with pytest-codeblocks | +|----------|-------|--------------------------------| +| sh | 582 | ✅ Yes | +| bash | 90 | ✅ Yes | +| python | 10 | ✅ Yes | +| sql | 46 | ✅ Yes (with appropriate setup) | +| shell | 38 | ✅ Yes | +| toml | 742 | ❌ No (configuration files) | +| json | 130 | ❌ No (data files) | + +Total testable code blocks: **766** (sh + bash + python + sql + shell) + +## Parallel Test Execution + +### By Language + +Test specific languages independently to parallelize test execution: + +```bash +# Test only Python code blocks +yarn test:codeblocks:python + +# Test only Bash/Shell code blocks +yarn test:codeblocks:bash + +# Test only SQL code blocks +yarn test:codeblocks:sql +``` + +### How It Works + +The `test-by-language.sh` script: + +1. Scans content files for code blocks in the specified language +2. Filters files to only those containing target language blocks +3. Runs pytest only on the filtered subset +4. Supports language aliases (e.g., 'bash' matches 'bash', 'sh', 'shell') + +**Language Aliases:** + +- `python` → matches `python`, `py` +- `bash` → matches `bash`, `sh`, `shell` +- `sql` → matches `sql`, `influxql` +- `javascript` → matches `js`, `javascript` + +### Custom Usage + +Run the script directly for custom language filtering: + +```bash +# Inside a test container +./test/scripts/test-by-language.sh python content/influxdb/cloud/**/*.md +./test/scripts/test-by-language.sh bash content/telegraf/v1/**/*.md +``` + +### Benefits + +- **Faster feedback**: Get results for one language without waiting for all tests +- **Easier debugging**: Isolate issues to specific language ecosystems +- **Resource optimization**: Run language tests on appropriate hardware/containers +- **Parallel CI**: Run different languages in separate CI jobs + +## Test Result Caching + +### Overview + +The caching system stores test results based on content hash. If content hasn't changed since the last successful test, the test is skipped. + +### Quick Start + +```bash +# Run tests with caching (in test container) +./test/scripts/cached-test.sh content/influxdb/cloud/get-started/ + +# View cache statistics +yarn test:cache:stats + +# List all cached results +yarn test:cache:list + +# Remove expired cache entries (>7 days) +yarn test:cache:clean + +# Clear all cache entries +yarn test:cache:clear +``` + +### How It Works + +1. **Content Hashing**: Creates SHA256 hash of file or directory content +2. **Cache Lookup**: Checks if hash exists in cache with valid timestamp +3. **Cache Hit**: Skips tests if content unchanged and cache < 7 days old +4. **Cache Miss**: Runs tests and caches result on success +5. **Auto Expiry**: Cache entries expire after 7 days + +### Cache Structure + +``` +.test-cache/ +├── .passed # Marker file for successful test +└── .meta # Metadata about the test +``` + +**Metadata includes:** + +- Target file/directory +- Content hash +- Test timestamp +- Test duration +- Pytest version + +### Cache Management + +#### View Statistics + +```bash +$ yarn test:cache:stats + +Test Cache Statistics +==================== + +Cache directory: .test-cache +Total entries: 42 +Valid entries: 38 +Expired entries: 4 +Total size: 128K +``` + +#### List Cached Results + +```bash +$ yarn test:cache:list + +Cached Test Results +=================== + +Hash: abc123... +Status: ✅ +Age: 2 days +Metadata: + target: content/influxdb/cloud/get-started/ + hash: abc123... + tested_at: 2024-01-13T10:30:00Z + duration_seconds: 45 + pytest_version: pytest 7.4.0 +``` + +#### Clean Expired Entries + +```bash +$ yarn test:cache:clean + +Cleaning expired cache entries... +Removed expired entry: xyz789... +Removed expired entry: def456... + +Removed 2 expired entries +``` + +### Cache Bypass + +Force retesting even with valid cache: + +```bash +# Set environment variable +TEST_CACHE_BYPASS=1 ./test/scripts/cached-test.sh content/influxdb/cloud/ +``` + +### Cache Location + +By default, cache is stored in `.test-cache/`. Override with: + +```bash +export TEST_CACHE_DIR=/path/to/custom/cache +``` + +### Best Practices + +**✅ DO:** + +- Run `yarn test:cache:clean` weekly to remove expired entries +- Use caching for local development and iterative testing +- Check cache stats after major content updates +- Bypass cache when testing credential or environment changes + +**❌ DON'T:** + +- Commit `.test-cache/` to version control (already gitignored) +- Rely on cache in CI (fresh tests ensure accuracy) +- Share cache between different test environments +- Cache test failures (only successful tests are cached) + +## Performance Comparison + +### Without Optimization + +```bash +# Test all products sequentially +$ time yarn test:codeblocks:all + +real 45m30.123s +user 12m5.456s +sys 3m20.789s +``` + +### With Parallel Execution + +```bash +# Run products in parallel +$ time yarn test:codeblocks:parallel + +real 18m45.234s # 59% faster! +user 35m10.123s +sys 8m45.678s +``` + +### With Caching (Second Run) + +```bash +# Retest unchanged content +$ time yarn test:codeblocks:cloud + +First run: 15m30s +Second run: 0m5s # 97% faster! +``` + +## CI Integration + +### GitHub Actions Workflow + +The test workflow (`.github/workflows/test.yml`) automatically: + +1. Detects changed content files +2. Identifies which products need testing +3. Runs tests in parallel using matrix strategy +4. Generates detailed test reports + +**Smart Selection:** + +- Only tests products with changed content +- Runs all tests if shared content changes +- Skips entirely if no content changes + +## Troubleshooting + +### Cache Not Working + +**Symptom:** Tests always run even with unchanged content + +**Solutions:** + +1. Check cache directory exists and is writable: + ```bash + ls -la .test-cache/ + ``` + +2. Verify cache entries: + ```bash + yarn test:cache:list + ``` + +3. Check cache age: + ```bash + yarn test:cache:stats + ``` + +### Language Filter No Results + +**Symptom:** "No files found with X code blocks" + +**Solutions:** + +1. Verify language identifier in code blocks: + ```bash + grep -r '^```python' content/ + ``` + +2. Check language aliases in `test-by-language.sh` + +3. Ensure content path is correct + +### Performance Still Slow + +**Solutions:** + +1. Enable parallel execution: + ```bash + yarn test:codeblocks:parallel + ``` + +2. Use language-specific tests: + ```bash + yarn test:codeblocks:python + yarn test:codeblocks:bash + ``` + +3. Enable caching for repeated runs + +4. Test only changed files in development + +## Advanced Usage + +### Custom Language Testing + +Create custom language test combinations: + +```bash +# Test multiple languages sequentially +for lang in python bash sql; do + echo "Testing $lang..." + ./test/scripts/test-by-language.sh $lang content/**/*.md +done + +# Test with custom product paths +./test/scripts/test-by-language.sh python \ + content/influxdb/cloud/**/*.md \ + content/influxdb/v2/**/*.md +``` + +### Cache Analysis + +Find most expensive tests: + +```bash +# Sort cached tests by duration +for meta in .test-cache/*.meta; do + echo "$(grep duration_seconds: $meta | cut -d: -f2) $(grep target: $meta | cut -d: -f2-)" +done | sort -rn | head -10 +``` + +## Future Improvements + +Potential optimizations for consideration: + +- [ ] Pytest parallel execution with `pytest-xdist` +- [ ] Per-file caching instead of directory-level +- [ ] Distributed caching for team environments +- [ ] Test result database for trend analysis +- [ ] Automatic test splitting based on historical duration +- [ ] Smart test ordering (fast tests first) + +## Related Documentation + +- [DOCS-TESTING.md](../DOCS-TESTING.md) - Main testing guide +- [test/pytest/pytest.ini](pytest/pytest.ini) - Pytest configuration +- [.github/workflows/test.yml](../.github/workflows/test.yml) - CI test workflow +- [package.json](../package.json) - Test scripts + +## Support + +For issues or questions: + +1. Check existing [GitHub issues](https://github.com/influxdata/docs-v2/issues) +2. Create new issue with `testing` label +3. Include test output and cache statistics diff --git a/test/scripts/cached-test.sh b/test/scripts/cached-test.sh new file mode 100755 index 000000000..b6cda4fdf --- /dev/null +++ b/test/scripts/cached-test.sh @@ -0,0 +1,119 @@ +#!/bin/bash + +# Cache test results by content hash +# Usage: cached-test.sh +# +# This script caches successful test results based on the content hash. +# If the file hasn't changed since the last successful test, it skips retesting. + +set -e + +CACHE_DIR="${TEST_CACHE_DIR:-.test-cache}" +mkdir -p "$CACHE_DIR" + +TARGET="$1" + +if [[ -z "$TARGET" ]]; then + echo "Usage: cached-test.sh " + exit 1 +fi + +# Calculate hash based on content +calculate_hash() { + local target="$1" + + if [[ -f "$target" ]]; then + # Single file - hash its content + sha256sum "$target" | cut -d' ' -f1 + elif [[ -d "$target" ]]; then + # Directory - hash all markdown files + find "$target" -name "*.md" -type f -exec sha256sum {} \; | \ + sort | sha256sum | cut -d' ' -f1 + else + echo "Target not found: $target" >&2 + return 1 + fi +} + +# Get content hash +CONTENT_HASH=$(calculate_hash "$TARGET") +CACHE_FILE="$CACHE_DIR/${CONTENT_HASH}.passed" +CACHE_META="$CACHE_DIR/${CONTENT_HASH}.meta" + +echo "Target: $TARGET" +echo "Content hash: $CONTENT_HASH" + +# Check cache +if [[ -f "$CACHE_FILE" ]]; then + CACHE_AGE=$(($(date +%s) - $(stat -c %Y "$CACHE_FILE" 2>/dev/null || stat -f %m "$CACHE_FILE" 2>/dev/null))) + CACHE_AGE_DAYS=$((CACHE_AGE / 86400)) + + # Cache expires after 7 days + if [[ $CACHE_AGE -lt 604800 ]]; then + echo "✅ Cache hit! Tests passed $CACHE_AGE_DAYS day(s) ago" + + if [[ -f "$CACHE_META" ]]; then + echo "Cache metadata:" + cat "$CACHE_META" + fi + + echo "" + echo "Skipping tests (use TEST_CACHE_BYPASS=1 to force retest)" + + # Allow bypassing cache + if [[ -n "$TEST_CACHE_BYPASS" ]]; then + echo "Cache bypass enabled - running tests anyway" + else + exit 0 + fi + else + echo "Cache expired (older than 7 days) - will retest" + rm -f "$CACHE_FILE" "$CACHE_META" + fi +else + echo "❌ Cache miss - running tests" +fi + +# Run tests +echo "" +echo "Running tests for: $TARGET" + +TEST_START=$(date +%s) + +# Execute pytest with the target +if pytest \ + -ra \ + -s \ + --codeblocks \ + --suppress-no-test-exit-code \ + --exitfirst \ + --envfile=/app/.env.test \ + "$TARGET"; then + + TEST_END=$(date +%s) + TEST_DURATION=$((TEST_END - TEST_START)) + + # Cache successful result + touch "$CACHE_FILE" + + # Save metadata + cat > "$CACHE_META" << EOF +target: $TARGET +hash: $CONTENT_HASH +tested_at: $(date -Iseconds) +duration_seconds: $TEST_DURATION +pytest_version: $(pytest --version | head -n1) +EOF + + echo "" + echo "✅ Tests passed! Result cached for 7 days" + echo " Cache file: $CACHE_FILE" + echo " Test duration: ${TEST_DURATION}s" + + exit 0 +else + EXIT_CODE=$? + echo "" + echo "❌ Tests failed - result NOT cached" + exit $EXIT_CODE +fi diff --git a/test/scripts/manage-test-cache.sh b/test/scripts/manage-test-cache.sh new file mode 100755 index 000000000..92d8722a0 --- /dev/null +++ b/test/scripts/manage-test-cache.sh @@ -0,0 +1,165 @@ +#!/bin/bash + +# Manage test result cache +# Usage: manage-test-cache.sh +# +# Commands: +# stats - Show cache statistics +# clean - Remove expired cache entries (>7 days) +# clear - Remove all cache entries +# list - List all cached results + +set -e + +CACHE_DIR="${TEST_CACHE_DIR:-.test-cache}" +COMMAND="$1" + +if [[ ! -d "$CACHE_DIR" ]]; then + mkdir -p "$CACHE_DIR" + echo "Created cache directory: $CACHE_DIR" +fi + +show_stats() { + echo "Test Cache Statistics" + echo "====================" + echo "" + echo "Cache directory: $CACHE_DIR" + + if [[ ! -d "$CACHE_DIR" ]] || [[ -z "$(ls -A "$CACHE_DIR" 2>/dev/null)" ]]; then + echo "Status: Empty" + return + fi + + local total_entries=$(find "$CACHE_DIR" -name "*.passed" -type f | wc -l) + local total_size=$(du -sh "$CACHE_DIR" 2>/dev/null | cut -f1) + local now=$(date +%s) + local expired=0 + local valid=0 + + while IFS= read -r cache_file; do + local age=$((now - $(stat -c %Y "$cache_file" 2>/dev/null || stat -f %m "$cache_file" 2>/dev/null))) + if [[ $age -gt 604800 ]]; then + ((expired++)) + else + ((valid++)) + fi + done < <(find "$CACHE_DIR" -name "*.passed" -type f) + + echo "Total entries: $total_entries" + echo "Valid entries: $valid" + echo "Expired entries: $expired" + echo "Total size: $total_size" + echo "" + + if [[ $expired -gt 0 ]]; then + echo "Run 'manage-test-cache.sh clean' to remove expired entries" + fi +} + +list_cache() { + echo "Cached Test Results" + echo "===================" + echo "" + + if [[ ! -d "$CACHE_DIR" ]] || [[ -z "$(ls -A "$CACHE_DIR" 2>/dev/null)" ]]; then + echo "No cached results" + return + fi + + local now=$(date +%s) + + while IFS= read -r cache_file; do + local hash=$(basename "$cache_file" .passed) + local meta_file="${CACHE_DIR}/${hash}.meta" + local age=$((now - $(stat -c %Y "$cache_file" 2>/dev/null || stat -f %m "$cache_file" 2>/dev/null))) + local age_days=$((age / 86400)) + local status="✅" + + if [[ $age -gt 604800 ]]; then + status="⏰ EXPIRED" + fi + + echo "Hash: $hash" + echo "Status: $status" + echo "Age: $age_days days" + + if [[ -f "$meta_file" ]]; then + echo "Metadata:" + sed 's/^/ /' "$meta_file" + fi + + echo "" + done < <(find "$CACHE_DIR" -name "*.passed" -type f | sort -r) +} + +clean_cache() { + echo "Cleaning expired cache entries..." + + if [[ ! -d "$CACHE_DIR" ]]; then + echo "No cache directory found" + return + fi + + local now=$(date +%s) + local removed=0 + + while IFS= read -r cache_file; do + local age=$((now - $(stat -c %Y "$cache_file" 2>/dev/null || stat -f %m "$cache_file" 2>/dev/null))) + + if [[ $age -gt 604800 ]]; then + local hash=$(basename "$cache_file" .passed) + rm -f "$cache_file" "${CACHE_DIR}/${hash}.meta" + ((removed++)) + echo "Removed expired entry: $hash" + fi + done < <(find "$CACHE_DIR" -name "*.passed" -type f) + + echo "" + echo "Removed $removed expired entries" +} + +clear_cache() { + echo "Clearing all cache entries..." + + if [[ ! -d "$CACHE_DIR" ]]; then + echo "No cache directory found" + return + fi + + local count=$(find "$CACHE_DIR" -name "*.passed" -type f | wc -l) + + read -p "Are you sure you want to remove all $count cached results? (y/N) " -n 1 -r + echo + + if [[ $REPLY =~ ^[Yy]$ ]]; then + rm -rf "$CACHE_DIR"/* + echo "✅ Cache cleared" + else + echo "Cancelled" + fi +} + +case "$COMMAND" in + stats) + show_stats + ;; + list) + list_cache + ;; + clean) + clean_cache + ;; + clear) + clear_cache + ;; + *) + echo "Usage: manage-test-cache.sh " + echo "" + echo "Commands:" + echo " stats - Show cache statistics" + echo " clean - Remove expired cache entries (>7 days)" + echo " clear - Remove all cache entries" + echo " list - List all cached results" + exit 1 + ;; +esac diff --git a/test/scripts/test-by-language.sh b/test/scripts/test-by-language.sh new file mode 100755 index 000000000..460b5b4b2 --- /dev/null +++ b/test/scripts/test-by-language.sh @@ -0,0 +1,96 @@ +#!/bin/bash + +# Test code blocks filtered by programming language +# Usage: test-by-language.sh +# +# Example: test-by-language.sh python content/influxdb/cloud/**/*.md + +set -e + +LANGUAGE="$1" +shift +CONTENT_FILES="$@" + +if [[ -z "$LANGUAGE" ]] || [[ -z "$CONTENT_FILES" ]]; then + echo "Usage: test-by-language.sh " + echo "Example: test-by-language.sh python content/influxdb/cloud/**/*.md" + exit 1 +fi + +echo "Testing $LANGUAGE code blocks in: $CONTENT_FILES" + +# Create temporary filtered files +TEMP_DIR=$(mktemp -d) +trap "rm -rf $TEMP_DIR" EXIT + +# Extract only code blocks for the specified language +python3 << EOF +import sys +import re +import os +from pathlib import Path + +language = "$LANGUAGE" +content_files = """$CONTENT_FILES""".split() +temp_dir = "$TEMP_DIR" + +# Language aliases +language_map = { + 'python': ['python', 'py'], + 'bash': ['bash', 'sh', 'shell'], + 'sql': ['sql', 'influxql'], + 'javascript': ['js', 'javascript'], +} + +# Get all aliases for this language +target_langs = language_map.get(language, [language]) + +for file_path in content_files: + if not os.path.exists(file_path): + continue + + try: + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Check if file contains code blocks in target language + has_target_lang = False + for lang in target_langs: + if re.search(rf'^```{lang}\b', content, re.MULTILINE): + has_target_lang = True + break + + if has_target_lang: + # Copy to temp directory + rel_path = os.path.relpath(file_path, 'content') + dest_path = os.path.join(temp_dir, rel_path) + os.makedirs(os.path.dirname(dest_path), exist_ok=True) + + with open(dest_path, 'w', encoding='utf-8') as f: + f.write(content) + + print(f"Included: {file_path}", file=sys.stderr) + + except Exception as e: + print(f"Error processing {file_path}: {e}", file=sys.stderr) +EOF + +# Count filtered files +FILE_COUNT=$(find "$TEMP_DIR" -name "*.md" 2>/dev/null | wc -l) + +if [[ $FILE_COUNT -eq 0 ]]; then + echo "No files found with $LANGUAGE code blocks" + exit 0 +fi + +echo "Found $FILE_COUNT files with $LANGUAGE code blocks" + +# Run pytest on filtered files +pytest \ + -ra \ + -s \ + --codeblocks \ + --suppress-no-test-exit-code \ + --exitfirst \ + --envfile=/app/.env.test \ + "$TEMP_DIR"/**/*.md