diff --git a/cypress/e2e/content/article-links.cy.js b/cypress/e2e/content/article-links.cy.js index a8a89f07b..0ce8d4677 100644 --- a/cypress/e2e/content/article-links.cy.js +++ b/cypress/e2e/content/article-links.cy.js @@ -6,8 +6,8 @@ describe('Article', () => { .split(',') .filter((s) => s.trim() !== '') : []; - let validationStrategy = null; - let shouldSkipAllTests = false; // Flag to skip tests when all files are cached + + // Cache will be checked during test execution at the URL level // Always use HEAD for downloads to avoid timeouts const useHeadForDownloads = true; @@ -16,6 +16,42 @@ describe('Article', () => { before(() => { // Initialize the broken links report cy.task('initializeBrokenLinksReport'); + + // Clean up expired cache entries + cy.task('cleanupCache').then((cleaned) => { + if (cleaned > 0) { + cy.log(`๐Ÿงน Cleaned up ${cleaned} expired cache entries`); + } + }); + }); + + // Display cache statistics after all tests complete + after(() => { + cy.task('getCacheStats').then((stats) => { + cy.log('๐Ÿ“Š Link Validation Cache Statistics:'); + cy.log(` โ€ข Cache hits: ${stats.hits}`); + cy.log(` โ€ข Cache misses: ${stats.misses}`); + cy.log(` โ€ข New entries stored: ${stats.stores}`); + cy.log(` โ€ข Hit rate: ${stats.hitRate}`); + cy.log(` โ€ข Total validations: ${stats.total}`); + + if (stats.total > 0) { + const message = stats.hits > 0 + ? `โœจ Cache optimization saved ${stats.hits} link validations` + : '๐Ÿ”„ No cache hits - all links were validated fresh'; + cy.log(message); + } + + // Save cache statistics for the reporter to display + cy.task('saveCacheStatsForReporter', { + hitRate: parseFloat(stats.hitRate.replace('%', '')), + cacheHits: stats.hits, + cacheMisses: stats.misses, + totalValidations: stats.total, + newEntriesStored: stats.stores, + cleanups: stats.cleanups + }); + }); }); // Helper function to identify download links @@ -57,8 +93,45 @@ describe('Article', () => { return hasDownloadExtension || isFromDownloadDomain; } - // Helper function to make appropriate request based on link type + // Helper function for handling failed links + function handleFailedLink(url, status, type, redirectChain = '', linkText = '', pageUrl = '') { + // Report the broken link + cy.task('reportBrokenLink', { + url: url + redirectChain, + status, + type, + linkText, + page: pageUrl, + }); + + // Throw error for broken links + throw new Error( + `BROKEN ${type.toUpperCase()} LINK: ${url} (status: ${status})${redirectChain} on ${pageUrl}` + ); + } + + // Helper function to test a link with cache integration function testLink(href, linkText = '', pageUrl) { + // Check cache first + return cy.task('isLinkCached', href).then((isCached) => { + if (isCached) { + cy.log(`โœ… Cache hit: ${href}`); + return cy.task('getLinkCache', href).then((cachedResult) => { + if (cachedResult && cachedResult.result && cachedResult.result.status >= 400) { + // Cached result shows this link is broken + handleFailedLink(href, cachedResult.result.status, cachedResult.result.type || 'cached', '', linkText, pageUrl); + } + // For successful cached results, just return - no further action needed + }); + } else { + // Not cached, perform actual validation + return performLinkValidation(href, linkText, pageUrl); + } + }); + } + + // Helper function to perform actual link validation and cache the result + function performLinkValidation(href, linkText = '', pageUrl) { // Common request options for both methods const requestOptions = { failOnStatusCode: true, @@ -68,196 +141,78 @@ describe('Article', () => { retryOnStatusCodeFailure: true, // Retry on 5xx errors }; - function handleFailedLink(url, status, type, redirectChain = '') { - // Report the broken link - cy.task('reportBrokenLink', { - url: url + redirectChain, - status, - type, - linkText, - page: pageUrl, - }); - - // Throw error for broken links - throw new Error( - `BROKEN ${type.toUpperCase()} LINK: ${url} (status: ${status})${redirectChain} on ${pageUrl}` - ); - } if (useHeadForDownloads && isDownloadLink(href)) { cy.log(`** Testing download link with HEAD: ${href} **`); - cy.request({ + return cy.request({ method: 'HEAD', url: href, ...requestOptions, }).then((response) => { + // Prepare result for caching + const result = { + status: response.status, + type: 'download', + timestamp: new Date().toISOString() + }; + // Check final status after following any redirects if (response.status >= 400) { - // Build redirect info string if available const redirectInfo = response.redirects && response.redirects.length > 0 ? ` (redirected to: ${response.redirects.join(' -> ')})` : ''; - - handleFailedLink(href, response.status, 'download', redirectInfo); + + // Cache the failed result + cy.task('setLinkCache', { url: href, result }); + handleFailedLink(href, response.status, 'download', redirectInfo, linkText, pageUrl); + } else { + // Cache the successful result + cy.task('setLinkCache', { url: href, result }); } }); } else { cy.log(`** Testing link: ${href} **`); - cy.log(JSON.stringify(requestOptions)); - cy.request({ + return cy.request({ url: href, ...requestOptions, }).then((response) => { - // Check final status after following any redirects + // Prepare result for caching + const result = { + status: response.status, + type: 'regular', + timestamp: new Date().toISOString() + }; + if (response.status >= 400) { - // Build redirect info string if available const redirectInfo = response.redirects && response.redirects.length > 0 ? ` (redirected to: ${response.redirects.join(' -> ')})` : ''; - - handleFailedLink(href, response.status, 'regular', redirectInfo); + + // Cache the failed result + cy.task('setLinkCache', { url: href, result }); + handleFailedLink(href, response.status, 'regular', redirectInfo, linkText, pageUrl); + } else { + // Cache the successful result + cy.task('setLinkCache', { url: href, result }); } }); } } - // Test implementation for subjects - // Add debugging information about test subjects + // Test setup validation it('Test Setup Validation', function () { - cy.log(`๐Ÿ“‹ Initial Test Configuration:`); - cy.log(` โ€ข Initial test subjects count: ${subjects.length}`); - - // Get source file paths for incremental validation - const testSubjectsData = Cypress.env('test_subjects_data'); - let sourceFilePaths = subjects; // fallback to subjects if no data available - - if (testSubjectsData) { - try { - const urlToSourceData = JSON.parse(testSubjectsData); - // Extract source file paths from the structured data - sourceFilePaths = urlToSourceData.map((item) => item.source); - cy.log(` โ€ข Source files to analyze: ${sourceFilePaths.length}`); - } catch (e) { - cy.log( - 'โš ๏ธ Could not parse test_subjects_data, using subjects as fallback' - ); - sourceFilePaths = subjects; - } - } - - // Only run incremental validation if we have source file paths - if (sourceFilePaths.length > 0) { - cy.log('๐Ÿ”„ Running incremental validation analysis...'); - cy.log( - ` โ€ข Analyzing ${sourceFilePaths.length} files: ${sourceFilePaths.join(', ')}` - ); - - // Run incremental validation with proper error handling - cy.task('runIncrementalValidation', sourceFilePaths).then((results) => { - if (!results) { - cy.log('โš ๏ธ No results returned from incremental validation'); - cy.log( - '๐Ÿ”„ Falling back to test all provided subjects without cache optimization' - ); - return; - } - - // Check if results have expected structure - if (!results.validationStrategy || !results.cacheStats) { - cy.log('โš ๏ธ Incremental validation results missing expected fields'); - cy.log(` โ€ข Results: ${JSON.stringify(results)}`); - cy.log( - '๐Ÿ”„ Falling back to test all provided subjects without cache optimization' - ); - return; - } - - validationStrategy = results.validationStrategy; - - // Save cache statistics and validation strategy for reporting - cy.task('saveCacheStatistics', results.cacheStats); - cy.task('saveValidationStrategy', validationStrategy); - - // Update subjects to only test files that need validation - if (results.filesToValidate && results.filesToValidate.length > 0) { - // Convert file paths to URLs using shared utility via Cypress task - const urlPromises = results.filesToValidate.map((file) => - cy.task('filePathToUrl', file.filePath) - ); - - cy.wrap(Promise.all(urlPromises)).then((urls) => { - subjects = urls; - - cy.log( - `๐Ÿ“Š Cache Analysis: ${results.cacheStats.hitRate}% hit rate` - ); - cy.log( - `๐Ÿ”„ Testing ${subjects.length} pages (${results.cacheStats.cacheHits} cached)` - ); - cy.log('โœ… Incremental validation completed - ready to test'); - }); - } else { - // All files are cached, no validation needed - shouldSkipAllTests = true; // Set flag to skip all tests - cy.log('โœจ All files cached - will skip all validation tests'); - cy.log( - `๐Ÿ“Š Cache hit rate: ${results.cacheStats.hitRate}% (${results.cacheStats.cacheHits}/${results.cacheStats.totalFiles} files cached)` - ); - cy.log('๐ŸŽฏ No new validation needed - this is the expected outcome'); - cy.log('โญ๏ธ All link validation tests will be skipped'); - } - }); - } else { - cy.log('โš ๏ธ No source file paths available, using all provided subjects'); - - // Set a simple validation strategy when no source data is available - validationStrategy = { - noSourceData: true, - unchanged: [], - changed: [], - total: subjects.length, - }; - - cy.log( - `๐Ÿ“‹ Testing ${subjects.length} pages without incremental validation` - ); - } - - // Check for truly problematic scenarios - if (!validationStrategy && subjects.length === 0) { - const testSubjectsData = Cypress.env('test_subjects_data'); - if ( - !testSubjectsData || - testSubjectsData === '' || - testSubjectsData === '[]' - ) { - cy.log('โŒ Critical setup issue detected:'); - cy.log(' โ€ข No validation strategy'); - cy.log(' โ€ข No test subjects'); - cy.log(' โ€ข No test subjects data'); - cy.log(' This indicates a fundamental configuration problem'); - - // Only fail in this truly problematic case - throw new Error( - 'Critical test setup failure: No strategy, subjects, or data available' - ); - } - } - - // Always pass if we get to this point - the setup is valid - cy.log('โœ… Test setup validation completed successfully'); + cy.log(`๐Ÿ“‹ Test Configuration:`); + cy.log(` โ€ข Test subjects: ${subjects.length}`); + cy.log(` โ€ข Cache: URL-level caching with 30-day TTL`); + cy.log(` โ€ข Link validation: Internal, anchor, and allowed external links`); + + cy.log('โœ… Test setup validation completed'); }); subjects.forEach((subject) => { it(`${subject} has valid internal links`, function () { - // Skip test if all files are cached - if (shouldSkipAllTests) { - cy.log('โœ… All files cached - skipping internal links test'); - this.skip(); - return; - } // Add error handling for page visit failures cy.visit(`${subject}`, { timeout: 20000 }).then(() => { @@ -291,12 +246,6 @@ describe('Article', () => { }); it(`${subject} has valid anchor links`, function () { - // Skip test if all files are cached - if (shouldSkipAllTests) { - cy.log('โœ… All files cached - skipping anchor links test'); - this.skip(); - return; - } cy.visit(`${subject}`).then(() => { cy.log(`โœ… Successfully loaded page for anchor testing: ${subject}`); @@ -351,12 +300,6 @@ describe('Article', () => { }); it(`${subject} has valid external links`, function () { - // Skip test if all files are cached - if (shouldSkipAllTests) { - cy.log('โœ… All files cached - skipping external links test'); - this.skip(); - return; - } // Check if we should skip external links entirely if (Cypress.env('skipExternalLinks') === true) { diff --git a/cypress/support/hugo-server.js b/cypress/support/hugo-server.js index d4e4c7361..8121e3d31 100644 --- a/cypress/support/hugo-server.js +++ b/cypress/support/hugo-server.js @@ -8,6 +8,7 @@ import process from 'process'; export const HUGO_ENVIRONMENT = 'testing'; export const HUGO_PORT = 1315; export const HUGO_LOG_FILE = '/tmp/hugo_server.log'; +export const HUGO_SHUTDOWN_TIMEOUT = 5000; // 5 second timeout for graceful shutdown /** * Check if a port is already in use diff --git a/cypress/support/link-cache.js b/cypress/support/link-cache.js new file mode 100644 index 000000000..9df6db0d6 --- /dev/null +++ b/cypress/support/link-cache.js @@ -0,0 +1,213 @@ +/** + * Link Cache Manager for Cypress Tests + * Manages caching of link validation results at the URL level + */ + +import fs from 'fs'; +import path from 'path'; +import crypto from 'crypto'; + +const CACHE_VERSION = 'v2'; +const CACHE_KEY_PREFIX = 'link-validation'; +const LOCAL_CACHE_DIR = path.join(process.cwd(), '.cache', 'link-validation'); + +/** + * Cache manager for individual link validation results + */ +export class LinkCacheManager { + constructor(options = {}) { + this.localCacheDir = options.localCacheDir || LOCAL_CACHE_DIR; + + // Configurable cache TTL - default 30 days + this.cacheTTLDays = + options.cacheTTLDays || parseInt(process.env.LINK_CACHE_TTL_DAYS) || 30; + this.maxAge = this.cacheTTLDays * 24 * 60 * 60 * 1000; + + this.ensureLocalCacheDir(); + + // Track cache statistics + this.stats = { + hits: 0, + misses: 0, + stores: 0, + cleanups: 0 + }; + } + + ensureLocalCacheDir() { + if (!fs.existsSync(this.localCacheDir)) { + fs.mkdirSync(this.localCacheDir, { recursive: true }); + } + } + + /** + * Generate cache key for a URL + * @param {string} url - The URL to cache + * @returns {string} Cache key + */ + generateCacheKey(url) { + const urlHash = crypto + .createHash('sha256') + .update(url) + .digest('hex') + .substring(0, 16); + return `${CACHE_KEY_PREFIX}-${CACHE_VERSION}-${urlHash}`; + } + + /** + * Get cache file path for a URL + * @param {string} url - The URL + * @returns {string} File path + */ + getCacheFilePath(url) { + const cacheKey = this.generateCacheKey(url); + return path.join(this.localCacheDir, `${cacheKey}.json`); + } + + /** + * Check if a URL's validation result is cached + * @param {string} url - The URL to check + * @returns {Object|null} Cached result or null + */ + get(url) { + const cacheFile = this.getCacheFilePath(url); + + if (!fs.existsSync(cacheFile)) { + this.stats.misses++; + return null; + } + + try { + const content = fs.readFileSync(cacheFile, 'utf8'); + const cached = JSON.parse(content); + + // TTL check + const age = Date.now() - new Date(cached.cachedAt).getTime(); + + if (age > this.maxAge) { + fs.unlinkSync(cacheFile); + this.stats.misses++; + this.stats.cleanups++; + return null; + } + + this.stats.hits++; + return cached; + } catch (error) { + // Clean up corrupted cache + try { + fs.unlinkSync(cacheFile); + this.stats.cleanups++; + } catch { + // Ignore cleanup errors + } + this.stats.misses++; + return null; + } + } + + /** + * Store validation result for a URL + * @param {string} url - The URL + * @param {Object} result - Validation result + * @returns {boolean} True if successfully cached, false otherwise + */ + set(url, result) { + const cacheFile = this.getCacheFilePath(url); + + const cacheData = { + url, + result, + cachedAt: new Date().toISOString(), + ttl: new Date(Date.now() + this.maxAge).toISOString() + }; + + try { + fs.writeFileSync(cacheFile, JSON.stringify(cacheData, null, 2)); + this.stats.stores++; + return true; + } catch (error) { + console.warn(`Failed to cache validation result for ${url}: ${error.message}`); + return false; + } + } + + /** + * Check if a URL is cached and valid + * @param {string} url - The URL to check + * @returns {boolean} True if cached and valid + */ + isCached(url) { + return this.get(url) !== null; + } + + /** + * Get cache statistics + * @returns {Object} Cache statistics + */ + getStats() { + const total = this.stats.hits + this.stats.misses; + const hitRate = total > 0 ? (this.stats.hits / total * 100).toFixed(1) : 0; + + return { + ...this.stats, + total, + hitRate: `${hitRate}%` + }; + } + + /** + * Clean up expired cache entries + * @returns {number} Number of entries cleaned up + */ + cleanup() { + let cleaned = 0; + + try { + const files = fs.readdirSync(this.localCacheDir); + const cacheFiles = files.filter(file => + file.startsWith(CACHE_KEY_PREFIX) && file.endsWith('.json') + ); + + for (const file of cacheFiles) { + const filePath = path.join(this.localCacheDir, file); + + try { + const content = fs.readFileSync(filePath, 'utf8'); + const cached = JSON.parse(content); + + const age = Date.now() - new Date(cached.cachedAt).getTime(); + + if (age > this.maxAge) { + fs.unlinkSync(filePath); + cleaned++; + } + } catch { + // Remove corrupted files + fs.unlinkSync(filePath); + cleaned++; + } + } + } catch (error) { + console.warn(`Cache cleanup failed: ${error.message}`); + } + + this.stats.cleanups += cleaned; + return cleaned; + } +} + +/** + * Cypress task helper to integrate cache with Cypress tasks + */ +export const createCypressCacheTasks = (options = {}) => { + const cache = new LinkCacheManager(options); + + return { + getLinkCache: (url) => cache.get(url), + setLinkCache: ({ url, result }) => cache.set(url, result), + isLinkCached: (url) => cache.isCached(url), + getCacheStats: () => cache.getStats(), + cleanupCache: () => cache.cleanup() + }; +}; \ No newline at end of file diff --git a/cypress/support/link-reporter.js b/cypress/support/link-reporter.js index 5dc947ef1..fa514c7ef 100644 --- a/cypress/support/link-reporter.js +++ b/cypress/support/link-reporter.js @@ -147,18 +147,32 @@ export function displayBrokenLinksReport(brokenLinksReport = null) { // Display cache performance first if (cacheStats) { - console.log('\n๐Ÿ“Š Cache Performance:'); - console.log('====================='); + console.log('\n๐Ÿ“Š Link Validation Cache Performance:'); + console.log('======================================='); console.log(`Cache hit rate: ${cacheStats.hitRate}%`); - console.log(`Files cached: ${cacheStats.cacheHits}`); - console.log(`Files validated: ${cacheStats.cacheMisses}`); + console.log(`Cache hits: ${cacheStats.cacheHits}`); + console.log(`Cache misses: ${cacheStats.cacheMisses}`); + console.log(`Total validations: ${cacheStats.totalValidations || cacheStats.cacheHits + cacheStats.cacheMisses}`); + console.log(`New entries stored: ${cacheStats.newEntriesStored || 0}`); + + if (cacheStats.cleanups > 0) { + console.log(`Expired entries cleaned: ${cacheStats.cleanups}`); + } + + if (cacheStats.totalValidations > 0) { + const message = cacheStats.cacheHits > 0 + ? `โœจ Cache optimization saved ${cacheStats.cacheHits} link validations` + : '๐Ÿ”„ No cache hits - all links were validated fresh'; + console.log(message); + } if (validationStrategy) { - console.log(`Total files analyzed: ${validationStrategy.total}`); + console.log(`Files analyzed: ${validationStrategy.total}`); console.log( `Links needing validation: ${validationStrategy.newLinks.length}` ); } + console.log(''); // Add spacing after cache stats } // Check both the report and first broken link file to determine if we have broken links diff --git a/cypress/support/run-e2e-specs.js b/cypress/support/run-e2e-specs.js index 9601202c3..d39dfb4a2 100644 --- a/cypress/support/run-e2e-specs.js +++ b/cypress/support/run-e2e-specs.js @@ -44,6 +44,7 @@ import { HUGO_ENVIRONMENT, HUGO_PORT, HUGO_LOG_FILE, + HUGO_SHUTDOWN_TIMEOUT, startHugoServer, waitForHugoReady, } from './hugo-server.js';